@oculum/scanner 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/formatters/cli-terminal.d.ts +27 -0
- package/dist/formatters/cli-terminal.d.ts.map +1 -0
- package/dist/formatters/cli-terminal.js +412 -0
- package/dist/formatters/cli-terminal.js.map +1 -0
- package/dist/formatters/github-comment.d.ts +41 -0
- package/dist/formatters/github-comment.d.ts.map +1 -0
- package/dist/formatters/github-comment.js +306 -0
- package/dist/formatters/github-comment.js.map +1 -0
- package/dist/formatters/grouping.d.ts +52 -0
- package/dist/formatters/grouping.d.ts.map +1 -0
- package/dist/formatters/grouping.js +152 -0
- package/dist/formatters/grouping.js.map +1 -0
- package/dist/formatters/index.d.ts +9 -0
- package/dist/formatters/index.d.ts.map +1 -0
- package/dist/formatters/index.js +35 -0
- package/dist/formatters/index.js.map +1 -0
- package/dist/formatters/vscode-diagnostic.d.ts +103 -0
- package/dist/formatters/vscode-diagnostic.d.ts.map +1 -0
- package/dist/formatters/vscode-diagnostic.js +151 -0
- package/dist/formatters/vscode-diagnostic.js.map +1 -0
- package/dist/index.d.ts +52 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +648 -0
- package/dist/index.js.map +1 -0
- package/dist/layer1/comments.d.ts +8 -0
- package/dist/layer1/comments.d.ts.map +1 -0
- package/dist/layer1/comments.js +203 -0
- package/dist/layer1/comments.js.map +1 -0
- package/dist/layer1/config-audit.d.ts +8 -0
- package/dist/layer1/config-audit.d.ts.map +1 -0
- package/dist/layer1/config-audit.js +252 -0
- package/dist/layer1/config-audit.js.map +1 -0
- package/dist/layer1/entropy.d.ts +8 -0
- package/dist/layer1/entropy.d.ts.map +1 -0
- package/dist/layer1/entropy.js +500 -0
- package/dist/layer1/entropy.js.map +1 -0
- package/dist/layer1/file-flags.d.ts +7 -0
- package/dist/layer1/file-flags.d.ts.map +1 -0
- package/dist/layer1/file-flags.js +112 -0
- package/dist/layer1/file-flags.js.map +1 -0
- package/dist/layer1/index.d.ts +36 -0
- package/dist/layer1/index.d.ts.map +1 -0
- package/dist/layer1/index.js +132 -0
- package/dist/layer1/index.js.map +1 -0
- package/dist/layer1/patterns.d.ts +8 -0
- package/dist/layer1/patterns.d.ts.map +1 -0
- package/dist/layer1/patterns.js +482 -0
- package/dist/layer1/patterns.js.map +1 -0
- package/dist/layer1/urls.d.ts +8 -0
- package/dist/layer1/urls.d.ts.map +1 -0
- package/dist/layer1/urls.js +296 -0
- package/dist/layer1/urls.js.map +1 -0
- package/dist/layer1/weak-crypto.d.ts +7 -0
- package/dist/layer1/weak-crypto.d.ts.map +1 -0
- package/dist/layer1/weak-crypto.js +291 -0
- package/dist/layer1/weak-crypto.js.map +1 -0
- package/dist/layer2/ai-agent-tools.d.ts +19 -0
- package/dist/layer2/ai-agent-tools.d.ts.map +1 -0
- package/dist/layer2/ai-agent-tools.js +528 -0
- package/dist/layer2/ai-agent-tools.js.map +1 -0
- package/dist/layer2/ai-endpoint-protection.d.ts +36 -0
- package/dist/layer2/ai-endpoint-protection.d.ts.map +1 -0
- package/dist/layer2/ai-endpoint-protection.js +332 -0
- package/dist/layer2/ai-endpoint-protection.js.map +1 -0
- package/dist/layer2/ai-execution-sinks.d.ts +18 -0
- package/dist/layer2/ai-execution-sinks.d.ts.map +1 -0
- package/dist/layer2/ai-execution-sinks.js +496 -0
- package/dist/layer2/ai-execution-sinks.js.map +1 -0
- package/dist/layer2/ai-fingerprinting.d.ts +7 -0
- package/dist/layer2/ai-fingerprinting.d.ts.map +1 -0
- package/dist/layer2/ai-fingerprinting.js +654 -0
- package/dist/layer2/ai-fingerprinting.js.map +1 -0
- package/dist/layer2/ai-prompt-hygiene.d.ts +19 -0
- package/dist/layer2/ai-prompt-hygiene.d.ts.map +1 -0
- package/dist/layer2/ai-prompt-hygiene.js +356 -0
- package/dist/layer2/ai-prompt-hygiene.js.map +1 -0
- package/dist/layer2/ai-rag-safety.d.ts +21 -0
- package/dist/layer2/ai-rag-safety.d.ts.map +1 -0
- package/dist/layer2/ai-rag-safety.js +459 -0
- package/dist/layer2/ai-rag-safety.js.map +1 -0
- package/dist/layer2/ai-schema-validation.d.ts +25 -0
- package/dist/layer2/ai-schema-validation.d.ts.map +1 -0
- package/dist/layer2/ai-schema-validation.js +375 -0
- package/dist/layer2/ai-schema-validation.js.map +1 -0
- package/dist/layer2/auth-antipatterns.d.ts +20 -0
- package/dist/layer2/auth-antipatterns.d.ts.map +1 -0
- package/dist/layer2/auth-antipatterns.js +333 -0
- package/dist/layer2/auth-antipatterns.js.map +1 -0
- package/dist/layer2/byok-patterns.d.ts +12 -0
- package/dist/layer2/byok-patterns.d.ts.map +1 -0
- package/dist/layer2/byok-patterns.js +299 -0
- package/dist/layer2/byok-patterns.js.map +1 -0
- package/dist/layer2/dangerous-functions.d.ts +7 -0
- package/dist/layer2/dangerous-functions.d.ts.map +1 -0
- package/dist/layer2/dangerous-functions.js +1375 -0
- package/dist/layer2/dangerous-functions.js.map +1 -0
- package/dist/layer2/data-exposure.d.ts +16 -0
- package/dist/layer2/data-exposure.d.ts.map +1 -0
- package/dist/layer2/data-exposure.js +279 -0
- package/dist/layer2/data-exposure.js.map +1 -0
- package/dist/layer2/framework-checks.d.ts +7 -0
- package/dist/layer2/framework-checks.d.ts.map +1 -0
- package/dist/layer2/framework-checks.js +388 -0
- package/dist/layer2/framework-checks.js.map +1 -0
- package/dist/layer2/index.d.ts +58 -0
- package/dist/layer2/index.d.ts.map +1 -0
- package/dist/layer2/index.js +380 -0
- package/dist/layer2/index.js.map +1 -0
- package/dist/layer2/logic-gates.d.ts +7 -0
- package/dist/layer2/logic-gates.d.ts.map +1 -0
- package/dist/layer2/logic-gates.js +182 -0
- package/dist/layer2/logic-gates.js.map +1 -0
- package/dist/layer2/risky-imports.d.ts +7 -0
- package/dist/layer2/risky-imports.d.ts.map +1 -0
- package/dist/layer2/risky-imports.js +161 -0
- package/dist/layer2/risky-imports.js.map +1 -0
- package/dist/layer2/variables.d.ts +8 -0
- package/dist/layer2/variables.d.ts.map +1 -0
- package/dist/layer2/variables.js +152 -0
- package/dist/layer2/variables.js.map +1 -0
- package/dist/layer3/anthropic.d.ts +83 -0
- package/dist/layer3/anthropic.d.ts.map +1 -0
- package/dist/layer3/anthropic.js +1745 -0
- package/dist/layer3/anthropic.js.map +1 -0
- package/dist/layer3/index.d.ts +24 -0
- package/dist/layer3/index.d.ts.map +1 -0
- package/dist/layer3/index.js +119 -0
- package/dist/layer3/index.js.map +1 -0
- package/dist/layer3/openai.d.ts +25 -0
- package/dist/layer3/openai.d.ts.map +1 -0
- package/dist/layer3/openai.js +238 -0
- package/dist/layer3/openai.js.map +1 -0
- package/dist/layer3/package-check.d.ts +63 -0
- package/dist/layer3/package-check.d.ts.map +1 -0
- package/dist/layer3/package-check.js +508 -0
- package/dist/layer3/package-check.js.map +1 -0
- package/dist/modes/incremental.d.ts +66 -0
- package/dist/modes/incremental.d.ts.map +1 -0
- package/dist/modes/incremental.js +200 -0
- package/dist/modes/incremental.js.map +1 -0
- package/dist/tiers.d.ts +125 -0
- package/dist/tiers.d.ts.map +1 -0
- package/dist/tiers.js +234 -0
- package/dist/tiers.js.map +1 -0
- package/dist/types.d.ts +175 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +50 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/auth-helper-detector.d.ts +56 -0
- package/dist/utils/auth-helper-detector.d.ts.map +1 -0
- package/dist/utils/auth-helper-detector.js +360 -0
- package/dist/utils/auth-helper-detector.js.map +1 -0
- package/dist/utils/context-helpers.d.ts +96 -0
- package/dist/utils/context-helpers.d.ts.map +1 -0
- package/dist/utils/context-helpers.js +493 -0
- package/dist/utils/context-helpers.js.map +1 -0
- package/dist/utils/diff-detector.d.ts +53 -0
- package/dist/utils/diff-detector.d.ts.map +1 -0
- package/dist/utils/diff-detector.js +104 -0
- package/dist/utils/diff-detector.js.map +1 -0
- package/dist/utils/diff-parser.d.ts +80 -0
- package/dist/utils/diff-parser.d.ts.map +1 -0
- package/dist/utils/diff-parser.js +202 -0
- package/dist/utils/diff-parser.js.map +1 -0
- package/dist/utils/imported-auth-detector.d.ts +37 -0
- package/dist/utils/imported-auth-detector.d.ts.map +1 -0
- package/dist/utils/imported-auth-detector.js +251 -0
- package/dist/utils/imported-auth-detector.js.map +1 -0
- package/dist/utils/middleware-detector.d.ts +55 -0
- package/dist/utils/middleware-detector.d.ts.map +1 -0
- package/dist/utils/middleware-detector.js +260 -0
- package/dist/utils/middleware-detector.js.map +1 -0
- package/dist/utils/oauth-flow-detector.d.ts +41 -0
- package/dist/utils/oauth-flow-detector.d.ts.map +1 -0
- package/dist/utils/oauth-flow-detector.js +202 -0
- package/dist/utils/oauth-flow-detector.js.map +1 -0
- package/dist/utils/path-exclusions.d.ts +55 -0
- package/dist/utils/path-exclusions.d.ts.map +1 -0
- package/dist/utils/path-exclusions.js +222 -0
- package/dist/utils/path-exclusions.js.map +1 -0
- package/dist/utils/project-context-builder.d.ts +119 -0
- package/dist/utils/project-context-builder.d.ts.map +1 -0
- package/dist/utils/project-context-builder.js +534 -0
- package/dist/utils/project-context-builder.js.map +1 -0
- package/dist/utils/registry-clients.d.ts +93 -0
- package/dist/utils/registry-clients.d.ts.map +1 -0
- package/dist/utils/registry-clients.js +273 -0
- package/dist/utils/registry-clients.js.map +1 -0
- package/dist/utils/trpc-analyzer.d.ts +78 -0
- package/dist/utils/trpc-analyzer.d.ts.map +1 -0
- package/dist/utils/trpc-analyzer.js +297 -0
- package/dist/utils/trpc-analyzer.js.map +1 -0
- package/package.json +45 -0
- package/src/__tests__/benchmark/fixtures/false-positives.ts +227 -0
- package/src/__tests__/benchmark/fixtures/index.ts +68 -0
- package/src/__tests__/benchmark/fixtures/layer1/config-audit.ts +364 -0
- package/src/__tests__/benchmark/fixtures/layer1/hardcoded-secrets.ts +173 -0
- package/src/__tests__/benchmark/fixtures/layer1/high-entropy.ts +234 -0
- package/src/__tests__/benchmark/fixtures/layer1/index.ts +31 -0
- package/src/__tests__/benchmark/fixtures/layer1/sensitive-urls.ts +90 -0
- package/src/__tests__/benchmark/fixtures/layer1/weak-crypto.ts +197 -0
- package/src/__tests__/benchmark/fixtures/layer2/ai-agent-tools.ts +170 -0
- package/src/__tests__/benchmark/fixtures/layer2/ai-endpoint-protection.ts +418 -0
- package/src/__tests__/benchmark/fixtures/layer2/ai-execution-sinks.ts +189 -0
- package/src/__tests__/benchmark/fixtures/layer2/ai-fingerprinting.ts +316 -0
- package/src/__tests__/benchmark/fixtures/layer2/ai-prompt-hygiene.ts +178 -0
- package/src/__tests__/benchmark/fixtures/layer2/ai-rag-safety.ts +184 -0
- package/src/__tests__/benchmark/fixtures/layer2/ai-schema-validation.ts +434 -0
- package/src/__tests__/benchmark/fixtures/layer2/auth-antipatterns.ts +159 -0
- package/src/__tests__/benchmark/fixtures/layer2/byok-patterns.ts +112 -0
- package/src/__tests__/benchmark/fixtures/layer2/dangerous-functions.ts +246 -0
- package/src/__tests__/benchmark/fixtures/layer2/data-exposure.ts +168 -0
- package/src/__tests__/benchmark/fixtures/layer2/framework-checks.ts +346 -0
- package/src/__tests__/benchmark/fixtures/layer2/index.ts +67 -0
- package/src/__tests__/benchmark/fixtures/layer2/injection-vulnerabilities.ts +239 -0
- package/src/__tests__/benchmark/fixtures/layer2/logic-gates.ts +246 -0
- package/src/__tests__/benchmark/fixtures/layer2/risky-imports.ts +231 -0
- package/src/__tests__/benchmark/fixtures/layer2/variables.ts +167 -0
- package/src/__tests__/benchmark/index.ts +29 -0
- package/src/__tests__/benchmark/run-benchmark.ts +144 -0
- package/src/__tests__/benchmark/run-depth-validation.ts +206 -0
- package/src/__tests__/benchmark/run-real-world-test.ts +243 -0
- package/src/__tests__/benchmark/security-benchmark-script.ts +1737 -0
- package/src/__tests__/benchmark/tier-integration-script.ts +177 -0
- package/src/__tests__/benchmark/types.ts +144 -0
- package/src/__tests__/benchmark/utils/test-runner.ts +475 -0
- package/src/__tests__/regression/known-false-positives.test.ts +467 -0
- package/src/__tests__/snapshots/__snapshots__/scan-depth.test.ts.snap +178 -0
- package/src/__tests__/snapshots/scan-depth.test.ts +258 -0
- package/src/__tests__/validation/analyze-results.ts +542 -0
- package/src/__tests__/validation/extract-for-triage.ts +146 -0
- package/src/__tests__/validation/fp-deep-analysis.ts +327 -0
- package/src/__tests__/validation/run-validation.ts +364 -0
- package/src/__tests__/validation/triage-template.md +132 -0
- package/src/formatters/cli-terminal.ts +446 -0
- package/src/formatters/github-comment.ts +382 -0
- package/src/formatters/grouping.ts +190 -0
- package/src/formatters/index.ts +47 -0
- package/src/formatters/vscode-diagnostic.ts +243 -0
- package/src/index.ts +823 -0
- package/src/layer1/comments.ts +218 -0
- package/src/layer1/config-audit.ts +289 -0
- package/src/layer1/entropy.ts +583 -0
- package/src/layer1/file-flags.ts +127 -0
- package/src/layer1/index.ts +181 -0
- package/src/layer1/patterns.ts +516 -0
- package/src/layer1/urls.ts +334 -0
- package/src/layer1/weak-crypto.ts +328 -0
- package/src/layer2/ai-agent-tools.ts +601 -0
- package/src/layer2/ai-endpoint-protection.ts +387 -0
- package/src/layer2/ai-execution-sinks.ts +580 -0
- package/src/layer2/ai-fingerprinting.ts +758 -0
- package/src/layer2/ai-prompt-hygiene.ts +411 -0
- package/src/layer2/ai-rag-safety.ts +511 -0
- package/src/layer2/ai-schema-validation.ts +421 -0
- package/src/layer2/auth-antipatterns.ts +394 -0
- package/src/layer2/byok-patterns.ts +336 -0
- package/src/layer2/dangerous-functions.ts +1563 -0
- package/src/layer2/data-exposure.ts +315 -0
- package/src/layer2/framework-checks.ts +433 -0
- package/src/layer2/index.ts +473 -0
- package/src/layer2/logic-gates.ts +206 -0
- package/src/layer2/risky-imports.ts +186 -0
- package/src/layer2/variables.ts +166 -0
- package/src/layer3/anthropic.ts +2030 -0
- package/src/layer3/index.ts +130 -0
- package/src/layer3/package-check.ts +604 -0
- package/src/modes/incremental.ts +293 -0
- package/src/tiers.ts +318 -0
- package/src/types.ts +284 -0
- package/src/utils/auth-helper-detector.ts +443 -0
- package/src/utils/context-helpers.ts +535 -0
- package/src/utils/diff-detector.ts +135 -0
- package/src/utils/diff-parser.ts +272 -0
- package/src/utils/imported-auth-detector.ts +320 -0
- package/src/utils/middleware-detector.ts +333 -0
- package/src/utils/oauth-flow-detector.ts +246 -0
- package/src/utils/path-exclusions.ts +266 -0
- package/src/utils/project-context-builder.ts +707 -0
- package/src/utils/registry-clients.ts +351 -0
- package/src/utils/trpc-analyzer.ts +382 -0
|
@@ -0,0 +1,475 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Test runner utilities for the security benchmark suite
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { runLayer1Scan } from '../../../layer1'
|
|
6
|
+
import { runLayer2Scan } from '../../../layer2'
|
|
7
|
+
import { computeTierStats, formatTierStats, getTierForCategory } from '../../../tiers'
|
|
8
|
+
import type {
|
|
9
|
+
TestFixture,
|
|
10
|
+
TestResult,
|
|
11
|
+
TestGroup,
|
|
12
|
+
BenchmarkSummary,
|
|
13
|
+
BenchmarkMetrics,
|
|
14
|
+
DetectorMetrics,
|
|
15
|
+
SeverityMetrics
|
|
16
|
+
} from '../types'
|
|
17
|
+
import type { Vulnerability, VulnerabilityCategory } from '../../../types'
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Run a single test fixture
|
|
21
|
+
*/
|
|
22
|
+
export async function runTestFixture(fixture: TestFixture): Promise<TestResult> {
|
|
23
|
+
const layer1Result = await runLayer1Scan([fixture.file])
|
|
24
|
+
const layer2Result = await runLayer2Scan([fixture.file])
|
|
25
|
+
|
|
26
|
+
const allFindings = [...layer1Result.vulnerabilities, ...layer2Result.vulnerabilities]
|
|
27
|
+
const foundCategories = new Set(allFindings.map(f => f.category))
|
|
28
|
+
|
|
29
|
+
const unexpectedCategories: VulnerabilityCategory[] = []
|
|
30
|
+
const acceptableInfoFindings: Vulnerability[] = []
|
|
31
|
+
const problematicFindings: Vulnerability[] = []
|
|
32
|
+
let passed = true
|
|
33
|
+
let failureReason: string | undefined
|
|
34
|
+
|
|
35
|
+
if (fixture.expectFindings) {
|
|
36
|
+
// True positive test - should find vulnerabilities
|
|
37
|
+
if (allFindings.length === 0) {
|
|
38
|
+
passed = false
|
|
39
|
+
failureReason = 'Expected vulnerabilities but found none'
|
|
40
|
+
} else {
|
|
41
|
+
// Check we found expected categories
|
|
42
|
+
for (const cat of fixture.expectedCategories || []) {
|
|
43
|
+
if (!foundCategories.has(cat)) {
|
|
44
|
+
passed = false
|
|
45
|
+
failureReason = `Missing expected category: ${cat}`
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
} else {
|
|
50
|
+
// False negative test - should NOT flag actual vulnerabilities
|
|
51
|
+
// Default: Fail on medium+ severity (critical, high, medium)
|
|
52
|
+
// Optional: Allow specific info/low findings via allowedInfoFindings
|
|
53
|
+
|
|
54
|
+
const mediumOrHigher = allFindings.filter(f =>
|
|
55
|
+
f.severity === 'critical' || f.severity === 'high' || f.severity === 'medium'
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
// Check if medium+ findings exist
|
|
59
|
+
if (mediumOrHigher.length > 0) {
|
|
60
|
+
passed = false
|
|
61
|
+
failureReason = `Found ${mediumOrHigher.length} false positive(s) with medium+ severity`
|
|
62
|
+
problematicFindings.push(...mediumOrHigher)
|
|
63
|
+
for (const f of mediumOrHigher) {
|
|
64
|
+
unexpectedCategories.push(f.category)
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Check low-severity findings (low, info)
|
|
69
|
+
const lowSeverity = allFindings.filter(f =>
|
|
70
|
+
f.severity === 'low' || f.severity === 'info'
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
if (lowSeverity.length > 0) {
|
|
74
|
+
// Check if these are allowed
|
|
75
|
+
const allowedCategories = new Map(
|
|
76
|
+
(fixture.allowedInfoFindings || []).map(a => [a.category, a.maxCount])
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
for (const finding of lowSeverity) {
|
|
80
|
+
const allowedCount = allowedCategories.get(finding.category)
|
|
81
|
+
|
|
82
|
+
if (allowedCount === undefined) {
|
|
83
|
+
// Not allowed - this is a false positive
|
|
84
|
+
passed = false
|
|
85
|
+
if (!failureReason) {
|
|
86
|
+
failureReason = `Found unexpected ${finding.severity} finding: ${finding.category}`
|
|
87
|
+
}
|
|
88
|
+
problematicFindings.push(finding)
|
|
89
|
+
unexpectedCategories.push(finding.category)
|
|
90
|
+
} else {
|
|
91
|
+
// Allowed but track it
|
|
92
|
+
acceptableInfoFindings.push(finding)
|
|
93
|
+
|
|
94
|
+
// Check if we exceeded the allowed count
|
|
95
|
+
const countForCategory = lowSeverity.filter(f => f.category === finding.category).length
|
|
96
|
+
if (countForCategory > allowedCount) {
|
|
97
|
+
passed = false
|
|
98
|
+
failureReason = `Exceeded allowed count for ${finding.category}: ${countForCategory} > ${allowedCount}`
|
|
99
|
+
problematicFindings.push(finding)
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return {
|
|
107
|
+
name: fixture.name,
|
|
108
|
+
file: fixture.file,
|
|
109
|
+
layer1Findings: layer1Result.vulnerabilities,
|
|
110
|
+
layer2Findings: layer2Result.vulnerabilities,
|
|
111
|
+
expectedCategories: fixture.expectedCategories || [],
|
|
112
|
+
unexpectedCategories,
|
|
113
|
+
passed,
|
|
114
|
+
failureReason,
|
|
115
|
+
acceptableInfoFindings,
|
|
116
|
+
problematicFindings,
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Run all fixtures in a test group
|
|
122
|
+
*/
|
|
123
|
+
export async function runTestGroup(group: TestGroup): Promise<TestResult[]> {
|
|
124
|
+
const results: TestResult[] = []
|
|
125
|
+
|
|
126
|
+
// Run true positive tests
|
|
127
|
+
for (const fixture of group.truePositives) {
|
|
128
|
+
results.push(await runTestFixture(fixture))
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Run false negative tests
|
|
132
|
+
for (const fixture of group.falseNegatives) {
|
|
133
|
+
results.push(await runTestFixture(fixture))
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return results
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Print results for a single test
|
|
141
|
+
*/
|
|
142
|
+
export function printTestResult(result: TestResult): void {
|
|
143
|
+
const totalFindings = result.layer1Findings.length + result.layer2Findings.length
|
|
144
|
+
|
|
145
|
+
console.log(`\nš ${result.name}`)
|
|
146
|
+
console.log(` File: ${result.file.path}`)
|
|
147
|
+
console.log(` Layer 1 findings: ${result.layer1Findings.length}`)
|
|
148
|
+
console.log(` Layer 2 findings: ${result.layer2Findings.length}`)
|
|
149
|
+
|
|
150
|
+
if (totalFindings > 0) {
|
|
151
|
+
const allFindings = [...result.layer1Findings, ...result.layer2Findings]
|
|
152
|
+
const byCategory: Record<string, number> = {}
|
|
153
|
+
const bySeverity: Record<string, number> = {}
|
|
154
|
+
|
|
155
|
+
for (const f of allFindings) {
|
|
156
|
+
byCategory[f.category] = (byCategory[f.category] || 0) + 1
|
|
157
|
+
bySeverity[f.severity] = (bySeverity[f.severity] || 0) + 1
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
console.log(' By category:', Object.entries(byCategory).map(([k, v]) => `${k}:${v}`).join(', '))
|
|
161
|
+
console.log(' By severity:', Object.entries(bySeverity).map(([k, v]) => `${k}:${v}`).join(', '))
|
|
162
|
+
|
|
163
|
+
// Tier breakdown
|
|
164
|
+
const tierStats = computeTierStats(allFindings.map(f => ({ category: f.category, layer: f.layer })))
|
|
165
|
+
console.log(` ${formatTierStats(tierStats)}`)
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
if (result.passed) {
|
|
169
|
+
const isTPTest = result.expectedCategories.length > 0 || result.name.includes('True Positive')
|
|
170
|
+
if (isTPTest) {
|
|
171
|
+
console.log(' ā
PASS: Detected expected vulnerabilities')
|
|
172
|
+
} else {
|
|
173
|
+
console.log(' ā
PASS: No false positives')
|
|
174
|
+
if (result.acceptableInfoFindings && result.acceptableInfoFindings.length > 0) {
|
|
175
|
+
console.log(` ā¹ļø Note: ${result.acceptableInfoFindings.length} acceptable low-severity finding(s)`)
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
} else {
|
|
179
|
+
console.log(` ā FAIL: ${result.failureReason}`)
|
|
180
|
+
if (result.problematicFindings && result.problematicFindings.length > 0) {
|
|
181
|
+
console.log(' Problematic findings:')
|
|
182
|
+
for (const f of result.problematicFindings) {
|
|
183
|
+
console.log(` - ${f.category} (${f.severity}): ${f.title}`)
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Compute detailed benchmark metrics
|
|
191
|
+
*/
|
|
192
|
+
export function computeMetrics(results: TestResult[]): BenchmarkMetrics {
|
|
193
|
+
// Collect all categories and detectors
|
|
194
|
+
const allCategories = new Set<VulnerabilityCategory>()
|
|
195
|
+
const detectorStats = new Map<string, { tp: number; fp: number; fn: number }>()
|
|
196
|
+
|
|
197
|
+
// Initialize severity distributions
|
|
198
|
+
const severityDistribution: SeverityMetrics = { critical: 0, high: 0, medium: 0, low: 0, info: 0 }
|
|
199
|
+
const falsePositiveSeverity: SeverityMetrics = { critical: 0, high: 0, medium: 0, low: 0, info: 0 }
|
|
200
|
+
|
|
201
|
+
let totalVulnerabilitiesDetected = 0
|
|
202
|
+
let truePositiveDetections = 0
|
|
203
|
+
let falsePositiveDetections = 0
|
|
204
|
+
let missedVulnerabilities = 0
|
|
205
|
+
|
|
206
|
+
// Tier tracking
|
|
207
|
+
const tierStats = {
|
|
208
|
+
tierA: { tested: 0, passed: 0 },
|
|
209
|
+
tierB: { tested: 0, passed: 0 },
|
|
210
|
+
tierC: { tested: 0, passed: 0 },
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
for (const result of results) {
|
|
214
|
+
const allFindings = [...result.layer1Findings, ...result.layer2Findings]
|
|
215
|
+
const isTPTest = result.expectedCategories.length > 0 || result.name.includes('True Positive')
|
|
216
|
+
const isFNTest = result.name.includes('False Negative') || result.name.includes('Should Not Flag')
|
|
217
|
+
|
|
218
|
+
// Track tier stats
|
|
219
|
+
for (const finding of allFindings) {
|
|
220
|
+
const tier = getTierForCategory(finding.category, finding.layer)
|
|
221
|
+
if (tier === 'core') {
|
|
222
|
+
tierStats.tierA.tested++
|
|
223
|
+
if (result.passed) tierStats.tierA.passed++
|
|
224
|
+
} else if (tier === 'ai_assisted') {
|
|
225
|
+
tierStats.tierB.tested++
|
|
226
|
+
if (result.passed) tierStats.tierB.passed++
|
|
227
|
+
} else if (tier === 'experimental') {
|
|
228
|
+
tierStats.tierC.tested++
|
|
229
|
+
if (result.passed) tierStats.tierC.passed++
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Process findings
|
|
234
|
+
for (const finding of allFindings) {
|
|
235
|
+
allCategories.add(finding.category)
|
|
236
|
+
totalVulnerabilitiesDetected++
|
|
237
|
+
|
|
238
|
+
// Update severity distribution
|
|
239
|
+
severityDistribution[finding.severity]++
|
|
240
|
+
|
|
241
|
+
// Initialize detector stats if not exists
|
|
242
|
+
if (!detectorStats.has(finding.category)) {
|
|
243
|
+
detectorStats.set(finding.category, { tp: 0, fp: 0, fn: 0 })
|
|
244
|
+
}
|
|
245
|
+
const stats = detectorStats.get(finding.category)!
|
|
246
|
+
|
|
247
|
+
if (isTPTest) {
|
|
248
|
+
// True positive test - findings are expected
|
|
249
|
+
if (result.expectedCategories?.includes(finding.category)) {
|
|
250
|
+
stats.tp++
|
|
251
|
+
truePositiveDetections++
|
|
252
|
+
}
|
|
253
|
+
} else if (isFNTest) {
|
|
254
|
+
// False negative test - findings are false positives
|
|
255
|
+
const isAcceptable = result.acceptableInfoFindings?.some(f => f.id === finding.id)
|
|
256
|
+
if (!isAcceptable) {
|
|
257
|
+
stats.fp++
|
|
258
|
+
falsePositiveDetections++
|
|
259
|
+
falsePositiveSeverity[finding.severity]++
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// Track missed vulnerabilities (false negatives)
|
|
265
|
+
if (isTPTest && !result.passed) {
|
|
266
|
+
for (const expectedCat of result.expectedCategories || []) {
|
|
267
|
+
if (!detectorStats.has(expectedCat)) {
|
|
268
|
+
detectorStats.set(expectedCat, { tp: 0, fp: 0, fn: 0 })
|
|
269
|
+
}
|
|
270
|
+
const foundIt = allFindings.some(f => f.category === expectedCat)
|
|
271
|
+
if (!foundIt) {
|
|
272
|
+
detectorStats.get(expectedCat)!.fn++
|
|
273
|
+
missedVulnerabilities++
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// Compute detector metrics
|
|
280
|
+
const byDetector: DetectorMetrics[] = Array.from(detectorStats.entries())
|
|
281
|
+
.map(([name, stats]) => {
|
|
282
|
+
const precision = stats.tp + stats.fp > 0 ? stats.tp / (stats.tp + stats.fp) : 0
|
|
283
|
+
const recall = stats.tp + stats.fn > 0 ? stats.tp / (stats.tp + stats.fn) : 0
|
|
284
|
+
return {
|
|
285
|
+
name,
|
|
286
|
+
truePositives: stats.tp,
|
|
287
|
+
falsePositives: stats.fp,
|
|
288
|
+
falseNegatives: stats.fn,
|
|
289
|
+
precision,
|
|
290
|
+
recall,
|
|
291
|
+
}
|
|
292
|
+
})
|
|
293
|
+
.sort((a, b) => b.falsePositives - a.falsePositives) // Sort by FP count
|
|
294
|
+
|
|
295
|
+
// Compute coverage
|
|
296
|
+
const knownCategories: VulnerabilityCategory[] = [
|
|
297
|
+
'hardcoded_secret',
|
|
298
|
+
'high_entropy_string',
|
|
299
|
+
'weak_crypto',
|
|
300
|
+
'sensitive_url',
|
|
301
|
+
'dangerous_function',
|
|
302
|
+
'missing_auth',
|
|
303
|
+
'data_exposure',
|
|
304
|
+
'ai_pattern',
|
|
305
|
+
'ai_prompt_injection',
|
|
306
|
+
'ai_unsafe_execution',
|
|
307
|
+
'ai_overpermissive_tool',
|
|
308
|
+
'insecure_config',
|
|
309
|
+
'suspicious_package',
|
|
310
|
+
'sensitive_variable',
|
|
311
|
+
]
|
|
312
|
+
const categoriesTested = Array.from(allCategories).filter(c => knownCategories.includes(c)).length
|
|
313
|
+
const untestedCategories = knownCategories.filter(c => !allCategories.has(c))
|
|
314
|
+
|
|
315
|
+
return {
|
|
316
|
+
detection: {
|
|
317
|
+
totalVulnerabilitiesDetected,
|
|
318
|
+
truePositiveDetections,
|
|
319
|
+
falsePositiveDetections,
|
|
320
|
+
missedVulnerabilities,
|
|
321
|
+
},
|
|
322
|
+
byDetector,
|
|
323
|
+
severityDistribution,
|
|
324
|
+
falsePositiveSeverity,
|
|
325
|
+
coverage: {
|
|
326
|
+
totalCategories: knownCategories.length,
|
|
327
|
+
categoriesTested,
|
|
328
|
+
coveragePercent: (categoriesTested / knownCategories.length) * 100,
|
|
329
|
+
untestedCategories,
|
|
330
|
+
},
|
|
331
|
+
byTier: tierStats,
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/**
|
|
336
|
+
* Compute benchmark summary from results
|
|
337
|
+
*/
|
|
338
|
+
export function computeSummary(results: TestResult[]): BenchmarkSummary {
|
|
339
|
+
let truePositivePassed = 0
|
|
340
|
+
let truePositiveFailed = 0
|
|
341
|
+
let falseNegativePassed = 0
|
|
342
|
+
let falseNegativeFailed = 0
|
|
343
|
+
|
|
344
|
+
for (const result of results) {
|
|
345
|
+
const isTPTest = result.expectedCategories.length > 0 ||
|
|
346
|
+
result.name.includes('True Positive')
|
|
347
|
+
const isFNTest = result.name.includes('False Negative') ||
|
|
348
|
+
result.name.includes('Should Not Flag')
|
|
349
|
+
|
|
350
|
+
if (isTPTest) {
|
|
351
|
+
if (result.passed) {
|
|
352
|
+
truePositivePassed++
|
|
353
|
+
} else {
|
|
354
|
+
truePositiveFailed++
|
|
355
|
+
}
|
|
356
|
+
} else if (isFNTest) {
|
|
357
|
+
if (result.passed) {
|
|
358
|
+
falseNegativePassed++
|
|
359
|
+
} else {
|
|
360
|
+
falseNegativeFailed++
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
const totalTests = truePositivePassed + truePositiveFailed + falseNegativePassed + falseNegativeFailed
|
|
366
|
+
const passedTests = truePositivePassed + falseNegativePassed
|
|
367
|
+
const passRate = totalTests > 0 ? (passedTests / totalTests) * 100 : 0
|
|
368
|
+
|
|
369
|
+
const metrics = computeMetrics(results)
|
|
370
|
+
|
|
371
|
+
return {
|
|
372
|
+
truePositivePassed,
|
|
373
|
+
truePositiveFailed,
|
|
374
|
+
falseNegativePassed,
|
|
375
|
+
falseNegativeFailed,
|
|
376
|
+
totalTests,
|
|
377
|
+
passedTests,
|
|
378
|
+
passRate,
|
|
379
|
+
results,
|
|
380
|
+
metrics,
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
/**
|
|
385
|
+
* Print detailed metrics
|
|
386
|
+
*/
|
|
387
|
+
export function printMetrics(metrics: BenchmarkMetrics): void {
|
|
388
|
+
console.log('\n' + '='.repeat(80))
|
|
389
|
+
console.log('DETAILED PERFORMANCE METRICS')
|
|
390
|
+
console.log('='.repeat(80))
|
|
391
|
+
|
|
392
|
+
// Overall detection stats
|
|
393
|
+
console.log('\nš Detection Statistics:')
|
|
394
|
+
console.log(` Total findings: ${metrics.detection.totalVulnerabilitiesDetected}`)
|
|
395
|
+
console.log(` True positives: ${metrics.detection.truePositiveDetections}`)
|
|
396
|
+
console.log(` False positives: ${metrics.detection.falsePositiveDetections}`)
|
|
397
|
+
console.log(` Missed vulnerabilities: ${metrics.detection.missedVulnerabilities}`)
|
|
398
|
+
|
|
399
|
+
// Severity distribution
|
|
400
|
+
console.log('\nšÆ Severity Distribution (All Findings):')
|
|
401
|
+
console.log(` Critical: ${metrics.severityDistribution.critical}`)
|
|
402
|
+
console.log(` High: ${metrics.severityDistribution.high}`)
|
|
403
|
+
console.log(` Medium: ${metrics.severityDistribution.medium}`)
|
|
404
|
+
console.log(` Low: ${metrics.severityDistribution.low}`)
|
|
405
|
+
console.log(` Info: ${metrics.severityDistribution.info}`)
|
|
406
|
+
|
|
407
|
+
if (metrics.detection.falsePositiveDetections > 0) {
|
|
408
|
+
console.log('\nā ļø False Positive Severity Distribution:')
|
|
409
|
+
console.log(` Critical: ${metrics.falsePositiveSeverity.critical}`)
|
|
410
|
+
console.log(` High: ${metrics.falsePositiveSeverity.high}`)
|
|
411
|
+
console.log(` Medium: ${metrics.falsePositiveSeverity.medium}`)
|
|
412
|
+
console.log(` Low: ${metrics.falsePositiveSeverity.low}`)
|
|
413
|
+
console.log(` Info: ${metrics.falsePositiveSeverity.info}`)
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
// Detector performance
|
|
417
|
+
if (metrics.byDetector.length > 0) {
|
|
418
|
+
console.log('\nš Detector Performance (sorted by false positives):')
|
|
419
|
+
console.log(' Detector TP FP FN Precision Recall')
|
|
420
|
+
console.log(' ' + 'ā'.repeat(70))
|
|
421
|
+
for (const detector of metrics.byDetector) {
|
|
422
|
+
const name = detector.name.padEnd(25)
|
|
423
|
+
const tp = detector.truePositives.toString().padStart(4)
|
|
424
|
+
const fp = detector.falsePositives.toString().padStart(4)
|
|
425
|
+
const fn = detector.falseNegatives.toString().padStart(4)
|
|
426
|
+
const precision = (detector.precision * 100).toFixed(1).padStart(10) + '%'
|
|
427
|
+
const recall = (detector.recall * 100).toFixed(1).padStart(8) + '%'
|
|
428
|
+
console.log(` ${name} ${tp} ${fp} ${fn} ${precision} ${recall}`)
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// Coverage
|
|
433
|
+
console.log('\nš Category Coverage:')
|
|
434
|
+
console.log(` Categories tested: ${metrics.coverage.categoriesTested}/${metrics.coverage.totalCategories} (${metrics.coverage.coveragePercent.toFixed(1)}%)`)
|
|
435
|
+
if (metrics.coverage.untestedCategories.length > 0) {
|
|
436
|
+
console.log(' Untested categories:', metrics.coverage.untestedCategories.join(', '))
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// Tier stats
|
|
440
|
+
console.log('\nšļø Performance by Tier:')
|
|
441
|
+
console.log(` Tier A (Core): ${metrics.byTier.tierA.passed}/${metrics.byTier.tierA.tested} findings validated`)
|
|
442
|
+
console.log(` Tier B (AI-Assisted): ${metrics.byTier.tierB.passed}/${metrics.byTier.tierB.tested} findings validated`)
|
|
443
|
+
console.log(` Tier C (Experimental): ${metrics.byTier.tierC.passed}/${metrics.byTier.tierC.tested} findings validated`)
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
/**
|
|
447
|
+
* Print final benchmark summary
|
|
448
|
+
*/
|
|
449
|
+
export function printSummary(summary: BenchmarkSummary): void {
|
|
450
|
+
console.log('\n' + '='.repeat(80))
|
|
451
|
+
console.log('FINAL SCORE')
|
|
452
|
+
console.log('='.repeat(80))
|
|
453
|
+
|
|
454
|
+
console.log(`\n True Positive Tests: ${summary.truePositivePassed}/${summary.truePositivePassed + summary.truePositiveFailed} passed`)
|
|
455
|
+
console.log(` False Negative Tests: ${summary.falseNegativePassed}/${summary.falseNegativePassed + summary.falseNegativeFailed} passed`)
|
|
456
|
+
console.log(` āāāāāāāāāāāāāāāāāāāāāāāāāāāāā`)
|
|
457
|
+
console.log(` Total: ${summary.passedTests}/${summary.totalTests} (${summary.passRate.toFixed(1)}%)`)
|
|
458
|
+
|
|
459
|
+
if (summary.passRate === 100) {
|
|
460
|
+
console.log('\n š ALL TESTS PASSED!')
|
|
461
|
+
} else {
|
|
462
|
+
console.log('\n ā ļø Some tests need attention')
|
|
463
|
+
|
|
464
|
+
// Show failed tests
|
|
465
|
+
const failedTests = summary.results.filter(r => !r.passed)
|
|
466
|
+
if (failedTests.length > 0) {
|
|
467
|
+
console.log('\n Failed tests:')
|
|
468
|
+
for (const test of failedTests) {
|
|
469
|
+
console.log(` - ${test.name}: ${test.failureReason}`)
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
console.log('\n' + '='.repeat(80))
|
|
475
|
+
}
|