@oculum/scanner 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. package/dist/formatters/cli-terminal.d.ts +27 -0
  2. package/dist/formatters/cli-terminal.d.ts.map +1 -0
  3. package/dist/formatters/cli-terminal.js +412 -0
  4. package/dist/formatters/cli-terminal.js.map +1 -0
  5. package/dist/formatters/github-comment.d.ts +41 -0
  6. package/dist/formatters/github-comment.d.ts.map +1 -0
  7. package/dist/formatters/github-comment.js +306 -0
  8. package/dist/formatters/github-comment.js.map +1 -0
  9. package/dist/formatters/grouping.d.ts +52 -0
  10. package/dist/formatters/grouping.d.ts.map +1 -0
  11. package/dist/formatters/grouping.js +152 -0
  12. package/dist/formatters/grouping.js.map +1 -0
  13. package/dist/formatters/index.d.ts +9 -0
  14. package/dist/formatters/index.d.ts.map +1 -0
  15. package/dist/formatters/index.js +35 -0
  16. package/dist/formatters/index.js.map +1 -0
  17. package/dist/formatters/vscode-diagnostic.d.ts +103 -0
  18. package/dist/formatters/vscode-diagnostic.d.ts.map +1 -0
  19. package/dist/formatters/vscode-diagnostic.js +151 -0
  20. package/dist/formatters/vscode-diagnostic.js.map +1 -0
  21. package/dist/index.d.ts +52 -0
  22. package/dist/index.d.ts.map +1 -0
  23. package/dist/index.js +648 -0
  24. package/dist/index.js.map +1 -0
  25. package/dist/layer1/comments.d.ts +8 -0
  26. package/dist/layer1/comments.d.ts.map +1 -0
  27. package/dist/layer1/comments.js +203 -0
  28. package/dist/layer1/comments.js.map +1 -0
  29. package/dist/layer1/config-audit.d.ts +8 -0
  30. package/dist/layer1/config-audit.d.ts.map +1 -0
  31. package/dist/layer1/config-audit.js +252 -0
  32. package/dist/layer1/config-audit.js.map +1 -0
  33. package/dist/layer1/entropy.d.ts +8 -0
  34. package/dist/layer1/entropy.d.ts.map +1 -0
  35. package/dist/layer1/entropy.js +500 -0
  36. package/dist/layer1/entropy.js.map +1 -0
  37. package/dist/layer1/file-flags.d.ts +7 -0
  38. package/dist/layer1/file-flags.d.ts.map +1 -0
  39. package/dist/layer1/file-flags.js +112 -0
  40. package/dist/layer1/file-flags.js.map +1 -0
  41. package/dist/layer1/index.d.ts +36 -0
  42. package/dist/layer1/index.d.ts.map +1 -0
  43. package/dist/layer1/index.js +132 -0
  44. package/dist/layer1/index.js.map +1 -0
  45. package/dist/layer1/patterns.d.ts +8 -0
  46. package/dist/layer1/patterns.d.ts.map +1 -0
  47. package/dist/layer1/patterns.js +482 -0
  48. package/dist/layer1/patterns.js.map +1 -0
  49. package/dist/layer1/urls.d.ts +8 -0
  50. package/dist/layer1/urls.d.ts.map +1 -0
  51. package/dist/layer1/urls.js +296 -0
  52. package/dist/layer1/urls.js.map +1 -0
  53. package/dist/layer1/weak-crypto.d.ts +7 -0
  54. package/dist/layer1/weak-crypto.d.ts.map +1 -0
  55. package/dist/layer1/weak-crypto.js +291 -0
  56. package/dist/layer1/weak-crypto.js.map +1 -0
  57. package/dist/layer2/ai-agent-tools.d.ts +19 -0
  58. package/dist/layer2/ai-agent-tools.d.ts.map +1 -0
  59. package/dist/layer2/ai-agent-tools.js +528 -0
  60. package/dist/layer2/ai-agent-tools.js.map +1 -0
  61. package/dist/layer2/ai-endpoint-protection.d.ts +36 -0
  62. package/dist/layer2/ai-endpoint-protection.d.ts.map +1 -0
  63. package/dist/layer2/ai-endpoint-protection.js +332 -0
  64. package/dist/layer2/ai-endpoint-protection.js.map +1 -0
  65. package/dist/layer2/ai-execution-sinks.d.ts +18 -0
  66. package/dist/layer2/ai-execution-sinks.d.ts.map +1 -0
  67. package/dist/layer2/ai-execution-sinks.js +496 -0
  68. package/dist/layer2/ai-execution-sinks.js.map +1 -0
  69. package/dist/layer2/ai-fingerprinting.d.ts +7 -0
  70. package/dist/layer2/ai-fingerprinting.d.ts.map +1 -0
  71. package/dist/layer2/ai-fingerprinting.js +654 -0
  72. package/dist/layer2/ai-fingerprinting.js.map +1 -0
  73. package/dist/layer2/ai-prompt-hygiene.d.ts +19 -0
  74. package/dist/layer2/ai-prompt-hygiene.d.ts.map +1 -0
  75. package/dist/layer2/ai-prompt-hygiene.js +356 -0
  76. package/dist/layer2/ai-prompt-hygiene.js.map +1 -0
  77. package/dist/layer2/ai-rag-safety.d.ts +21 -0
  78. package/dist/layer2/ai-rag-safety.d.ts.map +1 -0
  79. package/dist/layer2/ai-rag-safety.js +459 -0
  80. package/dist/layer2/ai-rag-safety.js.map +1 -0
  81. package/dist/layer2/ai-schema-validation.d.ts +25 -0
  82. package/dist/layer2/ai-schema-validation.d.ts.map +1 -0
  83. package/dist/layer2/ai-schema-validation.js +375 -0
  84. package/dist/layer2/ai-schema-validation.js.map +1 -0
  85. package/dist/layer2/auth-antipatterns.d.ts +20 -0
  86. package/dist/layer2/auth-antipatterns.d.ts.map +1 -0
  87. package/dist/layer2/auth-antipatterns.js +333 -0
  88. package/dist/layer2/auth-antipatterns.js.map +1 -0
  89. package/dist/layer2/byok-patterns.d.ts +12 -0
  90. package/dist/layer2/byok-patterns.d.ts.map +1 -0
  91. package/dist/layer2/byok-patterns.js +299 -0
  92. package/dist/layer2/byok-patterns.js.map +1 -0
  93. package/dist/layer2/dangerous-functions.d.ts +7 -0
  94. package/dist/layer2/dangerous-functions.d.ts.map +1 -0
  95. package/dist/layer2/dangerous-functions.js +1375 -0
  96. package/dist/layer2/dangerous-functions.js.map +1 -0
  97. package/dist/layer2/data-exposure.d.ts +16 -0
  98. package/dist/layer2/data-exposure.d.ts.map +1 -0
  99. package/dist/layer2/data-exposure.js +279 -0
  100. package/dist/layer2/data-exposure.js.map +1 -0
  101. package/dist/layer2/framework-checks.d.ts +7 -0
  102. package/dist/layer2/framework-checks.d.ts.map +1 -0
  103. package/dist/layer2/framework-checks.js +388 -0
  104. package/dist/layer2/framework-checks.js.map +1 -0
  105. package/dist/layer2/index.d.ts +58 -0
  106. package/dist/layer2/index.d.ts.map +1 -0
  107. package/dist/layer2/index.js +380 -0
  108. package/dist/layer2/index.js.map +1 -0
  109. package/dist/layer2/logic-gates.d.ts +7 -0
  110. package/dist/layer2/logic-gates.d.ts.map +1 -0
  111. package/dist/layer2/logic-gates.js +182 -0
  112. package/dist/layer2/logic-gates.js.map +1 -0
  113. package/dist/layer2/risky-imports.d.ts +7 -0
  114. package/dist/layer2/risky-imports.d.ts.map +1 -0
  115. package/dist/layer2/risky-imports.js +161 -0
  116. package/dist/layer2/risky-imports.js.map +1 -0
  117. package/dist/layer2/variables.d.ts +8 -0
  118. package/dist/layer2/variables.d.ts.map +1 -0
  119. package/dist/layer2/variables.js +152 -0
  120. package/dist/layer2/variables.js.map +1 -0
  121. package/dist/layer3/anthropic.d.ts +83 -0
  122. package/dist/layer3/anthropic.d.ts.map +1 -0
  123. package/dist/layer3/anthropic.js +1745 -0
  124. package/dist/layer3/anthropic.js.map +1 -0
  125. package/dist/layer3/index.d.ts +24 -0
  126. package/dist/layer3/index.d.ts.map +1 -0
  127. package/dist/layer3/index.js +119 -0
  128. package/dist/layer3/index.js.map +1 -0
  129. package/dist/layer3/openai.d.ts +25 -0
  130. package/dist/layer3/openai.d.ts.map +1 -0
  131. package/dist/layer3/openai.js +238 -0
  132. package/dist/layer3/openai.js.map +1 -0
  133. package/dist/layer3/package-check.d.ts +63 -0
  134. package/dist/layer3/package-check.d.ts.map +1 -0
  135. package/dist/layer3/package-check.js +508 -0
  136. package/dist/layer3/package-check.js.map +1 -0
  137. package/dist/modes/incremental.d.ts +66 -0
  138. package/dist/modes/incremental.d.ts.map +1 -0
  139. package/dist/modes/incremental.js +200 -0
  140. package/dist/modes/incremental.js.map +1 -0
  141. package/dist/tiers.d.ts +125 -0
  142. package/dist/tiers.d.ts.map +1 -0
  143. package/dist/tiers.js +234 -0
  144. package/dist/tiers.js.map +1 -0
  145. package/dist/types.d.ts +175 -0
  146. package/dist/types.d.ts.map +1 -0
  147. package/dist/types.js +50 -0
  148. package/dist/types.js.map +1 -0
  149. package/dist/utils/auth-helper-detector.d.ts +56 -0
  150. package/dist/utils/auth-helper-detector.d.ts.map +1 -0
  151. package/dist/utils/auth-helper-detector.js +360 -0
  152. package/dist/utils/auth-helper-detector.js.map +1 -0
  153. package/dist/utils/context-helpers.d.ts +96 -0
  154. package/dist/utils/context-helpers.d.ts.map +1 -0
  155. package/dist/utils/context-helpers.js +493 -0
  156. package/dist/utils/context-helpers.js.map +1 -0
  157. package/dist/utils/diff-detector.d.ts +53 -0
  158. package/dist/utils/diff-detector.d.ts.map +1 -0
  159. package/dist/utils/diff-detector.js +104 -0
  160. package/dist/utils/diff-detector.js.map +1 -0
  161. package/dist/utils/diff-parser.d.ts +80 -0
  162. package/dist/utils/diff-parser.d.ts.map +1 -0
  163. package/dist/utils/diff-parser.js +202 -0
  164. package/dist/utils/diff-parser.js.map +1 -0
  165. package/dist/utils/imported-auth-detector.d.ts +37 -0
  166. package/dist/utils/imported-auth-detector.d.ts.map +1 -0
  167. package/dist/utils/imported-auth-detector.js +251 -0
  168. package/dist/utils/imported-auth-detector.js.map +1 -0
  169. package/dist/utils/middleware-detector.d.ts +55 -0
  170. package/dist/utils/middleware-detector.d.ts.map +1 -0
  171. package/dist/utils/middleware-detector.js +260 -0
  172. package/dist/utils/middleware-detector.js.map +1 -0
  173. package/dist/utils/oauth-flow-detector.d.ts +41 -0
  174. package/dist/utils/oauth-flow-detector.d.ts.map +1 -0
  175. package/dist/utils/oauth-flow-detector.js +202 -0
  176. package/dist/utils/oauth-flow-detector.js.map +1 -0
  177. package/dist/utils/path-exclusions.d.ts +55 -0
  178. package/dist/utils/path-exclusions.d.ts.map +1 -0
  179. package/dist/utils/path-exclusions.js +222 -0
  180. package/dist/utils/path-exclusions.js.map +1 -0
  181. package/dist/utils/project-context-builder.d.ts +119 -0
  182. package/dist/utils/project-context-builder.d.ts.map +1 -0
  183. package/dist/utils/project-context-builder.js +534 -0
  184. package/dist/utils/project-context-builder.js.map +1 -0
  185. package/dist/utils/registry-clients.d.ts +93 -0
  186. package/dist/utils/registry-clients.d.ts.map +1 -0
  187. package/dist/utils/registry-clients.js +273 -0
  188. package/dist/utils/registry-clients.js.map +1 -0
  189. package/dist/utils/trpc-analyzer.d.ts +78 -0
  190. package/dist/utils/trpc-analyzer.d.ts.map +1 -0
  191. package/dist/utils/trpc-analyzer.js +297 -0
  192. package/dist/utils/trpc-analyzer.js.map +1 -0
  193. package/package.json +45 -0
  194. package/src/__tests__/benchmark/fixtures/false-positives.ts +227 -0
  195. package/src/__tests__/benchmark/fixtures/index.ts +68 -0
  196. package/src/__tests__/benchmark/fixtures/layer1/config-audit.ts +364 -0
  197. package/src/__tests__/benchmark/fixtures/layer1/hardcoded-secrets.ts +173 -0
  198. package/src/__tests__/benchmark/fixtures/layer1/high-entropy.ts +234 -0
  199. package/src/__tests__/benchmark/fixtures/layer1/index.ts +31 -0
  200. package/src/__tests__/benchmark/fixtures/layer1/sensitive-urls.ts +90 -0
  201. package/src/__tests__/benchmark/fixtures/layer1/weak-crypto.ts +197 -0
  202. package/src/__tests__/benchmark/fixtures/layer2/ai-agent-tools.ts +170 -0
  203. package/src/__tests__/benchmark/fixtures/layer2/ai-endpoint-protection.ts +418 -0
  204. package/src/__tests__/benchmark/fixtures/layer2/ai-execution-sinks.ts +189 -0
  205. package/src/__tests__/benchmark/fixtures/layer2/ai-fingerprinting.ts +316 -0
  206. package/src/__tests__/benchmark/fixtures/layer2/ai-prompt-hygiene.ts +178 -0
  207. package/src/__tests__/benchmark/fixtures/layer2/ai-rag-safety.ts +184 -0
  208. package/src/__tests__/benchmark/fixtures/layer2/ai-schema-validation.ts +434 -0
  209. package/src/__tests__/benchmark/fixtures/layer2/auth-antipatterns.ts +159 -0
  210. package/src/__tests__/benchmark/fixtures/layer2/byok-patterns.ts +112 -0
  211. package/src/__tests__/benchmark/fixtures/layer2/dangerous-functions.ts +246 -0
  212. package/src/__tests__/benchmark/fixtures/layer2/data-exposure.ts +168 -0
  213. package/src/__tests__/benchmark/fixtures/layer2/framework-checks.ts +346 -0
  214. package/src/__tests__/benchmark/fixtures/layer2/index.ts +67 -0
  215. package/src/__tests__/benchmark/fixtures/layer2/injection-vulnerabilities.ts +239 -0
  216. package/src/__tests__/benchmark/fixtures/layer2/logic-gates.ts +246 -0
  217. package/src/__tests__/benchmark/fixtures/layer2/risky-imports.ts +231 -0
  218. package/src/__tests__/benchmark/fixtures/layer2/variables.ts +167 -0
  219. package/src/__tests__/benchmark/index.ts +29 -0
  220. package/src/__tests__/benchmark/run-benchmark.ts +144 -0
  221. package/src/__tests__/benchmark/run-depth-validation.ts +206 -0
  222. package/src/__tests__/benchmark/run-real-world-test.ts +243 -0
  223. package/src/__tests__/benchmark/security-benchmark-script.ts +1737 -0
  224. package/src/__tests__/benchmark/tier-integration-script.ts +177 -0
  225. package/src/__tests__/benchmark/types.ts +144 -0
  226. package/src/__tests__/benchmark/utils/test-runner.ts +475 -0
  227. package/src/__tests__/regression/known-false-positives.test.ts +467 -0
  228. package/src/__tests__/snapshots/__snapshots__/scan-depth.test.ts.snap +178 -0
  229. package/src/__tests__/snapshots/scan-depth.test.ts +258 -0
  230. package/src/__tests__/validation/analyze-results.ts +542 -0
  231. package/src/__tests__/validation/extract-for-triage.ts +146 -0
  232. package/src/__tests__/validation/fp-deep-analysis.ts +327 -0
  233. package/src/__tests__/validation/run-validation.ts +364 -0
  234. package/src/__tests__/validation/triage-template.md +132 -0
  235. package/src/formatters/cli-terminal.ts +446 -0
  236. package/src/formatters/github-comment.ts +382 -0
  237. package/src/formatters/grouping.ts +190 -0
  238. package/src/formatters/index.ts +47 -0
  239. package/src/formatters/vscode-diagnostic.ts +243 -0
  240. package/src/index.ts +823 -0
  241. package/src/layer1/comments.ts +218 -0
  242. package/src/layer1/config-audit.ts +289 -0
  243. package/src/layer1/entropy.ts +583 -0
  244. package/src/layer1/file-flags.ts +127 -0
  245. package/src/layer1/index.ts +181 -0
  246. package/src/layer1/patterns.ts +516 -0
  247. package/src/layer1/urls.ts +334 -0
  248. package/src/layer1/weak-crypto.ts +328 -0
  249. package/src/layer2/ai-agent-tools.ts +601 -0
  250. package/src/layer2/ai-endpoint-protection.ts +387 -0
  251. package/src/layer2/ai-execution-sinks.ts +580 -0
  252. package/src/layer2/ai-fingerprinting.ts +758 -0
  253. package/src/layer2/ai-prompt-hygiene.ts +411 -0
  254. package/src/layer2/ai-rag-safety.ts +511 -0
  255. package/src/layer2/ai-schema-validation.ts +421 -0
  256. package/src/layer2/auth-antipatterns.ts +394 -0
  257. package/src/layer2/byok-patterns.ts +336 -0
  258. package/src/layer2/dangerous-functions.ts +1563 -0
  259. package/src/layer2/data-exposure.ts +315 -0
  260. package/src/layer2/framework-checks.ts +433 -0
  261. package/src/layer2/index.ts +473 -0
  262. package/src/layer2/logic-gates.ts +206 -0
  263. package/src/layer2/risky-imports.ts +186 -0
  264. package/src/layer2/variables.ts +166 -0
  265. package/src/layer3/anthropic.ts +2030 -0
  266. package/src/layer3/index.ts +130 -0
  267. package/src/layer3/package-check.ts +604 -0
  268. package/src/modes/incremental.ts +293 -0
  269. package/src/tiers.ts +318 -0
  270. package/src/types.ts +284 -0
  271. package/src/utils/auth-helper-detector.ts +443 -0
  272. package/src/utils/context-helpers.ts +535 -0
  273. package/src/utils/diff-detector.ts +135 -0
  274. package/src/utils/diff-parser.ts +272 -0
  275. package/src/utils/imported-auth-detector.ts +320 -0
  276. package/src/utils/middleware-detector.ts +333 -0
  277. package/src/utils/oauth-flow-detector.ts +246 -0
  278. package/src/utils/path-exclusions.ts +266 -0
  279. package/src/utils/project-context-builder.ts +707 -0
  280. package/src/utils/registry-clients.ts +351 -0
  281. package/src/utils/trpc-analyzer.ts +382 -0
@@ -0,0 +1,475 @@
1
+ /**
2
+ * Test runner utilities for the security benchmark suite
3
+ */
4
+
5
+ import { runLayer1Scan } from '../../../layer1'
6
+ import { runLayer2Scan } from '../../../layer2'
7
+ import { computeTierStats, formatTierStats, getTierForCategory } from '../../../tiers'
8
+ import type {
9
+ TestFixture,
10
+ TestResult,
11
+ TestGroup,
12
+ BenchmarkSummary,
13
+ BenchmarkMetrics,
14
+ DetectorMetrics,
15
+ SeverityMetrics
16
+ } from '../types'
17
+ import type { Vulnerability, VulnerabilityCategory } from '../../../types'
18
+
19
+ /**
20
+ * Run a single test fixture
21
+ */
22
+ export async function runTestFixture(fixture: TestFixture): Promise<TestResult> {
23
+ const layer1Result = await runLayer1Scan([fixture.file])
24
+ const layer2Result = await runLayer2Scan([fixture.file])
25
+
26
+ const allFindings = [...layer1Result.vulnerabilities, ...layer2Result.vulnerabilities]
27
+ const foundCategories = new Set(allFindings.map(f => f.category))
28
+
29
+ const unexpectedCategories: VulnerabilityCategory[] = []
30
+ const acceptableInfoFindings: Vulnerability[] = []
31
+ const problematicFindings: Vulnerability[] = []
32
+ let passed = true
33
+ let failureReason: string | undefined
34
+
35
+ if (fixture.expectFindings) {
36
+ // True positive test - should find vulnerabilities
37
+ if (allFindings.length === 0) {
38
+ passed = false
39
+ failureReason = 'Expected vulnerabilities but found none'
40
+ } else {
41
+ // Check we found expected categories
42
+ for (const cat of fixture.expectedCategories || []) {
43
+ if (!foundCategories.has(cat)) {
44
+ passed = false
45
+ failureReason = `Missing expected category: ${cat}`
46
+ }
47
+ }
48
+ }
49
+ } else {
50
+ // False negative test - should NOT flag actual vulnerabilities
51
+ // Default: Fail on medium+ severity (critical, high, medium)
52
+ // Optional: Allow specific info/low findings via allowedInfoFindings
53
+
54
+ const mediumOrHigher = allFindings.filter(f =>
55
+ f.severity === 'critical' || f.severity === 'high' || f.severity === 'medium'
56
+ )
57
+
58
+ // Check if medium+ findings exist
59
+ if (mediumOrHigher.length > 0) {
60
+ passed = false
61
+ failureReason = `Found ${mediumOrHigher.length} false positive(s) with medium+ severity`
62
+ problematicFindings.push(...mediumOrHigher)
63
+ for (const f of mediumOrHigher) {
64
+ unexpectedCategories.push(f.category)
65
+ }
66
+ }
67
+
68
+ // Check low-severity findings (low, info)
69
+ const lowSeverity = allFindings.filter(f =>
70
+ f.severity === 'low' || f.severity === 'info'
71
+ )
72
+
73
+ if (lowSeverity.length > 0) {
74
+ // Check if these are allowed
75
+ const allowedCategories = new Map(
76
+ (fixture.allowedInfoFindings || []).map(a => [a.category, a.maxCount])
77
+ )
78
+
79
+ for (const finding of lowSeverity) {
80
+ const allowedCount = allowedCategories.get(finding.category)
81
+
82
+ if (allowedCount === undefined) {
83
+ // Not allowed - this is a false positive
84
+ passed = false
85
+ if (!failureReason) {
86
+ failureReason = `Found unexpected ${finding.severity} finding: ${finding.category}`
87
+ }
88
+ problematicFindings.push(finding)
89
+ unexpectedCategories.push(finding.category)
90
+ } else {
91
+ // Allowed but track it
92
+ acceptableInfoFindings.push(finding)
93
+
94
+ // Check if we exceeded the allowed count
95
+ const countForCategory = lowSeverity.filter(f => f.category === finding.category).length
96
+ if (countForCategory > allowedCount) {
97
+ passed = false
98
+ failureReason = `Exceeded allowed count for ${finding.category}: ${countForCategory} > ${allowedCount}`
99
+ problematicFindings.push(finding)
100
+ }
101
+ }
102
+ }
103
+ }
104
+ }
105
+
106
+ return {
107
+ name: fixture.name,
108
+ file: fixture.file,
109
+ layer1Findings: layer1Result.vulnerabilities,
110
+ layer2Findings: layer2Result.vulnerabilities,
111
+ expectedCategories: fixture.expectedCategories || [],
112
+ unexpectedCategories,
113
+ passed,
114
+ failureReason,
115
+ acceptableInfoFindings,
116
+ problematicFindings,
117
+ }
118
+ }
119
+
120
+ /**
121
+ * Run all fixtures in a test group
122
+ */
123
+ export async function runTestGroup(group: TestGroup): Promise<TestResult[]> {
124
+ const results: TestResult[] = []
125
+
126
+ // Run true positive tests
127
+ for (const fixture of group.truePositives) {
128
+ results.push(await runTestFixture(fixture))
129
+ }
130
+
131
+ // Run false negative tests
132
+ for (const fixture of group.falseNegatives) {
133
+ results.push(await runTestFixture(fixture))
134
+ }
135
+
136
+ return results
137
+ }
138
+
139
+ /**
140
+ * Print results for a single test
141
+ */
142
+ export function printTestResult(result: TestResult): void {
143
+ const totalFindings = result.layer1Findings.length + result.layer2Findings.length
144
+
145
+ console.log(`\nšŸ“‹ ${result.name}`)
146
+ console.log(` File: ${result.file.path}`)
147
+ console.log(` Layer 1 findings: ${result.layer1Findings.length}`)
148
+ console.log(` Layer 2 findings: ${result.layer2Findings.length}`)
149
+
150
+ if (totalFindings > 0) {
151
+ const allFindings = [...result.layer1Findings, ...result.layer2Findings]
152
+ const byCategory: Record<string, number> = {}
153
+ const bySeverity: Record<string, number> = {}
154
+
155
+ for (const f of allFindings) {
156
+ byCategory[f.category] = (byCategory[f.category] || 0) + 1
157
+ bySeverity[f.severity] = (bySeverity[f.severity] || 0) + 1
158
+ }
159
+
160
+ console.log(' By category:', Object.entries(byCategory).map(([k, v]) => `${k}:${v}`).join(', '))
161
+ console.log(' By severity:', Object.entries(bySeverity).map(([k, v]) => `${k}:${v}`).join(', '))
162
+
163
+ // Tier breakdown
164
+ const tierStats = computeTierStats(allFindings.map(f => ({ category: f.category, layer: f.layer })))
165
+ console.log(` ${formatTierStats(tierStats)}`)
166
+ }
167
+
168
+ if (result.passed) {
169
+ const isTPTest = result.expectedCategories.length > 0 || result.name.includes('True Positive')
170
+ if (isTPTest) {
171
+ console.log(' āœ… PASS: Detected expected vulnerabilities')
172
+ } else {
173
+ console.log(' āœ… PASS: No false positives')
174
+ if (result.acceptableInfoFindings && result.acceptableInfoFindings.length > 0) {
175
+ console.log(` ā„¹ļø Note: ${result.acceptableInfoFindings.length} acceptable low-severity finding(s)`)
176
+ }
177
+ }
178
+ } else {
179
+ console.log(` āŒ FAIL: ${result.failureReason}`)
180
+ if (result.problematicFindings && result.problematicFindings.length > 0) {
181
+ console.log(' Problematic findings:')
182
+ for (const f of result.problematicFindings) {
183
+ console.log(` - ${f.category} (${f.severity}): ${f.title}`)
184
+ }
185
+ }
186
+ }
187
+ }
188
+
189
+ /**
190
+ * Compute detailed benchmark metrics
191
+ */
192
+ export function computeMetrics(results: TestResult[]): BenchmarkMetrics {
193
+ // Collect all categories and detectors
194
+ const allCategories = new Set<VulnerabilityCategory>()
195
+ const detectorStats = new Map<string, { tp: number; fp: number; fn: number }>()
196
+
197
+ // Initialize severity distributions
198
+ const severityDistribution: SeverityMetrics = { critical: 0, high: 0, medium: 0, low: 0, info: 0 }
199
+ const falsePositiveSeverity: SeverityMetrics = { critical: 0, high: 0, medium: 0, low: 0, info: 0 }
200
+
201
+ let totalVulnerabilitiesDetected = 0
202
+ let truePositiveDetections = 0
203
+ let falsePositiveDetections = 0
204
+ let missedVulnerabilities = 0
205
+
206
+ // Tier tracking
207
+ const tierStats = {
208
+ tierA: { tested: 0, passed: 0 },
209
+ tierB: { tested: 0, passed: 0 },
210
+ tierC: { tested: 0, passed: 0 },
211
+ }
212
+
213
+ for (const result of results) {
214
+ const allFindings = [...result.layer1Findings, ...result.layer2Findings]
215
+ const isTPTest = result.expectedCategories.length > 0 || result.name.includes('True Positive')
216
+ const isFNTest = result.name.includes('False Negative') || result.name.includes('Should Not Flag')
217
+
218
+ // Track tier stats
219
+ for (const finding of allFindings) {
220
+ const tier = getTierForCategory(finding.category, finding.layer)
221
+ if (tier === 'core') {
222
+ tierStats.tierA.tested++
223
+ if (result.passed) tierStats.tierA.passed++
224
+ } else if (tier === 'ai_assisted') {
225
+ tierStats.tierB.tested++
226
+ if (result.passed) tierStats.tierB.passed++
227
+ } else if (tier === 'experimental') {
228
+ tierStats.tierC.tested++
229
+ if (result.passed) tierStats.tierC.passed++
230
+ }
231
+ }
232
+
233
+ // Process findings
234
+ for (const finding of allFindings) {
235
+ allCategories.add(finding.category)
236
+ totalVulnerabilitiesDetected++
237
+
238
+ // Update severity distribution
239
+ severityDistribution[finding.severity]++
240
+
241
+ // Initialize detector stats if not exists
242
+ if (!detectorStats.has(finding.category)) {
243
+ detectorStats.set(finding.category, { tp: 0, fp: 0, fn: 0 })
244
+ }
245
+ const stats = detectorStats.get(finding.category)!
246
+
247
+ if (isTPTest) {
248
+ // True positive test - findings are expected
249
+ if (result.expectedCategories?.includes(finding.category)) {
250
+ stats.tp++
251
+ truePositiveDetections++
252
+ }
253
+ } else if (isFNTest) {
254
+ // False negative test - findings are false positives
255
+ const isAcceptable = result.acceptableInfoFindings?.some(f => f.id === finding.id)
256
+ if (!isAcceptable) {
257
+ stats.fp++
258
+ falsePositiveDetections++
259
+ falsePositiveSeverity[finding.severity]++
260
+ }
261
+ }
262
+ }
263
+
264
+ // Track missed vulnerabilities (false negatives)
265
+ if (isTPTest && !result.passed) {
266
+ for (const expectedCat of result.expectedCategories || []) {
267
+ if (!detectorStats.has(expectedCat)) {
268
+ detectorStats.set(expectedCat, { tp: 0, fp: 0, fn: 0 })
269
+ }
270
+ const foundIt = allFindings.some(f => f.category === expectedCat)
271
+ if (!foundIt) {
272
+ detectorStats.get(expectedCat)!.fn++
273
+ missedVulnerabilities++
274
+ }
275
+ }
276
+ }
277
+ }
278
+
279
+ // Compute detector metrics
280
+ const byDetector: DetectorMetrics[] = Array.from(detectorStats.entries())
281
+ .map(([name, stats]) => {
282
+ const precision = stats.tp + stats.fp > 0 ? stats.tp / (stats.tp + stats.fp) : 0
283
+ const recall = stats.tp + stats.fn > 0 ? stats.tp / (stats.tp + stats.fn) : 0
284
+ return {
285
+ name,
286
+ truePositives: stats.tp,
287
+ falsePositives: stats.fp,
288
+ falseNegatives: stats.fn,
289
+ precision,
290
+ recall,
291
+ }
292
+ })
293
+ .sort((a, b) => b.falsePositives - a.falsePositives) // Sort by FP count
294
+
295
+ // Compute coverage
296
+ const knownCategories: VulnerabilityCategory[] = [
297
+ 'hardcoded_secret',
298
+ 'high_entropy_string',
299
+ 'weak_crypto',
300
+ 'sensitive_url',
301
+ 'dangerous_function',
302
+ 'missing_auth',
303
+ 'data_exposure',
304
+ 'ai_pattern',
305
+ 'ai_prompt_injection',
306
+ 'ai_unsafe_execution',
307
+ 'ai_overpermissive_tool',
308
+ 'insecure_config',
309
+ 'suspicious_package',
310
+ 'sensitive_variable',
311
+ ]
312
+ const categoriesTested = Array.from(allCategories).filter(c => knownCategories.includes(c)).length
313
+ const untestedCategories = knownCategories.filter(c => !allCategories.has(c))
314
+
315
+ return {
316
+ detection: {
317
+ totalVulnerabilitiesDetected,
318
+ truePositiveDetections,
319
+ falsePositiveDetections,
320
+ missedVulnerabilities,
321
+ },
322
+ byDetector,
323
+ severityDistribution,
324
+ falsePositiveSeverity,
325
+ coverage: {
326
+ totalCategories: knownCategories.length,
327
+ categoriesTested,
328
+ coveragePercent: (categoriesTested / knownCategories.length) * 100,
329
+ untestedCategories,
330
+ },
331
+ byTier: tierStats,
332
+ }
333
+ }
334
+
335
+ /**
336
+ * Compute benchmark summary from results
337
+ */
338
+ export function computeSummary(results: TestResult[]): BenchmarkSummary {
339
+ let truePositivePassed = 0
340
+ let truePositiveFailed = 0
341
+ let falseNegativePassed = 0
342
+ let falseNegativeFailed = 0
343
+
344
+ for (const result of results) {
345
+ const isTPTest = result.expectedCategories.length > 0 ||
346
+ result.name.includes('True Positive')
347
+ const isFNTest = result.name.includes('False Negative') ||
348
+ result.name.includes('Should Not Flag')
349
+
350
+ if (isTPTest) {
351
+ if (result.passed) {
352
+ truePositivePassed++
353
+ } else {
354
+ truePositiveFailed++
355
+ }
356
+ } else if (isFNTest) {
357
+ if (result.passed) {
358
+ falseNegativePassed++
359
+ } else {
360
+ falseNegativeFailed++
361
+ }
362
+ }
363
+ }
364
+
365
+ const totalTests = truePositivePassed + truePositiveFailed + falseNegativePassed + falseNegativeFailed
366
+ const passedTests = truePositivePassed + falseNegativePassed
367
+ const passRate = totalTests > 0 ? (passedTests / totalTests) * 100 : 0
368
+
369
+ const metrics = computeMetrics(results)
370
+
371
+ return {
372
+ truePositivePassed,
373
+ truePositiveFailed,
374
+ falseNegativePassed,
375
+ falseNegativeFailed,
376
+ totalTests,
377
+ passedTests,
378
+ passRate,
379
+ results,
380
+ metrics,
381
+ }
382
+ }
383
+
384
+ /**
385
+ * Print detailed metrics
386
+ */
387
+ export function printMetrics(metrics: BenchmarkMetrics): void {
388
+ console.log('\n' + '='.repeat(80))
389
+ console.log('DETAILED PERFORMANCE METRICS')
390
+ console.log('='.repeat(80))
391
+
392
+ // Overall detection stats
393
+ console.log('\nšŸ“Š Detection Statistics:')
394
+ console.log(` Total findings: ${metrics.detection.totalVulnerabilitiesDetected}`)
395
+ console.log(` True positives: ${metrics.detection.truePositiveDetections}`)
396
+ console.log(` False positives: ${metrics.detection.falsePositiveDetections}`)
397
+ console.log(` Missed vulnerabilities: ${metrics.detection.missedVulnerabilities}`)
398
+
399
+ // Severity distribution
400
+ console.log('\nšŸŽÆ Severity Distribution (All Findings):')
401
+ console.log(` Critical: ${metrics.severityDistribution.critical}`)
402
+ console.log(` High: ${metrics.severityDistribution.high}`)
403
+ console.log(` Medium: ${metrics.severityDistribution.medium}`)
404
+ console.log(` Low: ${metrics.severityDistribution.low}`)
405
+ console.log(` Info: ${metrics.severityDistribution.info}`)
406
+
407
+ if (metrics.detection.falsePositiveDetections > 0) {
408
+ console.log('\nāš ļø False Positive Severity Distribution:')
409
+ console.log(` Critical: ${metrics.falsePositiveSeverity.critical}`)
410
+ console.log(` High: ${metrics.falsePositiveSeverity.high}`)
411
+ console.log(` Medium: ${metrics.falsePositiveSeverity.medium}`)
412
+ console.log(` Low: ${metrics.falsePositiveSeverity.low}`)
413
+ console.log(` Info: ${metrics.falsePositiveSeverity.info}`)
414
+ }
415
+
416
+ // Detector performance
417
+ if (metrics.byDetector.length > 0) {
418
+ console.log('\nšŸ” Detector Performance (sorted by false positives):')
419
+ console.log(' Detector TP FP FN Precision Recall')
420
+ console.log(' ' + '─'.repeat(70))
421
+ for (const detector of metrics.byDetector) {
422
+ const name = detector.name.padEnd(25)
423
+ const tp = detector.truePositives.toString().padStart(4)
424
+ const fp = detector.falsePositives.toString().padStart(4)
425
+ const fn = detector.falseNegatives.toString().padStart(4)
426
+ const precision = (detector.precision * 100).toFixed(1).padStart(10) + '%'
427
+ const recall = (detector.recall * 100).toFixed(1).padStart(8) + '%'
428
+ console.log(` ${name} ${tp} ${fp} ${fn} ${precision} ${recall}`)
429
+ }
430
+ }
431
+
432
+ // Coverage
433
+ console.log('\nšŸ“‹ Category Coverage:')
434
+ console.log(` Categories tested: ${metrics.coverage.categoriesTested}/${metrics.coverage.totalCategories} (${metrics.coverage.coveragePercent.toFixed(1)}%)`)
435
+ if (metrics.coverage.untestedCategories.length > 0) {
436
+ console.log(' Untested categories:', metrics.coverage.untestedCategories.join(', '))
437
+ }
438
+
439
+ // Tier stats
440
+ console.log('\nšŸŽšļø Performance by Tier:')
441
+ console.log(` Tier A (Core): ${metrics.byTier.tierA.passed}/${metrics.byTier.tierA.tested} findings validated`)
442
+ console.log(` Tier B (AI-Assisted): ${metrics.byTier.tierB.passed}/${metrics.byTier.tierB.tested} findings validated`)
443
+ console.log(` Tier C (Experimental): ${metrics.byTier.tierC.passed}/${metrics.byTier.tierC.tested} findings validated`)
444
+ }
445
+
446
+ /**
447
+ * Print final benchmark summary
448
+ */
449
+ export function printSummary(summary: BenchmarkSummary): void {
450
+ console.log('\n' + '='.repeat(80))
451
+ console.log('FINAL SCORE')
452
+ console.log('='.repeat(80))
453
+
454
+ console.log(`\n True Positive Tests: ${summary.truePositivePassed}/${summary.truePositivePassed + summary.truePositiveFailed} passed`)
455
+ console.log(` False Negative Tests: ${summary.falseNegativePassed}/${summary.falseNegativePassed + summary.falseNegativeFailed} passed`)
456
+ console.log(` ─────────────────────────────`)
457
+ console.log(` Total: ${summary.passedTests}/${summary.totalTests} (${summary.passRate.toFixed(1)}%)`)
458
+
459
+ if (summary.passRate === 100) {
460
+ console.log('\n šŸŽ‰ ALL TESTS PASSED!')
461
+ } else {
462
+ console.log('\n āš ļø Some tests need attention')
463
+
464
+ // Show failed tests
465
+ const failedTests = summary.results.filter(r => !r.passed)
466
+ if (failedTests.length > 0) {
467
+ console.log('\n Failed tests:')
468
+ for (const test of failedTests) {
469
+ console.log(` - ${test.name}: ${test.failureReason}`)
470
+ }
471
+ }
472
+ }
473
+
474
+ console.log('\n' + '='.repeat(80))
475
+ }