@oculum/scanner 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (281) hide show
  1. package/dist/formatters/cli-terminal.d.ts +27 -0
  2. package/dist/formatters/cli-terminal.d.ts.map +1 -0
  3. package/dist/formatters/cli-terminal.js +412 -0
  4. package/dist/formatters/cli-terminal.js.map +1 -0
  5. package/dist/formatters/github-comment.d.ts +41 -0
  6. package/dist/formatters/github-comment.d.ts.map +1 -0
  7. package/dist/formatters/github-comment.js +306 -0
  8. package/dist/formatters/github-comment.js.map +1 -0
  9. package/dist/formatters/grouping.d.ts +52 -0
  10. package/dist/formatters/grouping.d.ts.map +1 -0
  11. package/dist/formatters/grouping.js +152 -0
  12. package/dist/formatters/grouping.js.map +1 -0
  13. package/dist/formatters/index.d.ts +9 -0
  14. package/dist/formatters/index.d.ts.map +1 -0
  15. package/dist/formatters/index.js +35 -0
  16. package/dist/formatters/index.js.map +1 -0
  17. package/dist/formatters/vscode-diagnostic.d.ts +103 -0
  18. package/dist/formatters/vscode-diagnostic.d.ts.map +1 -0
  19. package/dist/formatters/vscode-diagnostic.js +151 -0
  20. package/dist/formatters/vscode-diagnostic.js.map +1 -0
  21. package/dist/index.d.ts +52 -0
  22. package/dist/index.d.ts.map +1 -0
  23. package/dist/index.js +648 -0
  24. package/dist/index.js.map +1 -0
  25. package/dist/layer1/comments.d.ts +8 -0
  26. package/dist/layer1/comments.d.ts.map +1 -0
  27. package/dist/layer1/comments.js +203 -0
  28. package/dist/layer1/comments.js.map +1 -0
  29. package/dist/layer1/config-audit.d.ts +8 -0
  30. package/dist/layer1/config-audit.d.ts.map +1 -0
  31. package/dist/layer1/config-audit.js +252 -0
  32. package/dist/layer1/config-audit.js.map +1 -0
  33. package/dist/layer1/entropy.d.ts +8 -0
  34. package/dist/layer1/entropy.d.ts.map +1 -0
  35. package/dist/layer1/entropy.js +500 -0
  36. package/dist/layer1/entropy.js.map +1 -0
  37. package/dist/layer1/file-flags.d.ts +7 -0
  38. package/dist/layer1/file-flags.d.ts.map +1 -0
  39. package/dist/layer1/file-flags.js +112 -0
  40. package/dist/layer1/file-flags.js.map +1 -0
  41. package/dist/layer1/index.d.ts +36 -0
  42. package/dist/layer1/index.d.ts.map +1 -0
  43. package/dist/layer1/index.js +132 -0
  44. package/dist/layer1/index.js.map +1 -0
  45. package/dist/layer1/patterns.d.ts +8 -0
  46. package/dist/layer1/patterns.d.ts.map +1 -0
  47. package/dist/layer1/patterns.js +482 -0
  48. package/dist/layer1/patterns.js.map +1 -0
  49. package/dist/layer1/urls.d.ts +8 -0
  50. package/dist/layer1/urls.d.ts.map +1 -0
  51. package/dist/layer1/urls.js +296 -0
  52. package/dist/layer1/urls.js.map +1 -0
  53. package/dist/layer1/weak-crypto.d.ts +7 -0
  54. package/dist/layer1/weak-crypto.d.ts.map +1 -0
  55. package/dist/layer1/weak-crypto.js +291 -0
  56. package/dist/layer1/weak-crypto.js.map +1 -0
  57. package/dist/layer2/ai-agent-tools.d.ts +19 -0
  58. package/dist/layer2/ai-agent-tools.d.ts.map +1 -0
  59. package/dist/layer2/ai-agent-tools.js +528 -0
  60. package/dist/layer2/ai-agent-tools.js.map +1 -0
  61. package/dist/layer2/ai-endpoint-protection.d.ts +36 -0
  62. package/dist/layer2/ai-endpoint-protection.d.ts.map +1 -0
  63. package/dist/layer2/ai-endpoint-protection.js +332 -0
  64. package/dist/layer2/ai-endpoint-protection.js.map +1 -0
  65. package/dist/layer2/ai-execution-sinks.d.ts +18 -0
  66. package/dist/layer2/ai-execution-sinks.d.ts.map +1 -0
  67. package/dist/layer2/ai-execution-sinks.js +496 -0
  68. package/dist/layer2/ai-execution-sinks.js.map +1 -0
  69. package/dist/layer2/ai-fingerprinting.d.ts +7 -0
  70. package/dist/layer2/ai-fingerprinting.d.ts.map +1 -0
  71. package/dist/layer2/ai-fingerprinting.js +654 -0
  72. package/dist/layer2/ai-fingerprinting.js.map +1 -0
  73. package/dist/layer2/ai-prompt-hygiene.d.ts +19 -0
  74. package/dist/layer2/ai-prompt-hygiene.d.ts.map +1 -0
  75. package/dist/layer2/ai-prompt-hygiene.js +356 -0
  76. package/dist/layer2/ai-prompt-hygiene.js.map +1 -0
  77. package/dist/layer2/ai-rag-safety.d.ts +21 -0
  78. package/dist/layer2/ai-rag-safety.d.ts.map +1 -0
  79. package/dist/layer2/ai-rag-safety.js +459 -0
  80. package/dist/layer2/ai-rag-safety.js.map +1 -0
  81. package/dist/layer2/ai-schema-validation.d.ts +25 -0
  82. package/dist/layer2/ai-schema-validation.d.ts.map +1 -0
  83. package/dist/layer2/ai-schema-validation.js +375 -0
  84. package/dist/layer2/ai-schema-validation.js.map +1 -0
  85. package/dist/layer2/auth-antipatterns.d.ts +20 -0
  86. package/dist/layer2/auth-antipatterns.d.ts.map +1 -0
  87. package/dist/layer2/auth-antipatterns.js +333 -0
  88. package/dist/layer2/auth-antipatterns.js.map +1 -0
  89. package/dist/layer2/byok-patterns.d.ts +12 -0
  90. package/dist/layer2/byok-patterns.d.ts.map +1 -0
  91. package/dist/layer2/byok-patterns.js +299 -0
  92. package/dist/layer2/byok-patterns.js.map +1 -0
  93. package/dist/layer2/dangerous-functions.d.ts +7 -0
  94. package/dist/layer2/dangerous-functions.d.ts.map +1 -0
  95. package/dist/layer2/dangerous-functions.js +1375 -0
  96. package/dist/layer2/dangerous-functions.js.map +1 -0
  97. package/dist/layer2/data-exposure.d.ts +16 -0
  98. package/dist/layer2/data-exposure.d.ts.map +1 -0
  99. package/dist/layer2/data-exposure.js +279 -0
  100. package/dist/layer2/data-exposure.js.map +1 -0
  101. package/dist/layer2/framework-checks.d.ts +7 -0
  102. package/dist/layer2/framework-checks.d.ts.map +1 -0
  103. package/dist/layer2/framework-checks.js +388 -0
  104. package/dist/layer2/framework-checks.js.map +1 -0
  105. package/dist/layer2/index.d.ts +58 -0
  106. package/dist/layer2/index.d.ts.map +1 -0
  107. package/dist/layer2/index.js +380 -0
  108. package/dist/layer2/index.js.map +1 -0
  109. package/dist/layer2/logic-gates.d.ts +7 -0
  110. package/dist/layer2/logic-gates.d.ts.map +1 -0
  111. package/dist/layer2/logic-gates.js +182 -0
  112. package/dist/layer2/logic-gates.js.map +1 -0
  113. package/dist/layer2/risky-imports.d.ts +7 -0
  114. package/dist/layer2/risky-imports.d.ts.map +1 -0
  115. package/dist/layer2/risky-imports.js +161 -0
  116. package/dist/layer2/risky-imports.js.map +1 -0
  117. package/dist/layer2/variables.d.ts +8 -0
  118. package/dist/layer2/variables.d.ts.map +1 -0
  119. package/dist/layer2/variables.js +152 -0
  120. package/dist/layer2/variables.js.map +1 -0
  121. package/dist/layer3/anthropic.d.ts +83 -0
  122. package/dist/layer3/anthropic.d.ts.map +1 -0
  123. package/dist/layer3/anthropic.js +1745 -0
  124. package/dist/layer3/anthropic.js.map +1 -0
  125. package/dist/layer3/index.d.ts +24 -0
  126. package/dist/layer3/index.d.ts.map +1 -0
  127. package/dist/layer3/index.js +119 -0
  128. package/dist/layer3/index.js.map +1 -0
  129. package/dist/layer3/openai.d.ts +25 -0
  130. package/dist/layer3/openai.d.ts.map +1 -0
  131. package/dist/layer3/openai.js +238 -0
  132. package/dist/layer3/openai.js.map +1 -0
  133. package/dist/layer3/package-check.d.ts +63 -0
  134. package/dist/layer3/package-check.d.ts.map +1 -0
  135. package/dist/layer3/package-check.js +508 -0
  136. package/dist/layer3/package-check.js.map +1 -0
  137. package/dist/modes/incremental.d.ts +66 -0
  138. package/dist/modes/incremental.d.ts.map +1 -0
  139. package/dist/modes/incremental.js +200 -0
  140. package/dist/modes/incremental.js.map +1 -0
  141. package/dist/tiers.d.ts +125 -0
  142. package/dist/tiers.d.ts.map +1 -0
  143. package/dist/tiers.js +234 -0
  144. package/dist/tiers.js.map +1 -0
  145. package/dist/types.d.ts +175 -0
  146. package/dist/types.d.ts.map +1 -0
  147. package/dist/types.js +50 -0
  148. package/dist/types.js.map +1 -0
  149. package/dist/utils/auth-helper-detector.d.ts +56 -0
  150. package/dist/utils/auth-helper-detector.d.ts.map +1 -0
  151. package/dist/utils/auth-helper-detector.js +360 -0
  152. package/dist/utils/auth-helper-detector.js.map +1 -0
  153. package/dist/utils/context-helpers.d.ts +96 -0
  154. package/dist/utils/context-helpers.d.ts.map +1 -0
  155. package/dist/utils/context-helpers.js +493 -0
  156. package/dist/utils/context-helpers.js.map +1 -0
  157. package/dist/utils/diff-detector.d.ts +53 -0
  158. package/dist/utils/diff-detector.d.ts.map +1 -0
  159. package/dist/utils/diff-detector.js +104 -0
  160. package/dist/utils/diff-detector.js.map +1 -0
  161. package/dist/utils/diff-parser.d.ts +80 -0
  162. package/dist/utils/diff-parser.d.ts.map +1 -0
  163. package/dist/utils/diff-parser.js +202 -0
  164. package/dist/utils/diff-parser.js.map +1 -0
  165. package/dist/utils/imported-auth-detector.d.ts +37 -0
  166. package/dist/utils/imported-auth-detector.d.ts.map +1 -0
  167. package/dist/utils/imported-auth-detector.js +251 -0
  168. package/dist/utils/imported-auth-detector.js.map +1 -0
  169. package/dist/utils/middleware-detector.d.ts +55 -0
  170. package/dist/utils/middleware-detector.d.ts.map +1 -0
  171. package/dist/utils/middleware-detector.js +260 -0
  172. package/dist/utils/middleware-detector.js.map +1 -0
  173. package/dist/utils/oauth-flow-detector.d.ts +41 -0
  174. package/dist/utils/oauth-flow-detector.d.ts.map +1 -0
  175. package/dist/utils/oauth-flow-detector.js +202 -0
  176. package/dist/utils/oauth-flow-detector.js.map +1 -0
  177. package/dist/utils/path-exclusions.d.ts +55 -0
  178. package/dist/utils/path-exclusions.d.ts.map +1 -0
  179. package/dist/utils/path-exclusions.js +222 -0
  180. package/dist/utils/path-exclusions.js.map +1 -0
  181. package/dist/utils/project-context-builder.d.ts +119 -0
  182. package/dist/utils/project-context-builder.d.ts.map +1 -0
  183. package/dist/utils/project-context-builder.js +534 -0
  184. package/dist/utils/project-context-builder.js.map +1 -0
  185. package/dist/utils/registry-clients.d.ts +93 -0
  186. package/dist/utils/registry-clients.d.ts.map +1 -0
  187. package/dist/utils/registry-clients.js +273 -0
  188. package/dist/utils/registry-clients.js.map +1 -0
  189. package/dist/utils/trpc-analyzer.d.ts +78 -0
  190. package/dist/utils/trpc-analyzer.d.ts.map +1 -0
  191. package/dist/utils/trpc-analyzer.js +297 -0
  192. package/dist/utils/trpc-analyzer.js.map +1 -0
  193. package/package.json +45 -0
  194. package/src/__tests__/benchmark/fixtures/false-positives.ts +227 -0
  195. package/src/__tests__/benchmark/fixtures/index.ts +68 -0
  196. package/src/__tests__/benchmark/fixtures/layer1/config-audit.ts +364 -0
  197. package/src/__tests__/benchmark/fixtures/layer1/hardcoded-secrets.ts +173 -0
  198. package/src/__tests__/benchmark/fixtures/layer1/high-entropy.ts +234 -0
  199. package/src/__tests__/benchmark/fixtures/layer1/index.ts +31 -0
  200. package/src/__tests__/benchmark/fixtures/layer1/sensitive-urls.ts +90 -0
  201. package/src/__tests__/benchmark/fixtures/layer1/weak-crypto.ts +197 -0
  202. package/src/__tests__/benchmark/fixtures/layer2/ai-agent-tools.ts +170 -0
  203. package/src/__tests__/benchmark/fixtures/layer2/ai-endpoint-protection.ts +418 -0
  204. package/src/__tests__/benchmark/fixtures/layer2/ai-execution-sinks.ts +189 -0
  205. package/src/__tests__/benchmark/fixtures/layer2/ai-fingerprinting.ts +316 -0
  206. package/src/__tests__/benchmark/fixtures/layer2/ai-prompt-hygiene.ts +178 -0
  207. package/src/__tests__/benchmark/fixtures/layer2/ai-rag-safety.ts +184 -0
  208. package/src/__tests__/benchmark/fixtures/layer2/ai-schema-validation.ts +434 -0
  209. package/src/__tests__/benchmark/fixtures/layer2/auth-antipatterns.ts +159 -0
  210. package/src/__tests__/benchmark/fixtures/layer2/byok-patterns.ts +112 -0
  211. package/src/__tests__/benchmark/fixtures/layer2/dangerous-functions.ts +246 -0
  212. package/src/__tests__/benchmark/fixtures/layer2/data-exposure.ts +168 -0
  213. package/src/__tests__/benchmark/fixtures/layer2/framework-checks.ts +346 -0
  214. package/src/__tests__/benchmark/fixtures/layer2/index.ts +67 -0
  215. package/src/__tests__/benchmark/fixtures/layer2/injection-vulnerabilities.ts +239 -0
  216. package/src/__tests__/benchmark/fixtures/layer2/logic-gates.ts +246 -0
  217. package/src/__tests__/benchmark/fixtures/layer2/risky-imports.ts +231 -0
  218. package/src/__tests__/benchmark/fixtures/layer2/variables.ts +167 -0
  219. package/src/__tests__/benchmark/index.ts +29 -0
  220. package/src/__tests__/benchmark/run-benchmark.ts +144 -0
  221. package/src/__tests__/benchmark/run-depth-validation.ts +206 -0
  222. package/src/__tests__/benchmark/run-real-world-test.ts +243 -0
  223. package/src/__tests__/benchmark/security-benchmark-script.ts +1737 -0
  224. package/src/__tests__/benchmark/tier-integration-script.ts +177 -0
  225. package/src/__tests__/benchmark/types.ts +144 -0
  226. package/src/__tests__/benchmark/utils/test-runner.ts +475 -0
  227. package/src/__tests__/regression/known-false-positives.test.ts +467 -0
  228. package/src/__tests__/snapshots/__snapshots__/scan-depth.test.ts.snap +178 -0
  229. package/src/__tests__/snapshots/scan-depth.test.ts +258 -0
  230. package/src/__tests__/validation/analyze-results.ts +542 -0
  231. package/src/__tests__/validation/extract-for-triage.ts +146 -0
  232. package/src/__tests__/validation/fp-deep-analysis.ts +327 -0
  233. package/src/__tests__/validation/run-validation.ts +364 -0
  234. package/src/__tests__/validation/triage-template.md +132 -0
  235. package/src/formatters/cli-terminal.ts +446 -0
  236. package/src/formatters/github-comment.ts +382 -0
  237. package/src/formatters/grouping.ts +190 -0
  238. package/src/formatters/index.ts +47 -0
  239. package/src/formatters/vscode-diagnostic.ts +243 -0
  240. package/src/index.ts +823 -0
  241. package/src/layer1/comments.ts +218 -0
  242. package/src/layer1/config-audit.ts +289 -0
  243. package/src/layer1/entropy.ts +583 -0
  244. package/src/layer1/file-flags.ts +127 -0
  245. package/src/layer1/index.ts +181 -0
  246. package/src/layer1/patterns.ts +516 -0
  247. package/src/layer1/urls.ts +334 -0
  248. package/src/layer1/weak-crypto.ts +328 -0
  249. package/src/layer2/ai-agent-tools.ts +601 -0
  250. package/src/layer2/ai-endpoint-protection.ts +387 -0
  251. package/src/layer2/ai-execution-sinks.ts +580 -0
  252. package/src/layer2/ai-fingerprinting.ts +758 -0
  253. package/src/layer2/ai-prompt-hygiene.ts +411 -0
  254. package/src/layer2/ai-rag-safety.ts +511 -0
  255. package/src/layer2/ai-schema-validation.ts +421 -0
  256. package/src/layer2/auth-antipatterns.ts +394 -0
  257. package/src/layer2/byok-patterns.ts +336 -0
  258. package/src/layer2/dangerous-functions.ts +1563 -0
  259. package/src/layer2/data-exposure.ts +315 -0
  260. package/src/layer2/framework-checks.ts +433 -0
  261. package/src/layer2/index.ts +473 -0
  262. package/src/layer2/logic-gates.ts +206 -0
  263. package/src/layer2/risky-imports.ts +186 -0
  264. package/src/layer2/variables.ts +166 -0
  265. package/src/layer3/anthropic.ts +2030 -0
  266. package/src/layer3/index.ts +130 -0
  267. package/src/layer3/package-check.ts +604 -0
  268. package/src/modes/incremental.ts +293 -0
  269. package/src/tiers.ts +318 -0
  270. package/src/types.ts +284 -0
  271. package/src/utils/auth-helper-detector.ts +443 -0
  272. package/src/utils/context-helpers.ts +535 -0
  273. package/src/utils/diff-detector.ts +135 -0
  274. package/src/utils/diff-parser.ts +272 -0
  275. package/src/utils/imported-auth-detector.ts +320 -0
  276. package/src/utils/middleware-detector.ts +333 -0
  277. package/src/utils/oauth-flow-detector.ts +246 -0
  278. package/src/utils/path-exclusions.ts +266 -0
  279. package/src/utils/project-context-builder.ts +707 -0
  280. package/src/utils/registry-clients.ts +351 -0
  281. package/src/utils/trpc-analyzer.ts +382 -0
@@ -0,0 +1,2030 @@
1
+ /**
2
+ * Layer 3: AI Semantic Analysis
3
+ * Uses Claude to perform deep security analysis including:
4
+ * - Taint analysis (data flow from sources to sinks)
5
+ * - Business logic flaw detection
6
+ * - Missing authorization checks
7
+ * - Cryptography validation
8
+ * - Data exposure detection
9
+ * - Framework-specific deep analysis
10
+ */
11
+
12
+ import Anthropic from '@anthropic-ai/sdk'
13
+ import OpenAI from 'openai'
14
+ import type { Vulnerability, VulnerabilitySeverity, VulnerabilityCategory, ScanFile, ValidationStatus } from '../types'
15
+ import {
16
+ isTestOrMockFile,
17
+ isExampleFile,
18
+ isScannerOrFixtureFile,
19
+ isEnvVarReference,
20
+ isPublicEndpoint,
21
+ isComment,
22
+ } from '../utils/context-helpers'
23
+ import { buildProjectContext, getFileValidationContext, type ProjectContext } from '../utils/project-context-builder'
24
+ // Import tier system for tier-aware auto-dismiss
25
+ import { getTierForCategory, type DetectorTier } from '../tiers'
26
+
27
+ // ============================================================================
28
+ // Cost Monitoring Types
29
+ // ============================================================================
30
+
31
+ export interface ValidationStats {
32
+ /** Total findings processed (input) */
33
+ totalFindings: number
34
+ /** Findings that went through AI validation */
35
+ validatedFindings: number
36
+ /** Findings confirmed as true positives */
37
+ confirmedFindings: number
38
+ /** Findings dismissed as false positives */
39
+ dismissedFindings: number
40
+ /** Findings with severity adjusted down */
41
+ downgradedFindings: number
42
+ /** Findings auto-dismissed before AI (test files, etc.) */
43
+ autoDismissedFindings: number
44
+ /** Estimated input tokens used */
45
+ estimatedInputTokens: number
46
+ /** Estimated output tokens used */
47
+ estimatedOutputTokens: number
48
+ /** Estimated cost in USD (based on Haiku pricing) */
49
+ estimatedCost: number
50
+ /** Number of API calls made */
51
+ apiCalls: number
52
+ /** Cache creation tokens (first write to cache) */
53
+ cacheCreationTokens: number
54
+ /** Cache read tokens (subsequent reads from cache) */
55
+ cacheReadTokens: number
56
+ /** Cache hit rate (0-1) */
57
+ cacheHitRate: number
58
+ }
59
+
60
+ export interface AIValidationResult {
61
+ vulnerabilities: Vulnerability[]
62
+ stats: ValidationStats
63
+ }
64
+
65
+ // ============================================================================
66
+ // Phase 2: Multi-File Batching Configuration
67
+ // ============================================================================
68
+
69
+ // Number of files to include in each API call (Phase 2 optimization)
70
+ // Batching multiple files reduces API overhead and leverages prompt caching better
71
+ const FILES_PER_API_BATCH = 5
72
+
73
+ // Number of API batches to process in parallel (Phase 3 optimization)
74
+ // Higher values = faster scans but more API load; OpenAI handles this well
75
+ const PARALLEL_API_BATCHES = 4
76
+
77
+ // Initialize Anthropic client
78
+ function getAnthropicClient(): Anthropic {
79
+ const apiKey = process.env.ANTHROPIC_API_KEY
80
+ if (!apiKey) {
81
+ throw new Error('ANTHROPIC_API_KEY environment variable is not set')
82
+ }
83
+ return new Anthropic({ apiKey })
84
+ }
85
+
86
+ // Initialize OpenAI client
87
+ let openaiClient: OpenAI | null = null
88
+ function getOpenAIClient(): OpenAI {
89
+ if (!openaiClient) {
90
+ const apiKey = process.env.OPENAI_API_KEY
91
+ if (!apiKey) {
92
+ throw new Error('OPENAI_API_KEY environment variable is not set')
93
+ }
94
+ openaiClient = new OpenAI({ apiKey })
95
+ }
96
+ return openaiClient
97
+ }
98
+
99
+ // GPT-5-mini pricing constants (per 1M tokens)
100
+ const GPT5_MINI_PRICING = {
101
+ input: 0.25, // $0.25 per 1M tokens
102
+ cached: 0.025, // $0.025 per 1M tokens (10% of input)
103
+ output: 2.00, // $2.00 per 1M tokens
104
+ }
105
+
106
+ // ============================================================================
107
+ // Smart Auto-Dismiss Rules (No AI needed - instant filtering)
108
+ // ============================================================================
109
+
110
+ interface AutoDismissRule {
111
+ name: string
112
+ check: (finding: Vulnerability, fileContent?: string) => boolean
113
+ reason: string
114
+ }
115
+
116
+ const AUTO_DISMISS_RULES: AutoDismissRule[] = [
117
+ // Test files - often contain intentional "vulnerable" patterns for testing
118
+ {
119
+ name: 'test_file',
120
+ check: (finding) => isTestOrMockFile(finding.filePath),
121
+ reason: 'Finding in test/mock file',
122
+ },
123
+
124
+ // Example/demo code - not production code
125
+ {
126
+ name: 'example_file',
127
+ check: (finding) => isExampleFile(finding.filePath),
128
+ reason: 'Finding in example/demo file',
129
+ },
130
+
131
+ // Documentation files
132
+ {
133
+ name: 'documentation_file',
134
+ check: (finding) => /\.(md|mdx|txt|rst)$/i.test(finding.filePath),
135
+ reason: 'Finding in documentation file',
136
+ },
137
+
138
+ // Scanner/security tool code itself
139
+ {
140
+ name: 'scanner_code',
141
+ check: (finding) => isScannerOrFixtureFile(finding.filePath),
142
+ reason: 'Finding in scanner/fixture code',
143
+ },
144
+
145
+ // Environment variable references (not hardcoded secrets)
146
+ {
147
+ name: 'env_var_reference',
148
+ check: (finding) => {
149
+ if (finding.category !== 'hardcoded_secret' && finding.category !== 'high_entropy_string') {
150
+ return false
151
+ }
152
+ return isEnvVarReference(finding.lineContent)
153
+ },
154
+ reason: 'Uses environment variable (not hardcoded)',
155
+ },
156
+
157
+ // Public health check endpoints don't need auth
158
+ {
159
+ name: 'health_check_endpoint',
160
+ check: (finding) => {
161
+ if (finding.category !== 'missing_auth') return false
162
+ return isPublicEndpoint(finding.lineContent, finding.filePath)
163
+ },
164
+ reason: 'Public health check endpoint (auth not required)',
165
+ },
166
+
167
+ // CSS/Tailwind classes flagged as high entropy
168
+ {
169
+ name: 'css_classes',
170
+ check: (finding) => {
171
+ if (finding.category !== 'high_entropy_string') return false
172
+ const cssIndicators = ['flex', 'grid', 'text-', 'bg-', 'px-', 'py-', 'rounded', 'shadow', 'hover:', 'dark:']
173
+ const lowerLine = finding.lineContent.toLowerCase()
174
+ const matchCount = cssIndicators.filter(ind => lowerLine.includes(ind)).length
175
+ return matchCount >= 2
176
+ },
177
+ reason: 'CSS/Tailwind classes (not a secret)',
178
+ },
179
+
180
+ // Comment lines shouldn't be flagged for most categories
181
+ {
182
+ name: 'comment_line',
183
+ check: (finding) => {
184
+ // Some categories are valid in comments (e.g., TODO security)
185
+ if (finding.category === 'ai_pattern') return false
186
+ return isComment(finding.lineContent)
187
+ },
188
+ reason: 'Code comment (not executable)',
189
+ },
190
+
191
+ // Info severity already - no need to validate
192
+ // BUT: Only auto-dismiss info-severity for Tier A (core) findings
193
+ // Tier B (ai_assisted) findings MUST go through AI validation even at info severity
194
+ // because detectors may have pre-downgraded them based on partial context
195
+ {
196
+ name: 'info_severity_core_only',
197
+ check: (finding) => {
198
+ if (finding.severity !== 'info') return false
199
+ // Only auto-dismiss info-severity for Tier A (core) findings
200
+ // Tier B should always go through AI for proper validation
201
+ const tier = getTierForCategory(finding.category, finding.layer)
202
+ return tier === 'core'
203
+ },
204
+ reason: 'Already info severity for core detector (low priority)',
205
+ },
206
+
207
+ // Generic success/error messages in ai_pattern
208
+ {
209
+ name: 'generic_message',
210
+ check: (finding) => {
211
+ if (finding.category !== 'ai_pattern') return false
212
+ const genericPatterns = [
213
+ /['"`](success|done|ok|completed|finished|saved|updated|deleted|created)['"`]/i,
214
+ /['"`]something went wrong['"`]/i,
215
+ /['"`]an error occurred['"`]/i,
216
+ /console\.(log|info|debug)\s*\(\s*['"`][^'"]+['"`]\s*\)/i,
217
+ ]
218
+ return genericPatterns.some(p => p.test(finding.lineContent))
219
+ },
220
+ reason: 'Generic UI message (not security-relevant)',
221
+ },
222
+
223
+ // Type definitions with 'any' - often necessary for third-party libs
224
+ {
225
+ name: 'type_definition_any',
226
+ check: (finding) => {
227
+ if (finding.category !== 'ai_pattern') return false
228
+ if (!finding.title.toLowerCase().includes('any')) return false
229
+ // Check if it's in a .d.ts file or type definition context
230
+ if (finding.filePath.includes('.d.ts')) return true
231
+ const typeDefPatterns = [/^type\s+\w+\s*=/, /^interface\s+\w+/, /declare\s+(const|let|var|function|class)/]
232
+ return typeDefPatterns.some(p => p.test(finding.lineContent.trim()))
233
+ },
234
+ reason: 'Type definition (not runtime code)',
235
+ },
236
+
237
+ // setTimeout/setInterval magic numbers - code style, not security
238
+ {
239
+ name: 'timeout_magic_number',
240
+ check: (finding) => {
241
+ if (finding.category !== 'ai_pattern') return false
242
+ return /set(Timeout|Interval)\s*\([^,]+,\s*\d+\s*\)/.test(finding.lineContent)
243
+ },
244
+ reason: 'Timeout value (code style, not security)',
245
+ },
246
+ ]
247
+
248
+ /**
249
+ * Apply smart auto-dismiss rules to filter obvious false positives
250
+ * Returns findings that should be sent to AI validation
251
+ */
252
+ export function applyAutoDismissRules(findings: Vulnerability[]): {
253
+ toValidate: Vulnerability[]
254
+ dismissed: Array<{ finding: Vulnerability; rule: string; reason: string }>
255
+ } {
256
+ const toValidate: Vulnerability[] = []
257
+ const dismissed: Array<{ finding: Vulnerability; rule: string; reason: string }> = []
258
+
259
+ for (const finding of findings) {
260
+ let wasDismissed = false
261
+
262
+ for (const rule of AUTO_DISMISS_RULES) {
263
+ if (rule.check(finding)) {
264
+ dismissed.push({
265
+ finding,
266
+ rule: rule.name,
267
+ reason: rule.reason,
268
+ })
269
+ wasDismissed = true
270
+ break
271
+ }
272
+ }
273
+
274
+ if (!wasDismissed) {
275
+ toValidate.push(finding)
276
+ }
277
+ }
278
+
279
+ return { toValidate, dismissed }
280
+ }
281
+
282
+ // ============================================================================
283
+ // Security Analysis Prompt (Layer 3)
284
+ // ============================================================================
285
+
286
+ // System prompt for security analysis
287
+ const SECURITY_ANALYSIS_PROMPT = `You are an expert security code reviewer. Analyze the provided code for security vulnerabilities.
288
+
289
+ Focus on these specific vulnerability types:
290
+
291
+ 1. **Taint Analysis (Data Flow)**
292
+ - Track user input from sources (req.query, req.params, req.body, searchParams, URL parameters)
293
+ - To dangerous sinks (eval, dangerouslySetInnerHTML, exec, SQL queries, file operations)
294
+ - Flag any path where untrusted data reaches a dangerous function without sanitization
295
+
296
+ 2. **SQL Injection**
297
+ - String concatenation in SQL queries
298
+ - Template literals with user input in queries
299
+ - Missing parameterized queries
300
+
301
+ 3. **XSS (Cross-Site Scripting)**
302
+ - User input rendered without escaping
303
+ - dangerouslySetInnerHTML with user data
304
+ - innerHTML assignments
305
+ - NOTE: React/Next.js JSX automatically escapes content, so {variable} in JSX is NOT XSS
306
+
307
+ 4. **Command Injection**
308
+ - exec, spawn, execSync with user input
309
+ - Shell command construction with variables
310
+
311
+ 5. **Missing Authorization**
312
+ - API routes that modify data without auth checks
313
+ - Database writes in GET handlers
314
+ - Missing permission checks before sensitive operations
315
+
316
+ 6. **Insecure Deserialization**
317
+ - JSON.parse on untrusted data without validation
318
+ - eval of serialized data
319
+
320
+ 7. **Cryptography Validation**
321
+ - Weak algorithms: MD5 (for security), SHA1 (for security), DES, RC4
322
+ - Insecure random: Math.random() for tokens/keys/secrets
323
+ - Hardcoded encryption keys or IVs (not from env vars)
324
+ - ECB mode usage (patterns indicate cipher mode)
325
+ - Low iteration counts for PBKDF2 (< 10000)
326
+ - Short key lengths (< 256 bits for symmetric)
327
+ - Missing salt for password hashing
328
+ - createCipher() instead of createCipheriv()
329
+
330
+ 8. **Data Exposure Detection**
331
+ - Logging sensitive data: console.log with passwords, tokens, secrets, API keys
332
+ - Stack traces exposed to clients: err.stack in response
333
+ - Returning entire user objects (may include password hash)
334
+ - Debug endpoints left in code: /debug, /test, /_internal routes
335
+ - Verbose error messages exposing internal details
336
+ - Sensitive data in error responses
337
+
338
+ 9. **Framework-Specific Security**
339
+
340
+ **Next.js:**
341
+ - Server actions ('use server') without authentication
342
+ - Client components ('use client') accessing non-NEXT_PUBLIC_ env vars
343
+ - Middleware that returns NextResponse.next() without auth checks
344
+ - getServerSideProps without session validation
345
+ - Exposed API routes without rate limiting
346
+
347
+ **React:**
348
+ - Sensitive data stored in useState (visible in devtools)
349
+ - dangerouslySetInnerHTML with props/state
350
+ - useEffect making authenticated API calls without token validation
351
+
352
+ **Express:**
353
+ - Missing helmet() middleware for security headers
354
+ - CORS with origin: "*" in production
355
+ - Missing body-parser limits (DoS risk)
356
+ - Trust proxy without verification
357
+ - Error handlers exposing stack traces
358
+
359
+ IMPORTANT - DO NOT FLAG THESE AS VULNERABILITIES (common false positives):
360
+
361
+ **Framework Patterns (Safe by Design):**
362
+ - Next.js middleware using request.url for redirects (standard pattern)
363
+ - React/Next.js JSX rendering variables like {user.name} (auto-escaped by React)
364
+ - Supabase/Firebase client creation with NEXT_PUBLIC_ environment variables
365
+ - Using headers().get('host') in Next.js server actions
366
+
367
+ **Data Handling (Low Risk):**
368
+ - JSON.parse on data from YOUR OWN database (the app wrote it, it's trusted). Do NOT report this as a vulnerability. At most, you may mention an info-level robustness note if there is no error handling, but generally you should omit it.
369
+ - JSON.parse on localStorage data (same-origin, XSS is a separate issue). This is also not a security vulnerability. At most, you may suggest an info-level robustness improvement, and usually it is not worth mentioning.
370
+ - Passing user's own data to external APIs (user embedding their own content).
371
+ - Error messages that use error.message in catch blocks or are returned to the client as a generic error string are standard error handling. Treat them as LOW/INFO hardening at most, and DO NOT mark them as medium/high unless the message clearly includes credentials, secrets, or full stack traces.
372
+ - Generic configuration or feature messages like "OpenAI API key not configured" or "service disabled" are operational information, not security vulnerabilities. Treat them as info at most, or ignore them.
373
+
374
+ **Authentication Patterns (Context Matters):**
375
+ - Internal server-side functions only called from trusted code paths (OAuth callbacks, etc.)
376
+ - Functions with userId parameters called with session.user.id from authenticated contexts
377
+ - Service role keys used in server-side code with proper auth checks elsewhere
378
+ - API routes that call getCurrentUserId() and use the result (the auth check IS the userId call)
379
+
380
+ **BYOK (Bring Your Own Key) Patterns:**
381
+ - User-provided API keys in BYOK mode are INTENTIONAL - the user wants to use their own key
382
+ - This is a feature, not a vulnerability - don't flag it unless there's actual abuse potential
383
+ - When a BYOK key is only used TRANSIENTLY in memory for a single provider call (and is never logged or stored), and the route is authenticated, do NOT report this as a medium/high vulnerability. At most, you may surface a low/info note reminding the developer not to log or persist keys.
384
+ - Frontend components sending a BYOK key to an authenticated backend endpoint for one-shot use are expected behavior, not a vulnerability. Do NOT flag these as data_exposure or dangerous_function unless the key is logged, stored, or echoed back to the client.
385
+ - Only raise medium/high BYOK findings when keys are clearly stored (e.g., written to a database or long-term logs), logged in plaintext, or accepted by unauthenticated endpoints that attackers could abuse at scale.
386
+
387
+ **What TO Flag (Real Vulnerabilities):**
388
+ - SQL string concatenation with user input
389
+ - eval() or Function() with user-controlled strings
390
+ - Missing auth checks where sensitive data could be accessed by wrong user
391
+ - Actual hardcoded secrets (real API keys, not env var references)
392
+ - Command injection (exec/spawn with user input)
393
+
394
+ Respond ONLY with a JSON array of findings. Each finding must have:
395
+ {
396
+ "lineNumber": <number>,
397
+ "severity": "critical" | "high" | "medium" | "low",
398
+ "category": "sql_injection" | "xss" | "command_injection" | "missing_auth" | "dangerous_function",
399
+ "title": "<short title>",
400
+ "description": "<detailed explanation of the vulnerability>",
401
+ "suggestedFix": "<how to fix it>"
402
+ }
403
+
404
+ If no vulnerabilities are found, return an empty array: []
405
+
406
+ CRITICAL: Only report REAL vulnerabilities with HIGH confidence. Be conservative - it's better to miss a low-confidence issue than to report false positives. The code is likely using modern frameworks with built-in protections.`
407
+
408
+ interface AIFinding {
409
+ lineNumber: number
410
+ severity: VulnerabilitySeverity
411
+ category: VulnerabilityCategory
412
+ title: string
413
+ description: string
414
+ suggestedFix: string
415
+ }
416
+
417
+ export interface Layer3Context {
418
+ /** Middleware configuration from project scan */
419
+ middlewareConfig?: {
420
+ hasAuthMiddleware: boolean
421
+ authType?: string
422
+ protectedPaths: string[]
423
+ }
424
+ /** Auth helper context */
425
+ authHelpers?: {
426
+ hasThrowingHelpers: boolean
427
+ summary: string
428
+ }
429
+ /** Additional context string */
430
+ additionalContext?: string
431
+ }
432
+
433
+ /**
434
+ * Build auth context string for AI prompt
435
+ */
436
+ function buildAuthContextForPrompt(ctx?: Layer3Context): string {
437
+ if (!ctx) return ''
438
+
439
+ const parts: string[] = []
440
+
441
+ if (ctx.middlewareConfig?.hasAuthMiddleware) {
442
+ parts.push(`**IMPORTANT AUTH CONTEXT**: This project uses ${ctx.middlewareConfig.authType || 'auth'} middleware.`)
443
+ if (ctx.middlewareConfig.protectedPaths.length > 0) {
444
+ parts.push(`Protected paths: ${ctx.middlewareConfig.protectedPaths.join(', ')}`)
445
+ } else {
446
+ parts.push('All /api/** routes are protected by default.')
447
+ }
448
+ parts.push('Routes under these paths are ALREADY AUTHENTICATED - do NOT flag them as "missing auth".')
449
+ parts.push('Client components calling these protected API routes are also safe - the backend handles auth.')
450
+ }
451
+
452
+ if (ctx.authHelpers?.hasThrowingHelpers) {
453
+ parts.push('')
454
+ parts.push('**AUTH HELPER FUNCTIONS**: This project uses throwing auth helpers that guarantee authenticated context:')
455
+ parts.push(ctx.authHelpers.summary)
456
+ parts.push('Code after these helper calls is GUARANTEED to be authenticated. Do NOT flag "missing auth" after these calls.')
457
+ }
458
+
459
+ if (ctx.additionalContext) {
460
+ parts.push('')
461
+ parts.push(ctx.additionalContext)
462
+ }
463
+
464
+ return parts.length > 0 ? '\n\n' + parts.join('\n') : ''
465
+ }
466
+
467
+ export async function analyzeWithAI(
468
+ file: ScanFile,
469
+ context?: Layer3Context
470
+ ): Promise<Vulnerability[]> {
471
+ const client = getAnthropicClient()
472
+
473
+ // Prepare the code with line numbers for reference
474
+ const numberedCode = file.content
475
+ .split('\n')
476
+ .map((line, i) => `${i + 1}: ${line}`)
477
+ .join('\n')
478
+
479
+ // Build auth context for the prompt
480
+ const authContext = buildAuthContextForPrompt(context)
481
+
482
+ const userMessage = `Analyze this ${file.language} file for security vulnerabilities:
483
+
484
+ File: ${file.path}${authContext}
485
+
486
+ \`\`\`${file.language}
487
+ ${numberedCode}
488
+ \`\`\`
489
+
490
+ Return ONLY a JSON array of findings.`
491
+
492
+ try {
493
+ const response = await client.messages.create({
494
+ model: 'claude-3-5-haiku-20241022',
495
+ max_tokens: 4096,
496
+ system: SECURITY_ANALYSIS_PROMPT,
497
+ messages: [
498
+ {
499
+ role: 'user',
500
+ content: userMessage,
501
+ },
502
+ ],
503
+ })
504
+
505
+ // Extract text content from response
506
+ const textContent = response.content.find((block: { type: string }) => block.type === 'text')
507
+ if (!textContent || textContent.type !== 'text') {
508
+ console.error('No text content in AI response')
509
+ return []
510
+ }
511
+
512
+ // Parse the JSON response
513
+ const findings = parseAIResponse(textContent.text)
514
+
515
+ // Convert to Vulnerability format
516
+ return findings.map((finding, index) => ({
517
+ id: `ai-${file.path}-${finding.lineNumber}-${index}`,
518
+ filePath: file.path,
519
+ lineNumber: finding.lineNumber,
520
+ lineContent: getLineContent(file.content, finding.lineNumber),
521
+ severity: finding.severity,
522
+ category: finding.category,
523
+ title: finding.title,
524
+ description: finding.description,
525
+ suggestedFix: finding.suggestedFix,
526
+ confidence: 'high' as const,
527
+ layer: 3 as const,
528
+ }))
529
+ } catch (error) {
530
+ console.error('AI analysis error:', error)
531
+ return []
532
+ }
533
+ }
534
+
535
+ // Parse the AI response JSON
536
+ function parseAIResponse(response: string): AIFinding[] {
537
+ try {
538
+ // Try to extract JSON from the response
539
+ const jsonMatch = response.match(/\[[\s\S]*\]/)
540
+ if (!jsonMatch) {
541
+ return []
542
+ }
543
+
544
+ const parsed = JSON.parse(jsonMatch[0])
545
+
546
+ // Validate the structure
547
+ if (!Array.isArray(parsed)) {
548
+ return []
549
+ }
550
+
551
+ return parsed.filter(item =>
552
+ typeof item.lineNumber === 'number' &&
553
+ typeof item.severity === 'string' &&
554
+ typeof item.category === 'string' &&
555
+ typeof item.title === 'string' &&
556
+ typeof item.description === 'string'
557
+ ).map(item => ({
558
+ lineNumber: item.lineNumber,
559
+ severity: validateSeverity(item.severity),
560
+ category: validateCategory(item.category),
561
+ title: item.title,
562
+ description: item.description,
563
+ suggestedFix: item.suggestedFix || 'Review and fix the security issue',
564
+ }))
565
+ } catch (error) {
566
+ console.error('Failed to parse AI response:', error)
567
+ return []
568
+ }
569
+ }
570
+
571
+ function validateSeverity(severity: string): VulnerabilitySeverity {
572
+ const valid: VulnerabilitySeverity[] = ['critical', 'high', 'medium', 'low']
573
+ return valid.includes(severity as VulnerabilitySeverity)
574
+ ? severity as VulnerabilitySeverity
575
+ : 'medium'
576
+ }
577
+
578
+ function validateCategory(category: string): VulnerabilityCategory {
579
+ const valid: VulnerabilityCategory[] = [
580
+ 'sql_injection', 'xss', 'command_injection', 'missing_auth',
581
+ 'dangerous_function', 'hardcoded_secret', 'high_entropy_string',
582
+ 'sensitive_variable', 'security_bypass', 'insecure_config',
583
+ 'suspicious_package', 'cors_misconfiguration', 'root_container',
584
+ 'weak_crypto', 'sensitive_url', 'ai_pattern', 'dangerous_file',
585
+ 'data_exposure', // NEW: for logging/exposing sensitive data
586
+ ]
587
+ return valid.includes(category as VulnerabilityCategory)
588
+ ? category as VulnerabilityCategory
589
+ : 'dangerous_function'
590
+ }
591
+
592
+ function getLineContent(content: string, lineNumber: number): string {
593
+ const lines = content.split('\n')
594
+ return lines[lineNumber - 1]?.trim() || ''
595
+ }
596
+
597
+ // Batch analyze multiple files (with rate limiting)
598
+ export async function batchAnalyzeWithAI(
599
+ files: ScanFile[],
600
+ context?: Layer3Context,
601
+ maxConcurrent: number = 3
602
+ ): Promise<Vulnerability[]> {
603
+ const vulnerabilities: Vulnerability[] = []
604
+
605
+ // Process files in batches to avoid rate limits
606
+ for (let i = 0; i < files.length; i += maxConcurrent) {
607
+ const batch = files.slice(i, i + maxConcurrent)
608
+ const results = await Promise.all(
609
+ batch.map(file => analyzeWithAI(file, context).catch(err => {
610
+ console.error(`AI analysis failed for ${file.path}:`, err)
611
+ return []
612
+ }))
613
+ )
614
+ vulnerabilities.push(...results.flat())
615
+
616
+ // Small delay between batches to avoid rate limits
617
+ if (i + maxConcurrent < files.length) {
618
+ await new Promise(resolve => setTimeout(resolve, 500))
619
+ }
620
+ }
621
+
622
+ return vulnerabilities
623
+ }
624
+
625
+ // ============================================================================
626
+ // High-Context Validation Prompt (Section 3 Generalised Rules)
627
+ // ============================================================================
628
+
629
+ /**
630
+ * This prompt encodes the generalised security rules from CURRENTTASK.md Section 3.
631
+ * It is designed to work with full-file content and project context.
632
+ */
633
+ const HIGH_CONTEXT_VALIDATION_PROMPT = `You are an expert security code reviewer acting as a "Second-opinion AI Reviewer" for vulnerability findings from an automated scanner.
634
+
635
+ Your PRIMARY task: AGGRESSIVELY REJECT false positives and marginal findings. Only keep findings that are clearly exploitable or represent real security risk.
636
+
637
+ **CORE PHILOSOPHY**: A professional scanner should surface very few, high-confidence findings. When in doubt, REJECT the finding or downgrade to info.
638
+
639
+ ## Input Format
640
+ You will receive:
641
+ 1. **Project Context** - Architectural information about auth, data access, and secrets handling
642
+ 2. **Full File Content** - The entire file with line numbers
643
+ 3. **Candidate Findings** - List of potential vulnerabilities to validate
644
+
645
+ ## Core Validation Principles
646
+
647
+ ### 3.1 Authentication & Access Control
648
+ Recognise these SAFE patterns (downgrade to info or REJECT entirely):
649
+ - **Middleware-protected routes**: If project context shows auth middleware (Clerk, NextAuth, Auth0, custom), routes under protected paths are ALREADY GUARDED - do NOT flag as missing auth
650
+ - **Auth helper functions that THROW**: Functions like getCurrentUserId(), getSession(), auth() that throw/abort on missing auth guarantee authenticated context. Code AFTER these calls is authenticated.
651
+ - Do NOT suggest "if (!userId)" checks after calling throwing helpers - the check is redundant
652
+ - If helper throws, it returns Promise<string> not Promise<string|null> - userId is guaranteed non-null
653
+ - Common throwing helpers: getCurrentUserId(), requireAuth(), getUser(), auth().protect(), getSession() with throw
654
+ - **User-scoped queries**: Database queries filtered by user_id/tenant_id from authenticated session
655
+ - **Guard patterns**: Early returns or throws when auth fails (if (!user) return/throw)
656
+
657
+ Flag as REAL vulnerability (keep high severity) ONLY when:
658
+ - Route has no visible auth check AND is NOT covered by middleware AND has no throwing auth helper
659
+ - Sensitive operations without user scoping (cross-tenant access possible)
660
+ - Auth checks that can be bypassed (e.g., checking wrong variable)
661
+
662
+ **CRITICAL CONTRADICTION HANDLING**:
663
+ - If we detect both "protected by middleware" and "missing auth" on the same route - REJECT the "missing auth" finding
664
+ - If we detect both "uses throwing auth helper" and "missing auth" - REJECT the "missing auth" finding
665
+ - Client components calling these protected API routes should NOT be flagged for "missing auth"
666
+ - Adding "if (!userId)" after a throwing helper is a FALSE POSITIVE - reject it
667
+
668
+ ### 3.2 Deserialization & Unsafe Parsing
669
+ Distinguish by INPUT ORIGIN and error handling:
670
+ - **Application-controlled data** (database, config, localStorage): Low risk - downgrade to info
671
+ - JSON.parse on data YOUR app wrote is trusted
672
+ - Failures affect robustness, not security
673
+ - If ALSO wrapped in try-catch: REJECT the finding entirely
674
+ - **External/untrusted data** (HTTP request body, URL params): Higher risk
675
+ - With try-catch: downgrade to low, suggest SCHEMA VALIDATION (zod/joi/yup) not more try-catch
676
+ - Without try-catch: keep as medium, suggest both try-catch AND schema validation
677
+ - **request.json() / req.json()**: NOT a dangerous function
678
+ - This is the standard way to parse request bodies in modern frameworks
679
+ - Only suggest schema validation if none is visible nearby
680
+ - Severity: info at most
681
+
682
+ **CRITICAL JSON.parse RULES**:
683
+ - Do NOT suggest "add try/catch" when JSON.parse is ALREADY inside a try-catch block - this creates contradictory advice
684
+ - If JSON.parse is in try-catch with app-controlled data: REJECT the finding
685
+ - Prefer suggesting schema validation over generic try-catch for user input
686
+ - For sensitive sinks (DB writes, code execution): medium severity
687
+ - For display-only uses: low/info severity
688
+
689
+ ### 3.3 Logging & Error Handling
690
+ Distinguish LOGS vs RESPONSES with this severity ladder:
691
+
692
+ **Response Sinks (res.json, NextResponse.json, return) - Higher Risk:**
693
+ - Full error object or stack trace in response → **HIGH severity**
694
+ - Detailed internal fields (debug, trace, internal) → **MEDIUM severity**
695
+ - error.message only or static error strings → **LOW/INFO severity** (this is the RECOMMENDED pattern)
696
+
697
+ **Log Sinks (console.log, logger.info) - Lower Risk:**
698
+ - Logging error objects for debugging → **INFO severity** (hygiene, not security)
699
+ - Logging userId, query strings → **INFO severity** (privacy note)
700
+ - Logging passwords/secrets → **MEDIUM+ severity**
701
+ - JSON.stringify(error) in logs → **INFO severity**
702
+
703
+ **CRITICAL ERROR HANDLING RULES**:
704
+ - "error.message" in responses is usually SAFE and should NOT be HIGH severity
705
+ - HIGH severity is ONLY for responses that expose stacks, internal fields, or raw error objects
706
+ - Logging errors is STANDARD PRACTICE - don't flag it as a security issue unless it logs secrets
707
+
708
+ ### 3.4 XSS vs Prompt Injection
709
+ Keep these SEPARATE:
710
+ - **XSS**: Writing untrusted data into DOM/HTML sinks without escaping
711
+ - innerHTML with dynamic user data: flag as XSS
712
+ - React JSX {variable}: NOT XSS (auto-escaped)
713
+ - dangerouslySetInnerHTML with static content: info severity
714
+ - **Prompt Injection**: User content in LLM prompts
715
+ - NOT XSS - different threat model
716
+ - Downgrade to low/info unless clear path to high-impact actions
717
+ - Never label prompt issues as XSS
718
+
719
+ ### 3.5 Secrets, BYOK, and External Services
720
+ Distinguish these patterns:
721
+ - **Hardcoded secrets**: Real API keys in code = critical/high
722
+ - **Environment variables**: process.env.SECRET = safe (REJECT finding)
723
+ - **BYOK (Bring Your Own Key)**: User provides their own key for AI services
724
+ - This is a FEATURE, not a vulnerability
725
+ - Distinguish TRANSIENT USE vs STORAGE:
726
+ - Transient use (key in request body → API call → discarded): info severity, this is the IDEAL pattern
727
+ - Storage (key saved to database): check for user-scoping and encryption
728
+ - Severity ladder:
729
+ - Authenticated + transient use: info (feature, not vuln)
730
+ - Authenticated + user-scoped storage: low (suggest encryption at rest)
731
+ - Unauthenticated: medium (cost/abuse risk)
732
+ - Cross-tenant storage: medium (data isolation risk)
733
+ - Do NOT describe transient BYOK keys as "stored without encryption" - they are NOT stored
734
+
735
+ ### 3.6 DOM Sinks and Bootstrap Scripts
736
+ Recognise LOW-RISK patterns:
737
+ - Static scripts reading localStorage for theme/preferences
738
+ - Setting attributes from config without user input
739
+ - innerHTML with string literals only (no interpolation)
740
+
741
+ Flag as REAL when:
742
+ - User input flows to innerHTML/eval without sanitization
743
+ - Template literals with \${userInput} in DOM sinks
744
+
745
+ ### 3.7 AI/LLM-Specific Patterns
746
+
747
+ **Prompt Injection (ai_prompt_injection):**
748
+ - User input in system prompt WITHOUT delimiters (code fences, XML tags, separators) -> **HIGH** (real risk)
749
+ - User input in system prompt WITH clear delimiters -> **INFO** (properly fenced)
750
+ - Static prompts with no user interpolation -> **REJECT** (false positive)
751
+ - Prompt templates using proper parameterization/placeholders -> **REJECT**
752
+
753
+ **LLM Output Execution (ai_unsafe_execution):**
754
+ - LLM output fed to eval()/Function()/exec() WITHOUT sandbox -> **CRITICAL** (arbitrary code execution)
755
+ - LLM output to execution WITH sandbox (vm2, isolated-vm) -> **MEDIUM** (risk mitigated)
756
+ - LLM output to execution WITH validation AND sandbox -> **LOW** (well-protected)
757
+ - LLM output used for display only (console.log, UI) -> **REJECT** (not execution)
758
+ - Generated SQL from LLM without parameterization -> **CRITICAL** (SQL injection)
759
+ - Generated SQL with parameterized queries -> **MEDIUM** (logic may still be wrong)
760
+
761
+ **Agent Tool Permissions (ai_overpermissive_tool):**
762
+ - Tool with unrestricted file/network/exec access -> **HIGH** (overpermissive)
763
+ - Tool without user context verification -> **MEDIUM** (missing authorization)
764
+ - Tool with proper scoping, allowlists, and user verification -> **LOW** or **REJECT**
765
+ - Test files with tool definitions -> **INFO** or **REJECT**
766
+
767
+ **Hallucinated Dependencies (suspicious_package):**
768
+ - Package not found in registry -> **CRITICAL** (likely AI-hallucinated name)
769
+ - Very new package (less than 7 days old) with low downloads and typosquat pattern -> **HIGH**
770
+ - Legitimate looking package with source/repo but low popularity -> **MEDIUM** (needs review)
771
+ - Known legitimate package with unusual name (in allowlist) -> **REJECT**
772
+
773
+ **CRITICAL AI PATTERN RULES**:
774
+ - AI code generation often produces non-existent package names - flag these prominently
775
+ - Prompt injection is NOT the same as XSS - different threat model and severity
776
+ - Sandboxed code execution (vm2, isolated-vm) significantly reduces risk
777
+ - Agent tools need both access restrictions AND user context verification
778
+
779
+ ### 3.8 RAG Data Exfiltration (ai_rag_exfiltration)
780
+ Retrieval Augmented Generation systems can leak sensitive data across tenant boundaries.
781
+
782
+ **Unscoped Retrieval Queries:**
783
+ - Vector store query WITHOUT user/tenant filter -> **HIGH** (cross-tenant data access)
784
+ - .query(), .search(), .similaritySearch() without filter/where/userId/tenantId parameter
785
+ - LangChain retriever.invoke() without metadata filter
786
+ - Pinecone/Chroma/Weaviate query without namespace or metadata filter
787
+ - Query WITH proper scoping (filter by userId/tenantId) -> **REJECT** (properly scoped)
788
+ - Query with RLS-enabled Supabase tables -> **LOW/INFO** (verify RLS policy)
789
+
790
+ **Raw Context Exposure:**
791
+ - Raw sourceDocuments/chunks returned in API response -> **MEDIUM** (data leak to client)
792
+ - Raw context returned WITHOUT authentication -> **HIGH** (public data leak)
793
+ - Filtered response (only IDs, titles, metadata) -> **REJECT** (properly filtered)
794
+ - Response filtering visible nearby (.map, sanitize, redact) -> **INFO**
795
+
796
+ **Context Logging:**
797
+ - Logging retrieved documents (debug) -> **INFO** (hygiene, not direct risk)
798
+ - Logging full prompts with context -> **LOW** (audit concern if logs are accessible)
799
+ - Persisting prompts/context to database -> **MEDIUM** (sensitive data retention)
800
+
801
+ **CRITICAL RAG RULES**:
802
+ - Cross-tenant data access is the PRIMARY risk - always check for user/tenant scoping
803
+ - Authenticated endpoints exposing context are MEDIUM; unauthenticated are HIGH
804
+ - Debug logging is INFO severity - it's not a direct vulnerability
805
+ - If RLS or middleware protection is visible, downgrade significantly
806
+
807
+ ### 3.9 AI Endpoint Protection (ai_endpoint_unprotected)
808
+ AI/LLM API endpoints can incur significant costs and enable data exfiltration.
809
+
810
+ **No Authentication + No Rate Limiting -> HIGH:**
811
+ - Endpoint calls OpenAI/Anthropic/etc. without any auth check or rate limit
812
+ - Anyone on the internet can abuse the endpoint and run up API costs
813
+ - Potential for prompt exfiltration or model abuse
814
+
815
+ **Has Rate Limiting but No Authentication -> MEDIUM:**
816
+ - Rate limit provides some protection against abuse
817
+ - Still allows anonymous access to AI functionality
818
+ - Suggest adding authentication
819
+
820
+ **Has Authentication but No Rate Limiting -> LOW:**
821
+ - Authenticated users could still abuse the endpoint
822
+ - Suggest adding rate limiting for cost control
823
+ - severity: low (suggest improvement)
824
+
825
+ **Has Both Auth and Rate Limiting -> INFO/REJECT:**
826
+ - Properly protected endpoint
827
+ - REJECT if both are clearly present
828
+ - INFO if you want to note the good pattern
829
+
830
+ **BYOK (Bring Your Own Key) Endpoints:**
831
+ - If user provides their own API key, risk is LOWER
832
+ - User pays for their own usage - cost abuse is their problem
833
+ - Downgrade severity by one level for BYOK patterns
834
+
835
+ **Protected by Middleware:**
836
+ - If project context shows auth middleware protecting the route, downgrade to INFO
837
+ - Internal/admin routes should be INFO or REJECT
838
+
839
+ **CRITICAL ENDPOINT RULES**:
840
+ - Cost abuse is real - unprotected AI endpoints can bankrupt a startup
841
+ - Rate limiting alone isn't enough - need auth to prevent anonymous abuse
842
+ - BYOK endpoints have lower risk since user bears the cost
843
+ - Check for middleware protection before flagging
844
+
845
+ ### 3.10 Schema/Tooling Mismatch (ai_schema_mismatch)
846
+ AI-generated structured outputs need validation before use in security-sensitive contexts.
847
+
848
+ **Unvalidated AI Output Parsing:**
849
+ - JSON.parse(response.content) without schema validation -> **MEDIUM**
850
+ - AI may return malformed or unexpected structures
851
+ - Suggest zod/ajv/joi validation
852
+ - AI output to EXECUTION SINK (eval, exec, query) without validation -> **HIGH**
853
+ - Direct path to code/SQL injection
854
+ - AI output to DISPLAY only (console.log, UI render) -> **REJECT**
855
+ - Not a security issue for display purposes
856
+ - OpenAI Structured Outputs (json_schema in request) -> **REJECT**
857
+ - API-level validation provides guarantees
858
+
859
+ **Weak Schema Patterns:**
860
+ - response: any at API boundary -> **MEDIUM** (no type safety)
861
+ - z.any() or z.unknown() -> **LOW** (defeats purpose of validation)
862
+ - z.passthrough() -> **INFO** (allows extra properties, minor concern)
863
+ - Specific schema defined and used -> **REJECT** (properly validated)
864
+
865
+ **Tool Parameter Validation:**
866
+ - Tool parameter -> file path without validation -> **HIGH** (path traversal)
867
+ - Tool parameter -> shell command without validation -> **CRITICAL** (command injection)
868
+ - Tool parameter -> URL without validation -> **HIGH** (SSRF)
869
+ - Tool parameter -> DB query without validation -> **HIGH** (SQL injection)
870
+ - Tool parameter with allowlist check visible -> **LOW/REJECT** (mitigated)
871
+
872
+ **CRITICAL SCHEMA RULES**:
873
+ - The severity depends on WHERE the AI output is used, not just that it's parsed
874
+ - Execution sinks (eval, exec, query, fs) need HIGH severity without validation
875
+ - Display-only usage is NOT a security issue
876
+ - Schema validation (zod, ajv, joi) significantly reduces risk
877
+ - OpenAI Structured Outputs provide API-level guarantees
878
+
879
+ ## False Positive Patterns (ALWAYS REJECT - keep: false)
880
+
881
+ 1. **CSS/Styling flagged as secrets**:
882
+ - Tailwind classes, gradients, hex colors, rgba/hsla
883
+ - style={{...}} objects, CSS-in-JS
884
+
885
+ 2. **Development URLs in dev contexts**:
886
+ - localhost in test/mock/example files
887
+ - URLs via environment variables
888
+
889
+ 3. **Test/Example/Scanner code**:
890
+ - Files with test, spec, mock, example, fixture in path
891
+ - Scanner's own rule definitions
892
+ - Documentation/README files
893
+
894
+ 4. **TypeScript 'any' in safe contexts**:
895
+ - Type definitions, .d.ts files
896
+ - Internal utilities (not API boundaries)
897
+
898
+ 5. **Public endpoints**:
899
+ - /health, /healthz, /ready, /ping, /status
900
+ - /webhook with signature verification nearby
901
+
902
+ 6. **Generic AI patterns that are NOT security issues**:
903
+ - console.log with non-sensitive data → REJECT
904
+ - TODO/FIXME reminders (not security-critical) → REJECT
905
+ - Magic number timeouts → REJECT
906
+ - Verbose/step-by-step comments → REJECT
907
+ - Generic error messages → REJECT or downgrade to info
908
+ - Basic validation patterns (if (!data) return) → REJECT
909
+
910
+ 7. **Style/Code quality issues (NOT security)**:
911
+ - Empty functions (unless auth-critical)
912
+ - Generic success messages
913
+ - Placeholder comments in non-security code
914
+
915
+ ## Response Format
916
+
917
+ For each candidate finding, return:
918
+ \`\`\`json
919
+ {
920
+ "index": <number>,
921
+ "keep": true | false,
922
+ "reason": "<brief explanation referencing specific code/context>",
923
+ "adjustedSeverity": "critical" | "high" | "medium" | "low" | "info" | null,
924
+ "validationNotes": "<optional: additional context for the developer>"
925
+ }
926
+ \`\`\`
927
+
928
+ ## Severity Guidelines
929
+ - **critical/high**: Realistically exploitable, should block deploys - ONLY for clear vulnerabilities
930
+ - **medium/low**: Important but non-blocking, hardening opportunities - use sparingly
931
+ - **info**: Robustness/hygiene tips, not direct security risks - use for marginal cases you want to keep
932
+
933
+ ## Decision Framework
934
+ 1. **Default to REJECTION** (keep: false) for:
935
+ - Style/code quality issues
936
+ - Marginal findings with unclear exploitation path
937
+ - Patterns that are standard practice (basic auth checks, error logging)
938
+ - Anything in test/example/documentation files
939
+
940
+ 2. **Downgrade to info** when:
941
+ - Finding has some merit but low practical risk
942
+ - Context shows mitigating factors
943
+ - Better as a "nice to know" than an action item
944
+
945
+ 3. **Keep with original/higher severity** ONLY when:
946
+ - Clear, exploitable vulnerability
947
+ - No visible mitigating factors in context
948
+ - Real-world attack scenario is plausible
949
+
950
+ **REMEMBER**: You are the last line of defense against noise. A finding that reaches the user should be CLEARLY worth their time. When in doubt, REJECT.`
951
+
952
+ interface ValidationResult {
953
+ index: number
954
+ keep: boolean
955
+ reason: string
956
+ adjustedSeverity?: VulnerabilitySeverity | null
957
+ validationNotes?: string
958
+ }
959
+
960
+ // Cache for project context (built once per scan)
961
+ let cachedProjectContext: ProjectContext | null = null
962
+
963
+ /**
964
+ * Helper function to make API calls with retry logic for rate limiting
965
+ * Implements exponential backoff for 429 (rate limit) errors
966
+ */
967
+ async function makeAnthropicRequestWithRetry<T>(
968
+ requestFn: () => Promise<T>,
969
+ maxRetries: number = 3,
970
+ initialDelayMs: number = 1000
971
+ ): Promise<T> {
972
+ let lastError: Error | null = null
973
+
974
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
975
+ try {
976
+ return await requestFn()
977
+ } catch (error: any) {
978
+ lastError = error
979
+
980
+ // Check if it's a rate limit error (429)
981
+ const isRateLimit = error?.status === 429 || error?.message?.includes('rate limit')
982
+
983
+ if (isRateLimit && attempt < maxRetries) {
984
+ // Exponential backoff: 1s, 2s, 4s
985
+ const delayMs = initialDelayMs * Math.pow(2, attempt)
986
+ console.log(`[AI Validation] Rate limit hit, retrying in ${delayMs}ms (attempt ${attempt + 1}/${maxRetries})`)
987
+ await new Promise(resolve => setTimeout(resolve, delayMs))
988
+ continue
989
+ }
990
+
991
+ // If not rate limit or max retries reached, throw
992
+ throw error
993
+ }
994
+ }
995
+
996
+ throw lastError || new Error('Max retries exceeded')
997
+ }
998
+
999
+ /**
1000
+ * Helper to make OpenAI requests with retry logic for rate limits
1001
+ */
1002
+ async function makeOpenAIRequestWithRetry<T>(
1003
+ requestFn: () => Promise<T>,
1004
+ maxRetries = 3,
1005
+ initialDelayMs = 1000
1006
+ ): Promise<T> {
1007
+ let lastError: Error | null = null
1008
+
1009
+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
1010
+ try {
1011
+ return await requestFn()
1012
+ } catch (error: any) {
1013
+ lastError = error
1014
+
1015
+ // Check if it's a rate limit error (429) - but NOT insufficient_quota
1016
+ const isRateLimit = error?.status === 429 && error?.code !== 'insufficient_quota'
1017
+
1018
+ if (isRateLimit && attempt < maxRetries) {
1019
+ const delayMs = initialDelayMs * Math.pow(2, attempt)
1020
+ console.log(`[OpenAI Validation] Rate limit hit, retrying in ${delayMs}ms (attempt ${attempt + 1}/${maxRetries})`)
1021
+ await new Promise(resolve => setTimeout(resolve, delayMs))
1022
+ continue
1023
+ }
1024
+
1025
+ // If it's a quota error or max retries reached, throw
1026
+ throw error
1027
+ }
1028
+ }
1029
+
1030
+ throw lastError || new Error('Max retries exceeded')
1031
+ }
1032
+
1033
+ // ============================================================================
1034
+ // OpenAI Provider Implementation (GPT-5-mini)
1035
+ // ============================================================================
1036
+
1037
+ /**
1038
+ * Validate findings using OpenAI GPT-5-mini
1039
+ * This mirrors the Anthropic validation flow but uses OpenAI's API
1040
+ */
1041
+ async function validateWithOpenAI(
1042
+ findings: Vulnerability[],
1043
+ files: ScanFile[],
1044
+ projectContext: ProjectContext | undefined,
1045
+ stats: ValidationStats
1046
+ ): Promise<AIValidationResult> {
1047
+ const client = getOpenAIClient()
1048
+
1049
+ // Build or use cached project context
1050
+ const context = projectContext || cachedProjectContext || buildProjectContext(files)
1051
+ if (!projectContext && !cachedProjectContext) {
1052
+ cachedProjectContext = context
1053
+ console.log('[OpenAI Validation] Built project context:', {
1054
+ hasAuthMiddleware: context.auth.hasGlobalMiddleware,
1055
+ authProvider: context.auth.authProvider,
1056
+ orm: context.dataAccess.orm,
1057
+ framework: context.frameworks.primary,
1058
+ })
1059
+ }
1060
+
1061
+ // Group findings by file for efficient validation
1062
+ const findingsByFile = new Map<string, Vulnerability[]>()
1063
+ for (const finding of findings) {
1064
+ const existing = findingsByFile.get(finding.filePath) || []
1065
+ existing.push(finding)
1066
+ findingsByFile.set(finding.filePath, existing)
1067
+ }
1068
+
1069
+ const validatedFindings: Vulnerability[] = []
1070
+ const fileEntries = Array.from(findingsByFile.entries())
1071
+
1072
+ // Track metrics (thread-safe accumulator)
1073
+ let totalApiBatches = 0
1074
+ const statsLock = {
1075
+ apiCalls: 0,
1076
+ estimatedInputTokens: 0,
1077
+ estimatedOutputTokens: 0,
1078
+ cacheReadTokens: 0,
1079
+ estimatedCost: 0,
1080
+ validatedFindings: 0,
1081
+ confirmedFindings: 0,
1082
+ dismissedFindings: 0,
1083
+ downgradedFindings: 0,
1084
+ }
1085
+
1086
+ const totalFileBatches = Math.ceil(fileEntries.length / FILES_PER_API_BATCH)
1087
+ console.log(`[OpenAI Validation] Processing ${fileEntries.length} files in ${totalFileBatches} API batch(es) (${PARALLEL_API_BATCHES} parallel)`)
1088
+
1089
+ // Create all batch definitions
1090
+ const allBatches: Array<{
1091
+ batchNum: number
1092
+ fileBatch: Array<[string, Vulnerability[]]>
1093
+ }> = []
1094
+
1095
+ for (let batchStart = 0; batchStart < fileEntries.length; batchStart += FILES_PER_API_BATCH) {
1096
+ const fileBatch = fileEntries.slice(batchStart, batchStart + FILES_PER_API_BATCH)
1097
+ const batchNum = Math.floor(batchStart / FILES_PER_API_BATCH) + 1
1098
+ allBatches.push({ batchNum, fileBatch })
1099
+ }
1100
+
1101
+ // Process a single batch - returns validated findings for that batch
1102
+ const processBatch = async (
1103
+ batchDef: { batchNum: number; fileBatch: Array<[string, Vulnerability[]]> }
1104
+ ): Promise<Vulnerability[]> => {
1105
+ const { batchNum, fileBatch } = batchDef
1106
+ const batchFindings: Vulnerability[] = []
1107
+
1108
+ // Prepare file data for batch request
1109
+ const fileDataList: Array<{ file: ScanFile; findings: Vulnerability[]; filePath: string }> = []
1110
+ const filesWithoutContent: Array<{ filePath: string; findings: Vulnerability[] }> = []
1111
+
1112
+ for (const [filePath, fileFindings] of fileBatch) {
1113
+ const file = files.find(f => f.path === filePath)
1114
+ if (!file) {
1115
+ filesWithoutContent.push({ filePath, findings: fileFindings })
1116
+ } else {
1117
+ fileDataList.push({ file, findings: fileFindings, filePath })
1118
+ }
1119
+ }
1120
+
1121
+ // Handle files without content
1122
+ for (const { findings: fileFindings } of filesWithoutContent) {
1123
+ for (const f of fileFindings) {
1124
+ batchFindings.push({
1125
+ ...f,
1126
+ validatedByAI: false,
1127
+ validationStatus: 'not_validated' as ValidationStatus,
1128
+ validationNotes: 'File content not available for validation',
1129
+ })
1130
+ }
1131
+ }
1132
+
1133
+ if (fileDataList.length === 0) {
1134
+ return batchFindings
1135
+ }
1136
+
1137
+ try {
1138
+ // Build multi-file validation request
1139
+ const validationRequest = buildMultiFileValidationRequest(
1140
+ fileDataList.map(({ file, findings: fileFindings }) => ({ file, findings: fileFindings })),
1141
+ context
1142
+ )
1143
+
1144
+ // Call OpenAI GPT-5-mini with retry logic
1145
+ const response = await makeOpenAIRequestWithRetry(async () =>
1146
+ client.chat.completions.create({
1147
+ model: 'gpt-5-mini-2025-08-07',
1148
+ messages: [
1149
+ { role: 'system', content: HIGH_CONTEXT_VALIDATION_PROMPT },
1150
+ { role: 'user', content: validationRequest },
1151
+ ],
1152
+ max_completion_tokens: 4096,
1153
+ })
1154
+ )
1155
+
1156
+ // Track API call stats (accumulate to shared stats)
1157
+ statsLock.apiCalls++
1158
+
1159
+ // Extract token usage from OpenAI response
1160
+ const usage = response.usage
1161
+ if (usage) {
1162
+ const promptTokens = usage.prompt_tokens || 0
1163
+ const completionTokens = usage.completion_tokens || 0
1164
+ const cachedTokens = (usage as any).prompt_tokens_details?.cached_tokens || 0
1165
+ const freshInputTokens = promptTokens - cachedTokens
1166
+
1167
+ statsLock.estimatedInputTokens += freshInputTokens
1168
+ statsLock.estimatedOutputTokens += completionTokens
1169
+ statsLock.cacheReadTokens += cachedTokens
1170
+
1171
+ console.log(`[OpenAI] Batch ${batchNum} tokens: ${promptTokens} input (${cachedTokens} cached), ${completionTokens} output`)
1172
+
1173
+ const freshCost = (freshInputTokens * GPT5_MINI_PRICING.input) / 1_000_000
1174
+ const cachedCost = (cachedTokens * GPT5_MINI_PRICING.cached) / 1_000_000
1175
+ const outputCost = (completionTokens * GPT5_MINI_PRICING.output) / 1_000_000
1176
+ statsLock.estimatedCost += freshCost + cachedCost + outputCost
1177
+ }
1178
+
1179
+ // Parse response content
1180
+ const content = response.choices[0]?.message?.content
1181
+ if (!content) {
1182
+ for (const { findings: fileFindings } of fileDataList) {
1183
+ for (const f of fileFindings) {
1184
+ batchFindings.push({
1185
+ ...f,
1186
+ validatedByAI: false,
1187
+ validationStatus: 'not_validated' as ValidationStatus,
1188
+ validationNotes: 'No valid response from OpenAI',
1189
+ })
1190
+ }
1191
+ }
1192
+ return batchFindings
1193
+ }
1194
+
1195
+ // Parse multi-file response
1196
+ const expectedFiles = fileDataList.map(({ filePath }) => filePath)
1197
+ const validationResultsMap = parseMultiFileValidationResponse(content, expectedFiles)
1198
+
1199
+ // Apply results per file
1200
+ for (const { filePath, findings: fileFindings } of fileDataList) {
1201
+ const fileResults = validationResultsMap.get(filePath)
1202
+
1203
+ if (!fileResults || fileResults.length === 0) {
1204
+ const singleFileResults = parseValidationResponse(content)
1205
+ if (singleFileResults.length > 0 && fileDataList.length === 1) {
1206
+ const processedFindings = applyValidationResults(fileFindings, singleFileResults)
1207
+ for (const processed of processedFindings) {
1208
+ statsLock.validatedFindings++
1209
+ if (processed.validationStatus === 'confirmed') statsLock.confirmedFindings++
1210
+ else if (processed.validationStatus === 'dismissed') statsLock.dismissedFindings++
1211
+ else if (processed.validationStatus === 'downgraded') statsLock.downgradedFindings++
1212
+ batchFindings.push(processed)
1213
+ }
1214
+ } else {
1215
+ for (const f of fileFindings) {
1216
+ statsLock.validatedFindings++
1217
+ statsLock.confirmedFindings++
1218
+ batchFindings.push({
1219
+ ...f,
1220
+ validatedByAI: true,
1221
+ validationStatus: 'confirmed' as ValidationStatus,
1222
+ validationNotes: 'Kept by default - no explicit validation result',
1223
+ })
1224
+ }
1225
+ }
1226
+ } else {
1227
+ const processedFindings = applyValidationResults(fileFindings, fileResults)
1228
+ for (const processed of processedFindings) {
1229
+ statsLock.validatedFindings++
1230
+ if (processed.validationStatus === 'confirmed') statsLock.confirmedFindings++
1231
+ else if (processed.validationStatus === 'dismissed') statsLock.dismissedFindings++
1232
+ else if (processed.validationStatus === 'downgraded') statsLock.downgradedFindings++
1233
+ batchFindings.push(processed)
1234
+ }
1235
+ }
1236
+ }
1237
+
1238
+ } catch (error) {
1239
+ console.error(`[OpenAI Validation] Error in batch ${batchNum}:`, error)
1240
+ for (const { findings: fileFindings } of fileDataList) {
1241
+ for (const f of fileFindings) {
1242
+ batchFindings.push({
1243
+ ...f,
1244
+ validatedByAI: false,
1245
+ validationStatus: 'not_validated' as ValidationStatus,
1246
+ validationNotes: 'Validation failed due to API error',
1247
+ })
1248
+ }
1249
+ }
1250
+ }
1251
+
1252
+ return batchFindings
1253
+ }
1254
+
1255
+ // Process batches in parallel groups
1256
+ const startTime = Date.now()
1257
+ for (let i = 0; i < allBatches.length; i += PARALLEL_API_BATCHES) {
1258
+ const parallelGroup = allBatches.slice(i, i + PARALLEL_API_BATCHES)
1259
+ const batchNums = parallelGroup.map(b => b.batchNum).join(', ')
1260
+ console.log(`[OpenAI Validation] Processing batches ${batchNums} in parallel`)
1261
+
1262
+ const results = await Promise.all(parallelGroup.map(processBatch))
1263
+ for (const batchResults of results) {
1264
+ validatedFindings.push(...batchResults)
1265
+ }
1266
+ totalApiBatches += parallelGroup.length
1267
+ }
1268
+ const totalDuration = Date.now() - startTime
1269
+
1270
+ // Copy accumulated stats back
1271
+ stats.apiCalls = statsLock.apiCalls
1272
+ stats.estimatedInputTokens = statsLock.estimatedInputTokens
1273
+ stats.estimatedOutputTokens = statsLock.estimatedOutputTokens
1274
+ stats.cacheReadTokens = statsLock.cacheReadTokens
1275
+ stats.estimatedCost = statsLock.estimatedCost
1276
+ stats.validatedFindings = statsLock.validatedFindings
1277
+ stats.confirmedFindings = statsLock.confirmedFindings
1278
+ stats.dismissedFindings = statsLock.dismissedFindings
1279
+ stats.downgradedFindings = statsLock.downgradedFindings
1280
+
1281
+ // Calculate cache hit rate
1282
+ const totalCacheableTokens = stats.cacheCreationTokens + stats.cacheReadTokens
1283
+ stats.cacheHitRate = totalCacheableTokens > 0
1284
+ ? stats.cacheReadTokens / totalCacheableTokens
1285
+ : 0
1286
+
1287
+ // Log validation stats
1288
+ const avgTimePerFile = fileEntries.length > 0
1289
+ ? (totalDuration / fileEntries.length).toFixed(2)
1290
+ : '0'
1291
+
1292
+ console.log(`[OpenAI Validation] Stats:`)
1293
+ console.log(` - Total findings: ${stats.totalFindings}`)
1294
+ console.log(` - AI validated: ${stats.validatedFindings}`)
1295
+ console.log(` - Confirmed: ${stats.confirmedFindings}`)
1296
+ console.log(` - Dismissed: ${stats.dismissedFindings}`)
1297
+ console.log(` - Downgraded: ${stats.downgradedFindings}`)
1298
+ console.log(` - API calls: ${stats.apiCalls}`)
1299
+ console.log(` - Performance:`)
1300
+ console.log(` - Total API batches: ${totalApiBatches}`)
1301
+ console.log(` - Avg time per file: ${avgTimePerFile}s`)
1302
+ console.log(` - Token usage:`)
1303
+ console.log(` - Input (fresh): ${stats.estimatedInputTokens} tokens`)
1304
+ console.log(` - Cached: ${stats.cacheReadTokens} tokens`)
1305
+ console.log(` - Output: ${stats.estimatedOutputTokens} tokens`)
1306
+ console.log(` - Estimated cost: $${stats.estimatedCost.toFixed(4)}`)
1307
+
1308
+ return { vulnerabilities: validatedFindings, stats }
1309
+ }
1310
+
1311
+ /**
1312
+ * Validate Layer 1/2 findings using AI with HIGH-CONTEXT validation
1313
+ *
1314
+ * Key improvements over previous version:
1315
+ * 1. Sends FULL FILE CONTENT (not just snippets) for better context
1316
+ * 2. Includes PROJECT CONTEXT (auth patterns, data access, etc.)
1317
+ * 3. Uses generalised rules from Section 3 of the security model
1318
+ */
1319
+ export async function validateFindingsWithAI(
1320
+ findings: Vulnerability[],
1321
+ files: ScanFile[],
1322
+ projectContext?: ProjectContext
1323
+ ): Promise<AIValidationResult> {
1324
+ // Initialize stats tracking
1325
+ const stats: ValidationStats = {
1326
+ totalFindings: findings.length,
1327
+ validatedFindings: 0,
1328
+ confirmedFindings: 0,
1329
+ dismissedFindings: 0,
1330
+ downgradedFindings: 0,
1331
+ autoDismissedFindings: 0,
1332
+ estimatedInputTokens: 0,
1333
+ estimatedOutputTokens: 0,
1334
+ estimatedCost: 0,
1335
+ apiCalls: 0,
1336
+ cacheCreationTokens: 0,
1337
+ cacheReadTokens: 0,
1338
+ cacheHitRate: 0,
1339
+ }
1340
+
1341
+ if (findings.length === 0) {
1342
+ return { vulnerabilities: [], stats }
1343
+ }
1344
+
1345
+ // Check for provider override (GPT-5-mini is default for 47% cost savings)
1346
+ const aiProvider = process.env.AI_PROVIDER || 'openai'
1347
+ if (aiProvider === 'anthropic') {
1348
+ console.log('[AI Validation] Using Anthropic provider (Claude 3.5 Haiku)')
1349
+ // Fall through to Anthropic implementation below
1350
+ } else {
1351
+ console.log('[AI Validation] Using OpenAI provider (GPT-5-mini)')
1352
+ return validateWithOpenAI(findings, files, projectContext, stats)
1353
+ }
1354
+
1355
+ // Anthropic implementation
1356
+ console.log('[AI Validation] Initializing Anthropic client...')
1357
+ const client = getAnthropicClient()
1358
+
1359
+ // Build or use cached project context
1360
+ const context = projectContext || cachedProjectContext || buildProjectContext(files)
1361
+ if (!projectContext && !cachedProjectContext) {
1362
+ cachedProjectContext = context
1363
+ console.log('[AI Validation] Built project context:', {
1364
+ hasAuthMiddleware: context.auth.hasGlobalMiddleware,
1365
+ authProvider: context.auth.authProvider,
1366
+ orm: context.dataAccess.orm,
1367
+ framework: context.frameworks.primary,
1368
+ })
1369
+ }
1370
+
1371
+ // Group findings by file for efficient validation
1372
+ const findingsByFile = new Map<string, Vulnerability[]>()
1373
+ for (const finding of findings) {
1374
+ const existing = findingsByFile.get(finding.filePath) || []
1375
+ existing.push(finding)
1376
+ findingsByFile.set(finding.filePath, existing)
1377
+ }
1378
+
1379
+ const validatedFindings: Vulnerability[] = []
1380
+
1381
+ // Phase 2: Multi-file batching
1382
+ // Instead of one API call per file, batch multiple files into single requests
1383
+ // This reduces API overhead and leverages prompt caching more effectively
1384
+ const fileEntries = Array.from(findingsByFile.entries())
1385
+
1386
+ // Track metrics
1387
+ let totalBatchWaitTime = 0
1388
+ let totalApiBatches = 0
1389
+
1390
+ // Calculate how many API batches we'll make
1391
+ const totalFileBatches = Math.ceil(fileEntries.length / FILES_PER_API_BATCH)
1392
+
1393
+ console.log(`[AI Validation] Phase 2: Processing ${fileEntries.length} files in ${totalFileBatches} API batch(es) (${FILES_PER_API_BATCH} files/batch)`)
1394
+
1395
+ // Process files in batches - each batch is ONE API call with multiple files
1396
+ for (let batchStart = 0; batchStart < fileEntries.length; batchStart += FILES_PER_API_BATCH) {
1397
+ const fileBatch = fileEntries.slice(batchStart, batchStart + FILES_PER_API_BATCH)
1398
+ const batchNum = Math.floor(batchStart / FILES_PER_API_BATCH) + 1
1399
+
1400
+ console.log(`[AI Validation] API Batch ${batchNum}/${totalFileBatches}: ${fileBatch.length} files`)
1401
+
1402
+ // Prepare file data for batch request
1403
+ const fileDataList: Array<{ file: ScanFile; findings: Vulnerability[]; filePath: string }> = []
1404
+ const filesWithoutContent: Array<{ filePath: string; findings: Vulnerability[] }> = []
1405
+
1406
+ for (const [filePath, fileFindings] of fileBatch) {
1407
+ const file = files.find(f => f.path === filePath)
1408
+ if (!file) {
1409
+ // Can't validate without file content
1410
+ filesWithoutContent.push({ filePath, findings: fileFindings })
1411
+ } else {
1412
+ fileDataList.push({ file, findings: fileFindings, filePath })
1413
+ }
1414
+ }
1415
+
1416
+ // Handle files without content - mark as not validated
1417
+ for (const { findings } of filesWithoutContent) {
1418
+ for (const f of findings) {
1419
+ validatedFindings.push({
1420
+ ...f,
1421
+ validatedByAI: false,
1422
+ validationStatus: 'not_validated' as ValidationStatus,
1423
+ validationNotes: 'File content not available for validation',
1424
+ })
1425
+ }
1426
+ }
1427
+
1428
+ // Skip API call if no files with content
1429
+ if (fileDataList.length === 0) {
1430
+ continue
1431
+ }
1432
+
1433
+ const batchStartTime = Date.now()
1434
+
1435
+ try {
1436
+ // Build multi-file validation request
1437
+ const validationRequest = buildMultiFileValidationRequest(
1438
+ fileDataList.map(({ file, findings }) => ({ file, findings })),
1439
+ context
1440
+ )
1441
+
1442
+ // Use Anthropic prompt caching with multi-file request
1443
+ const response = await makeAnthropicRequestWithRetry(() =>
1444
+ client.messages.create({
1445
+ model: 'claude-3-5-haiku-20241022',
1446
+ max_tokens: 4096, // Increased for multi-file responses
1447
+ system: [
1448
+ {
1449
+ type: 'text',
1450
+ text: HIGH_CONTEXT_VALIDATION_PROMPT,
1451
+ cache_control: { type: 'ephemeral' }, // Cache for 5 minutes
1452
+ },
1453
+ ],
1454
+ messages: [{ role: 'user', content: validationRequest }],
1455
+ })
1456
+ )
1457
+
1458
+ // Track API call stats
1459
+ stats.apiCalls++
1460
+ totalApiBatches++
1461
+
1462
+ // Extract cache metrics from usage
1463
+ const usage = response.usage
1464
+ if (usage) {
1465
+ // DEBUG: Log full usage object to understand token breakdown
1466
+ console.log(`[DEBUG] Batch ${batchNum} - Full API Response Usage:`)
1467
+ console.log(JSON.stringify(usage, null, 2))
1468
+ console.log(`[DEBUG] Breakdown:`)
1469
+ console.log(` - input_tokens: ${usage.input_tokens || 0}`)
1470
+ console.log(` - output_tokens: ${usage.output_tokens || 0}`)
1471
+ // @ts-ignore
1472
+ console.log(` - cache_creation_input_tokens: ${usage.cache_creation_input_tokens || 0}`)
1473
+ // @ts-ignore
1474
+ console.log(` - cache_read_input_tokens: ${usage.cache_read_input_tokens || 0}`)
1475
+
1476
+ stats.estimatedInputTokens += usage.input_tokens || 0
1477
+ stats.estimatedOutputTokens += usage.output_tokens || 0
1478
+
1479
+ // @ts-ignore - cache fields not in types yet
1480
+ const cacheCreation = usage.cache_creation_input_tokens || 0
1481
+ // @ts-ignore
1482
+ const cacheRead = usage.cache_read_input_tokens || 0
1483
+
1484
+ stats.cacheCreationTokens += cacheCreation
1485
+ stats.cacheReadTokens += cacheRead
1486
+ }
1487
+
1488
+ const textContent = response.content.find((block: { type: string }) => block.type === 'text')
1489
+ if (!textContent || textContent.type !== 'text') {
1490
+ // No valid response - mark all findings as not validated
1491
+ for (const { findings } of fileDataList) {
1492
+ for (const f of findings) {
1493
+ validatedFindings.push({
1494
+ ...f,
1495
+ validatedByAI: false,
1496
+ validationStatus: 'not_validated' as ValidationStatus,
1497
+ validationNotes: 'No valid response from AI',
1498
+ })
1499
+ }
1500
+ }
1501
+ continue
1502
+ }
1503
+
1504
+ // Parse multi-file response
1505
+ const expectedFiles = fileDataList.map(({ filePath }) => filePath)
1506
+ const validationResultsMap = parseMultiFileValidationResponse(textContent.text, expectedFiles)
1507
+
1508
+ // Apply results per file
1509
+ for (const { filePath, findings } of fileDataList) {
1510
+ const fileResults = validationResultsMap.get(filePath)
1511
+
1512
+ if (!fileResults || fileResults.length === 0) {
1513
+ // No results for this file - try single-file parsing as fallback
1514
+ // This handles cases where AI doesn't follow multi-file format
1515
+ const singleFileResults = parseValidationResponse(textContent.text)
1516
+
1517
+ if (singleFileResults.length > 0 && fileDataList.length === 1) {
1518
+ // Single file in batch, use single-file parsing
1519
+ const processedFindings = applyValidationResults(findings, singleFileResults)
1520
+ for (const processed of processedFindings) {
1521
+ stats.validatedFindings++
1522
+ if (processed.validationStatus === 'confirmed') {
1523
+ stats.confirmedFindings++
1524
+ } else if (processed.validationStatus === 'dismissed') {
1525
+ stats.dismissedFindings++
1526
+ } else if (processed.validationStatus === 'downgraded') {
1527
+ stats.downgradedFindings++
1528
+ }
1529
+ validatedFindings.push(processed)
1530
+ }
1531
+ } else {
1532
+ // Keep findings but mark as validation failed for this file
1533
+ console.warn(`[AI Validation] No results for ${filePath}, keeping findings unvalidated`)
1534
+ for (const f of findings) {
1535
+ stats.validatedFindings++
1536
+ stats.confirmedFindings++ // Keep by default
1537
+ validatedFindings.push({
1538
+ ...f,
1539
+ validatedByAI: true,
1540
+ validationStatus: 'confirmed' as ValidationStatus,
1541
+ validationNotes: 'Kept by default - no explicit validation result',
1542
+ })
1543
+ }
1544
+ }
1545
+ } else {
1546
+ // Apply validation results for this file
1547
+ const processedFindings = applyValidationResults(findings, fileResults)
1548
+
1549
+ for (const processed of processedFindings) {
1550
+ stats.validatedFindings++
1551
+ if (processed.validationStatus === 'confirmed') {
1552
+ stats.confirmedFindings++
1553
+ } else if (processed.validationStatus === 'dismissed') {
1554
+ stats.dismissedFindings++
1555
+ } else if (processed.validationStatus === 'downgraded') {
1556
+ stats.downgradedFindings++
1557
+ }
1558
+ validatedFindings.push(processed)
1559
+ }
1560
+ }
1561
+ }
1562
+
1563
+ } catch (error) {
1564
+ console.error(`[AI Validation] Error in batch ${batchNum}:`, error)
1565
+ // Fallback: keep all findings but mark as not validated
1566
+ for (const { findings } of fileDataList) {
1567
+ for (const f of findings) {
1568
+ validatedFindings.push({
1569
+ ...f,
1570
+ validatedByAI: false,
1571
+ validationStatus: 'not_validated' as ValidationStatus,
1572
+ validationNotes: 'Validation failed due to API error',
1573
+ })
1574
+ }
1575
+ }
1576
+ }
1577
+
1578
+ const batchDuration = Date.now() - batchStartTime
1579
+ totalBatchWaitTime += batchDuration
1580
+ }
1581
+
1582
+ // Calculate cache hit rate
1583
+ const totalCacheableTokens = stats.cacheCreationTokens + stats.cacheReadTokens
1584
+ stats.cacheHitRate = totalCacheableTokens > 0
1585
+ ? stats.cacheReadTokens / totalCacheableTokens
1586
+ : 0
1587
+
1588
+ // Calculate estimated cost with cache pricing
1589
+ // Claude 3.5 Haiku pricing (claude-3-5-haiku-20241022):
1590
+ // - Base input: $0.80/1M tokens
1591
+ // - 5m cache writes: $1.00/1M tokens
1592
+ // - Cache hits: $0.08/1M tokens
1593
+ // - Output: $4.00/1M tokens
1594
+ //
1595
+ // Note: input_tokens from Anthropic API represents only fresh (non-cached) tokens
1596
+ // Cache tokens are reported separately and billed at different rates
1597
+
1598
+ const freshInputCost = (stats.estimatedInputTokens * 0.80) / 1_000_000
1599
+ const cacheWriteCost = (stats.cacheCreationTokens * 1.00) / 1_000_000
1600
+ const cacheReadCost = (stats.cacheReadTokens * 0.08) / 1_000_000
1601
+ const outputCost = (stats.estimatedOutputTokens * 4.00) / 1_000_000
1602
+
1603
+ stats.estimatedCost = freshInputCost + cacheWriteCost + cacheReadCost + outputCost
1604
+
1605
+ // Log validation stats with cache metrics and performance
1606
+ console.log(`[AI Validation] Stats:`)
1607
+ console.log(` - Total findings: ${stats.totalFindings}`)
1608
+ console.log(` - AI validated: ${stats.validatedFindings}`)
1609
+ console.log(` - Confirmed: ${stats.confirmedFindings}`)
1610
+ console.log(` - Dismissed: ${stats.dismissedFindings}`)
1611
+ console.log(` - Downgraded: ${stats.downgradedFindings}`)
1612
+ console.log(` - API calls: ${stats.apiCalls}`)
1613
+ console.log(` - Performance (Phase 2 Multi-File Batching):`)
1614
+ console.log(` - Files per API batch: ${FILES_PER_API_BATCH}`)
1615
+ console.log(` - Total API batches: ${totalApiBatches}`)
1616
+ console.log(` - Total validation time: ${(totalBatchWaitTime / 1000).toFixed(2)}s`)
1617
+ console.log(` - Avg time per file: ${fileEntries.length > 0 ? (totalBatchWaitTime / fileEntries.length / 1000).toFixed(2) : 0}s`)
1618
+ console.log(` - Cache metrics:`)
1619
+ console.log(` - Cache writes: ${stats.cacheCreationTokens.toLocaleString()} tokens`)
1620
+ console.log(` - Cache reads: ${stats.cacheReadTokens.toLocaleString()} tokens`)
1621
+ console.log(` - Cache hit rate: ${(stats.cacheHitRate * 100).toFixed(1)}%`)
1622
+ console.log(` - Token usage:`)
1623
+ console.log(` - Input (total): ${stats.estimatedInputTokens.toLocaleString()} tokens`)
1624
+ console.log(` - Output: ${stats.estimatedOutputTokens.toLocaleString()} tokens`)
1625
+ console.log(` - Estimated cost: $${stats.estimatedCost.toFixed(4)}`)
1626
+
1627
+ // Clear cache after validation complete
1628
+ cachedProjectContext = null
1629
+
1630
+ return { vulnerabilities: validatedFindings, stats }
1631
+ }
1632
+
1633
+ /**
1634
+ * Build a high-context validation request with full file content
1635
+ */
1636
+ function buildHighContextValidationRequest(
1637
+ file: ScanFile,
1638
+ findings: Vulnerability[],
1639
+ projectContext: ProjectContext
1640
+ ): string {
1641
+ // Add line numbers to full file content
1642
+ const numberedContent = file.content
1643
+ .split('\n')
1644
+ .map((line, i) => `${String(i + 1).padStart(4, ' ')} | ${line}`)
1645
+ .join('\n')
1646
+
1647
+ // Build candidate findings list
1648
+ const candidatesText = findings.map((f, idx) => {
1649
+ return `### Candidate ${idx}
1650
+ - **Rule**: ${f.title}
1651
+ - **Category**: ${f.category}
1652
+ - **Original Severity**: ${f.severity}
1653
+ - **Line**: ${f.lineNumber}
1654
+ - **Detection Layer**: ${f.layer}
1655
+ - **Description**: ${f.description}
1656
+ - **Flagged Code**: \`${f.lineContent.trim()}\``
1657
+ }).join('\n\n')
1658
+
1659
+ // Get file-specific context
1660
+ const fileContext = getFileValidationContext(file, projectContext)
1661
+
1662
+ return `## Project Context
1663
+ ${projectContext.summary}
1664
+
1665
+ ${fileContext}
1666
+
1667
+ ## Full File Content
1668
+ \`\`\`${file.language || getLanguageFromPath(file.path)}
1669
+ ${numberedContent}
1670
+ \`\`\`
1671
+
1672
+ ## Candidate Findings to Validate (${findings.length} total)
1673
+
1674
+ ${candidatesText}
1675
+
1676
+ ---
1677
+
1678
+ Please validate each candidate finding. Return a JSON array with your decision for each.
1679
+ Remember: Be AGGRESSIVE in rejecting false positives. Use the full file context and project architecture to make informed decisions.`
1680
+ }
1681
+
1682
+ /**
1683
+ * Build a multi-file validation request (Phase 2 optimization)
1684
+ * Batches multiple files into a single API call to reduce overhead
1685
+ */
1686
+ function buildMultiFileValidationRequest(
1687
+ fileDataList: Array<{ file: ScanFile; findings: Vulnerability[] }>,
1688
+ projectContext: ProjectContext
1689
+ ): string {
1690
+ const filesContent = fileDataList.map(({ file, findings }, fileIndex) => {
1691
+ // Add line numbers to full file content
1692
+ const numberedContent = file.content
1693
+ .split('\n')
1694
+ .map((line, i) => `${String(i + 1).padStart(4, ' ')} | ${line}`)
1695
+ .join('\n')
1696
+
1697
+ // Build candidate findings list with file-specific indices
1698
+ const candidatesText = findings.map((f, idx) => {
1699
+ return `### Candidate ${idx}
1700
+ - **Rule**: ${f.title}
1701
+ - **Category**: ${f.category}
1702
+ - **Original Severity**: ${f.severity}
1703
+ - **Line**: ${f.lineNumber}
1704
+ - **Detection Layer**: ${f.layer}
1705
+ - **Description**: ${f.description}
1706
+ - **Flagged Code**: \`${f.lineContent.trim()}\``
1707
+ }).join('\n\n')
1708
+
1709
+ // Get file-specific context
1710
+ const fileContext = getFileValidationContext(file, projectContext)
1711
+
1712
+ return `
1713
+ ================================================================================
1714
+ FILE ${fileIndex + 1}: ${file.path}
1715
+ ================================================================================
1716
+
1717
+ ${fileContext}
1718
+
1719
+ ### Full File Content
1720
+ \`\`\`${file.language || getLanguageFromPath(file.path)}
1721
+ ${numberedContent}
1722
+ \`\`\`
1723
+
1724
+ ### Candidate Findings to Validate (${findings.length} total)
1725
+
1726
+ ${candidatesText}`
1727
+ }).join('\n\n')
1728
+
1729
+ return `## Project Context
1730
+ ${projectContext.summary}
1731
+
1732
+ ${filesContent}
1733
+
1734
+ ---
1735
+
1736
+ ## Response Format
1737
+
1738
+ For EACH file, provide a JSON object with the file path and validation results.
1739
+ Return a JSON array where each element has:
1740
+ - "file": the file path (e.g., "${fileDataList[0]?.file.path || 'path/to/file.ts'}")
1741
+ - "validations": array of validation results for that file's candidates
1742
+
1743
+ Example response format:
1744
+ \`\`\`json
1745
+ [
1746
+ {
1747
+ "file": "src/auth.ts",
1748
+ "validations": [
1749
+ { "index": 0, "keep": true, "reason": "Valid finding", "adjustedSeverity": null, "validationNotes": "..." },
1750
+ { "index": 1, "keep": false, "reason": "False positive because..." }
1751
+ ]
1752
+ },
1753
+ {
1754
+ "file": "src/api.ts",
1755
+ "validations": [
1756
+ { "index": 0, "keep": true, "reason": "...", "adjustedSeverity": "high", "validationNotes": "..." }
1757
+ ]
1758
+ }
1759
+ ]
1760
+ \`\`\`
1761
+
1762
+ Remember: Be AGGRESSIVE in rejecting false positives. Use the full file context and project architecture to make informed decisions.`
1763
+ }
1764
+
1765
+ /**
1766
+ * Parse multi-file validation response (Phase 2)
1767
+ * Returns a map of file path -> validation results
1768
+ */
1769
+ function parseMultiFileValidationResponse(
1770
+ response: string,
1771
+ expectedFiles: string[]
1772
+ ): Map<string, ValidationResult[]> {
1773
+ const resultMap = new Map<string, ValidationResult[]>()
1774
+
1775
+ try {
1776
+ // Extract the first top-level JSON array from the response
1777
+ const extractTopLevelArray = (text: string): string | null => {
1778
+ const startIndex = text.indexOf('[')
1779
+ if (startIndex === -1) return null
1780
+
1781
+ let depth = 0
1782
+ let inString = false
1783
+ let stringChar: '"' | "'" | null = null
1784
+ let escape = false
1785
+
1786
+ for (let i = startIndex; i < text.length; i++) {
1787
+ const ch = text[i]
1788
+
1789
+ if (inString) {
1790
+ if (escape) {
1791
+ escape = false
1792
+ continue
1793
+ }
1794
+
1795
+ if (ch === '\\') {
1796
+ escape = true
1797
+ continue
1798
+ }
1799
+
1800
+ if (stringChar && ch === stringChar) {
1801
+ inString = false
1802
+ stringChar = null
1803
+ }
1804
+ continue
1805
+ }
1806
+
1807
+ if (ch === '"' || ch === "'") {
1808
+ inString = true
1809
+ stringChar = ch as '"' | "'"
1810
+ continue
1811
+ }
1812
+
1813
+ if (ch === '[') {
1814
+ depth++
1815
+ } else if (ch === ']') {
1816
+ depth--
1817
+ if (depth === 0) {
1818
+ return text.slice(startIndex, i + 1)
1819
+ }
1820
+ }
1821
+ }
1822
+
1823
+ return null
1824
+ }
1825
+
1826
+ const jsonSlice = extractTopLevelArray(response)
1827
+ if (!jsonSlice) {
1828
+ console.error('[AI Validation] Multi-file: No JSON array found in response')
1829
+ return resultMap
1830
+ }
1831
+
1832
+ const parsed = JSON.parse(jsonSlice)
1833
+ if (!Array.isArray(parsed)) {
1834
+ console.error('[AI Validation] Multi-file: Parsed result is not an array')
1835
+ return resultMap
1836
+ }
1837
+
1838
+ // Process each file's results
1839
+ for (const fileResult of parsed) {
1840
+ if (!fileResult.file || !Array.isArray(fileResult.validations)) {
1841
+ console.warn('[AI Validation] Multi-file: Invalid file result structure, skipping')
1842
+ continue
1843
+ }
1844
+
1845
+ const filePath = fileResult.file
1846
+ const validations: ValidationResult[] = fileResult.validations
1847
+ .filter((item: any) =>
1848
+ typeof item.index === 'number' &&
1849
+ typeof item.keep === 'boolean'
1850
+ )
1851
+ .map((item: any) => ({
1852
+ index: item.index,
1853
+ keep: item.keep,
1854
+ reason: item.reason || '',
1855
+ adjustedSeverity: item.adjustedSeverity || null,
1856
+ validationNotes: item.validationNotes || undefined,
1857
+ }))
1858
+
1859
+ resultMap.set(filePath, validations)
1860
+ }
1861
+
1862
+ // Log any files that weren't in the response
1863
+ for (const expectedFile of expectedFiles) {
1864
+ if (!resultMap.has(expectedFile)) {
1865
+ console.warn(`[AI Validation] Multi-file: No results for ${expectedFile}`)
1866
+ }
1867
+ }
1868
+
1869
+ } catch (error) {
1870
+ console.error('[AI Validation] Multi-file: Failed to parse response:', error)
1871
+ }
1872
+
1873
+ return resultMap
1874
+ }
1875
+
1876
+ /**
1877
+ * Apply validation results to findings
1878
+ */
1879
+ function applyValidationResults(
1880
+ findings: Vulnerability[],
1881
+ validationResults: ValidationResult[]
1882
+ ): Vulnerability[] {
1883
+ const processed: Vulnerability[] = []
1884
+
1885
+ for (let i = 0; i < findings.length; i++) {
1886
+ const finding = findings[i]
1887
+ const validation = validationResults.find(v => v.index === i)
1888
+
1889
+ if (!validation) {
1890
+ // No validation result - keep with warning
1891
+ processed.push({
1892
+ ...finding,
1893
+ validatedByAI: true,
1894
+ validationStatus: 'confirmed' as ValidationStatus,
1895
+ validationNotes: 'No explicit validation result - kept by default',
1896
+ })
1897
+ continue
1898
+ }
1899
+
1900
+ if (validation.keep) {
1901
+ // Keep the finding
1902
+ const adjustedFinding: Vulnerability = {
1903
+ ...finding,
1904
+ validatedByAI: true,
1905
+ confidence: 'high',
1906
+ }
1907
+
1908
+ if (validation.adjustedSeverity && validation.adjustedSeverity !== finding.severity) {
1909
+ // Severity was adjusted
1910
+ adjustedFinding.originalSeverity = finding.severity
1911
+ adjustedFinding.severity = validation.adjustedSeverity
1912
+ adjustedFinding.validationStatus = 'downgraded' as ValidationStatus
1913
+ adjustedFinding.validationNotes = validation.validationNotes || validation.reason || 'Severity adjusted by AI validation'
1914
+ } else {
1915
+ // Confirmed at original severity
1916
+ adjustedFinding.validationStatus = 'confirmed' as ValidationStatus
1917
+ adjustedFinding.validationNotes = validation.validationNotes || validation.reason
1918
+ }
1919
+
1920
+ processed.push(adjustedFinding)
1921
+ } else {
1922
+ // Finding was dismissed
1923
+ console.log(`[AI Validation] Rejected: ${finding.title} at ${finding.filePath}:${finding.lineNumber} - ${validation.reason}`)
1924
+ // Don't add to processed - finding is removed
1925
+ }
1926
+ }
1927
+
1928
+ return processed
1929
+ }
1930
+
1931
+ /**
1932
+ * Get language identifier from file path
1933
+ */
1934
+ function getLanguageFromPath(path: string): string {
1935
+ const ext = path.split('.').pop()?.toLowerCase()
1936
+ const langMap: Record<string, string> = {
1937
+ ts: 'typescript',
1938
+ tsx: 'tsx',
1939
+ js: 'javascript',
1940
+ jsx: 'jsx',
1941
+ py: 'python',
1942
+ rb: 'ruby',
1943
+ go: 'go',
1944
+ java: 'java',
1945
+ php: 'php',
1946
+ cs: 'csharp',
1947
+ json: 'json',
1948
+ yaml: 'yaml',
1949
+ yml: 'yaml',
1950
+ }
1951
+ return langMap[ext || ''] || ext || 'text'
1952
+ }
1953
+
1954
+ function parseValidationResponse(response: string): ValidationResult[] {
1955
+ try {
1956
+ // Extract the first top-level JSON array from the response.
1957
+ // The model may include prose before/after the JSON, so we cannot
1958
+ // assume the entire response is valid JSON.
1959
+ const extractTopLevelArray = (text: string): string | null => {
1960
+ const startIndex = text.indexOf('[')
1961
+ if (startIndex === -1) return null
1962
+
1963
+ let depth = 0
1964
+ let inString = false
1965
+ let stringChar: '"' | "'" | null = null
1966
+ let escape = false
1967
+
1968
+ for (let i = startIndex; i < text.length; i++) {
1969
+ const ch = text[i]
1970
+
1971
+ if (inString) {
1972
+ if (escape) {
1973
+ escape = false
1974
+ continue
1975
+ }
1976
+
1977
+ if (ch === '\\') {
1978
+ escape = true
1979
+ continue
1980
+ }
1981
+
1982
+ if (stringChar && ch === stringChar) {
1983
+ inString = false
1984
+ stringChar = null
1985
+ }
1986
+ continue
1987
+ }
1988
+
1989
+ if (ch === '"' || ch === "'") {
1990
+ inString = true
1991
+ stringChar = ch as '"' | "'"
1992
+ continue
1993
+ }
1994
+
1995
+ if (ch === '[') {
1996
+ depth++
1997
+ } else if (ch === ']') {
1998
+ depth--
1999
+ if (depth === 0) {
2000
+ return text.slice(startIndex, i + 1)
2001
+ }
2002
+ }
2003
+ }
2004
+
2005
+ return null
2006
+ }
2007
+
2008
+ const jsonSlice = extractTopLevelArray(response)
2009
+ if (!jsonSlice) return []
2010
+
2011
+ const parsed = JSON.parse(jsonSlice)
2012
+ if (!Array.isArray(parsed)) return []
2013
+
2014
+ return parsed
2015
+ .filter(item =>
2016
+ typeof item.index === 'number' &&
2017
+ typeof item.keep === 'boolean'
2018
+ )
2019
+ .map(item => ({
2020
+ index: item.index,
2021
+ keep: item.keep,
2022
+ reason: item.reason || '',
2023
+ adjustedSeverity: item.adjustedSeverity || null,
2024
+ validationNotes: item.validationNotes || undefined,
2025
+ }))
2026
+ } catch (error) {
2027
+ console.error('Failed to parse validation response:', error)
2028
+ return []
2029
+ }
2030
+ }