@oculum/scanner 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -138,6 +138,8 @@ export interface ScanOptions {
138
138
  scanMode?: ScanMode | ScanModeConfig
139
139
  /** Scan depth (cheap/validated/deep) - controls AI usage */
140
140
  scanDepth?: ScanDepth
141
+ /** Suppress console.log output (for interactive CLI mode) */
142
+ quiet?: boolean
141
143
  }
142
144
 
143
145
  export interface ScanProgress {
@@ -214,10 +216,18 @@ export async function runScan(
214
216
  const scanModeConfig = resolveScanModeConfig(options)
215
217
  const isIncremental = scanModeConfig.mode === 'incremental'
216
218
  const depth = scanModeConfig.scanDepth || 'cheap'
219
+ const quiet = options.quiet ?? false
217
220
 
218
- console.log(`[Scanner] repo=${repoInfo.name} mode=${scanModeConfig.mode} depth=${depth} files=${files.length}`)
221
+ // Conditional logging helper - suppresses output in quiet mode (interactive CLI)
222
+ const log = (message: string) => {
223
+ if (!quiet) {
224
+ console.log(message)
225
+ }
226
+ }
227
+
228
+ log(`[Scanner] repo=${repoInfo.name} mode=${scanModeConfig.mode} depth=${depth} files=${files.length}`)
219
229
  if (isIncremental && scanModeConfig.changedFiles) {
220
- console.log(`[Scanner] repo=${repoInfo.name} incremental_files=${scanModeConfig.changedFiles.length}`)
230
+ log(`[Scanner] repo=${repoInfo.name} incremental_files=${scanModeConfig.changedFiles.length}`)
221
231
  }
222
232
 
223
233
  // Report progress helper
@@ -247,19 +257,19 @@ export async function runScan(
247
257
  // Detect global auth middleware before scanning (always on all files for context)
248
258
  const middlewareConfig = detectGlobalAuthMiddleware(files)
249
259
  if (middlewareConfig.hasAuthMiddleware) {
250
- console.log(`[Scanner] repo=${repoInfo.name} auth_middleware=${middlewareConfig.authType || 'unknown'} file=${middlewareConfig.middlewareFile}`)
260
+ log(`[Scanner] repo=${repoInfo.name} auth_middleware=${middlewareConfig.authType || 'unknown'} file=${middlewareConfig.middlewareFile}`)
251
261
  }
252
262
 
253
263
  // Build imported auth registry for cross-file middleware detection
254
264
  const fileAuthImports = buildFileAuthImports(files)
255
265
  const filesWithImportedAuth = Array.from(fileAuthImports.values()).filter(f => f.usesImportedAuth).length
256
266
  if (filesWithImportedAuth > 0) {
257
- console.log(`[Scanner] repo=${repoInfo.name} files_with_imported_auth=${filesWithImportedAuth}`)
267
+ log(`[Scanner] repo=${repoInfo.name} files_with_imported_auth=${filesWithImportedAuth}`)
258
268
  }
259
269
 
260
270
  // Layer 1: Surface Scan
261
271
  reportProgress('layer1', 'Running surface scan (patterns, entropy, config)...')
262
- let layer1Result = await runLayer1Scan(files)
272
+ let layer1Result = await runLayer1Scan(files, onProgress)
263
273
 
264
274
  // Aggregate repeated localhost findings
265
275
  const layer1RawCount = layer1Result.vulnerabilities.length
@@ -267,18 +277,18 @@ export async function runScan(
267
277
  ...layer1Result,
268
278
  vulnerabilities: aggregateLocalhostFindings(layer1Result.vulnerabilities)
269
279
  }
270
- console.log(`[Layer1] repo=${repoInfo.name} findings_raw=${layer1RawCount} findings_deduped=${layer1Result.vulnerabilities.length}`)
280
+ log(`[Layer1] repo=${repoInfo.name} findings_raw=${layer1RawCount} findings_deduped=${layer1Result.vulnerabilities.length}`)
271
281
 
272
282
  // Layer 2: Structural Scan
273
283
  reportProgress('layer2', 'Running structural scan (variables, logic gates)...', layer1Result.vulnerabilities.length)
274
- const layer2Result = await runLayer2Scan(files, { middlewareConfig, fileAuthImports })
284
+ const layer2Result = await runLayer2Scan(files, { middlewareConfig, fileAuthImports }, onProgress)
275
285
 
276
286
  // Format heuristic breakdown for logging
277
287
  const heuristicBreakdown = Object.entries(layer2Result.stats.raw)
278
288
  .filter(([, count]) => count > 0)
279
289
  .map(([name, count]) => `${name}:${count}`)
280
290
  .join(',')
281
- console.log(`[Layer2] repo=${repoInfo.name} findings_raw=${Object.values(layer2Result.stats.raw).reduce((a, b) => a + b, 0)} findings_deduped=${layer2Result.vulnerabilities.length} heuristic_breakdown={${heuristicBreakdown}}`)
291
+ log(`[Layer2] repo=${repoInfo.name} findings_raw=${Object.values(layer2Result.stats.raw).reduce((a, b) => a + b, 0)} findings_deduped=${layer2Result.vulnerabilities.length} heuristic_breakdown={${heuristicBreakdown}}`)
282
292
 
283
293
  // Combine Layer 1 and Layer 2 findings
284
294
  const layer12Findings = [...layer1Result.vulnerabilities, ...layer2Result.vulnerabilities]
@@ -293,8 +303,8 @@ export async function runScan(
293
303
  const tierFiltered = filterByTierAndDepth(aggregatedFindings, depth)
294
304
 
295
305
  // Log tier breakdown
296
- console.log(`[Scanner] repo=${repoInfo.name} tier_breakdown=${formatTierStats(tierFiltered.tierStats)}`)
297
- console.log(`[Scanner] repo=${repoInfo.name} depth=${depth} tier_routing: surface=${tierFiltered.toSurface.length} validate=${tierFiltered.toValidate.length} hidden=${tierFiltered.hidden.length}`)
306
+ log(`[Scanner] repo=${repoInfo.name} tier_breakdown=${formatTierStats(tierFiltered.tierStats)}`)
307
+ log(`[Scanner] repo=${repoInfo.name} depth=${depth} tier_routing: surface=${tierFiltered.toSurface.length} validate=${tierFiltered.toValidate.length} hidden=${tierFiltered.hidden.length}`)
298
308
 
299
309
  // For cheap scans: Tier A surfaces directly, Tier B/C are hidden
300
310
  // For validated/deep: Tier A surfaces, Tier B goes through AI validation, Tier C hidden
@@ -322,7 +332,7 @@ export async function runScan(
322
332
  autoDismissBySeverity[d.finding.severity] = (autoDismissBySeverity[d.finding.severity] || 0) + 1
323
333
  }
324
334
  if (autoDismissed.length > 0) {
325
- console.log(`[Layer2] repo=${repoInfo.name} auto_dismissed_total=${autoDismissed.length} by_severity={info:${autoDismissBySeverity.info},low:${autoDismissBySeverity.low},medium:${autoDismissBySeverity.medium},high:${autoDismissBySeverity.high}}`)
335
+ log(`[Layer2] repo=${repoInfo.name} auto_dismissed_total=${autoDismissed.length} by_severity={info:${autoDismissBySeverity.info},low:${autoDismissBySeverity.low},medium:${autoDismissBySeverity.medium},high:${autoDismissBySeverity.high}}`)
326
336
  }
327
337
 
328
338
  // Apply per-file cap to validation candidates (cost control)
@@ -349,15 +359,15 @@ export async function runScan(
349
359
  const { stats: validationStats } = validationResult
350
360
  capturedValidationStats = validationStats // Capture for return
351
361
 
352
- console.log(`[AI Validation] repo=${repoInfo.name} depth=${depth} candidates=${findingsToValidate.length} capped_from=${requiresValidation.length} auto_dismissed=${autoDismissed.length} kept=${validationStats.confirmedFindings} rejected=${validationStats.dismissedFindings} downgraded=${validationStats.downgradedFindings}`)
353
- console.log(`[AI Validation] cost_estimate: input_tokens=${validationStats.estimatedInputTokens} output_tokens=${validationStats.estimatedOutputTokens} cost=$${validationStats.estimatedCost.toFixed(4)} api_calls=${validationStats.apiCalls}`)
362
+ log(`[AI Validation] repo=${repoInfo.name} depth=${depth} candidates=${findingsToValidate.length} capped_from=${requiresValidation.length} auto_dismissed=${autoDismissed.length} kept=${validationStats.confirmedFindings} rejected=${validationStats.dismissedFindings} downgraded=${validationStats.downgradedFindings}`)
363
+ log(`[AI Validation] cost_estimate: input_tokens=${validationStats.estimatedInputTokens} output_tokens=${validationStats.estimatedOutputTokens} cost=$${validationStats.estimatedCost.toFixed(4)} api_calls=${validationStats.apiCalls}`)
354
364
 
355
365
  // Add back findings that weren't validated (not in changed files)
356
366
  const notValidated = cappedValidation.filter(v => !findingsToValidate.includes(v))
357
367
  validatedFindings.push(...notValidated)
358
368
  }
359
369
  } else if (scanModeConfig.skipAIValidation) {
360
- console.log(`[AI Validation] repo=${repoInfo.name} depth=${depth} skipped=true reason=scan_mode_config`)
370
+ log(`[AI Validation] repo=${repoInfo.name} depth=${depth} skipped=true reason=scan_mode_config`)
361
371
  }
362
372
 
363
373
  // Combine validated and non-validated findings
@@ -393,9 +403,9 @@ export async function runScan(
393
403
  },
394
404
  })
395
405
  allVulnerabilities.push(...layer3Result.vulnerabilities)
396
- console.log(`[Layer3] repo=${repoInfo.name} depth=${depth} files_analyzed=${layer3Result.aiAnalyzed} findings=${layer3Result.vulnerabilities.length}`)
406
+ log(`[Layer3] repo=${repoInfo.name} depth=${depth} files_analyzed=${layer3Result.aiAnalyzed} findings=${layer3Result.vulnerabilities.length}`)
397
407
  } else if (scanModeConfig.skipLayer3) {
398
- console.log(`[Layer3] repo=${repoInfo.name} depth=${depth} skipped=true reason=scan_mode_config`)
408
+ log(`[Layer3] repo=${repoInfo.name} depth=${depth} skipped=true reason=scan_mode_config`)
399
409
  }
400
410
 
401
411
  // Deduplicate vulnerabilities
@@ -553,9 +563,7 @@ function capValidationCandidatesPerFile(
553
563
  const capped = sorted.slice(0, maxPerFile)
554
564
  result.push(...capped)
555
565
 
556
- if (sorted.length > maxPerFile) {
557
- console.log(`[Scanner] Capped ${filePath}: ${sorted.length} → ${maxPerFile} validation candidates`)
558
- }
566
+ // Note: Capping log removed to support quiet mode - this is debug info only
559
567
  }
560
568
 
561
569
  return result
@@ -683,7 +691,7 @@ function resolveContradictions(
683
691
  : isClientCallingProtectedAPI
684
692
  ? 'client component calling protected API'
685
693
  : 'route is protected'
686
- console.log(`[Contradiction] Dropping "${vuln.title}" (${vuln.severity}) - ${reason}`)
694
+ // Note: Contradiction log removed to support quiet mode - this is debug info only
687
695
  continue // Skip this finding
688
696
  }
689
697
 
@@ -5,6 +5,7 @@
5
5
  */
6
6
 
7
7
  import type { Vulnerability, ScanFile } from '../types'
8
+ import type { ProgressCallback } from '../index'
8
9
  import { detectHighEntropyStrings } from './entropy'
9
10
  import { detectKnownPatterns } from './patterns'
10
11
  import { auditConfiguration } from './config-audit'
@@ -46,12 +47,12 @@ export interface Layer1Result {
46
47
  stats: Layer1Stats
47
48
  }
48
49
 
49
- export async function runLayer1Scan(files: ScanFile[]): Promise<Layer1Result> {
50
- const startTime = Date.now()
51
- const vulnerabilities: Vulnerability[] = []
52
-
53
- // Track raw counts per detector (before dedupe)
54
- const rawStats: Record<Layer1DetectorName, number> = {
50
+ // Process a single file through all Layer 1 detectors
51
+ function processFileLayer1(file: ScanFile): {
52
+ findings: Vulnerability[],
53
+ stats: Record<Layer1DetectorName, number>
54
+ } {
55
+ const stats: Record<Layer1DetectorName, number> = {
55
56
  known_secrets: 0,
56
57
  weak_crypto: 0,
57
58
  sensitive_urls: 0,
@@ -61,33 +62,78 @@ export async function runLayer1Scan(files: ScanFile[]): Promise<Layer1Result> {
61
62
  ai_comments: 0,
62
63
  }
63
64
 
64
- for (const file of files) {
65
- // Run all Layer 1 detectors and track raw counts
66
- const entropyFindings = detectHighEntropyStrings(file.content, file.path)
67
- const patternFindings = detectKnownPatterns(file.content, file.path)
68
- const configFindings = auditConfiguration(file.content, file.path)
69
- const fileFlags = detectDangerousFiles(file.content, file.path)
70
- const commentFindings = detectAICommentPatterns(file.content, file.path)
71
- const urlFindings = detectSensitiveURLs(file.content, file.path)
72
- const cryptoFindings = detectWeakCrypto(file.content, file.path)
73
-
74
- rawStats.entropy += entropyFindings.length
75
- rawStats.known_secrets += patternFindings.length
76
- rawStats.config_audit += configFindings.length
77
- rawStats.file_flags += fileFlags.length
78
- rawStats.ai_comments += commentFindings.length
79
- rawStats.sensitive_urls += urlFindings.length
80
- rawStats.weak_crypto += cryptoFindings.length
81
-
82
- vulnerabilities.push(
65
+ const entropyFindings = detectHighEntropyStrings(file.content, file.path)
66
+ const patternFindings = detectKnownPatterns(file.content, file.path)
67
+ const configFindings = auditConfiguration(file.content, file.path)
68
+ const fileFlags = detectDangerousFiles(file.content, file.path)
69
+ const commentFindings = detectAICommentPatterns(file.content, file.path)
70
+ const urlFindings = detectSensitiveURLs(file.content, file.path)
71
+ const cryptoFindings = detectWeakCrypto(file.content, file.path)
72
+
73
+ stats.entropy = entropyFindings.length
74
+ stats.known_secrets = patternFindings.length
75
+ stats.config_audit = configFindings.length
76
+ stats.file_flags = fileFlags.length
77
+ stats.ai_comments = commentFindings.length
78
+ stats.sensitive_urls = urlFindings.length
79
+ stats.weak_crypto = cryptoFindings.length
80
+
81
+ return {
82
+ findings: [
83
83
  ...entropyFindings,
84
84
  ...patternFindings,
85
85
  ...configFindings,
86
86
  ...fileFlags,
87
87
  ...commentFindings,
88
88
  ...urlFindings,
89
- ...cryptoFindings
90
- )
89
+ ...cryptoFindings,
90
+ ],
91
+ stats,
92
+ }
93
+ }
94
+
95
+ // Parallel batch size for Layer 1 processing
96
+ const LAYER1_PARALLEL_BATCH_SIZE = 50
97
+
98
+ export async function runLayer1Scan(files: ScanFile[], onProgress?: ProgressCallback): Promise<Layer1Result> {
99
+ const startTime = Date.now()
100
+ const vulnerabilities: Vulnerability[] = []
101
+
102
+ // Track raw counts per detector (before dedupe)
103
+ const rawStats: Record<Layer1DetectorName, number> = {
104
+ known_secrets: 0,
105
+ weak_crypto: 0,
106
+ sensitive_urls: 0,
107
+ entropy: 0,
108
+ config_audit: 0,
109
+ file_flags: 0,
110
+ ai_comments: 0,
111
+ }
112
+
113
+ // Process files in parallel batches for better performance on large codebases
114
+ for (let i = 0; i < files.length; i += LAYER1_PARALLEL_BATCH_SIZE) {
115
+ const batch = files.slice(i, i + LAYER1_PARALLEL_BATCH_SIZE)
116
+ const results = await Promise.all(batch.map(file => Promise.resolve(processFileLayer1(file))))
117
+
118
+ for (const result of results) {
119
+ vulnerabilities.push(...result.findings)
120
+ // Accumulate stats
121
+ for (const [key, value] of Object.entries(result.stats)) {
122
+ rawStats[key as Layer1DetectorName] += value
123
+ }
124
+ }
125
+
126
+ // Report progress after each batch
127
+ if (onProgress) {
128
+ const filesProcessed = Math.min(i + LAYER1_PARALLEL_BATCH_SIZE, files.length)
129
+ onProgress({
130
+ status: 'layer1',
131
+ message: 'Running surface scan (patterns, entropy, config)...',
132
+ filesProcessed,
133
+ totalFiles: files.length,
134
+ vulnerabilitiesFound: vulnerabilities.length,
135
+ })
136
+ }
91
137
  }
92
138
 
93
139
  // Deduplicate findings (same line might be caught by multiple detectors)
@@ -96,18 +142,7 @@ export async function runLayer1Scan(files: ScanFile[]): Promise<Layer1Result> {
96
142
  // Apply path exclusions to filter out findings in test/seed/example files
97
143
  const { kept: uniqueVulnerabilities, suppressed } = filterFindingsByPath(dedupedVulnerabilities)
98
144
 
99
- // Log suppressed findings
100
- if (suppressed.length > 0) {
101
- const byReason: Record<string, number> = {}
102
- for (const s of suppressed) {
103
- const reason = s.reason || 'unknown'
104
- byReason[reason] = (byReason[reason] || 0) + 1
105
- }
106
- console.log(`[Layer 1] Suppressed ${suppressed.length} findings in test/seed/example files:`)
107
- for (const [reason, count] of Object.entries(byReason)) {
108
- console.log(` - ${reason}: ${count}`)
109
- }
110
- }
145
+ // Track suppressed findings (debug info available in stats)
111
146
 
112
147
  // Compute deduped counts per category
113
148
  const dedupedStats: Record<string, number> = {}
@@ -121,15 +156,7 @@ export async function runLayer1Scan(files: ScanFile[]): Promise<Layer1Result> {
121
156
  uniqueVulnerabilities.map(v => ({ category: v.category, layer: 1 as const }))
122
157
  )
123
158
 
124
- // Log heuristic breakdown with tier info
125
- console.log('[Layer 1] Heuristic breakdown (raw findings before dedupe):')
126
- for (const [name, count] of Object.entries(rawStats)) {
127
- if (count > 0) {
128
- const tier = getLayer1DetectorTier(name as Layer1DetectorName)
129
- console.log(` - ${name}: ${count} (${tier})`)
130
- }
131
- }
132
- console.log(`[Layer 1] Tier breakdown (after dedupe): ${formatTierStats(tierStats)}`)
159
+ // Heuristic breakdown available in stats.raw and stats.tiers for debugging
133
160
 
134
161
  return {
135
162
  vulnerabilities: uniqueVulnerabilities,
@@ -6,6 +6,7 @@
6
6
  */
7
7
 
8
8
  import type { Vulnerability, ScanFile } from '../types'
9
+ import type { ProgressCallback } from '../index'
9
10
  import type { MiddlewareAuthConfig } from '../utils/middleware-detector'
10
11
  import { detectAuthHelpers, type AuthHelperContext } from '../utils/auth-helper-detector'
11
12
  import type { FileAuthImports } from '../utils/imported-auth-detector'
@@ -72,13 +73,127 @@ export interface Layer2Result {
72
73
  stats: Layer2Stats
73
74
  }
74
75
 
76
+ // Layer 2 detector stats type
77
+ type Layer2DetectorStats = {
78
+ variables: number
79
+ logicGates: number
80
+ dangerousFunctions: number
81
+ riskyImports: number
82
+ authAntipatterns: number
83
+ frameworkIssues: number
84
+ aiFingerprints: number
85
+ dataExposure: number
86
+ byokPatterns: number
87
+ promptHygiene: number
88
+ executionSinks: number
89
+ agentTools: number
90
+ ragSafety: number
91
+ endpointProtection: number
92
+ schemaValidation: number
93
+ }
94
+
95
+ // Process a single file through all Layer 2 detectors
96
+ function processFileLayer2(
97
+ file: ScanFile,
98
+ options: Layer2Options,
99
+ authHelperContext: ReturnType<typeof detectAuthHelpers>
100
+ ): { findings: Vulnerability[], stats: Layer2DetectorStats } {
101
+ const stats: Layer2DetectorStats = {
102
+ variables: 0,
103
+ logicGates: 0,
104
+ dangerousFunctions: 0,
105
+ riskyImports: 0,
106
+ authAntipatterns: 0,
107
+ frameworkIssues: 0,
108
+ aiFingerprints: 0,
109
+ dataExposure: 0,
110
+ byokPatterns: 0,
111
+ promptHygiene: 0,
112
+ executionSinks: 0,
113
+ agentTools: 0,
114
+ ragSafety: 0,
115
+ endpointProtection: 0,
116
+ schemaValidation: 0,
117
+ }
118
+
119
+ // Skip non-code files
120
+ if (!isCodeFile(file.path)) {
121
+ return { findings: [], stats }
122
+ }
123
+
124
+ // Run all detectors
125
+ const variableFindings = detectSensitiveVariables(file.content, file.path)
126
+ const logicFindings = detectLogicGates(file.content, file.path)
127
+ const dangerousFuncFindings = detectDangerousFunctions(file.content, file.path)
128
+ const riskyImportFindings = detectRiskyImports(file.content, file.path)
129
+ const authFindings = detectAuthAntipatterns(file.content, file.path, {
130
+ middlewareConfig: options.middlewareConfig,
131
+ authHelpers: authHelperContext,
132
+ fileAuthImports: options.fileAuthImports,
133
+ })
134
+ const frameworkFindings = detectFrameworkIssues(file.content, file.path)
135
+ const aiFindings = detectAIFingerprints(file.content, file.path)
136
+ const dataExposureFindings = detectDataExposure(file.content, file.path)
137
+ const byokFindings = detectBYOKPatterns(file.content, file.path, options.middlewareConfig)
138
+ const promptHygieneFindings = detectAIPromptHygiene(file.content, file.path)
139
+ const executionSinkFindings = detectAIExecutionSinks(file.content, file.path)
140
+ const agentToolFindings = detectAIAgentTools(file.content, file.path)
141
+ const ragSafetyFindings = detectRAGSafetyIssues(file.content, file.path)
142
+ const endpointProtectionFindings = detectAIEndpointProtection(file.content, file.path, {
143
+ middlewareConfig: options.middlewareConfig,
144
+ })
145
+ const schemaValidationFindings = detectAISchemaValidation(file.content, file.path)
146
+
147
+ // Update stats
148
+ stats.variables = variableFindings.length
149
+ stats.logicGates = logicFindings.length
150
+ stats.dangerousFunctions = dangerousFuncFindings.length
151
+ stats.riskyImports = riskyImportFindings.length
152
+ stats.authAntipatterns = authFindings.length
153
+ stats.frameworkIssues = frameworkFindings.length
154
+ stats.aiFingerprints = aiFindings.length
155
+ stats.dataExposure = dataExposureFindings.length
156
+ stats.byokPatterns = byokFindings.length
157
+ stats.promptHygiene = promptHygieneFindings.length
158
+ stats.executionSinks = executionSinkFindings.length
159
+ stats.agentTools = agentToolFindings.length
160
+ stats.ragSafety = ragSafetyFindings.length
161
+ stats.endpointProtection = endpointProtectionFindings.length
162
+ stats.schemaValidation = schemaValidationFindings.length
163
+
164
+ return {
165
+ findings: [
166
+ ...variableFindings,
167
+ ...logicFindings,
168
+ ...dangerousFuncFindings,
169
+ ...riskyImportFindings,
170
+ ...authFindings,
171
+ ...frameworkFindings,
172
+ ...aiFindings,
173
+ ...dataExposureFindings,
174
+ ...byokFindings,
175
+ ...promptHygieneFindings,
176
+ ...executionSinkFindings,
177
+ ...agentToolFindings,
178
+ ...ragSafetyFindings,
179
+ ...endpointProtectionFindings,
180
+ ...schemaValidationFindings,
181
+ ],
182
+ stats,
183
+ }
184
+ }
185
+
186
+ // Parallel batch size for Layer 2 processing
187
+ const LAYER2_PARALLEL_BATCH_SIZE = 50
188
+
75
189
  export async function runLayer2Scan(
76
190
  files: ScanFile[],
77
- options: Layer2Options = {}
191
+ options: Layer2Options = {},
192
+ onProgress?: ProgressCallback
78
193
  ): Promise<Layer2Result> {
79
194
  const startTime = Date.now()
80
195
  const vulnerabilities: Vulnerability[] = []
81
- const stats = {
196
+ const stats: Layer2DetectorStats = {
82
197
  variables: 0,
83
198
  logicGates: 0,
84
199
  dangerousFunctions: 0,
@@ -91,7 +206,6 @@ export async function runLayer2Scan(
91
206
  promptHygiene: 0,
92
207
  executionSinks: 0,
93
208
  agentTools: 0,
94
- // M5: New AI-era detectors
95
209
  ragSafety: 0,
96
210
  endpointProtection: 0,
97
211
  schemaValidation: 0,
@@ -100,71 +214,31 @@ export async function runLayer2Scan(
100
214
  // Detect auth helpers once for all files (if not already provided)
101
215
  const authHelperContext = options.authHelperContext || detectAuthHelpers(files)
102
216
 
103
- for (const file of files) {
104
- // Only scan code files for Layer 2 (skip configs, etc.)
105
- if (isCodeFile(file.path)) {
106
- // Existing scanners
107
- const variableFindings = detectSensitiveVariables(file.content, file.path)
108
- const logicFindings = detectLogicGates(file.content, file.path)
109
-
110
- // New Layer 2 scanners
111
- const dangerousFuncFindings = detectDangerousFunctions(file.content, file.path)
112
- const riskyImportFindings = detectRiskyImports(file.content, file.path)
113
- const authFindings = detectAuthAntipatterns(file.content, file.path, {
114
- middlewareConfig: options.middlewareConfig,
115
- authHelpers: authHelperContext,
116
- fileAuthImports: options.fileAuthImports,
117
- })
118
- const frameworkFindings = detectFrameworkIssues(file.content, file.path)
119
- const aiFindings = detectAIFingerprints(file.content, file.path)
120
- const dataExposureFindings = detectDataExposure(file.content, file.path)
121
- const byokFindings = detectBYOKPatterns(file.content, file.path, options.middlewareConfig)
122
-
123
- // Story B: AI-specific detection (prompt hygiene, execution sinks, agent tools)
124
- const promptHygieneFindings = detectAIPromptHygiene(file.content, file.path)
125
- const executionSinkFindings = detectAIExecutionSinks(file.content, file.path)
126
- const agentToolFindings = detectAIAgentTools(file.content, file.path)
127
-
128
- // M5: New AI-era detectors
129
- const ragSafetyFindings = detectRAGSafetyIssues(file.content, file.path)
130
- const endpointProtectionFindings = detectAIEndpointProtection(file.content, file.path, {
131
- middlewareConfig: options.middlewareConfig,
217
+ // Process files in parallel batches for better performance on large codebases
218
+ for (let i = 0; i < files.length; i += LAYER2_PARALLEL_BATCH_SIZE) {
219
+ const batch = files.slice(i, i + LAYER2_PARALLEL_BATCH_SIZE)
220
+ const results = await Promise.all(
221
+ batch.map(file => Promise.resolve(processFileLayer2(file, options, authHelperContext)))
222
+ )
223
+
224
+ for (const result of results) {
225
+ vulnerabilities.push(...result.findings)
226
+ // Accumulate stats
227
+ for (const [key, value] of Object.entries(result.stats)) {
228
+ stats[key as keyof Layer2DetectorStats] += value
229
+ }
230
+ }
231
+
232
+ // Report progress after each batch
233
+ if (onProgress) {
234
+ const filesProcessed = Math.min(i + LAYER2_PARALLEL_BATCH_SIZE, files.length)
235
+ onProgress({
236
+ status: 'layer2',
237
+ message: 'Running structural scan (variables, logic gates)...',
238
+ filesProcessed,
239
+ totalFiles: files.length,
240
+ vulnerabilitiesFound: vulnerabilities.length,
132
241
  })
133
- const schemaValidationFindings = detectAISchemaValidation(file.content, file.path)
134
-
135
- stats.variables += variableFindings.length
136
- stats.logicGates += logicFindings.length
137
- stats.dangerousFunctions += dangerousFuncFindings.length
138
- stats.riskyImports += riskyImportFindings.length
139
- stats.authAntipatterns += authFindings.length
140
- stats.frameworkIssues += frameworkFindings.length
141
- stats.aiFingerprints += aiFindings.length
142
- stats.dataExposure += dataExposureFindings.length
143
- stats.byokPatterns += byokFindings.length
144
- stats.promptHygiene += promptHygieneFindings.length
145
- stats.executionSinks += executionSinkFindings.length
146
- stats.agentTools += agentToolFindings.length
147
- stats.ragSafety += ragSafetyFindings.length
148
- stats.endpointProtection += endpointProtectionFindings.length
149
- stats.schemaValidation += schemaValidationFindings.length
150
-
151
- vulnerabilities.push(
152
- ...variableFindings,
153
- ...logicFindings,
154
- ...dangerousFuncFindings,
155
- ...riskyImportFindings,
156
- ...authFindings,
157
- ...frameworkFindings,
158
- ...aiFindings,
159
- ...dataExposureFindings,
160
- ...byokFindings,
161
- ...promptHygieneFindings,
162
- ...executionSinkFindings,
163
- ...agentToolFindings,
164
- ...ragSafetyFindings,
165
- ...endpointProtectionFindings,
166
- ...schemaValidationFindings
167
- )
168
242
  }
169
243
  }
170
244
 
@@ -197,17 +271,7 @@ export async function runLayer2Scan(
197
271
  Object.keys(exclusionConfig).length > 0 ? exclusionConfig : undefined
198
272
  )
199
273
 
200
- // Log suppressed findings
201
- if (suppressed.length > 0) {
202
- console.log(`[Layer 2] Suppressed ${suppressed.length} findings in test/seed/example files:`)
203
- const byReason = new Map<string, number>()
204
- for (const { reason } of suppressed) {
205
- byReason.set(reason || 'unknown', (byReason.get(reason || 'unknown') || 0) + 1)
206
- }
207
- for (const [reason, count] of byReason) {
208
- console.log(` - ${reason}: ${count}`)
209
- }
210
- }
274
+ // Track suppressed findings (debug info available in stats)
211
275
 
212
276
  // Build raw stats map for logging
213
277
  const rawStats: Record<string, number> = {
@@ -267,16 +331,7 @@ export async function runLayer2Scan(
267
331
  ai_schema_validation: 'ai_schema_validation',
268
332
  }
269
333
 
270
- // Log heuristic breakdown (raw findings before dedupe) with tier info
271
- console.log('[Layer 2] Heuristic breakdown (raw findings before dedupe):')
272
- for (const [name, count] of Object.entries(rawStats)) {
273
- if (count > 0) {
274
- const detectorName = detectorNameMap[name]
275
- const tier = detectorName ? getLayer2DetectorTier(detectorName) : 'unknown'
276
- console.log(` - ${name}: ${count} (${tier})`)
277
- }
278
- }
279
- console.log(`[Layer 2] Tier breakdown (after dedupe): ${formatTierStats(tierStats)}`)
334
+ // Heuristic breakdown available in stats.raw and stats.tiers for debugging
280
335
 
281
336
  return {
282
337
  vulnerabilities: uniqueVulnerabilities,
@@ -68,11 +68,12 @@ export interface AIValidationResult {
68
68
 
69
69
  // Number of files to include in each API call (Phase 2 optimization)
70
70
  // Batching multiple files reduces API overhead and leverages prompt caching better
71
- const FILES_PER_API_BATCH = 5
71
+ const FILES_PER_API_BATCH = 8
72
72
 
73
73
  // Number of API batches to process in parallel (Phase 3 optimization)
74
- // Higher values = faster scans but more API load; OpenAI handles this well
75
- const PARALLEL_API_BATCHES = 4
74
+ // Higher values = faster scans but more API load; OpenAI/GPT-5-mini handles this well
75
+ // Increased from 4 to 6 for better throughput on large codebases
76
+ const PARALLEL_API_BATCHES = 6
76
77
 
77
78
  // Initialize Anthropic client
78
79
  function getAnthropicClient(): Anthropic {