@oculum/scanner 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -72,13 +72,126 @@ export interface Layer2Result {
72
72
  stats: Layer2Stats
73
73
  }
74
74
 
75
+ // Layer 2 detector stats type
76
+ type Layer2DetectorStats = {
77
+ variables: number
78
+ logicGates: number
79
+ dangerousFunctions: number
80
+ riskyImports: number
81
+ authAntipatterns: number
82
+ frameworkIssues: number
83
+ aiFingerprints: number
84
+ dataExposure: number
85
+ byokPatterns: number
86
+ promptHygiene: number
87
+ executionSinks: number
88
+ agentTools: number
89
+ ragSafety: number
90
+ endpointProtection: number
91
+ schemaValidation: number
92
+ }
93
+
94
+ // Process a single file through all Layer 2 detectors
95
+ function processFileLayer2(
96
+ file: ScanFile,
97
+ options: Layer2Options,
98
+ authHelperContext: ReturnType<typeof detectAuthHelpers>
99
+ ): { findings: Vulnerability[], stats: Layer2DetectorStats } {
100
+ const stats: Layer2DetectorStats = {
101
+ variables: 0,
102
+ logicGates: 0,
103
+ dangerousFunctions: 0,
104
+ riskyImports: 0,
105
+ authAntipatterns: 0,
106
+ frameworkIssues: 0,
107
+ aiFingerprints: 0,
108
+ dataExposure: 0,
109
+ byokPatterns: 0,
110
+ promptHygiene: 0,
111
+ executionSinks: 0,
112
+ agentTools: 0,
113
+ ragSafety: 0,
114
+ endpointProtection: 0,
115
+ schemaValidation: 0,
116
+ }
117
+
118
+ // Skip non-code files
119
+ if (!isCodeFile(file.path)) {
120
+ return { findings: [], stats }
121
+ }
122
+
123
+ // Run all detectors
124
+ const variableFindings = detectSensitiveVariables(file.content, file.path)
125
+ const logicFindings = detectLogicGates(file.content, file.path)
126
+ const dangerousFuncFindings = detectDangerousFunctions(file.content, file.path)
127
+ const riskyImportFindings = detectRiskyImports(file.content, file.path)
128
+ const authFindings = detectAuthAntipatterns(file.content, file.path, {
129
+ middlewareConfig: options.middlewareConfig,
130
+ authHelpers: authHelperContext,
131
+ fileAuthImports: options.fileAuthImports,
132
+ })
133
+ const frameworkFindings = detectFrameworkIssues(file.content, file.path)
134
+ const aiFindings = detectAIFingerprints(file.content, file.path)
135
+ const dataExposureFindings = detectDataExposure(file.content, file.path)
136
+ const byokFindings = detectBYOKPatterns(file.content, file.path, options.middlewareConfig)
137
+ const promptHygieneFindings = detectAIPromptHygiene(file.content, file.path)
138
+ const executionSinkFindings = detectAIExecutionSinks(file.content, file.path)
139
+ const agentToolFindings = detectAIAgentTools(file.content, file.path)
140
+ const ragSafetyFindings = detectRAGSafetyIssues(file.content, file.path)
141
+ const endpointProtectionFindings = detectAIEndpointProtection(file.content, file.path, {
142
+ middlewareConfig: options.middlewareConfig,
143
+ })
144
+ const schemaValidationFindings = detectAISchemaValidation(file.content, file.path)
145
+
146
+ // Update stats
147
+ stats.variables = variableFindings.length
148
+ stats.logicGates = logicFindings.length
149
+ stats.dangerousFunctions = dangerousFuncFindings.length
150
+ stats.riskyImports = riskyImportFindings.length
151
+ stats.authAntipatterns = authFindings.length
152
+ stats.frameworkIssues = frameworkFindings.length
153
+ stats.aiFingerprints = aiFindings.length
154
+ stats.dataExposure = dataExposureFindings.length
155
+ stats.byokPatterns = byokFindings.length
156
+ stats.promptHygiene = promptHygieneFindings.length
157
+ stats.executionSinks = executionSinkFindings.length
158
+ stats.agentTools = agentToolFindings.length
159
+ stats.ragSafety = ragSafetyFindings.length
160
+ stats.endpointProtection = endpointProtectionFindings.length
161
+ stats.schemaValidation = schemaValidationFindings.length
162
+
163
+ return {
164
+ findings: [
165
+ ...variableFindings,
166
+ ...logicFindings,
167
+ ...dangerousFuncFindings,
168
+ ...riskyImportFindings,
169
+ ...authFindings,
170
+ ...frameworkFindings,
171
+ ...aiFindings,
172
+ ...dataExposureFindings,
173
+ ...byokFindings,
174
+ ...promptHygieneFindings,
175
+ ...executionSinkFindings,
176
+ ...agentToolFindings,
177
+ ...ragSafetyFindings,
178
+ ...endpointProtectionFindings,
179
+ ...schemaValidationFindings,
180
+ ],
181
+ stats,
182
+ }
183
+ }
184
+
185
+ // Parallel batch size for Layer 2 processing
186
+ const LAYER2_PARALLEL_BATCH_SIZE = 50
187
+
75
188
  export async function runLayer2Scan(
76
189
  files: ScanFile[],
77
190
  options: Layer2Options = {}
78
191
  ): Promise<Layer2Result> {
79
192
  const startTime = Date.now()
80
193
  const vulnerabilities: Vulnerability[] = []
81
- const stats = {
194
+ const stats: Layer2DetectorStats = {
82
195
  variables: 0,
83
196
  logicGates: 0,
84
197
  dangerousFunctions: 0,
@@ -91,7 +204,6 @@ export async function runLayer2Scan(
91
204
  promptHygiene: 0,
92
205
  executionSinks: 0,
93
206
  agentTools: 0,
94
- // M5: New AI-era detectors
95
207
  ragSafety: 0,
96
208
  endpointProtection: 0,
97
209
  schemaValidation: 0,
@@ -100,71 +212,19 @@ export async function runLayer2Scan(
100
212
  // Detect auth helpers once for all files (if not already provided)
101
213
  const authHelperContext = options.authHelperContext || detectAuthHelpers(files)
102
214
 
103
- for (const file of files) {
104
- // Only scan code files for Layer 2 (skip configs, etc.)
105
- if (isCodeFile(file.path)) {
106
- // Existing scanners
107
- const variableFindings = detectSensitiveVariables(file.content, file.path)
108
- const logicFindings = detectLogicGates(file.content, file.path)
109
-
110
- // New Layer 2 scanners
111
- const dangerousFuncFindings = detectDangerousFunctions(file.content, file.path)
112
- const riskyImportFindings = detectRiskyImports(file.content, file.path)
113
- const authFindings = detectAuthAntipatterns(file.content, file.path, {
114
- middlewareConfig: options.middlewareConfig,
115
- authHelpers: authHelperContext,
116
- fileAuthImports: options.fileAuthImports,
117
- })
118
- const frameworkFindings = detectFrameworkIssues(file.content, file.path)
119
- const aiFindings = detectAIFingerprints(file.content, file.path)
120
- const dataExposureFindings = detectDataExposure(file.content, file.path)
121
- const byokFindings = detectBYOKPatterns(file.content, file.path, options.middlewareConfig)
122
-
123
- // Story B: AI-specific detection (prompt hygiene, execution sinks, agent tools)
124
- const promptHygieneFindings = detectAIPromptHygiene(file.content, file.path)
125
- const executionSinkFindings = detectAIExecutionSinks(file.content, file.path)
126
- const agentToolFindings = detectAIAgentTools(file.content, file.path)
127
-
128
- // M5: New AI-era detectors
129
- const ragSafetyFindings = detectRAGSafetyIssues(file.content, file.path)
130
- const endpointProtectionFindings = detectAIEndpointProtection(file.content, file.path, {
131
- middlewareConfig: options.middlewareConfig,
132
- })
133
- const schemaValidationFindings = detectAISchemaValidation(file.content, file.path)
134
-
135
- stats.variables += variableFindings.length
136
- stats.logicGates += logicFindings.length
137
- stats.dangerousFunctions += dangerousFuncFindings.length
138
- stats.riskyImports += riskyImportFindings.length
139
- stats.authAntipatterns += authFindings.length
140
- stats.frameworkIssues += frameworkFindings.length
141
- stats.aiFingerprints += aiFindings.length
142
- stats.dataExposure += dataExposureFindings.length
143
- stats.byokPatterns += byokFindings.length
144
- stats.promptHygiene += promptHygieneFindings.length
145
- stats.executionSinks += executionSinkFindings.length
146
- stats.agentTools += agentToolFindings.length
147
- stats.ragSafety += ragSafetyFindings.length
148
- stats.endpointProtection += endpointProtectionFindings.length
149
- stats.schemaValidation += schemaValidationFindings.length
150
-
151
- vulnerabilities.push(
152
- ...variableFindings,
153
- ...logicFindings,
154
- ...dangerousFuncFindings,
155
- ...riskyImportFindings,
156
- ...authFindings,
157
- ...frameworkFindings,
158
- ...aiFindings,
159
- ...dataExposureFindings,
160
- ...byokFindings,
161
- ...promptHygieneFindings,
162
- ...executionSinkFindings,
163
- ...agentToolFindings,
164
- ...ragSafetyFindings,
165
- ...endpointProtectionFindings,
166
- ...schemaValidationFindings
167
- )
215
+ // Process files in parallel batches for better performance on large codebases
216
+ for (let i = 0; i < files.length; i += LAYER2_PARALLEL_BATCH_SIZE) {
217
+ const batch = files.slice(i, i + LAYER2_PARALLEL_BATCH_SIZE)
218
+ const results = await Promise.all(
219
+ batch.map(file => Promise.resolve(processFileLayer2(file, options, authHelperContext)))
220
+ )
221
+
222
+ for (const result of results) {
223
+ vulnerabilities.push(...result.findings)
224
+ // Accumulate stats
225
+ for (const [key, value] of Object.entries(result.stats)) {
226
+ stats[key as keyof Layer2DetectorStats] += value
227
+ }
168
228
  }
169
229
  }
170
230
 
@@ -197,17 +257,7 @@ export async function runLayer2Scan(
197
257
  Object.keys(exclusionConfig).length > 0 ? exclusionConfig : undefined
198
258
  )
199
259
 
200
- // Log suppressed findings
201
- if (suppressed.length > 0) {
202
- console.log(`[Layer 2] Suppressed ${suppressed.length} findings in test/seed/example files:`)
203
- const byReason = new Map<string, number>()
204
- for (const { reason } of suppressed) {
205
- byReason.set(reason || 'unknown', (byReason.get(reason || 'unknown') || 0) + 1)
206
- }
207
- for (const [reason, count] of byReason) {
208
- console.log(` - ${reason}: ${count}`)
209
- }
210
- }
260
+ // Track suppressed findings (debug info available in stats)
211
261
 
212
262
  // Build raw stats map for logging
213
263
  const rawStats: Record<string, number> = {
@@ -267,16 +317,7 @@ export async function runLayer2Scan(
267
317
  ai_schema_validation: 'ai_schema_validation',
268
318
  }
269
319
 
270
- // Log heuristic breakdown (raw findings before dedupe) with tier info
271
- console.log('[Layer 2] Heuristic breakdown (raw findings before dedupe):')
272
- for (const [name, count] of Object.entries(rawStats)) {
273
- if (count > 0) {
274
- const detectorName = detectorNameMap[name]
275
- const tier = detectorName ? getLayer2DetectorTier(detectorName) : 'unknown'
276
- console.log(` - ${name}: ${count} (${tier})`)
277
- }
278
- }
279
- console.log(`[Layer 2] Tier breakdown (after dedupe): ${formatTierStats(tierStats)}`)
320
+ // Heuristic breakdown available in stats.raw and stats.tiers for debugging
280
321
 
281
322
  return {
282
323
  vulnerabilities: uniqueVulnerabilities,
@@ -68,11 +68,12 @@ export interface AIValidationResult {
68
68
 
69
69
  // Number of files to include in each API call (Phase 2 optimization)
70
70
  // Batching multiple files reduces API overhead and leverages prompt caching better
71
- const FILES_PER_API_BATCH = 5
71
+ const FILES_PER_API_BATCH = 8
72
72
 
73
73
  // Number of API batches to process in parallel (Phase 3 optimization)
74
- // Higher values = faster scans but more API load; OpenAI handles this well
75
- const PARALLEL_API_BATCHES = 4
74
+ // Higher values = faster scans but more API load; OpenAI/GPT-5-mini handles this well
75
+ // Increased from 4 to 6 for better throughput on large codebases
76
+ const PARALLEL_API_BATCHES = 6
76
77
 
77
78
  // Initialize Anthropic client
78
79
  function getAnthropicClient(): Anthropic {