@oculum/scanner 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -825,8 +825,7 @@ function isCosmeticMathRandom(lineContent: string, content: string, lineNumber:
825
825
 
826
826
  // Context indicators of cosmetic use
827
827
  const cosmeticContextPatterns = [
828
- // UI component files
829
- /\/(components?|ui|widgets?|animations?|contexts?)\//i,
828
+ // UI component files - REMOVED, let severity classification handle these
830
829
  // Style-related variables/functions
831
830
  /\b(style|styles|css|className|animation|transition)/i,
832
831
  /\b(width|height|opacity|color|transform|rotate|scale|translate)/i,
@@ -840,11 +839,8 @@ function isCosmeticMathRandom(lineContent: string, content: string, lineNumber:
840
839
  /duration.*Math\.random/i,
841
840
  // UI state variations
842
841
  /\b(variant|theme|layout|position).*Math\.random/i,
843
- // UI identifier variable names (toast, notification, element, component IDs)
844
- /\b(toast|notification|element|component|widget|modal|dialog|popup).*id\b/i,
845
- /\bid\s*=.*Math\.random/i,
846
- /\bkey\s*=.*Math\.random/i, // React keys
847
- /\btempId|temporaryId|uniqueId\b/i,
842
+ // NOTE: Removed UI identifier patterns (key, id, tempId, etc.) - these should be
843
+ // classified with info/low severity by the severity classification logic, not skipped entirely
848
844
  ]
849
845
 
850
846
  if (cosmeticContextPatterns.some(p => p.test(context))) {
@@ -885,12 +881,20 @@ function isCosmeticMathRandom(lineContent: string, content: string, lineNumber:
885
881
  */
886
882
  function extractFunctionContext(content: string, lineNumber: number): string | null {
887
883
  const lines = content.split('\n')
888
- const start = Math.max(0, lineNumber - 10)
884
+ const start = Math.max(0, lineNumber - 20) // Increased from 10 to 20 for nested callbacks
889
885
 
890
886
  // Look backwards for function declaration
891
887
  for (let i = lineNumber; i >= start; i--) {
892
888
  const line = lines[i]
893
889
 
890
+ // Skip anonymous arrow functions in callbacks (e.g., .map((x) => ...), .replace(/x/g, (c) => ...))
891
+ // These are not the function context we're looking for
892
+ // Look for pattern: .methodName(..., (param) => or .methodName(...(param) =>
893
+ const hasMethodCallWithArrowCallback = /\.\w+\(.*\([^)]*\)\s*=>/.test(line)
894
+ if (hasMethodCallWithArrowCallback) {
895
+ continue // Skip this line and keep looking
896
+ }
897
+
894
898
  // Match various function declaration patterns
895
899
  // 1. function functionName
896
900
  // 2. export function functionName
@@ -898,8 +902,9 @@ function extractFunctionContext(content: string, lineNumber: number): string | n
898
902
  // 4. const/let functionName = (arrow function)
899
903
  // 5. export const functionName =
900
904
 
901
- // Traditional function declaration
902
- const funcDeclMatch = line.match(/(?:export\s+)?function\s+(\w+)/i)
905
+ // Traditional function declaration (handles TypeScript type annotations)
906
+ // Matches: function name(...), export function name(...), function name<T>(...), etc.
907
+ const funcDeclMatch = line.match(/(?:export\s+)?(?:async\s+)?function\s+(\w+)/i)
903
908
  if (funcDeclMatch) {
904
909
  return funcDeclMatch[1].toLowerCase()
905
910
  }
@@ -1057,7 +1062,11 @@ function classifyVariableNameRisk(varName: string | null): 'high' | 'medium' | '
1057
1062
  'random', 'temp', 'temporary', 'generated', 'dummy',
1058
1063
  // UI identifiers
1059
1064
  'toast', 'notification', 'element', 'component', 'widget',
1060
- 'modal', 'dialog', 'popup', 'unique', 'react'
1065
+ 'modal', 'dialog', 'popup', 'unique', 'react',
1066
+ // Non-security randomness usage (backoff/sampling/experiments)
1067
+ 'jitter', 'retry', 'backoff', 'delay', 'timeout', 'latency',
1068
+ 'sample', 'sampling', 'probability', 'chance', 'rollout',
1069
+ 'experiment', 'abtest', 'cohort', 'bucket', 'variant'
1061
1070
  ]
1062
1071
  if (lowRiskPatterns.some(p => lower.includes(p))) {
1063
1072
  return 'low'
@@ -1116,6 +1125,8 @@ function analyzeMathRandomContext(
1116
1125
  const businessLogicPatterns = [
1117
1126
  /\b(business|order|invoice|customer|product|transaction)Id\b/i,
1118
1127
  /\b(reference|tracking|confirmation)Number\b/i,
1128
+ /\b(backoff|retry|jitter|delay|timeout|latency)\b/i,
1129
+ /\b(sample|sampling|probability|chance|rollout|experiment|abtest|cohort|bucket|variant)\b/i,
1119
1130
  ]
1120
1131
  const inBusinessLogicContext = businessLogicPatterns.some(p => p.test(context)) &&
1121
1132
  !inSecurityContext
@@ -1129,7 +1140,7 @@ function analyzeMathRandomContext(
1129
1140
  } else if (inUIContext) {
1130
1141
  contextDescription = 'UI/cosmetic usage'
1131
1142
  } else if (inBusinessLogicContext) {
1132
- contextDescription = 'business identifier generation'
1143
+ contextDescription = 'non-security usage'
1133
1144
  }
1134
1145
 
1135
1146
  return {
@@ -1485,6 +1496,15 @@ export function detectDangerousFunctions(
1485
1496
  description = 'Math.random() used for generating fixture/seed data. Not security-critical in development contexts.'
1486
1497
  suggestedFix = 'Acceptable for seed data. Use crypto.randomUUID() if uniqueness guarantees needed.'
1487
1498
  }
1499
+ // Short UI ID pattern - INFO (check before variable name to avoid false positives)
1500
+ // e.g., "const key = Math.random().toString(36).substring(2, 9)" is a UI ID, not a security key
1501
+ else if (toStringPattern.intent === 'short-ui-id') {
1502
+ severity = 'info'
1503
+ confidence = 'low'
1504
+ explanation = ' (UI correlation ID)'
1505
+ description = 'Math.random() used for short UI correlation IDs. Not security-critical, but collisions possible in high-volume scenarios.'
1506
+ suggestedFix = 'For UI correlation, crypto.randomUUID() provides better uniqueness guarantees'
1507
+ }
1488
1508
  // Security context - HIGH
1489
1509
  else if (nameRisk === 'high' || context.inSecurityContext || functionIntent === 'security') {
1490
1510
  severity = 'high'
@@ -1493,21 +1513,13 @@ export function detectDangerousFunctions(
1493
1513
  description = 'Math.random() is NOT cryptographically secure and MUST NOT be used for tokens, keys, passwords, or session IDs. This can lead to predictable values that attackers can exploit.'
1494
1514
  suggestedFix = 'Replace with crypto.randomBytes() or crypto.randomUUID() for security-sensitive operations'
1495
1515
  }
1496
- // Short UI ID pattern - INFO
1497
- else if (toStringPattern.intent === 'short-ui-id') {
1498
- severity = 'info'
1499
- confidence = 'low'
1500
- explanation = ' (UI correlation ID)'
1501
- description = 'Math.random() used for short UI correlation IDs. Not security-critical, but collisions possible in high-volume scenarios.'
1502
- suggestedFix = 'For UI correlation, crypto.randomUUID() provides better uniqueness guarantees'
1503
- }
1504
- // Business ID pattern - LOW
1516
+ // Business/non-security pattern - LOW
1505
1517
  else if (nameRisk === 'low' || context.inBusinessLogicContext || toStringPattern.intent === 'business-id') {
1506
1518
  severity = 'low'
1507
1519
  confidence = 'low'
1508
- explanation = ' (business identifier)'
1509
- description = 'Math.random() is being used for non-security purposes (business IDs, tracking numbers). While not critical, Math.random() can produce collisions in high-volume scenarios.'
1510
- suggestedFix = 'Consider crypto.randomUUID() for better uniqueness guarantees and collision resistance'
1520
+ explanation = ' (non-security usage)'
1521
+ description = 'Math.random() is being used for non-security purposes (business IDs, sampling, jitter/backoff, experiments). While not critical, Math.random() can produce collisions or bias in high-volume scenarios.'
1522
+ suggestedFix = 'Use crypto.randomUUID() for uniqueness-sensitive IDs. For sampling/backoff, consider a seeded PRNG if determinism is needed.'
1511
1523
  }
1512
1524
  // Unknown context - MEDIUM
1513
1525
  else {
@@ -942,19 +942,23 @@ AI-generated structured outputs need validation before use in security-sensitive
942
942
  - Generic success messages
943
943
  - Placeholder comments in non-security code
944
944
 
945
- ## Response Format
945
+ ## Response Format (OPTIMIZED FOR MINIMAL OUTPUT)
946
946
 
947
947
  For each candidate finding, return:
948
948
  \`\`\`json
949
949
  {
950
950
  "index": <number>,
951
951
  "keep": true | false,
952
- "reason": "<brief explanation referencing specific code/context>",
953
- "adjustedSeverity": "critical" | "high" | "medium" | "low" | "info" | null,
954
- "validationNotes": "<optional: additional context for the developer>"
952
+ "adjustedSeverity": "critical" | "high" | "medium" | "low" | "info" | null, // Only if keep=true
953
+ "notes": "<concise context for developer>" // Only if keep=true, 1-2 sentences max
955
954
  }
956
955
  \`\`\`
957
956
 
957
+ **CRITICAL**: To minimize costs:
958
+ - For \`keep: false\` (rejected): ONLY include \`index\` and \`keep\` fields. NO explanation needed.
959
+ - For \`keep: true\` (accepted): Include \`notes\` field with brief context (10-30 words). Be concise.
960
+ - Omit \`adjustedSeverity\` if keeping original severity (null is wasteful).
961
+
958
962
  ## Severity Guidelines
959
963
  - **critical/high**: Realistically exploitable, should block deploys - ONLY for clear vulnerabilities
960
964
  - **medium/low**: Important but non-blocking, hardening opportunities - use sparingly
@@ -977,13 +981,44 @@ For each candidate finding, return:
977
981
  - No visible mitigating factors in context
978
982
  - Real-world attack scenario is plausible
979
983
 
980
- **REMEMBER**: You are the last line of defense against noise. A finding that reaches the user should be CLEARLY worth their time. When in doubt, REJECT.`
984
+ **REMEMBER**: You are the last line of defense against noise. A finding that reaches the user should be CLEARLY worth their time. When in doubt, REJECT.
985
+
986
+ ## Response Format
987
+
988
+ For EACH file, provide a JSON object with the file path and validation results.
989
+ Return a JSON array where each element has:
990
+ - "file": the file path (e.g., "src/routes/api.ts")
991
+ - "validations": array of validation results for that file's candidates
992
+
993
+ Example response format (OPTIMIZED):
994
+ \`\`\`json
995
+ [
996
+ {
997
+ "file": "src/auth.ts",
998
+ "validations": [
999
+ { "index": 0, "keep": true, "adjustedSeverity": "medium", "notes": "Protected by middleware" },
1000
+ { "index": 1, "keep": false }
1001
+ ]
1002
+ },
1003
+ {
1004
+ "file": "src/api.ts",
1005
+ "validations": [
1006
+ { "index": 0, "keep": true, "notes": "User input flows to SQL query" }
1007
+ ]
1008
+ }
1009
+ ]
1010
+ \`\`\`
1011
+
1012
+ **REMEMBER**: Rejected findings (keep: false) need NO explanation. Keep notes brief (10-30 words).`
981
1013
 
982
1014
  interface ValidationResult {
983
1015
  index: number
984
1016
  keep: boolean
985
- reason: string
1017
+ // Optimized format: single notes field (replaces reason + validationNotes)
1018
+ notes?: string // Only for keep=true, concise explanation
986
1019
  adjustedSeverity?: VulnerabilitySeverity | null
1020
+ // Legacy fields for backward compatibility during parsing
1021
+ reason?: string
987
1022
  validationNotes?: string
988
1023
  }
989
1024
 
@@ -1179,7 +1214,44 @@ async function validateWithOpenAI(
1179
1214
  { role: 'system', content: HIGH_CONTEXT_VALIDATION_PROMPT },
1180
1215
  { role: 'user', content: validationRequest },
1181
1216
  ],
1182
- max_completion_tokens: 4096,
1217
+ max_completion_tokens: 1500, // Reduced from 4096 - optimized format needs less output
1218
+ response_format: {
1219
+ type: 'json_schema',
1220
+ json_schema: {
1221
+ name: 'validation_response',
1222
+ strict: true,
1223
+ schema: {
1224
+ type: 'object',
1225
+ properties: {
1226
+ validations: {
1227
+ type: 'array',
1228
+ items: {
1229
+ type: 'object',
1230
+ properties: {
1231
+ file: { type: 'string' },
1232
+ validations: {
1233
+ type: 'array',
1234
+ items: {
1235
+ type: 'object',
1236
+ properties: {
1237
+ index: { type: 'number' },
1238
+ keep: { type: 'boolean' }
1239
+ },
1240
+ required: ['index', 'keep'],
1241
+ additionalProperties: false
1242
+ }
1243
+ }
1244
+ },
1245
+ required: ['file', 'validations'],
1246
+ additionalProperties: false
1247
+ }
1248
+ }
1249
+ },
1250
+ required: ['validations'],
1251
+ additionalProperties: false
1252
+ }
1253
+ }
1254
+ }
1183
1255
  })
1184
1256
  )
1185
1257
 
@@ -1222,9 +1294,25 @@ async function validateWithOpenAI(
1222
1294
  return batchFindings
1223
1295
  }
1224
1296
 
1297
+ // Parse structured JSON response (with validations wrapper from response_format)
1298
+ let parsedContent: any
1299
+ try {
1300
+ parsedContent = JSON.parse(content)
1301
+ // Unwrap the validations array if present (from structured output)
1302
+ if (parsedContent.validations && Array.isArray(parsedContent.validations)) {
1303
+ parsedContent = parsedContent.validations
1304
+ }
1305
+ } catch (e) {
1306
+ console.warn('[OpenAI] Failed to parse JSON response:', e)
1307
+ parsedContent = content
1308
+ }
1309
+
1225
1310
  // Parse multi-file response
1226
1311
  const expectedFiles = fileDataList.map(({ filePath }) => filePath)
1227
- const validationResultsMap = parseMultiFileValidationResponse(content, expectedFiles)
1312
+ const validationResultsMap = parseMultiFileValidationResponse(
1313
+ typeof parsedContent === 'string' ? parsedContent : JSON.stringify(parsedContent),
1314
+ expectedFiles
1315
+ )
1228
1316
 
1229
1317
  // Apply results per file
1230
1318
  for (const { filePath, findings: fileFindings } of fileDataList) {
@@ -1486,7 +1574,7 @@ export async function validateFindingsWithAI(
1486
1574
  const response = await makeAnthropicRequestWithRetry(() =>
1487
1575
  client.messages.create({
1488
1576
  model: 'claude-3-5-haiku-20241022',
1489
- max_tokens: 4096, // Increased for multi-file responses
1577
+ max_tokens: 1500, // Reduced from 4096 - optimized format needs less output
1490
1578
  system: [
1491
1579
  {
1492
1580
  type: 'text',
@@ -1801,14 +1889,14 @@ Example response format:
1801
1889
  {
1802
1890
  "file": "src/auth.ts",
1803
1891
  "validations": [
1804
- { "index": 0, "keep": true, "reason": "Valid finding", "adjustedSeverity": null, "validationNotes": "..." },
1805
- { "index": 1, "keep": false, "reason": "False positive because..." }
1892
+ { "index": 0, "keep": true, "adjustedSeverity": "medium", "notes": "Protected by middleware" },
1893
+ { "index": 1, "keep": false }
1806
1894
  ]
1807
1895
  },
1808
1896
  {
1809
1897
  "file": "src/api.ts",
1810
1898
  "validations": [
1811
- { "index": 0, "keep": true, "reason": "...", "adjustedSeverity": "high", "validationNotes": "..." }
1899
+ { "index": 0, "keep": true, "notes": "User input flows to SQL query" }
1812
1900
  ]
1813
1901
  }
1814
1902
  ]
@@ -1903,13 +1991,20 @@ function parseMultiFileValidationResponse(
1903
1991
  typeof item.index === 'number' &&
1904
1992
  typeof item.keep === 'boolean'
1905
1993
  )
1906
- .map((item: any) => ({
1907
- index: item.index,
1908
- keep: item.keep,
1909
- reason: item.reason || '',
1910
- adjustedSeverity: item.adjustedSeverity || null,
1911
- validationNotes: item.validationNotes || undefined,
1912
- }))
1994
+ .map((item: any) => {
1995
+ // Normalize notes field: prefer new 'notes', fallback to legacy 'reason' or 'validationNotes'
1996
+ const notes = item.notes || item.validationNotes || item.reason || undefined
1997
+
1998
+ return {
1999
+ index: item.index,
2000
+ keep: item.keep,
2001
+ notes,
2002
+ adjustedSeverity: item.adjustedSeverity || null,
2003
+ // Keep legacy fields for backward compatibility
2004
+ reason: item.reason,
2005
+ validationNotes: item.validationNotes,
2006
+ }
2007
+ })
1913
2008
 
1914
2009
  resultMap.set(filePath, validations)
1915
2010
  }
@@ -1960,22 +2055,25 @@ function applyValidationResults(
1960
2055
  confidence: 'high',
1961
2056
  }
1962
2057
 
2058
+ // Extract notes from optimized or legacy format
2059
+ const validationNotes = validation.notes || validation.validationNotes || validation.reason || undefined
2060
+
1963
2061
  if (validation.adjustedSeverity && validation.adjustedSeverity !== finding.severity) {
1964
2062
  // Severity was adjusted
1965
2063
  adjustedFinding.originalSeverity = finding.severity
1966
2064
  adjustedFinding.severity = validation.adjustedSeverity
1967
2065
  adjustedFinding.validationStatus = 'downgraded' as ValidationStatus
1968
- adjustedFinding.validationNotes = validation.validationNotes || validation.reason || 'Severity adjusted by AI validation'
2066
+ adjustedFinding.validationNotes = validationNotes || 'Severity adjusted by AI validation'
1969
2067
  } else {
1970
2068
  // Confirmed at original severity
1971
2069
  adjustedFinding.validationStatus = 'confirmed' as ValidationStatus
1972
- adjustedFinding.validationNotes = validation.validationNotes || validation.reason
2070
+ adjustedFinding.validationNotes = validationNotes
1973
2071
  }
1974
2072
 
1975
2073
  processed.push(adjustedFinding)
1976
2074
  } else {
1977
- // Finding was dismissed
1978
- console.log(`[AI Validation] Rejected: ${finding.title} at ${finding.filePath}:${finding.lineNumber} - ${validation.reason}`)
2075
+ // Finding was dismissed - no need to log verbose reason (cost optimization)
2076
+ console.log(`[AI Validation] Rejected: ${finding.title} at ${finding.filePath}:${finding.lineNumber}`)
1979
2077
  // Don't add to processed - finding is removed
1980
2078
  }
1981
2079
  }
@@ -2071,13 +2169,20 @@ function parseValidationResponse(response: string): ValidationResult[] {
2071
2169
  typeof item.index === 'number' &&
2072
2170
  typeof item.keep === 'boolean'
2073
2171
  )
2074
- .map(item => ({
2075
- index: item.index,
2076
- keep: item.keep,
2077
- reason: item.reason || '',
2078
- adjustedSeverity: item.adjustedSeverity || null,
2079
- validationNotes: item.validationNotes || undefined,
2080
- }))
2172
+ .map(item => {
2173
+ // Normalize notes field: prefer new 'notes', fallback to legacy 'reason' or 'validationNotes'
2174
+ const notes = item.notes || item.validationNotes || item.reason || undefined
2175
+
2176
+ return {
2177
+ index: item.index,
2178
+ keep: item.keep,
2179
+ notes,
2180
+ adjustedSeverity: item.adjustedSeverity || null,
2181
+ // Keep legacy fields for backward compatibility
2182
+ reason: item.reason,
2183
+ validationNotes: item.validationNotes,
2184
+ }
2185
+ })
2081
2186
  } catch (error) {
2082
2187
  console.error('Failed to parse validation response:', error)
2083
2188
  return []
@@ -46,6 +46,8 @@ const DEFAULT_EXCLUSIONS: ExclusionConfig = {
46
46
  '**/sample/**',
47
47
  '**/samples/**',
48
48
  '**/playground/**',
49
+ '**/oculum.json', // Scanner output files
50
+ '**/*.scan.json', // Any scan result files
49
51
  ],
50
52
  fixturePatterns: [
51
53
  '**/fixtures/**',