npm - @oculum/scanner - Versions diffs - 1.0.3 → 1.0.5 - Mend

@oculum/scanner 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/layer1/entropy.d.ts.map +1 -1
package/dist/layer1/entropy.js +28 -0
package/dist/layer1/entropy.js.map +1 -1
package/dist/layer2/dangerous-functions.d.ts.map +1 -1
package/dist/layer2/dangerous-functions.js +35 -24
package/dist/layer2/dangerous-functions.js.map +1 -1
package/dist/layer3/anthropic.d.ts.map +1 -1
package/dist/layer3/anthropic.js +125 -29
package/dist/layer3/anthropic.js.map +1 -1
package/dist/utils/path-exclusions.d.ts.map +1 -1
package/dist/utils/path-exclusions.js +2 -0
package/dist/utils/path-exclusions.js.map +1 -1
package/package.json +2 -1
package/src/layer1/entropy.ts +34 -0
package/src/layer2/dangerous-functions.ts +36 -24
package/src/layer3/anthropic.ts +135 -30
package/src/utils/path-exclusions.ts +2 -0

package/src/layer2/dangerous-functions.ts CHANGED Viewed

@@ -825,8 +825,7 @@ function isCosmeticMathRandom(lineContent: string, content: string, lineNumber:
   // Context indicators of cosmetic use
   const cosmeticContextPatterns = [
-    // UI component files
-    /\/(components?|ui|widgets?|animations?|contexts?)\//i,
+    // UI component files - REMOVED, let severity classification handle these
     // Style-related variables/functions
     /\b(style|styles|css|className|animation|transition)/i,
     /\b(width|height|opacity|color|transform|rotate|scale|translate)/i,
@@ -840,11 +839,8 @@ function isCosmeticMathRandom(lineContent: string, content: string, lineNumber:
     /duration.*Math\.random/i,
     // UI state variations
     /\b(variant|theme|layout|position).*Math\.random/i,
-    // UI identifier variable names (toast, notification, element, component IDs)
-    /\b(toast|notification|element|component|widget|modal|dialog|popup).*id\b/i,
-    /\bid\s*=.*Math\.random/i,
-    /\bkey\s*=.*Math\.random/i,  // React keys
-    /\btempId|temporaryId|uniqueId\b/i,
+    // NOTE: Removed UI identifier patterns (key, id, tempId, etc.) - these should be
+    // classified with info/low severity by the severity classification logic, not skipped entirely
   ]
   if (cosmeticContextPatterns.some(p => p.test(context))) {
@@ -885,12 +881,20 @@ function isCosmeticMathRandom(lineContent: string, content: string, lineNumber:
  */
 function extractFunctionContext(content: string, lineNumber: number): string | null {
   const lines = content.split('\n')
-  const start = Math.max(0, lineNumber - 10)
+  const start = Math.max(0, lineNumber - 20)  // Increased from 10 to 20 for nested callbacks
   // Look backwards for function declaration
   for (let i = lineNumber; i >= start; i--) {
     const line = lines[i]
+    // Skip anonymous arrow functions in callbacks (e.g., .map((x) => ...), .replace(/x/g, (c) => ...))
+    // These are not the function context we're looking for
+    // Look for pattern: .methodName(..., (param) => or .methodName(...(param) =>
+    const hasMethodCallWithArrowCallback = /\.\w+\(.*\([^)]*\)\s*=>/.test(line)
+    if (hasMethodCallWithArrowCallback) {
+      continue  // Skip this line and keep looking
+    }
     // Match various function declaration patterns
     // 1. function functionName
     // 2. export function functionName
@@ -898,8 +902,9 @@ function extractFunctionContext(content: string, lineNumber: number): string | n
     // 4. const/let functionName = (arrow function)
     // 5. export const functionName =
-    // Traditional function declaration
-    const funcDeclMatch = line.match(/(?:export\s+)?function\s+(\w+)/i)
+    // Traditional function declaration (handles TypeScript type annotations)
+    // Matches: function name(...), export function name(...), function name<T>(...), etc.
+    const funcDeclMatch = line.match(/(?:export\s+)?(?:async\s+)?function\s+(\w+)/i)
     if (funcDeclMatch) {
       return funcDeclMatch[1].toLowerCase()
     }
@@ -1057,7 +1062,11 @@ function classifyVariableNameRisk(varName: string | null): 'high' | 'medium' | '
     'random', 'temp', 'temporary', 'generated', 'dummy',
     // UI identifiers
     'toast', 'notification', 'element', 'component', 'widget',
-    'modal', 'dialog', 'popup', 'unique', 'react'
+    'modal', 'dialog', 'popup', 'unique', 'react',
+    // Non-security randomness usage (backoff/sampling/experiments)
+    'jitter', 'retry', 'backoff', 'delay', 'timeout', 'latency',
+    'sample', 'sampling', 'probability', 'chance', 'rollout',
+    'experiment', 'abtest', 'cohort', 'bucket', 'variant'
   ]
   if (lowRiskPatterns.some(p => lower.includes(p))) {
     return 'low'
@@ -1116,6 +1125,8 @@ function analyzeMathRandomContext(
   const businessLogicPatterns = [
     /\b(business|order|invoice|customer|product|transaction)Id\b/i,
     /\b(reference|tracking|confirmation)Number\b/i,
+    /\b(backoff|retry|jitter|delay|timeout|latency)\b/i,
+    /\b(sample|sampling|probability|chance|rollout|experiment|abtest|cohort|bucket|variant)\b/i,
   ]
   const inBusinessLogicContext = businessLogicPatterns.some(p => p.test(context)) &&
     !inSecurityContext
@@ -1129,7 +1140,7 @@ function analyzeMathRandomContext(
   } else if (inUIContext) {
     contextDescription = 'UI/cosmetic usage'
   } else if (inBusinessLogicContext) {
-    contextDescription = 'business identifier generation'
+    contextDescription = 'non-security usage'
   }
   return {
@@ -1485,6 +1496,15 @@ export function detectDangerousFunctions(
             description = 'Math.random() used for generating fixture/seed data. Not security-critical in development contexts.'
             suggestedFix = 'Acceptable for seed data. Use crypto.randomUUID() if uniqueness guarantees needed.'
           }
+          // Short UI ID pattern - INFO (check before variable name to avoid false positives)
+          // e.g., "const key = Math.random().toString(36).substring(2, 9)" is a UI ID, not a security key
+          else if (toStringPattern.intent === 'short-ui-id') {
+            severity = 'info'
+            confidence = 'low'
+            explanation = ' (UI correlation ID)'
+            description = 'Math.random() used for short UI correlation IDs. Not security-critical, but collisions possible in high-volume scenarios.'
+            suggestedFix = 'For UI correlation, crypto.randomUUID() provides better uniqueness guarantees'
+          }
           // Security context - HIGH
           else if (nameRisk === 'high' || context.inSecurityContext || functionIntent === 'security') {
             severity = 'high'
@@ -1493,21 +1513,13 @@ export function detectDangerousFunctions(
             description = 'Math.random() is NOT cryptographically secure and MUST NOT be used for tokens, keys, passwords, or session IDs. This can lead to predictable values that attackers can exploit.'
             suggestedFix = 'Replace with crypto.randomBytes() or crypto.randomUUID() for security-sensitive operations'
           }
-          // Short UI ID pattern - INFO
-          else if (toStringPattern.intent === 'short-ui-id') {
-            severity = 'info'
-            confidence = 'low'
-            explanation = ' (UI correlation ID)'
-            description = 'Math.random() used for short UI correlation IDs. Not security-critical, but collisions possible in high-volume scenarios.'
-            suggestedFix = 'For UI correlation, crypto.randomUUID() provides better uniqueness guarantees'
-          }
-          // Business ID pattern - LOW
+          // Business/non-security pattern - LOW
           else if (nameRisk === 'low' || context.inBusinessLogicContext || toStringPattern.intent === 'business-id') {
             severity = 'low'
             confidence = 'low'
-            explanation = ' (business identifier)'
-            description = 'Math.random() is being used for non-security purposes (business IDs, tracking numbers). While not critical, Math.random() can produce collisions in high-volume scenarios.'
-            suggestedFix = 'Consider crypto.randomUUID() for better uniqueness guarantees and collision resistance'
+            explanation = ' (non-security usage)'
+            description = 'Math.random() is being used for non-security purposes (business IDs, sampling, jitter/backoff, experiments). While not critical, Math.random() can produce collisions or bias in high-volume scenarios.'
+            suggestedFix = 'Use crypto.randomUUID() for uniqueness-sensitive IDs. For sampling/backoff, consider a seeded PRNG if determinism is needed.'
           }
           // Unknown context - MEDIUM
           else {

package/src/layer3/anthropic.ts CHANGED Viewed

@@ -942,19 +942,23 @@ AI-generated structured outputs need validation before use in security-sensitive
    - Generic success messages
    - Placeholder comments in non-security code
-## Response Format
+## Response Format (OPTIMIZED FOR MINIMAL OUTPUT)
 For each candidate finding, return:
 \`\`\`json
 {
   "index": <number>,
   "keep": true | false,
-  "reason": "<brief explanation referencing specific code/context>",
-  "adjustedSeverity": "critical" | "high" | "medium" | "low" | "info" | null,
-  "validationNotes": "<optional: additional context for the developer>"
+  "adjustedSeverity": "critical" | "high" | "medium" | "low" | "info" | null,  // Only if keep=true
+  "notes": "<concise context for developer>"  // Only if keep=true, 1-2 sentences max
 }
 \`\`\`
+**CRITICAL**: To minimize costs:
+- For \`keep: false\` (rejected): ONLY include \`index\` and \`keep\` fields. NO explanation needed.
+- For \`keep: true\` (accepted): Include \`notes\` field with brief context (10-30 words). Be concise.
+- Omit \`adjustedSeverity\` if keeping original severity (null is wasteful).
 ## Severity Guidelines
 - **critical/high**: Realistically exploitable, should block deploys - ONLY for clear vulnerabilities
 - **medium/low**: Important but non-blocking, hardening opportunities - use sparingly
@@ -977,13 +981,44 @@ For each candidate finding, return:
    - No visible mitigating factors in context
    - Real-world attack scenario is plausible
-**REMEMBER**: You are the last line of defense against noise. A finding that reaches the user should be CLEARLY worth their time. When in doubt, REJECT.`
+**REMEMBER**: You are the last line of defense against noise. A finding that reaches the user should be CLEARLY worth their time. When in doubt, REJECT.
+## Response Format
+For EACH file, provide a JSON object with the file path and validation results.
+Return a JSON array where each element has:
+- "file": the file path (e.g., "src/routes/api.ts")
+- "validations": array of validation results for that file's candidates
+Example response format (OPTIMIZED):
+\`\`\`json
+[
+  {
+    "file": "src/auth.ts",
+    "validations": [
+      { "index": 0, "keep": true, "adjustedSeverity": "medium", "notes": "Protected by middleware" },
+      { "index": 1, "keep": false }
+    ]
+  },
+  {
+    "file": "src/api.ts",
+    "validations": [
+      { "index": 0, "keep": true, "notes": "User input flows to SQL query" }
+    ]
+  }
+]
+\`\`\`
+**REMEMBER**: Rejected findings (keep: false) need NO explanation. Keep notes brief (10-30 words).`
 interface ValidationResult {
   index: number
   keep: boolean
-  reason: string
+  // Optimized format: single notes field (replaces reason + validationNotes)
+  notes?: string  // Only for keep=true, concise explanation
   adjustedSeverity?: VulnerabilitySeverity | null
+  // Legacy fields for backward compatibility during parsing
+  reason?: string
   validationNotes?: string
 }
@@ -1179,7 +1214,44 @@ async function validateWithOpenAI(
             { role: 'system', content: HIGH_CONTEXT_VALIDATION_PROMPT },
             { role: 'user', content: validationRequest },
           ],
-          max_completion_tokens: 4096,
+          max_completion_tokens: 1500, // Reduced from 4096 - optimized format needs less output
+          response_format: {
+            type: 'json_schema',
+            json_schema: {
+              name: 'validation_response',
+              strict: true,
+              schema: {
+                type: 'object',
+                properties: {
+                  validations: {
+                    type: 'array',
+                    items: {
+                      type: 'object',
+                      properties: {
+                        file: { type: 'string' },
+                        validations: {
+                          type: 'array',
+                          items: {
+                            type: 'object',
+                            properties: {
+                              index: { type: 'number' },
+                              keep: { type: 'boolean' }
+                            },
+                            required: ['index', 'keep'],
+                            additionalProperties: false
+                          }
+                        }
+                      },
+                      required: ['file', 'validations'],
+                      additionalProperties: false
+                    }
+                  }
+                },
+                required: ['validations'],
+                additionalProperties: false
+              }
+            }
+          }
         })
       )
@@ -1222,9 +1294,25 @@ async function validateWithOpenAI(
         return batchFindings
       }
+      // Parse structured JSON response (with validations wrapper from response_format)
+      let parsedContent: any
+      try {
+        parsedContent = JSON.parse(content)
+        // Unwrap the validations array if present (from structured output)
+        if (parsedContent.validations && Array.isArray(parsedContent.validations)) {
+          parsedContent = parsedContent.validations
+        }
+      } catch (e) {
+        console.warn('[OpenAI] Failed to parse JSON response:', e)
+        parsedContent = content
+      }
       // Parse multi-file response
       const expectedFiles = fileDataList.map(({ filePath }) => filePath)
-      const validationResultsMap = parseMultiFileValidationResponse(content, expectedFiles)
+      const validationResultsMap = parseMultiFileValidationResponse(
+        typeof parsedContent === 'string' ? parsedContent : JSON.stringify(parsedContent),
+        expectedFiles
+      )
       // Apply results per file
       for (const { filePath, findings: fileFindings } of fileDataList) {
@@ -1486,7 +1574,7 @@ export async function validateFindingsWithAI(
       const response = await makeAnthropicRequestWithRetry(() =>
         client.messages.create({
           model: 'claude-3-5-haiku-20241022',
-          max_tokens: 4096, // Increased for multi-file responses
+          max_tokens: 1500, // Reduced from 4096 - optimized format needs less output
           system: [
             {
               type: 'text',
@@ -1801,14 +1889,14 @@ Example response format:
   {
     "file": "src/auth.ts",
     "validations": [
-      { "index": 0, "keep": true, "reason": "Valid finding", "adjustedSeverity": null, "validationNotes": "..." },
-      { "index": 1, "keep": false, "reason": "False positive because..." }
+      { "index": 0, "keep": true, "adjustedSeverity": "medium", "notes": "Protected by middleware" },
+      { "index": 1, "keep": false }
     ]
   },
   {
     "file": "src/api.ts",
     "validations": [
-      { "index": 0, "keep": true, "reason": "...", "adjustedSeverity": "high", "validationNotes": "..." }
+      { "index": 0, "keep": true, "notes": "User input flows to SQL query" }
     ]
   }
 ]
@@ -1903,13 +1991,20 @@ function parseMultiFileValidationResponse(
           typeof item.index === 'number' &&
           typeof item.keep === 'boolean'
         )
-        .map((item: any) => ({
-          index: item.index,
-          keep: item.keep,
-          reason: item.reason || '',
-          adjustedSeverity: item.adjustedSeverity || null,
-          validationNotes: item.validationNotes || undefined,
-        }))
+        .map((item: any) => {
+          // Normalize notes field: prefer new 'notes', fallback to legacy 'reason' or 'validationNotes'
+          const notes = item.notes || item.validationNotes || item.reason || undefined
+          return {
+            index: item.index,
+            keep: item.keep,
+            notes,
+            adjustedSeverity: item.adjustedSeverity || null,
+            // Keep legacy fields for backward compatibility
+            reason: item.reason,
+            validationNotes: item.validationNotes,
+          }
+        })
       resultMap.set(filePath, validations)
     }
@@ -1960,22 +2055,25 @@ function applyValidationResults(
         confidence: 'high',
       }
+      // Extract notes from optimized or legacy format
+      const validationNotes = validation.notes || validation.validationNotes || validation.reason || undefined
       if (validation.adjustedSeverity && validation.adjustedSeverity !== finding.severity) {
         // Severity was adjusted
         adjustedFinding.originalSeverity = finding.severity
         adjustedFinding.severity = validation.adjustedSeverity
         adjustedFinding.validationStatus = 'downgraded' as ValidationStatus
-        adjustedFinding.validationNotes = validation.validationNotes || validation.reason || 'Severity adjusted by AI validation'
+        adjustedFinding.validationNotes = validationNotes || 'Severity adjusted by AI validation'
       } else {
         // Confirmed at original severity
         adjustedFinding.validationStatus = 'confirmed' as ValidationStatus
-        adjustedFinding.validationNotes = validation.validationNotes || validation.reason
+        adjustedFinding.validationNotes = validationNotes
       }
       processed.push(adjustedFinding)
     } else {
-      // Finding was dismissed
-      console.log(`[AI Validation] Rejected: ${finding.title} at ${finding.filePath}:${finding.lineNumber} - ${validation.reason}`)
+      // Finding was dismissed - no need to log verbose reason (cost optimization)
+      console.log(`[AI Validation] Rejected: ${finding.title} at ${finding.filePath}:${finding.lineNumber}`)
       // Don't add to processed - finding is removed
     }
   }
@@ -2071,13 +2169,20 @@ function parseValidationResponse(response: string): ValidationResult[] {
         typeof item.index === 'number' &&
         typeof item.keep === 'boolean'
       )
-      .map(item => ({
-        index: item.index,
-        keep: item.keep,
-        reason: item.reason || '',
-        adjustedSeverity: item.adjustedSeverity || null,
-        validationNotes: item.validationNotes || undefined,
-      }))
+      .map(item => {
+        // Normalize notes field: prefer new 'notes', fallback to legacy 'reason' or 'validationNotes'
+        const notes = item.notes || item.validationNotes || item.reason || undefined
+        return {
+          index: item.index,
+          keep: item.keep,
+          notes,
+          adjustedSeverity: item.adjustedSeverity || null,
+          // Keep legacy fields for backward compatibility
+          reason: item.reason,
+          validationNotes: item.validationNotes,
+        }
+      })
   } catch (error) {
     console.error('Failed to parse validation response:', error)
     return []

package/src/utils/path-exclusions.ts CHANGED Viewed

@@ -46,6 +46,8 @@ const DEFAULT_EXCLUSIONS: ExclusionConfig = {
     '**/sample/**',
     '**/samples/**',
     '**/playground/**',
+    '**/oculum.json',           // Scanner output files
+    '**/*.scan.json',          // Any scan result files
   ],
   fixturePatterns: [
     '**/fixtures/**',