npm - @oculum/scanner - Versions diffs - 1.0.2 → 1.0.4 - Mend

@oculum/scanner 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/dist/index.d.ts +4 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +60 -5
package/dist/index.js.map +1 -1
package/dist/layer1/entropy.d.ts.map +1 -1
package/dist/layer1/entropy.js +6 -4
package/dist/layer1/entropy.js.map +1 -1
package/dist/layer1/index.d.ts +2 -2
package/dist/layer1/index.d.ts.map +1 -1
package/dist/layer1/index.js +14 -5
package/dist/layer1/index.js.map +1 -1
package/dist/layer2/dangerous-functions.d.ts.map +1 -1
package/dist/layer2/dangerous-functions.js +319 -11
package/dist/layer2/dangerous-functions.js.map +1 -1
package/dist/layer2/index.d.ts +2 -2
package/dist/layer2/index.d.ts.map +1 -1
package/dist/layer2/index.js +14 -5
package/dist/layer2/index.js.map +1 -1
package/dist/layer3/anthropic.d.ts +5 -1
package/dist/layer3/anthropic.d.ts.map +1 -1
package/dist/layer3/anthropic.js +175 -30
package/dist/layer3/anthropic.js.map +1 -1
package/dist/layer3/index.d.ts +3 -1
package/dist/layer3/index.d.ts.map +1 -1
package/dist/layer3/index.js +21 -0
package/dist/layer3/index.js.map +1 -1
package/dist/types.d.ts +25 -0
package/dist/types.d.ts.map +1 -1
package/dist/types.js +40 -0
package/dist/types.js.map +1 -1
package/dist/utils/context-helpers.d.ts +12 -0
package/dist/utils/context-helpers.d.ts.map +1 -1
package/dist/utils/context-helpers.js +40 -0
package/dist/utils/context-helpers.js.map +1 -1
package/package.json +4 -2
package/src/index.ts +75 -5
package/src/layer1/entropy.ts +6 -4
package/src/layer1/index.ts +23 -8
package/src/layer2/__tests__/math-random-enhanced.test.ts +405 -0
package/src/layer2/dangerous-functions.ts +368 -11
package/src/layer2/index.ts +20 -8
package/src/layer3/anthropic.ts +190 -31
package/src/layer3/index.ts +27 -2
package/src/types.ts +59 -0
package/src/utils/context-helpers.ts +40 -0

package/src/layer3/anthropic.ts CHANGED Viewed

@@ -733,6 +733,35 @@ Distinguish these patterns:
     - Cross-tenant storage: medium (data isolation risk)
   - Do NOT describe transient BYOK keys as "stored without encryption" - they are NOT stored
+**Math.random() for Security:**
+Distinguish legitimate uses from security-critical misuse:
+- **Seed/Data Generation Files**: Files in /seed/, /fixtures/, /factories/, datacreator.ts, *.fixture.* are for test data generation
+  - Math.random() in seed files is acceptable - these are never production security code
+  - REJECT findings from seed/data generation files entirely
+- **Educational Vulnerability Files**: Files named insecurity.ts, vulnerable.ts, or in /intentionally-vulnerable/ paths
+  - These are OWASP Juice Shop challenges or security training examples
+  - REJECT entirely - they're intentionally vulnerable for educational purposes
+- **UUID/Identifier Generation**: Functions named generateUUID(), createId(), correlationId(), etc.
+  - Use Math.random() for UI correlation, React keys, element IDs
+  - Short toString(36).substring(2, 9) patterns are for UI correlation, NOT security tokens
+  - REJECT unless function name explicitly indicates security (generateToken, createSessionId, generateSecret)
+- **CAPTCHA/Puzzle Generation**: Math.random() for CAPTCHA questions, puzzle difficulty, game mechanics
+  - These don't need cryptographic randomness - legitimate non-security use
+  - REJECT findings in CAPTCHA/puzzle generation functions
+- **Security-Sensitive Context**: Only keep as HIGH/CRITICAL when:
+  - Variable names indicate security: token, secret, key, auth, session, password
+  - Function names indicate security: generateToken, createSession, makeSecret
+  - Used in security-critical files: auth.ts, crypto.ts, session.ts
+  - Long toString() patterns without truncation (potential token generation)
+**Severity Ladder for Math.random():**
+- Seed/educational files: REJECT (not production code)
+- UUID/CAPTCHA functions: REJECT (legitimate use)
+- Short UI IDs (toString(36).substring(2, 9)): INFO (UI correlation, suggest crypto.randomUUID())
+- Business IDs: LOW (suggest crypto.randomUUID() for collision resistance)
+- Security contexts (tokens/secrets/keys): HIGH (cryptographic weakness)
+- Unknown context: MEDIUM (needs manual review)
 ### 3.6 DOM Sinks and Bootstrap Scripts
 Recognise LOW-RISK patterns:
 - Static scripts reading localStorage for theme/preferences
@@ -913,19 +942,23 @@ AI-generated structured outputs need validation before use in security-sensitive
    - Generic success messages
    - Placeholder comments in non-security code
-## Response Format
+## Response Format (OPTIMIZED FOR MINIMAL OUTPUT)
 For each candidate finding, return:
 \`\`\`json
 {
   "index": <number>,
   "keep": true | false,
-  "reason": "<brief explanation referencing specific code/context>",
-  "adjustedSeverity": "critical" | "high" | "medium" | "low" | "info" | null,
-  "validationNotes": "<optional: additional context for the developer>"
+  "adjustedSeverity": "critical" | "high" | "medium" | "low" | "info" | null,  // Only if keep=true
+  "notes": "<concise context for developer>"  // Only if keep=true, 1-2 sentences max
 }
 \`\`\`
+**CRITICAL**: To minimize costs:
+- For \`keep: false\` (rejected): ONLY include \`index\` and \`keep\` fields. NO explanation needed.
+- For \`keep: true\` (accepted): Include \`notes\` field with brief context (10-30 words). Be concise.
+- Omit \`adjustedSeverity\` if keeping original severity (null is wasteful).
 ## Severity Guidelines
 - **critical/high**: Realistically exploitable, should block deploys - ONLY for clear vulnerabilities
 - **medium/low**: Important but non-blocking, hardening opportunities - use sparingly
@@ -948,13 +981,44 @@ For each candidate finding, return:
    - No visible mitigating factors in context
    - Real-world attack scenario is plausible
-**REMEMBER**: You are the last line of defense against noise. A finding that reaches the user should be CLEARLY worth their time. When in doubt, REJECT.`
+**REMEMBER**: You are the last line of defense against noise. A finding that reaches the user should be CLEARLY worth their time. When in doubt, REJECT.
+## Response Format
+For EACH file, provide a JSON object with the file path and validation results.
+Return a JSON array where each element has:
+- "file": the file path (e.g., "src/routes/api.ts")
+- "validations": array of validation results for that file's candidates
+Example response format (OPTIMIZED):
+\`\`\`json
+[
+  {
+    "file": "src/auth.ts",
+    "validations": [
+      { "index": 0, "keep": true, "adjustedSeverity": "medium", "notes": "Protected by middleware" },
+      { "index": 1, "keep": false }
+    ]
+  },
+  {
+    "file": "src/api.ts",
+    "validations": [
+      { "index": 0, "keep": true, "notes": "User input flows to SQL query" }
+    ]
+  }
+]
+\`\`\`
+**REMEMBER**: Rejected findings (keep: false) need NO explanation. Keep notes brief (10-30 words).`
 interface ValidationResult {
   index: number
   keep: boolean
-  reason: string
+  // Optimized format: single notes field (replaces reason + validationNotes)
+  notes?: string  // Only for keep=true, concise explanation
   adjustedSeverity?: VulnerabilitySeverity | null
+  // Legacy fields for backward compatibility during parsing
+  reason?: string
   validationNotes?: string
 }
@@ -1150,7 +1214,44 @@ async function validateWithOpenAI(
             { role: 'system', content: HIGH_CONTEXT_VALIDATION_PROMPT },
             { role: 'user', content: validationRequest },
           ],
-          max_completion_tokens: 4096,
+          max_completion_tokens: 1500, // Reduced from 4096 - optimized format needs less output
+          response_format: {
+            type: 'json_schema',
+            json_schema: {
+              name: 'validation_response',
+              strict: true,
+              schema: {
+                type: 'object',
+                properties: {
+                  validations: {
+                    type: 'array',
+                    items: {
+                      type: 'object',
+                      properties: {
+                        file: { type: 'string' },
+                        validations: {
+                          type: 'array',
+                          items: {
+                            type: 'object',
+                            properties: {
+                              index: { type: 'number' },
+                              keep: { type: 'boolean' }
+                            },
+                            required: ['index', 'keep'],
+                            additionalProperties: true
+                          }
+                        }
+                      },
+                      required: ['file', 'validations'],
+                      additionalProperties: false
+                    }
+                  }
+                },
+                required: ['validations'],
+                additionalProperties: false
+              }
+            }
+          }
         })
       )
@@ -1193,9 +1294,25 @@ async function validateWithOpenAI(
         return batchFindings
       }
+      // Parse structured JSON response (with validations wrapper from response_format)
+      let parsedContent: any
+      try {
+        parsedContent = JSON.parse(content)
+        // Unwrap the validations array if present (from structured output)
+        if (parsedContent.validations && Array.isArray(parsedContent.validations)) {
+          parsedContent = parsedContent.validations
+        }
+      } catch (e) {
+        console.warn('[OpenAI] Failed to parse JSON response:', e)
+        parsedContent = content
+      }
       // Parse multi-file response
       const expectedFiles = fileDataList.map(({ filePath }) => filePath)
-      const validationResultsMap = parseMultiFileValidationResponse(content, expectedFiles)
+      const validationResultsMap = parseMultiFileValidationResponse(
+        typeof parsedContent === 'string' ? parsedContent : JSON.stringify(parsedContent),
+        expectedFiles
+      )
       // Apply results per file
       for (const { filePath, findings: fileFindings } of fileDataList) {
@@ -1320,7 +1437,8 @@ async function validateWithOpenAI(
 export async function validateFindingsWithAI(
   findings: Vulnerability[],
   files: ScanFile[],
-  projectContext?: ProjectContext
+  projectContext?: ProjectContext,
+  onProgress?: (progress: { filesProcessed: number; totalFiles: number; status: string }) => void
 ): Promise<AIValidationResult> {
   // Initialize stats tracking
   const stats: ValidationStats = {
@@ -1393,11 +1511,23 @@ export async function validateFindingsWithAI(
   console.log(`[AI Validation] Phase 2: Processing ${fileEntries.length} files in ${totalFileBatches} API batch(es) (${FILES_PER_API_BATCH} files/batch)`)
+  // Track files processed for progress reporting
+  let filesValidated = 0
   // Process files in batches - each batch is ONE API call with multiple files
   for (let batchStart = 0; batchStart < fileEntries.length; batchStart += FILES_PER_API_BATCH) {
     const fileBatch = fileEntries.slice(batchStart, batchStart + FILES_PER_API_BATCH)
     const batchNum = Math.floor(batchStart / FILES_PER_API_BATCH) + 1
+    // Report progress before processing batch
+    if (onProgress) {
+      onProgress({
+        filesProcessed: filesValidated,
+        totalFiles: fileEntries.length,
+        status: `AI validating batch ${batchNum}/${totalFileBatches}`,
+      })
+    }
     console.log(`[AI Validation] API Batch ${batchNum}/${totalFileBatches}: ${fileBatch.length} files`)
     // Prepare file data for batch request
@@ -1444,7 +1574,7 @@ export async function validateFindingsWithAI(
       const response = await makeAnthropicRequestWithRetry(() =>
         client.messages.create({
           model: 'claude-3-5-haiku-20241022',
-          max_tokens: 4096, // Increased for multi-file responses
+          max_tokens: 1500, // Reduced from 4096 - optimized format needs less output
           system: [
             {
               type: 'text',
@@ -1578,6 +1708,18 @@ export async function validateFindingsWithAI(
     const batchDuration = Date.now() - batchStartTime
     totalBatchWaitTime += batchDuration
+    // Update files validated counter
+    filesValidated += fileBatch.length
+    // Report progress after batch completion
+    if (onProgress) {
+      onProgress({
+        filesProcessed: filesValidated,
+        totalFiles: fileEntries.length,
+        status: `AI validation complete for batch ${batchNum}/${totalFileBatches}`,
+      })
+    }
   }
   // Calculate cache hit rate
@@ -1747,14 +1889,14 @@ Example response format:
   {
     "file": "src/auth.ts",
     "validations": [
-      { "index": 0, "keep": true, "reason": "Valid finding", "adjustedSeverity": null, "validationNotes": "..." },
-      { "index": 1, "keep": false, "reason": "False positive because..." }
+      { "index": 0, "keep": true, "adjustedSeverity": "medium", "notes": "Protected by middleware" },
+      { "index": 1, "keep": false }
     ]
   },
   {
     "file": "src/api.ts",
     "validations": [
-      { "index": 0, "keep": true, "reason": "...", "adjustedSeverity": "high", "validationNotes": "..." }
+      { "index": 0, "keep": true, "notes": "User input flows to SQL query" }
     ]
   }
 ]
@@ -1849,13 +1991,20 @@ function parseMultiFileValidationResponse(
           typeof item.index === 'number' &&
           typeof item.keep === 'boolean'
         )
-        .map((item: any) => ({
-          index: item.index,
-          keep: item.keep,
-          reason: item.reason || '',
-          adjustedSeverity: item.adjustedSeverity || null,
-          validationNotes: item.validationNotes || undefined,
-        }))
+        .map((item: any) => {
+          // Normalize notes field: prefer new 'notes', fallback to legacy 'reason' or 'validationNotes'
+          const notes = item.notes || item.validationNotes || item.reason || undefined
+          return {
+            index: item.index,
+            keep: item.keep,
+            notes,
+            adjustedSeverity: item.adjustedSeverity || null,
+            // Keep legacy fields for backward compatibility
+            reason: item.reason,
+            validationNotes: item.validationNotes,
+          }
+        })
       resultMap.set(filePath, validations)
     }
@@ -1906,22 +2055,25 @@ function applyValidationResults(
         confidence: 'high',
       }
+      // Extract notes from optimized or legacy format
+      const validationNotes = validation.notes || validation.validationNotes || validation.reason || undefined
       if (validation.adjustedSeverity && validation.adjustedSeverity !== finding.severity) {
         // Severity was adjusted
         adjustedFinding.originalSeverity = finding.severity
         adjustedFinding.severity = validation.adjustedSeverity
         adjustedFinding.validationStatus = 'downgraded' as ValidationStatus
-        adjustedFinding.validationNotes = validation.validationNotes || validation.reason || 'Severity adjusted by AI validation'
+        adjustedFinding.validationNotes = validationNotes || 'Severity adjusted by AI validation'
       } else {
         // Confirmed at original severity
         adjustedFinding.validationStatus = 'confirmed' as ValidationStatus
-        adjustedFinding.validationNotes = validation.validationNotes || validation.reason
+        adjustedFinding.validationNotes = validationNotes
       }
       processed.push(adjustedFinding)
     } else {
-      // Finding was dismissed
-      console.log(`[AI Validation] Rejected: ${finding.title} at ${finding.filePath}:${finding.lineNumber} - ${validation.reason}`)
+      // Finding was dismissed - no need to log verbose reason (cost optimization)
+      console.log(`[AI Validation] Rejected: ${finding.title} at ${finding.filePath}:${finding.lineNumber}`)
       // Don't add to processed - finding is removed
     }
   }
@@ -2017,13 +2169,20 @@ function parseValidationResponse(response: string): ValidationResult[] {
         typeof item.index === 'number' &&
         typeof item.keep === 'boolean'
       )
-      .map(item => ({
-        index: item.index,
-        keep: item.keep,
-        reason: item.reason || '',
-        adjustedSeverity: item.adjustedSeverity || null,
-        validationNotes: item.validationNotes || undefined,
-      }))
+      .map(item => {
+        // Normalize notes field: prefer new 'notes', fallback to legacy 'reason' or 'validationNotes'
+        const notes = item.notes || item.validationNotes || item.reason || undefined
+        return {
+          index: item.index,
+          keep: item.keep,
+          notes,
+          adjustedSeverity: item.adjustedSeverity || null,
+          // Keep legacy fields for backward compatibility
+          reason: item.reason,
+          validationNotes: item.validationNotes,
+        }
+      })
   } catch (error) {
     console.error('Failed to parse validation response:', error)
     return []

package/src/layer3/index.ts CHANGED Viewed

@@ -3,7 +3,7 @@
  * Deep security analysis using Claude AI and package verification
  */
-import type { Vulnerability, ScanFile } from '../types'
+import type { Vulnerability, ScanFile, CancellationToken } from '../types'
 import { batchAnalyzeWithAI, type Layer3Context } from './anthropic'
 import { checkPackages } from './package-check'
@@ -33,6 +33,8 @@ export interface Layer3Options {
   maxFiles?: number
   /** Project context for auth-aware analysis */
   projectContext?: Layer3Context
+  /** Cancellation token for aborting scans */
+  cancellationToken?: CancellationToken
 }
 export async function runLayer3Scan(
@@ -42,17 +44,40 @@ export async function runLayer3Scan(
   const startTime = Date.now()
   const vulnerabilities: Vulnerability[] = []
   let aiAnalyzedCount = 0
   // Use provided maxFiles or default
   const maxAIFiles = options.maxFiles ?? MAX_AI_FILES
+  // Check for cancellation before package check
+  if (options.cancellationToken?.cancelled) {
+    return {
+      vulnerabilities: [],
+      filesScanned: files.length,
+      duration: Date.now() - startTime,
+      aiAnalyzed: 0,
+    }
+  }
   // 1. Check packages (always run, fast)
   const packageFiles = files.filter(f => f.path.endsWith('package.json'))
   for (const file of packageFiles) {
+    // Check for cancellation in package loop
+    if (options.cancellationToken?.cancelled) break
     const packageFindings = await checkPackages(file.content, file.path)
     vulnerabilities.push(...packageFindings)
   }
+  // Check for cancellation before AI analysis
+  if (options.cancellationToken?.cancelled) {
+    return {
+      vulnerabilities,
+      filesScanned: files.length,
+      duration: Date.now() - startTime,
+      aiAnalyzed: 0,
+    }
+  }
   // 2. AI Analysis (if enabled)
   if (options.enableAI !== false) {
     // Select files for AI analysis

package/src/types.ts CHANGED Viewed

@@ -56,6 +56,61 @@ export interface Vulnerability {
   originalSeverity?: VulnerabilitySeverity // For downgraded findings, the original severity
 }
+/**
+ * Cancellation token for aborting scans gracefully
+ * Allows users to stop long-running scans (Ctrl+C) and get partial results
+ */
+export interface CancellationToken {
+  /** Whether cancellation has been requested */
+  cancelled: boolean
+  /** Reason for cancellation (e.g., "User pressed Ctrl+C") */
+  reason?: string
+  /** Request cancellation */
+  cancel(reason?: string): void
+  /** Register cleanup callback to run when cancelled */
+  onCancel(callback: () => void): void
+}
+/**
+ * Create a new cancellation token
+ *
+ * @example
+ * const token = createCancellationToken()
+ * process.on('SIGINT', () => token.cancel('User interrupted'))
+ * const result = await runScan(files, repo, { cancellationToken: token })
+ */
+export function createCancellationToken(): CancellationToken {
+  const cleanupCallbacks: Array<() => void> = []
+  const token: CancellationToken = {
+    cancelled: false,
+    reason: undefined,
+    cancel(reason?: string) {
+      if (!token.cancelled) {
+        token.cancelled = true
+        token.reason = reason
+        // Run cleanup callbacks
+        cleanupCallbacks.forEach(cb => {
+          try {
+            cb()
+          } catch (e) {
+            // Ignore cleanup errors
+          }
+        })
+      }
+    },
+    onCancel(callback) {
+      if (token.cancelled) {
+        callback() // Already cancelled, run immediately
+      } else {
+        cleanupCallbacks.push(callback)
+      }
+    }
+  }
+  return token
+}
 export interface ScanFile {
   path: string
   content: string
@@ -107,6 +162,10 @@ export interface ScanResult {
     cacheReadTokens: number
     cacheHitRate: number
   }
+  // Cancellation metadata
+  cancelled?: boolean // true if scan was cancelled by user
+  cancelReason?: string // Reason for cancellation (e.g., "User pressed Ctrl+C")
 }
 export interface ScanProgress {

package/src/utils/context-helpers.ts CHANGED Viewed

@@ -201,6 +201,46 @@ export function isClientBundledFile(filePath: string): boolean {
   return clientPatterns.some(pattern => pattern.test(filePath))
 }
+/**
+ * Check if file is a seed or data generation file
+ * These files generate test/demo data and Math.random() usage is acceptable
+ * Used to reduce false positives for Math.random() detection
+ */
+export function isSeedOrDataGenFile(filePath: string): boolean {
+  const patterns = [
+    /\/seed\//i,
+    /\/seeds\//i,
+    /seed-database\.(ts|js)$/i,
+    /\/seeder\./i,
+    /datacreator\.(ts|js)$/i,
+    /\/data\/.*creator/i,
+    /\/fixtures\//i,
+    /\.fixture\./i,
+    /\/generators?\//i,
+    /\/factories\//i,
+    /factory\.(ts|js)$/i,
+  ]
+  return patterns.some(p => p.test(filePath))
+}
+/**
+ * Check if file is educational/intentional vulnerability code
+ * These files (e.g., OWASP Juice Shop) contain intentional vulnerabilities for training
+ * Should be skipped entirely to avoid false positives
+ */
+export function isEducationalVulnerabilityFile(filePath: string): boolean {
+  const patterns = [
+    /\/insecurity\.(ts|js)$/i,
+    /\/vulnerable\.(ts|js)$/i,
+    /\/intentionally-vulnerable/i,
+    /\/security-examples?\//i,
+    /\/vuln-examples?\//i,
+    /\/challenge-\d+/i,  // OWASP Juice Shop challenges
+    /\/exploit-examples?\//i,
+  ]
+  return patterns.some(p => p.test(filePath))
+}
 // ============================================================================
 // Code Line Context Detection
 // ============================================================================