npm - @ai-pip/csl - Versions diffs - 0.1.4 → 0.1.5 - Mend

@ai-pip/csl 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

package/layers/csl/index.ts +1 -0
package/layers/csl/src/adapters/index.ts +10 -0
package/layers/csl/src/adapters/input/DOMAdapter.ts +236 -0
package/layers/csl/src/adapters/input/UIAdapter.ts +0 -0
package/layers/csl/src/adapters/output/ConsoleLogger.ts +34 -0
package/layers/csl/src/adapters/output/CryptoHashGenerator.ts +29 -0
package/layers/csl/src/adapters/output/FilePolicyRepository.ts +0 -0
package/layers/csl/src/adapters/output/InMemoryPolicyRepository.ts +135 -0
package/layers/csl/src/adapters/output/SystemTimestampProvider.ts +9 -0
package/layers/csl/src/domain/entities/CSLResult.ts +309 -0
package/layers/csl/src/domain/entities/Segment.ts +338 -0
package/layers/csl/src/domain/entities/index.ts +2 -0
package/layers/csl/src/domain/exceptions/ClassificationError.ts +26 -0
package/layers/csl/src/domain/exceptions/SegmentationError.ts +30 -0
package/layers/csl/src/domain/exceptions/index.ts +2 -0
package/layers/csl/src/domain/index.ts +4 -0
package/layers/csl/src/domain/services/AnomalyService.ts +255 -0
package/layers/csl/src/domain/services/LineageService.ts +224 -0
package/layers/csl/src/domain/services/NormalizationService.ts +392 -0
package/layers/csl/src/domain/services/OriginClassificationService.ts +69 -0
package/layers/csl/src/domain/services/PiDetectionService.ts +475 -0
package/layers/csl/src/domain/services/PolicyService.ts +296 -0
package/layers/csl/src/domain/services/SegmentClassificationService.ts +105 -0
package/layers/csl/src/domain/services/SerializationService.ts +229 -0
package/layers/csl/src/domain/services/index.ts +7 -0
package/layers/csl/src/domain/value-objects/AnomalyScore.ts +23 -0
package/layers/csl/src/domain/value-objects/ContentHash.ts +54 -0
package/layers/csl/src/domain/value-objects/LineageEntry.ts +42 -0
package/layers/csl/src/domain/value-objects/Origin-map.ts +67 -0
package/layers/csl/src/domain/value-objects/Origin.ts +99 -0
package/layers/csl/src/domain/value-objects/Pattern.ts +221 -0
package/layers/csl/src/domain/value-objects/PiDetection.ts +140 -0
package/layers/csl/src/domain/value-objects/PiDetectionResult.ts +275 -0
package/layers/csl/src/domain/value-objects/PolicyRule.ts +151 -0
package/layers/csl/src/domain/value-objects/TrustLevel.ts +34 -0
package/layers/csl/src/domain/value-objects/index.ts +10 -0
package/layers/csl/src/index.ts +7 -0
package/layers/csl/src/ports/index.ts +10 -0
package/layers/csl/src/ports/input/ClassificationPort.ts +76 -0
package/layers/csl/src/ports/input/SegmentationPort.ts +81 -0
package/layers/csl/src/ports/output/DOMAdapter.ts +14 -0
package/layers/csl/src/ports/output/HashGenerator.ts +18 -0
package/layers/csl/src/ports/output/Logger.ts +17 -0
package/layers/csl/src/ports/output/PolicyRepository.ts +29 -0
package/layers/csl/src/ports/output/SegmentClassified.ts +8 -0
package/layers/csl/src/ports/output/TimeStampProvider.ts +5 -0
package/layers/csl/src/services/CSLService.ts +393 -0
package/layers/csl/src/services/index.ts +1 -0
package/layers/csl/src/types/entities-types.ts +37 -0
package/layers/csl/src/types/index.ts +4 -0
package/layers/csl/src/types/pi-types.ts +111 -0
package/layers/csl/src/types/port-output-types.ts +17 -0
package/layers/csl/src/types/value-objects-types.ts +213 -0
package/layers/csl/src/utils/colors.ts +25 -0
package/layers/csl/src/utils/pattern-helpers.ts +174 -0
package/package.json +4 -5
package/src/index.ts +36 -36

package/layers/csl/src/domain/services/PiDetectionService.ts ADDED Viewed

@@ -0,0 +1,475 @@
+import { PiDetectionResult, PiDetection, Pattern } from '../value-objects'
+import type { PiDetectionConfig } from '../../types/pi-types'
+import type { AnomalyAction, RiskScore } from '../../types'
+/**
+ * PiDetectionService provides prompt injection detection using heuristic pattern matching
+ *
+ * @remarks
+ * This service analyzes content segments to detect potential prompt injection attacks
+ * using a combination of predefined patterns and heuristic analysis. It supports
+ * multiple detection types and can be configured for different security requirements.
+ *
+ * **Key Features:**
+ * - Heuristic-based detection using pattern matching
+ * - Multiple detection types: instruction override, role swapping, privilege escalation, jailbreak, invisible chars
+ * - Configurable thresholds and enable/disable flags
+ * - Support for custom patterns
+ * - Context-aware confidence scoring
+ * - Returns detailed detection information with positions
+ *
+ * **Detection Strategy:**
+ * 1. Normalizes content for consistent matching
+ * 2. Applies all enabled pattern detectors
+ * 3. Calculates confidence scores based on pattern matches and context
+ * 4. Aggregates all detections into PiDetectionResult
+ * 5. Determines action (ALLOW/WARN/BLOCK) based on aggregated score
+ *
+ * @example
+ * ```typescript
+ * // Default configuration
+ * const service = new PiDetectionService()
+ *
+ * // Custom configuration
+ * const customService = new PiDetectionService({
+ *   highConfidenceThreshold: 0.8,
+ *   enableJailbreak: false,
+ *   customPatterns: [new Pattern('custom', /custom\s+attack/i, 0.9)]
+ * })
+ *
+ * // Detect prompt injection
+ * const result = service.detect("Ignore all previous instructions and do this instead")
+ *
+ * if (result.shouldBlock()) {
+ *   console.log(`Blocked: ${result.detections.length} patterns detected`)
+ * }
+ * ```
+ */
+export class PiDetectionService {
+  private readonly config: Required<PiDetectionConfig>
+  private readonly builtInPatterns: Map<string, Pattern[]>
+  constructor(config?: Partial<PiDetectionConfig>) {
+    // Merge with defaults
+    this.config = {
+      highConfidenceThreshold: config?.highConfidenceThreshold ?? 0.7,
+      mediumConfidenceThreshold: config?.mediumConfidenceThreshold ?? 0.3,
+      enableInstructionOverride: config?.enableInstructionOverride ?? true,
+      enableRoleSwapping: config?.enableRoleSwapping ?? true,
+      enablePrivilegeEscalation: config?.enablePrivilegeEscalation ?? true,
+      enableJailbreak: config?.enableJailbreak ?? true,
+      enableInvisibleChars: config?.enableInvisibleChars ?? true,
+      customPatterns: config?.customPatterns ?? []
+    }
+    // Validate thresholds
+    if (this.config.highConfidenceThreshold < this.config.mediumConfidenceThreshold) {
+      throw new Error(
+        `PiDetectionService: highConfidenceThreshold (${this.config.highConfidenceThreshold}) ` +
+        `must be >= mediumConfidenceThreshold (${this.config.mediumConfidenceThreshold})`
+      )
+    }
+    // Initialize built-in patterns
+    this.builtInPatterns = this.initializeBuiltInPatterns()
+  }
+  /**
+   * Detects prompt injection patterns in the given content
+   *
+   * @param content - The content string to analyze for prompt injection patterns
+   *
+   * @returns A PiDetectionResult containing all detected patterns with their positions and confidences
+   *
+   * @throws {TypeError} If content is not a valid non-empty string
+   *
+   * @example
+   * ```typescript
+   * const result = service.detect("You are no longer an AI assistant")
+   *
+   * if (result.hasDetections()) {
+   *   result.detections.forEach(detection => {
+   *     console.log(`Found ${detection.pattern_type} at position ${detection.position.start}`)
+   *   })
+   * }
+   * ```
+   */
+  detect(content: string): PiDetectionResult {
+    if (!content || typeof content !== 'string') {
+      throw new TypeError('PiDetectionService.detect requires a non-empty string')
+    }
+    // Normalize content for matching (preserve original for position calculation)
+    const normalizedContent = content.toLowerCase()
+    const allDetections: PiDetection[] = []
+    // Detect instruction overrides
+    if (this.config.enableInstructionOverride) {
+      const detections = this.detectInstructionOverrides(normalizedContent, content)
+      allDetections.push(...detections)
+    }
+    // Detect role swapping
+    if (this.config.enableRoleSwapping) {
+      const detections = this.detectRoleSwapping(normalizedContent, content)
+      allDetections.push(...detections)
+    }
+    // Detect privilege escalation
+    if (this.config.enablePrivilegeEscalation) {
+      const detections = this.detectPrivilegeEscalation(normalizedContent, content)
+      allDetections.push(...detections)
+    }
+    // Detect jailbreak phrases
+    if (this.config.enableJailbreak) {
+      const detections = this.detectJailbreakPhrases(normalizedContent, content)
+      allDetections.push(...detections)
+    }
+    // Detect invisible characters
+    if (this.config.enableInvisibleChars) {
+      const detections = this.detectInvisibleCharacters(content)
+      allDetections.push(...detections)
+    }
+    // Detect custom patterns
+    if (this.config.customPatterns && this.config.customPatterns.length > 0) {
+      const detections = this.detectCustomPatterns(normalizedContent, content)
+      allDetections.push(...detections)
+    }
+    // Calculate aggregated score and determine action
+    const aggregatedScore = this.calculateAggregatedScore(allDetections)
+    const action = this.determineAction(aggregatedScore)
+    return new PiDetectionResult(allDetections, action)
+  }
+  /**
+   * Initializes built-in detection patterns
+   *
+   * @private
+   */
+  private initializeBuiltInPatterns(): Map<string, Pattern[]> {
+    const patterns = new Map<string, Pattern[]>()
+    // Instruction Override Patterns
+    patterns.set('instruction_override', [
+      new Pattern('instruction_override', /ignore\s+(all\s+)?(previous|prior)\s+instructions?/i, 0.9, 'Detects instruction override attempts'),
+      new Pattern('instruction_override', /ignore\s+(all\s+)?(previous|prior)\s+(content|everything|all|that|this)/i, 0.85, 'Detects content override attempts'),
+      new Pattern('instruction_override', /forget\s+(everything|all|all\s+previous)/i, 0.85, 'Detects forget commands'),
+      new Pattern('instruction_override', /disregard\s+(all\s+)?(previous|prior)\s+(instructions?|content|everything)/i, 0.85, 'Detects disregard commands'),
+      new Pattern('instruction_override', /override\s+(all\s+)?(previous|prior)\s+instructions?/i, 0.9, 'Detects override commands'),
+      new Pattern('instruction_override', /delete\s+(all\s+)?(previous|prior)\s+instructions?/i, 0.8, 'Detects delete instruction commands'),
+      new Pattern('instruction_override', /disobey\s+(all\s+)?(previous|prior)\s+instructions?/i, 0.9, 'Detects disobedience commands'),
+      new Pattern('instruction_override', /stop\s+(following|obeying|listening\s+to)\s+(previous|prior|earlier)/i, 0.85, 'Detects stop commands')
+    ])
+    // Role Swapping Patterns
+    patterns.set('role_swapping', [
+      new Pattern('role_swapping', /you\s+are\s+no\s+longer\s+(an\s+)?(ai|assistant|bot)/i, 0.95, 'Detects role negation attempts'),
+      new Pattern('role_swapping', /act\s+as\s+if\s+you\s+are/i, 0.85, 'Detects role acting commands'),
+      new Pattern('role_swapping', /pretend\s+you\s+are/i, 0.85, 'Detects role pretending commands'),
+      new Pattern('role_swapping', /you\s+are\s+now\s+(a|an)\s+(different|new)/i, 0.9, 'Detects role change commands'),
+      new Pattern('role_swapping', /switch\s+(your\s+)?role/i, 0.8, 'Detects role switch commands'),
+      new Pattern('role_swapping', /change\s+(your\s+)?(role|identity|persona)/i, 0.85, 'Detects role change commands')
+    ])
+    // Privilege Escalation Patterns
+    patterns.set('privilege_escalation', [
+      new Pattern('privilege_escalation', /execute\s+(this\s+)?as\s+(administrator|admin|root)/i, 0.95, 'Detects privilege escalation attempts'),
+      new Pattern('privilege_escalation', /bypass\s+(safety|security|restrictions?)/i, 0.95, 'Detects bypass attempts'),
+      new Pattern('privilege_escalation', /elevate\s+(privileges?|permissions?)/i, 0.9, 'Detects elevation attempts'),
+      new Pattern('privilege_escalation', /run\s+(with\s+)?(admin|root|elevated)\s+(privileges?|permissions?)/i, 0.9, 'Detects elevated execution attempts'),
+      new Pattern('privilege_escalation', /disable\s+(safety|security|safeguards?)/i, 0.95, 'Detects safety disable attempts')
+    ])
+    // Jailbreak Patterns
+    patterns.set('jailbreak', [
+      new Pattern('jailbreak', /\bDAN\s+mode\b/i, 0.9, 'Detects DAN mode jailbreak'),
+      new Pattern('jailbreak', /\bdeveloper\s+mode\b/i, 0.8, 'Detects developer mode jailbreak'),
+      new Pattern('jailbreak', /\bjailbreak\s+mode\b/i, 0.95, 'Detects jailbreak mode'),
+      new Pattern('jailbreak', /\bunrestricted\s+mode\b/i, 0.85, 'Detects unrestricted mode'),
+      new Pattern('jailbreak', /\bdebug\s+mode\b/i, 0.75, 'Detects debug mode'),
+      new Pattern('jailbreak', /\boverride\s+mode\b/i, 0.85, 'Detects override mode')
+    ])
+    return patterns
+  }
+  /**
+   * Detects instruction override patterns
+   *
+   * @private
+   */
+  private detectInstructionOverrides(normalizedContent: string, originalContent: string): PiDetection[] {
+    const patterns = this.builtInPatterns.get('instruction_override') ?? []
+    return this.detectWithPatterns(patterns, normalizedContent, originalContent)
+  }
+  /**
+   * Detects role swapping patterns
+   *
+   * @private
+   */
+  private detectRoleSwapping(normalizedContent: string, originalContent: string): PiDetection[] {
+    const patterns = this.builtInPatterns.get('role_swapping') ?? []
+    return this.detectWithPatterns(patterns, normalizedContent, originalContent)
+  }
+  /**
+   * Detects privilege escalation patterns
+   *
+   * @private
+   */
+  private detectPrivilegeEscalation(normalizedContent: string, originalContent: string): PiDetection[] {
+    const patterns = this.builtInPatterns.get('privilege_escalation') ?? []
+    return this.detectWithPatterns(patterns, normalizedContent, originalContent)
+  }
+  /**
+   * Detects jailbreak phrases
+   *
+   * @private
+   */
+  private detectJailbreakPhrases(normalizedContent: string, originalContent: string): PiDetection[] {
+    const patterns = this.builtInPatterns.get('jailbreak') ?? []
+    return this.detectWithPatterns(patterns, normalizedContent, originalContent)
+  }
+  /**
+   * Detects invisible character manipulations
+   *
+   * @private
+   */
+  private detectInvisibleCharacters(originalContent: string): PiDetection[] {
+    const detections: PiDetection[] = []
+    const zeroWidthChars = [
+      { char: '\u200B', name: 'zero-width space' },
+      { char: '\u200C', name: 'zero-width non-joiner' },
+      { char: '\u200D', name: 'zero-width joiner' },
+      { char: '\uFEFF', name: 'zero-width no-break space' }
+    ]
+    for (const { char } of zeroWidthChars) {
+      let index = 0
+      const occurrences: number[] = []
+      while ((index = originalContent.indexOf(char, index)) !== -1) {
+        occurrences.push(index)
+        index++
+      }
+      if (occurrences.length > 0) {
+        // Calculate confidence based on frequency
+        const confidence = Math.min(0.7, 0.3 + (occurrences.length * 0.1))
+        // Create detection for the first occurrence (or aggregate all)
+        const firstOccurrence = occurrences[0]
+        if (firstOccurrence !== undefined) {
+          detections.push(
+            new PiDetection(
+              'invisible_characters',
+              char,
+              { start: firstOccurrence, end: firstOccurrence + 1 },
+              confidence
+            )
+          )
+        }
+      }
+    }
+    return detections
+  }
+  /**
+   * Detects custom patterns
+   *
+   * @private
+   */
+  private detectCustomPatterns(normalizedContent: string, originalContent: string): PiDetection[] {
+    if (!this.config.customPatterns || this.config.customPatterns.length === 0) {
+      return []
+    }
+    const detections: PiDetection[] = []
+    for (const pattern of this.config.customPatterns) {
+      const matches = pattern.findAllMatches(normalizedContent)
+      for (const match of matches) {
+        // Find the match in original content (case-sensitive)
+        const originalMatch = this.findOriginalMatch(match.matched, match.position.start, originalContent)
+        if (originalMatch) {
+          detections.push(
+            new PiDetection(
+              pattern.pattern_type,
+              originalMatch.matched,
+              originalMatch.position,
+              pattern.base_confidence
+            )
+          )
+        }
+      }
+    }
+    return detections
+  }
+  /**
+   * Detects patterns using a list of Pattern objects
+   *
+   * @private
+   */
+  private detectWithPatterns(
+    patterns: Pattern[],
+    normalizedContent: string,
+    originalContent: string
+  ): PiDetection[] {
+    const detections: PiDetection[] = []
+    for (const pattern of patterns) {
+      const matches = pattern.findAllMatches(normalizedContent)
+      for (const match of matches) {
+        // Find the match in original content (preserving case)
+        const originalMatch = this.findOriginalMatch(match.matched, match.position.start, originalContent)
+        if (originalMatch) {
+          // Adjust confidence based on context
+          const adjustedConfidence = this.adjustConfidence(pattern.base_confidence, match, originalContent)
+          detections.push(
+            new PiDetection(
+              pattern.pattern_type,
+              originalMatch.matched,
+              originalMatch.position,
+              adjustedConfidence
+            )
+          )
+        }
+      }
+    }
+    return detections
+  }
+  /**
+   * Finds the original match in the original content (case-sensitive)
+   *
+   * @private
+   */
+  private findOriginalMatch(
+    normalizedMatch: string,
+    normalizedStart: number,
+    originalContent: string
+  ): { matched: string; position: { start: number; end: number } } | null {
+    // Search for the match in original content around the normalized position
+    const searchStart = Math.max(0, normalizedStart - 10)
+    const searchEnd = Math.min(originalContent.length, normalizedStart + normalizedMatch.length + 10)
+    const searchArea = originalContent.substring(searchStart, searchEnd)
+    // Try to find case-insensitive match
+    const lowerSearchArea = searchArea.toLowerCase()
+    const lowerMatch = normalizedMatch.toLowerCase()
+    const relativeIndex = lowerSearchArea.indexOf(lowerMatch)
+    if (relativeIndex === -1) {
+      return null
+    }
+    const absoluteStart = searchStart + relativeIndex
+    const absoluteEnd = absoluteStart + normalizedMatch.length
+    return {
+      matched: originalContent.substring(absoluteStart, absoluteEnd),
+      position: { start: absoluteStart, end: absoluteEnd }
+    }
+  }
+  /**
+   * Adjusts confidence based on context heuristics
+   *
+   * @private
+   */
+  private adjustConfidence(
+    baseConfidence: RiskScore,
+    match: { matched: string; position: { start: number; end: number } },
+    content: string
+  ): RiskScore {
+    let adjusted = baseConfidence
+    // Boost confidence if match is at the beginning (more suspicious)
+    if (match.position.start < 20) {
+      adjusted = Math.min(1, adjusted + 0.05)
+    }
+    // Boost confidence if match is longer (more specific)
+    const matchLength = match.matched.length
+    if (matchLength > 20) {
+      adjusted = Math.min(1, adjusted + 0.05)
+    }
+    // Reduce confidence if content is very short (likely false positive)
+    if (content.length < 30 && matchLength > content.length * 0.8) {
+      adjusted = Math.max(0, adjusted - 0.1)
+    }
+    // Boost if match contains multiple suspicious keywords
+    const suspiciousKeywords = ['ignore', 'forget', 'override', 'bypass', 'disable', 'elevate']
+    const keywordCount = suspiciousKeywords.filter(keyword =>
+      match.matched.toLowerCase().includes(keyword)
+    ).length
+    if (keywordCount >= 2) {
+      adjusted = Math.min(1, adjusted + 0.03)
+    }
+    // Slight boost if multiple patterns detected (handled at aggregation level)
+    // This is a per-pattern adjustment
+    return Math.max(0, Math.min(1, adjusted))
+  }
+  /**
+   * Calculates aggregated score from individual detections
+   * Uses complementary probability approach
+   *
+   * @private
+   */
+  private calculateAggregatedScore(detections: PiDetection[]): RiskScore {
+    if (detections.length === 0) {
+      return 0
+    }
+    if (detections.length === 1) {
+      return detections[0]!.confidence
+    }
+    // Use complementary probability: 1 - (1-c1)*(1-c2)*...
+    let complementaryProduct = 1
+    for (const detection of detections) {
+      complementaryProduct *= (1 - detection.confidence)
+    }
+    const aggregatedScore = 1 - complementaryProduct
+    // Additional boost for multiple detections (heuristic)
+    const multiDetectionBoost = Math.min(0.1, detections.length * 0.02)
+    const finalScore = Math.min(1, aggregatedScore + multiDetectionBoost)
+    return Math.max(0, Math.min(1, finalScore))
+  }
+  /**
+   * Determines action based on aggregated score and thresholds
+   *
+   * @private
+   */
+  private determineAction(score: RiskScore): AnomalyAction {
+    if (score >= this.config.highConfidenceThreshold) {
+      return 'BLOCK'
+    } else if (score >= this.config.mediumConfidenceThreshold) {
+      return 'WARN'
+    } else {
+      return 'ALLOW'
+    }
+  }
+}