npm - @ai-pip/csl - Versions diffs - 0.1.4 → 0.1.5 - Mend

@ai-pip/csl 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

package/layers/csl/index.ts +1 -0
package/layers/csl/src/adapters/index.ts +10 -0
package/layers/csl/src/adapters/input/DOMAdapter.ts +236 -0
package/layers/csl/src/adapters/input/UIAdapter.ts +0 -0
package/layers/csl/src/adapters/output/ConsoleLogger.ts +34 -0
package/layers/csl/src/adapters/output/CryptoHashGenerator.ts +29 -0
package/layers/csl/src/adapters/output/FilePolicyRepository.ts +0 -0
package/layers/csl/src/adapters/output/InMemoryPolicyRepository.ts +135 -0
package/layers/csl/src/adapters/output/SystemTimestampProvider.ts +9 -0
package/layers/csl/src/domain/entities/CSLResult.ts +309 -0
package/layers/csl/src/domain/entities/Segment.ts +338 -0
package/layers/csl/src/domain/entities/index.ts +2 -0
package/layers/csl/src/domain/exceptions/ClassificationError.ts +26 -0
package/layers/csl/src/domain/exceptions/SegmentationError.ts +30 -0
package/layers/csl/src/domain/exceptions/index.ts +2 -0
package/layers/csl/src/domain/index.ts +4 -0
package/layers/csl/src/domain/services/AnomalyService.ts +255 -0
package/layers/csl/src/domain/services/LineageService.ts +224 -0
package/layers/csl/src/domain/services/NormalizationService.ts +392 -0
package/layers/csl/src/domain/services/OriginClassificationService.ts +69 -0
package/layers/csl/src/domain/services/PiDetectionService.ts +475 -0
package/layers/csl/src/domain/services/PolicyService.ts +296 -0
package/layers/csl/src/domain/services/SegmentClassificationService.ts +105 -0
package/layers/csl/src/domain/services/SerializationService.ts +229 -0
package/layers/csl/src/domain/services/index.ts +7 -0
package/layers/csl/src/domain/value-objects/AnomalyScore.ts +23 -0
package/layers/csl/src/domain/value-objects/ContentHash.ts +54 -0
package/layers/csl/src/domain/value-objects/LineageEntry.ts +42 -0
package/layers/csl/src/domain/value-objects/Origin-map.ts +67 -0
package/layers/csl/src/domain/value-objects/Origin.ts +99 -0
package/layers/csl/src/domain/value-objects/Pattern.ts +221 -0
package/layers/csl/src/domain/value-objects/PiDetection.ts +140 -0
package/layers/csl/src/domain/value-objects/PiDetectionResult.ts +275 -0
package/layers/csl/src/domain/value-objects/PolicyRule.ts +151 -0
package/layers/csl/src/domain/value-objects/TrustLevel.ts +34 -0
package/layers/csl/src/domain/value-objects/index.ts +10 -0
package/layers/csl/src/index.ts +7 -0
package/layers/csl/src/ports/index.ts +10 -0
package/layers/csl/src/ports/input/ClassificationPort.ts +76 -0
package/layers/csl/src/ports/input/SegmentationPort.ts +81 -0
package/layers/csl/src/ports/output/DOMAdapter.ts +14 -0
package/layers/csl/src/ports/output/HashGenerator.ts +18 -0
package/layers/csl/src/ports/output/Logger.ts +17 -0
package/layers/csl/src/ports/output/PolicyRepository.ts +29 -0
package/layers/csl/src/ports/output/SegmentClassified.ts +8 -0
package/layers/csl/src/ports/output/TimeStampProvider.ts +5 -0
package/layers/csl/src/services/CSLService.ts +393 -0
package/layers/csl/src/services/index.ts +1 -0
package/layers/csl/src/types/entities-types.ts +37 -0
package/layers/csl/src/types/index.ts +4 -0
package/layers/csl/src/types/pi-types.ts +111 -0
package/layers/csl/src/types/port-output-types.ts +17 -0
package/layers/csl/src/types/value-objects-types.ts +213 -0
package/layers/csl/src/utils/colors.ts +25 -0
package/layers/csl/src/utils/pattern-helpers.ts +174 -0
package/package.json +4 -5
package/src/index.ts +36 -36

package/layers/csl/src/domain/services/NormalizationService.ts ADDED Viewed

@@ -0,0 +1,392 @@
+/**
+ * NormalizationService provides normalization of content for consistent processing.
+ *
+ * @remarks
+ * This service standardizes content to eliminate hidden manipulation vectors.
+ * It applies multiple normalization steps to ensure content is in a safe, uniform format.
+ *
+ * **Normalization Steps:**
+ * 1. Unicode normalization (NFC/NFKC)
+ * 2. Removal of invisible characters
+ * 3. HTML entity decoding
+ * 4. Whitespace and line break normalization
+ * 5. Encoding unification to UTF-8
+ *
+ * **Security Benefits:**
+ * - Prevents attacks based on invisible characters
+ * - Eliminates encoding-based manipulation
+ * - Reduces ambiguity in content processing
+ * - Standardizes content for consistent analysis
+ *
+ * **Usage:**
+ * All methods are static since the service has no state or dependencies.
+ *
+ * @example
+ * ```typescript
+ * const normalized = NormalizationService.normalize('Hello\u200B\u200Cworld')
+ * // Returns: 'Helloworld'
+ *
+ * // Or use individual methods
+ * const unicodeNormalized = NormalizationService.normalizeUnicode('café')
+ * ```
+ */
+export class NormalizationService {
+  /**
+   * Main normalization method that applies all normalization steps
+   *
+   * @param content - The content string to normalize
+   * @returns Normalized content string
+   *
+   * @throws {TypeError} If content is not a string
+   *
+   * @example
+   * ```typescript
+   * const normalized = NormalizationService.normalize('Hello\u200Bworld')
+   * // Returns: 'Helloworld'
+   * ```
+   */
+  static normalize(content: string): string {
+    // 1. Validación
+    if (typeof content !== 'string') {
+      throw new TypeError('NormalizationService.normalize: content must be a string')
+    }
+    if (content.length === 0) {
+      return content
+    }
+    let normalized = content
+    // 2. Remove BOM (primero)
+    normalized = this.removeBOM(normalized)
+    // 3. Unicode normalization
+    normalized = this.normalizeUnicode(normalized)
+    // 4. Remove invisible chars
+    normalized = this.removeInvisibleCharacters(normalized)
+    // 5. Decode HTML entities
+    normalized = this.decodeHtmlEntities(normalized)
+    // 6. Normalize whitespace
+    normalized = this.normalizeWhitespace(normalized)
+    // 7. Normalize line breaks
+    normalized = this.normalizeLineBreaks(normalized)
+    // 8. Remove control characters
+    normalized = this.removeControlCharacters(normalized)
+    // 9. Trim
+    normalized = this.trimWhitespace(normalized)
+    return normalized
+  }
+  /**
+   * Normalizes Unicode characters to NFC (Normalization Form Canonical Composition)
+   *
+   * @param content - Content to normalize
+   * @returns Unicode-normalized content
+   *
+   * @throws {TypeError} If content is not a string
+   *
+   * @example
+   * ```typescript
+   * const normalized = NormalizationService.normalizeUnicode('café')
+   * // Ensures canonical form
+   * ```
+   */
+  static normalizeUnicode(content: string): string {
+    if (typeof content !== 'string') {
+      throw new TypeError('NormalizationService.normalizeUnicode: content must be a string')
+    }
+    // Normalize to NFC (Canonical Composition)
+    // This ensures characters are in their canonical form
+    return content.normalize('NFC')
+  }
+  /**
+   * Removes invisible zero-width characters that can be used for attacks
+   *
+   * @param content - Content to clean
+   * @returns Content without invisible characters
+   *
+   * @throws {TypeError} If content is not a string
+   *
+   * @example
+   * ```typescript
+   * const cleaned = NormalizationService.removeInvisibleCharacters('Hello\u200Bworld')
+   * // Returns: 'Helloworld'
+   * ```
+   */
+  static removeInvisibleCharacters(content: string): string {
+    if (typeof content !== 'string') {
+      throw new TypeError('NormalizationService.removeInvisibleCharacters: content must be a string')
+    }
+    const INVISIBLE_CHARS = new Set([
+      '\u200B', // Zero-width space
+      '\u200C', // Zero-width non-joiner
+      '\u200D', // Zero-width joiner
+      '\uFEFF', // Zero-width no-break space (BOM)
+      '\u2060', // Word joiner
+      '\u200E', // Left-to-right mark
+      '\u200F', // Right-to-left mark
+      '\u202A', // Left-to-right embedding
+      '\u202B', // Right-to-left embedding
+      '\u202C', // Pop directional formatting
+      '\u202D', // Left-to-right override
+      '\u202E', // Right-to-left override
+      '\u202F', // Narrow no-break space
+      '\u00AD', // Soft hyphen
+    ])
+    return [...content].filter(char => !INVISIBLE_CHARS.has(char)).join('')
+  }
+  /**
+   * Removes BOM (Byte Order Mark) from the beginning of content
+   *
+   * @param content - Content that may contain BOM
+   * @returns Content without BOM
+   *
+   * @throws {TypeError} If content is not a string
+   */
+  static removeBOM(content: string): string {
+    if (typeof content !== 'string') {
+      throw new TypeError('NormalizationService.removeBOM: content must be a string')
+    }
+    // Remove UTF-8 BOM if present (U+FEFF)
+    if (content.length > 0 && content.codePointAt(0) === 0xFEFF) {
+      return content.slice(1)
+    }
+    return content
+  }
+  /**
+   * Decodes HTML entities to their actual characters
+   *
+   * @param content - Content with HTML entities
+   * @returns Content with decoded entities
+   *
+   * @throws {TypeError} If content is not a string
+   *
+   * @example
+   * ```typescript
+   * const decoded = NormalizationService.decodeHtmlEntities('&lt;script&gt;')
+   * // Returns: '<script>'
+   * ```
+   */
+  static decodeHtmlEntities(content: string): string {
+    if (typeof content !== 'string') {
+      throw new TypeError('NormalizationService.decodeHtmlEntities: content must be a string')
+    }
+    // Use browser's DOMParser if available (for browser environments)
+    if (typeof DOMParser !== 'undefined') {
+      try {
+        const parser = new DOMParser()
+        const doc = parser.parseFromString(content, 'text/html')
+        const decoded = doc.documentElement.textContent || content
+        // If DOMParser fails, it might return the original with error, so validate
+        if (decoded !== content || !content.includes('&')) {
+          return decoded
+        }
+      } catch {
+        // Fall through to manual decoding
+      }
+    }
+    // Manual decoding for Node.js or when DOMParser is not available
+    // Common HTML entities
+    const entityMap: Record<string, string> = {
+      '&lt;': '<',
+      '&gt;': '>',
+      '&amp;': '&',
+      '&quot;': '"',
+      '&apos;': "'",
+      '&nbsp;': ' ',
+      '&copy;': '©',
+      '&reg;': '®',
+      '&trade;': '™',
+      '&hellip;': '…',
+      '&mdash;': '—',
+      '&ndash;': '–',
+    }
+    let decoded = content
+    // Decode named entities (must be done before numeric to avoid conflicts)
+    for (const [entity, char] of Object.entries(entityMap)) {
+      decoded = decoded.replaceAll(entity, char)
+    }
+    // Decode numeric entities (&#123; format)
+    decoded = decoded.replaceAll(/&#(\d+);/g, (_, num) => {
+      const codePoint = Number.parseInt(num, 10)
+      // Only decode valid Unicode code points (0-0x10FFFF)
+      if (codePoint >= 0 && codePoint <= 0x10FFFF) {
+        try {
+          return String.fromCodePoint(codePoint)
+        } catch {
+          return `&#${num};` // Keep original if invalid
+        }
+      }
+      return `&#${num};`
+    })
+    // Decode hexadecimal entities (&#x1F; format)
+    decoded = decoded.replaceAll(/&#x([0-9A-Fa-f]+);/g, (_, hex) => {
+      const codePoint = Number.parseInt(hex, 16)
+      // Only decode valid Unicode code points
+      if (codePoint >= 0 && codePoint <= 0x10FFFF) {
+        try {
+          return String.fromCodePoint(codePoint)
+        } catch {
+          return `&#x${hex};` // Keep original if invalid
+        }
+      }
+      return `&#x${hex};`
+    })
+    return decoded
+  }
+  /**
+   * Normalizes whitespace by collapsing multiple spaces into single spaces
+   *
+   * @param content - Content with potentially excessive whitespace
+   * @returns Content with normalized whitespace
+   *
+   * @throws {TypeError} If content is not a string
+   *
+   * @example
+   * ```typescript
+   * const normalized = NormalizationService.normalizeWhitespace('Hello    world')
+   * // Returns: 'Hello world'
+   * ```
+   */
+  static normalizeWhitespace(content: string): string {
+    if (typeof content !== 'string') {
+      throw new TypeError('NormalizationService.normalizeWhitespace: content must be a string')
+    }
+    // Replace multiple spaces/tabs with single space
+    // Preserves single spaces and tabs, but collapses multiple
+    return content.replaceAll(/[ \t]+/g, ' ')
+  }
+  /**
+   * Normalizes line breaks to Unix-style (\n)
+   *
+   * @param content - Content with mixed line breaks
+   * @returns Content with normalized line breaks
+   *
+   * @throws {TypeError} If content is not a string
+   *
+   * @example
+   * ```typescript
+   * const normalized = NormalizationService.normalizeLineBreaks('Hello\r\nworld\r')
+   * // Returns: 'Hello\nworld\n'
+   * ```
+   */
+  static normalizeLineBreaks(content: string): string {
+    if (typeof content !== 'string') {
+      throw new TypeError('NormalizationService.normalizeLineBreaks: content must be a string')
+    }
+    // Normalize all line break variants to \n
+    return content
+      .replaceAll('\r\n', '\n')  // Windows (\r\n) → \n
+      .replaceAll('\r', '\n')     // Old Mac (\r) → \n
+  }
+  /**
+   * Removes control characters except newline (\n) and tab (\t)
+   *
+   * @param content - Content that may contain control characters
+   * @returns Content without control characters
+   *
+   * @throws {TypeError} If content is not a string
+   *
+   * @example
+   * ```typescript
+   * const cleaned = NormalizationService.removeControlCharacters('Hello\x00world')
+   * // Returns: 'Helloworld'
+   * ```
+   */
+  static removeControlCharacters(content: string): string {
+    if (typeof content !== 'string') {
+      throw new TypeError('NormalizationService.removeControlCharacters: content must be a string')
+    }
+    // Remove control characters (0x00-0x1F) except \n (0x0A) and \t (0x09)
+    // Also remove DEL (0x7F) and other high-range control chars (0x80-0x9F)
+    // Using Unicode escapes to avoid linter warnings about control characters
+    // eslint-disable-next-line no-control-regex
+    const CONTROL_CHAR_REGEX = /[\u0000-\u0008\u000B-\u001F\u007F-\u009F]/g
+    return content.replaceAll(CONTROL_CHAR_REGEX, '')
+  }
+  /**
+   * Trims leading and trailing whitespace
+   *
+   * @param content - Content to trim
+   * @returns Trimmed content
+   *
+   * @throws {TypeError} If content is not a string
+   *
+   * @example
+   * ```typescript
+   * const trimmed = NormalizationService.trimWhitespace('  Hello  ')
+   * // Returns: 'Hello'
+   * ```
+   */
+  static trimWhitespace(content: string): string {
+    if (typeof content !== 'string') {
+      throw new TypeError('NormalizationService.trimWhitespace: content must be a string')
+    }
+    return content.trim()
+  }
+  /**
+   * Validates that content is a valid UTF-8 string
+   *
+   * @param content - Content to validate
+   * @returns true if content is valid UTF-8, false otherwise
+   *
+   * @example
+   * ```typescript
+   * const isValid = NormalizationService.isValidUTF8('Hello world')
+   * // Returns: true
+   * ```
+   */
+  static isValidUTF8(content: string): boolean {
+    if (typeof content !== 'string') {
+      return false
+    }
+    try {
+      // Try to encode and decode to validate UTF-8
+      // In browser/Node.js, TextEncoder/TextDecoder validate UTF-8
+      if (typeof TextEncoder !== 'undefined' && typeof TextDecoder !== 'undefined') {
+        const encoded = new TextEncoder().encode(content)
+        const decoder = new TextDecoder('utf-8', { fatal: true })
+        const decoded = decoder.decode(encoded)
+        return decoded === content
+      }
+      // Fallback: basic check for valid string
+      // All characters in a JavaScript string are valid UTF-16/UTF-8
+      return true
+    } catch {
+      return false
+    }
+  }
+}

package/layers/csl/src/domain/services/OriginClassificationService.ts ADDED Viewed

@@ -0,0 +1,69 @@
+import type { ClassificationPort } from '../../ports';
+import { ClassificationError } from '../exceptions';
+import { TrustLevel, type Origin, originMap } from '../value-objects';
+/**
+ * OriginClassificationService provides deterministic classification of content based on origin.
+ *
+ * @remarks
+ * This service implements the ClassificationPort and provides a pure, deterministic
+ * mapping from Origin to TrustLevel. The classification is based solely on the
+ * origin type, not on content analysis.
+ *
+ * **Key Characteristics:**
+ * - 100% deterministic: same origin → same trust level, always
+ * - No side effects: pure function
+ * - No content analysis: only origin type matters
+ * - Fast: simple map lookup
+ *
+ * **Future Extensibility:**
+ * - Content analysis can be added as a separate layer
+ * - This service remains deterministic and fast
+ * - Additional analysis can modify trust level after initial classification
+ *
+ * @example
+ * ```typescript
+ * const OriginclassificationService = new OriginClassificationService()
+ *
+ * // Classify user input (always UC)
+ * const userOrigin = new Origin(OriginType.USER)
+ * const trustLevel = classificationService.classify(userOrigin)
+ * // Returns: TrustLevel(TrustLevelType.UC)
+ *
+ * // Classify system content (always TC)
+ * const systemOrigin = new Origin(OriginType.SYSTEM_GENERATED)
+ * const systemTrust = classificationService.classify(systemOrigin)
+ * // Returns: TrustLevel(TrustLevelType.TC)
+ * ```
+ */
+export class OriginClassificationService implements ClassificationPort {
+    /**
+     * Classifies a content segment based on its origin and returns the trust level.
+     *
+     * @param origin - The Origin value object representing the source of the content
+     *
+     * @returns The TrustLevel value object determined by the origin type
+     *
+     * @throws {ClassificationError} If the origin type is not mapped in originMap
+     *
+     * @example
+     * ```typescript
+     * const origin = new Origin(OriginType.DOM_VISIBLE)
+     * const trustLevel = OriginClassificationService.classify(origin)
+     * // Returns: TrustLevel(TrustLevelType.STC)
+     * ```
+     */
+    classify(origin: Origin): TrustLevel {
+        const trustLevelType = originMap.get(origin.type);
+        if (!trustLevelType) {
+            throw new ClassificationError(
+                `Origin type '${origin.type}' is not mapped in originMap. ` +
+                `All OriginType values must have a corresponding TrustLevel mapping.`
+            );
+        }
+        return new TrustLevel(trustLevelType);
+    }
+}