@ai-pip/csl 0.1.0 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/README.md +607 -56
  2. package/package.json +10 -28
  3. package/src/index.test.ts +429 -0
  4. package/{index.ts → src/index.ts} +100 -65
  5. package/src/test-external.js +547 -0
  6. package/layers/csl/adapters/index.ts +0 -9
  7. package/layers/csl/adapters/input/DOMAdapter.ts +0 -236
  8. package/layers/csl/adapters/input/UIAdapter.ts +0 -0
  9. package/layers/csl/adapters/output/ConsoleLogger.ts +0 -34
  10. package/layers/csl/adapters/output/CryptoHashGenerator.ts +0 -29
  11. package/layers/csl/adapters/output/FilePolicyRepository.ts +0 -0
  12. package/layers/csl/adapters/output/InMemoryPolicyRepository.ts +0 -135
  13. package/layers/csl/adapters/output/SystemTimestampProvider.ts +0 -9
  14. package/layers/csl/domain/entities/CSLResult.ts +0 -309
  15. package/layers/csl/domain/entities/Segment.ts +0 -338
  16. package/layers/csl/domain/entities/index.ts +0 -2
  17. package/layers/csl/domain/exceptions/ClassificationError.ts +0 -26
  18. package/layers/csl/domain/exceptions/SegmentationError.ts +0 -30
  19. package/layers/csl/domain/exceptions/index.ts +0 -2
  20. package/layers/csl/domain/index.ts +0 -4
  21. package/layers/csl/domain/services/AnomalyService.ts +0 -255
  22. package/layers/csl/domain/services/LineageService.ts +0 -224
  23. package/layers/csl/domain/services/NormalizationService.ts +0 -392
  24. package/layers/csl/domain/services/OriginClassificationService.ts +0 -69
  25. package/layers/csl/domain/services/PiDetectionService.ts +0 -475
  26. package/layers/csl/domain/services/PolicyService.ts +0 -296
  27. package/layers/csl/domain/services/SegmentClassificationService.ts +0 -105
  28. package/layers/csl/domain/services/SerializationService.ts +0 -229
  29. package/layers/csl/domain/services/index.ts +0 -7
  30. package/layers/csl/domain/value-objects/AnomalyScore.ts +0 -23
  31. package/layers/csl/domain/value-objects/ContentHash.ts +0 -54
  32. package/layers/csl/domain/value-objects/LineageEntry.ts +0 -42
  33. package/layers/csl/domain/value-objects/Origin-map.ts +0 -67
  34. package/layers/csl/domain/value-objects/Origin.ts +0 -99
  35. package/layers/csl/domain/value-objects/Pattern.ts +0 -221
  36. package/layers/csl/domain/value-objects/PiDetection.ts +0 -140
  37. package/layers/csl/domain/value-objects/PiDetectionResult.ts +0 -275
  38. package/layers/csl/domain/value-objects/PolicyRule.ts +0 -151
  39. package/layers/csl/domain/value-objects/TrustLevel.ts +0 -34
  40. package/layers/csl/domain/value-objects/index.ts +0 -10
  41. package/layers/csl/index.ts +0 -3
  42. package/layers/csl/ports/index.ts +0 -10
  43. package/layers/csl/ports/input/ClassificationPort.ts +0 -76
  44. package/layers/csl/ports/input/SegmentationPort.ts +0 -81
  45. package/layers/csl/ports/output/DOMAdapter.ts +0 -14
  46. package/layers/csl/ports/output/HashGenerator.ts +0 -18
  47. package/layers/csl/ports/output/Logger.ts +0 -17
  48. package/layers/csl/ports/output/PolicyRepository.ts +0 -29
  49. package/layers/csl/ports/output/SegmentClassified.ts +0 -8
  50. package/layers/csl/ports/output/TimeStampProvider.ts +0 -5
  51. package/layers/csl/services/CSLService.ts +0 -393
  52. package/layers/csl/services/index.ts +0 -1
  53. package/layers/csl/types/entities-types.ts +0 -37
  54. package/layers/csl/types/index.ts +0 -4
  55. package/layers/csl/types/pi-types.ts +0 -111
  56. package/layers/csl/types/port-output-types.ts +0 -17
  57. package/layers/csl/types/value-objects-types.ts +0 -213
  58. package/layers/csl/utils/colors.ts +0 -25
  59. package/layers/csl/utils/pattern-helpers.ts +0 -174
@@ -1,213 +0,0 @@
1
- /**
2
- * Value Objects Types for CSL Layer
3
- *
4
- * @remarks
5
- * This file contains the types for the value objects used in the CSL Layer
6
- *
7
- * @types
8
- * - OriginType
9
- * - TrustLevelType
10
- * - AnomalyScoreType
11
- * - ContentHashType
12
- */
13
-
14
-
15
-
16
-
17
-
18
- /**
19
- * OriginType represents the deterministic source of a content segment.
20
- *
21
- * @remarks
22
- * This enum defines all possible origins for content segments in a deterministic way.
23
- * The classification is based **solely on the origin type**, not on content analysis.
24
- * Each origin type maps directly to a TrustLevel through the originMap.
25
- *
26
- * **Classification Rules (Deterministic):**
27
- * - Classification is 100% reproducible and based only on origin type
28
- * - No heuristics, no content analysis, no dynamic evaluation
29
- * - Future content analysis can be added as a separate layer without changing this
30
- *
31
- * **Origin Categories:**
32
- * - **User Origins**: Direct user input (always untrusted for security)
33
- * - **DOM Origins**: Content extracted from DOM (visibility determines trust)
34
- * - **External Origins**: Content from external sources (always untrusted)
35
- * - **System Origins**: System-generated content (trusted)
36
- *
37
- * @example
38
- * ```typescript
39
- * // User input - always untrusted (security by default)
40
- * const userOrigin = OriginType.USER;
41
- *
42
- * // DOM visible content - semi-trusted (user can see it)
43
- * const domVisible = OriginType.DOM_VISIBLE;
44
- * ```
45
- */
46
- export enum OriginType {
47
- /**
48
- * Direct user input from UI controls (input fields, textareas, etc.)
49
- *
50
- * @remarks
51
- * This represents content typed directly by the user in UI elements.
52
- * Always classified as UC (Untrusted Content) for security.
53
- *
54
- * **Security Rationale:**
55
- * - Primary vector for prompt injection attacks
56
- * - User may have malicious intent or be victim of manipulation
57
- * - Requires aggressive sanitization in ISL layer
58
- */
59
- USER = 'USER',
60
-
61
- /**
62
- * Content from visible DOM elements (user can see it)
63
- *
64
- * @remarks
65
- * Text content that is visible to the user in the DOM.
66
- * Classified as STC (Semi-Trusted Content) because user can verify it.
67
- */
68
- DOM_VISIBLE = 'DOM_VISIBLE',
69
-
70
- /**
71
- * Content from hidden DOM elements (user cannot see it)
72
- *
73
- * @remarks
74
- * Text content hidden via CSS (display:none, visibility:hidden, etc.)
75
- * Classified as UC (Untrusted Content) - potential attack vector.
76
- */
77
- DOM_HIDDEN = 'DOM_HIDDEN',
78
-
79
- /**
80
- * Content from DOM attributes (data-*, aria-*, etc.)
81
- *
82
- * @remarks
83
- * Attribute values that are visible in HTML but not in rendered content.
84
- * Classified as STC (Semi-Trusted Content) - visible in source.
85
- */
86
- DOM_ATTRIBUTE = 'DOM_ATTRIBUTE',
87
-
88
- /**
89
- * Content injected by scripts (dynamically generated)
90
- *
91
- * @remarks
92
- * Content created or modified by JavaScript at runtime.
93
- * Classified as UC (Untrusted Content) - can be manipulated.
94
- */
95
- SCRIPT_INJECTED = 'SCRIPT_INJECTED',
96
-
97
- /**
98
- * Content fetched from network (API calls, external resources)
99
- *
100
- * @remarks
101
- * Content obtained from external sources via network requests.
102
- * Classified as UC (Untrusted Content) - external source, not verified.
103
- */
104
- NETWORK_FETCHED = 'NETWORK_FETCHED',
105
-
106
- /**
107
- * System-generated content (instructions, system prompts, etc.)
108
- *
109
- * @remarks
110
- * Content created by the system itself, not from user or external sources.
111
- * Classified as TC (Trusted Content) - system controls this content.
112
- */
113
- SYSTEM_GENERATED = 'SYSTEM_GENERATED',
114
-
115
- /**
116
- * Origin cannot be determined
117
- *
118
- * @remarks
119
- * Fallback for cases where origin cannot be identified.
120
- * Classified as UC (Untrusted Content) - unknown is untrusted by default.
121
- */
122
- UNKNOWN = 'UNKNOWN',
123
- }
124
-
125
- export enum TrustLevelType {
126
- TC = 'TC',
127
- STC = 'STC',
128
- UC = 'UC',
129
- }
130
-
131
- export enum AnomalyScoreType {
132
- GREEN = 'GREEN',
133
- YELLOW = 'YELLOW',
134
- RED = 'RED',
135
- }
136
-
137
- /**
138
- * HashAlgorithmType
139
- *
140
- * @props
141
- * - sha256
142
- * - sha512
143
- * @example
144
- * ```typescript
145
- * const hashAlgorithmType = HashAlgorithmType.SHA-256;
146
- * ```
147
- */
148
- export type CSLHashAlgorithm = 'sha256' | 'sha512';
149
-
150
- /**
151
- * AnomalyActionType is the type of the action to be taken on the content segment
152
- * @props
153
- * - ALLOW: Allow the content segment
154
- * - WARN: Warn the content segment
155
- * - BLOCK: Block the content segment
156
- * @example
157
- * ```typescript
158
- * const anomalyActionType = AnomalyActionType.ALLOW;
159
- * ```
160
- */
161
- export type AnomalyAction = 'ALLOW' | 'WARN' | 'BLOCK';
162
-
163
- /**
164
- * RiskScore is the score of the risk of the content segment
165
- *
166
- * @props
167
- * 0 < RiskScore < 1
168
- * @example
169
- * ```typescript
170
- * const riskScore = 0.5;
171
- * ```
172
- */
173
- export type RiskScore = number
174
-
175
- /**
176
- * BlockedIntent represents an intent that is explicitly blocked by policy
177
- *
178
- * @example
179
- * ```typescript
180
- * const intent: BlockedIntent = 'delete_user_data';
181
- * ```
182
- */
183
- export type BlockedIntent = string
184
-
185
- /**
186
- * SensitiveScope represents a sensitive topic that requires additional validation
187
- *
188
- * @example
189
- * ```typescript
190
- * const scope: SensitiveScope = 'financial_transactions';
191
- * ```
192
- */
193
- export type SensitiveScope = string
194
-
195
- /**
196
- * ProtectedRole represents a role that cannot be overridden
197
- *
198
- * @example
199
- * ```typescript
200
- * const role: ProtectedRole = 'system';
201
- * ```
202
- */
203
- export type ProtectedRole = string
204
-
205
- /**
206
- * ImmutableInstruction represents an instruction that cannot be modified
207
- *
208
- * @example
209
- * ```typescript
210
- * const instruction: ImmutableInstruction = 'You are a helpful assistant';
211
- * ```
212
- */
213
- export type ImmutableInstruction = string
@@ -1,25 +0,0 @@
1
- import chalk, { type ChalkInstance } from 'chalk'
2
-
3
-
4
-
5
- type LogLevel = "Error" | "Warning" | "Info" | "Neutral" | "Debug";
6
-
7
- /**
8
- * ColorMap
9
- *
10
- * Mapping colors with chalk
11
- *
12
- * - Error -> chalk.bold.red
13
- * - Warning -> chalk.hex('#FFA500')
14
- * - Neutral -> chalk.white,
15
- * - Info -> chalk.blue
16
- */
17
-
18
- export const ColorMap: Record<LogLevel, ChalkInstance> = {
19
- Error: chalk.bold.red,
20
- Warning: chalk.hex("#FFA500"),
21
- Info: chalk.blue,
22
- Neutral: chalk.white,
23
- Debug: chalk.gray
24
-
25
- };
@@ -1,174 +0,0 @@
1
- /**
2
- * Security constants for pattern matching
3
- * These limits help prevent ReDoS attacks and resource exhaustion
4
- */
5
- export const MAX_CONTENT_LENGTH = 10_000_000 // 10MB - Maximum content size to process
6
- export const MAX_PATTERN_LENGTH = 10_000 // Maximum regex pattern length
7
- export const MAX_MATCHES = 10_000 // Maximum number of matches to prevent resource exhaustion
8
-
9
- /**
10
- * Helper functions for Pattern validation and security
11
- *
12
- * @remarks
13
- * These utilities provide validation, security checks, and helper functions
14
- * for the Pattern value object to reduce complexity and improve maintainability.
15
- */
16
- export namespace PatternHelpers {
17
- /**
18
- * Validates that content is a non-empty string
19
- */
20
- export function validateContent(content: unknown, methodName: string): asserts content is string {
21
- if (!content || typeof content !== 'string') {
22
- throw new TypeError(`Pattern.${methodName}: content must be a non-empty string`)
23
- }
24
- }
25
-
26
- /**
27
- * Validates content length to prevent ReDoS attacks
28
- */
29
- export function validateContentLength(content: string, methodName: string): void {
30
- if (content.length > MAX_CONTENT_LENGTH) {
31
- throw new Error(
32
- `Pattern.${methodName}: Content length (${content.length}) exceeds maximum allowed length (${MAX_CONTENT_LENGTH})`
33
- )
34
- }
35
- }
36
-
37
- /**
38
- * Validates pattern_type input
39
- */
40
- export function validatePatternType(pattern_type: unknown): asserts pattern_type is string {
41
- if (!pattern_type || typeof pattern_type !== 'string' || pattern_type.trim().length === 0) {
42
- throw new TypeError('Pattern pattern_type must be a non-empty string')
43
- }
44
- }
45
-
46
- /**
47
- * Validates regex input
48
- */
49
- export function validateRegex(regex: unknown): asserts regex is string | RegExp {
50
- if (!regex || (typeof regex !== 'string' && !(regex instanceof RegExp))) {
51
- throw new TypeError('Pattern regex must be a string or a RegExp')
52
- }
53
- }
54
-
55
- /**
56
- * Validates base_confidence input
57
- */
58
- export function validateBaseConfidence(base_confidence: unknown): asserts base_confidence is number {
59
- if (typeof base_confidence !== 'number' || !Number.isFinite(base_confidence)) {
60
- throw new TypeError('Pattern base_confidence must be a valid number')
61
- }
62
-
63
- if (base_confidence < 0 || base_confidence > 1) {
64
- throw new Error('Pattern base_confidence must be between 0 and 1')
65
- }
66
- }
67
-
68
- /**
69
- * Validates description input
70
- */
71
- export function validateDescription(description: unknown): void {
72
- if (description !== undefined && (typeof description !== 'string' || description.trim().length === 0)) {
73
- throw new TypeError('Pattern description must be a non-empty string if provided')
74
- }
75
- }
76
-
77
- /**
78
- * Validates regex source length
79
- */
80
- export function validateRegexLength(regexSource: string): void {
81
- if (regexSource.length > MAX_PATTERN_LENGTH) {
82
- throw new Error(`Pattern regex source exceeds maximum length of ${MAX_PATTERN_LENGTH} characters`)
83
- }
84
- }
85
-
86
- /**
87
- * Checks for potentially dangerous ReDoS patterns
88
- *
89
- * @remarks
90
- * This is a simple heuristic - more sophisticated validation could be added.
91
- * Only warns for very obvious cases - doesn't block all nested quantifiers
92
- * as some legitimate patterns may use them carefully.
93
- */
94
- export function checkForReDoSPatterns(regexSource: string, pattern_type: string): void {
95
- const dangerousPatterns = [
96
- /(\+|\*|\{.*,.*\})\s*\+/, // Nested quantifiers like (a+)+
97
- /(\+|\*|\{.*,.*\})\s*\*/, // Nested quantifiers like (a+)*
98
- /\(.*\)\s*\+.*\+/, // Nested groups with quantifiers
99
- /\(.*\)\s*\*.*\*/ // Nested groups with quantifiers
100
- ]
101
-
102
- const hasDangerousPattern = dangerousPatterns.some(pattern => pattern.test(regexSource))
103
- if (hasDangerousPattern && regexSource.length > 500) {
104
- // Only flag long patterns with nested quantifiers as potentially dangerous
105
- console.warn(`Pattern: Potentially dangerous regex pattern detected with nested quantifiers. Pattern type: ${pattern_type}`)
106
- }
107
- }
108
-
109
- /**
110
- * Compiles a regex string to RegExp
111
- */
112
- export function compileRegexString(regex: string): RegExp {
113
- try {
114
- return new RegExp(regex, 'i')
115
- } catch (error) {
116
- const errorMessage = error instanceof Error ? error.message : String(error)
117
- throw new TypeError(`Pattern regex must be a valid regular expression: ${regex}. Original error: ${errorMessage}`)
118
- }
119
- }
120
-
121
- /**
122
- * Clones a RegExp to ensure immutability
123
- */
124
- export function cloneRegExp(regex: RegExp): RegExp {
125
- return new RegExp(regex.source, regex.flags)
126
- }
127
-
128
- /**
129
- * Creates a match result object from a regex match
130
- */
131
- export function createMatchResult(match: RegExpExecArray, globalOffset: number = 0): {
132
- matched: string
133
- position: { start: number; end: number }
134
- } {
135
- return {
136
- matched: match[0],
137
- position: {
138
- start: match.index + globalOffset,
139
- end: match.index + globalOffset + match[0].length
140
- }
141
- }
142
- }
143
-
144
- /**
145
- * Handles empty string matches to prevent infinite loops
146
- */
147
- export function handleEmptyStringMatch(globalRegex: RegExp, match: RegExpExecArray): void {
148
- if (match[0].length === 0) {
149
- globalRegex.lastIndex++
150
- // Additional safety: if lastIndex doesn't advance, force it
151
- if (globalRegex.lastIndex === match.index) {
152
- globalRegex.lastIndex = match.index + 1
153
- }
154
- }
155
- }
156
-
157
- /**
158
- * Checks if match limits have been reached
159
- */
160
- export function checkMatchLimits(matchesCount: number, iterations: number): boolean {
161
- if (matchesCount >= MAX_MATCHES) {
162
- console.warn(`Pattern.findAllMatches: Maximum matches limit (${MAX_MATCHES}) reached. Stopping search.`)
163
- return true
164
- }
165
-
166
- const MAX_ITERATIONS = MAX_MATCHES * 2
167
- if (iterations > MAX_ITERATIONS) {
168
- console.warn(`Pattern.findAllMatches: Maximum iterations (${MAX_ITERATIONS}) reached. Stopping search to prevent resource exhaustion.`)
169
- return true
170
- }
171
-
172
- return false
173
- }
174
- }