@ai-pip/csl 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/layers/csl/index.ts +1 -0
  2. package/layers/csl/src/adapters/index.ts +10 -0
  3. package/layers/csl/src/adapters/input/DOMAdapter.ts +236 -0
  4. package/layers/csl/src/adapters/input/UIAdapter.ts +0 -0
  5. package/layers/csl/src/adapters/output/ConsoleLogger.ts +34 -0
  6. package/layers/csl/src/adapters/output/CryptoHashGenerator.ts +29 -0
  7. package/layers/csl/src/adapters/output/FilePolicyRepository.ts +0 -0
  8. package/layers/csl/src/adapters/output/InMemoryPolicyRepository.ts +135 -0
  9. package/layers/csl/src/adapters/output/SystemTimestampProvider.ts +9 -0
  10. package/layers/csl/src/domain/entities/CSLResult.ts +309 -0
  11. package/layers/csl/src/domain/entities/Segment.ts +338 -0
  12. package/layers/csl/src/domain/entities/index.ts +2 -0
  13. package/layers/csl/src/domain/exceptions/ClassificationError.ts +26 -0
  14. package/layers/csl/src/domain/exceptions/SegmentationError.ts +30 -0
  15. package/layers/csl/src/domain/exceptions/index.ts +2 -0
  16. package/layers/csl/src/domain/index.ts +4 -0
  17. package/layers/csl/src/domain/services/AnomalyService.ts +255 -0
  18. package/layers/csl/src/domain/services/LineageService.ts +224 -0
  19. package/layers/csl/src/domain/services/NormalizationService.ts +392 -0
  20. package/layers/csl/src/domain/services/OriginClassificationService.ts +69 -0
  21. package/layers/csl/src/domain/services/PiDetectionService.ts +475 -0
  22. package/layers/csl/src/domain/services/PolicyService.ts +296 -0
  23. package/layers/csl/src/domain/services/SegmentClassificationService.ts +105 -0
  24. package/layers/csl/src/domain/services/SerializationService.ts +229 -0
  25. package/layers/csl/src/domain/services/index.ts +7 -0
  26. package/layers/csl/src/domain/value-objects/AnomalyScore.ts +23 -0
  27. package/layers/csl/src/domain/value-objects/ContentHash.ts +54 -0
  28. package/layers/csl/src/domain/value-objects/LineageEntry.ts +42 -0
  29. package/layers/csl/src/domain/value-objects/Origin-map.ts +67 -0
  30. package/layers/csl/src/domain/value-objects/Origin.ts +99 -0
  31. package/layers/csl/src/domain/value-objects/Pattern.ts +221 -0
  32. package/layers/csl/src/domain/value-objects/PiDetection.ts +140 -0
  33. package/layers/csl/src/domain/value-objects/PiDetectionResult.ts +275 -0
  34. package/layers/csl/src/domain/value-objects/PolicyRule.ts +151 -0
  35. package/layers/csl/src/domain/value-objects/TrustLevel.ts +34 -0
  36. package/layers/csl/src/domain/value-objects/index.ts +10 -0
  37. package/layers/csl/src/index.ts +7 -0
  38. package/layers/csl/src/ports/index.ts +10 -0
  39. package/layers/csl/src/ports/input/ClassificationPort.ts +76 -0
  40. package/layers/csl/src/ports/input/SegmentationPort.ts +81 -0
  41. package/layers/csl/src/ports/output/DOMAdapter.ts +14 -0
  42. package/layers/csl/src/ports/output/HashGenerator.ts +18 -0
  43. package/layers/csl/src/ports/output/Logger.ts +17 -0
  44. package/layers/csl/src/ports/output/PolicyRepository.ts +29 -0
  45. package/layers/csl/src/ports/output/SegmentClassified.ts +8 -0
  46. package/layers/csl/src/ports/output/TimeStampProvider.ts +5 -0
  47. package/layers/csl/src/services/CSLService.ts +393 -0
  48. package/layers/csl/src/services/index.ts +1 -0
  49. package/layers/csl/src/types/entities-types.ts +37 -0
  50. package/layers/csl/src/types/index.ts +4 -0
  51. package/layers/csl/src/types/pi-types.ts +111 -0
  52. package/layers/csl/src/types/port-output-types.ts +17 -0
  53. package/layers/csl/src/types/value-objects-types.ts +213 -0
  54. package/layers/csl/src/utils/colors.ts +25 -0
  55. package/layers/csl/src/utils/pattern-helpers.ts +174 -0
  56. package/package.json +4 -5
  57. package/src/index.ts +36 -36
@@ -0,0 +1 @@
1
+ export * from './src';
@@ -0,0 +1,10 @@
1
+ // input
2
+ export * from './input/DOMAdapter';
3
+
4
+
5
+
6
+ // output
7
+ export * from './output/ConsoleLogger'
8
+ export * from './output/SystemTimestampProvider'
9
+ export * from './output/CryptoHashGenerator'
10
+ export * from './output/InMemoryPolicyRepository'
@@ -0,0 +1,236 @@
1
+ import { Origin, Segment } from '../../domain'
2
+ import { OriginType } from '../../types';
3
+
4
+ /**
5
+ * DOMAdapter adapts DOM elements to domain entities (Segment[]).
6
+ *
7
+ * @remarks
8
+ * This adapter is responsible for extracting content from the DOM and converting it
9
+ * into Segment entities that can be processed by the CSL pipeline. It analyzes DOM
10
+ * elements to determine their visibility and assigns appropriate Origin types.
11
+ *
12
+ * **Key Responsibilities:**
13
+ * - Extracts text content from DOM elements using TreeWalker
14
+ * - Determines element visibility (visible vs hidden)
15
+ * - Filters out non-processable nodes (script, style, etc.)
16
+ * - Creates Segment entities with appropriate Origin types
17
+ * - Assigns DOM_VISIBLE or DOM_HIDDEN based on CSS visibility
18
+ *
19
+ * **Origin Assignment:**
20
+ * - **DOM_VISIBLE**: Content from elements that are visible to the user
21
+ * - **DOM_HIDDEN**: Content from elements hidden via CSS (display:none, visibility:hidden, etc.)
22
+ *
23
+ * **Filtered Elements:**
24
+ * The adapter automatically filters out content from:
25
+ * - `<script>`, `<style>`, `<meta>`, `<link>`, `<noscript>`, `<iframe>`
26
+ * - Elements with `contenteditable="false"`
27
+ *
28
+ * **Usage in Pipeline:**
29
+ * This adapter is used by SegmentationService to extract segments from the DOM
30
+ * before they go through the classification, normalization, and analysis pipeline.
31
+ *
32
+ * @example
33
+ * ```typescript
34
+ * const domAdapter = new DOMAdapter()
35
+ *
36
+ * // Adapt entire document
37
+ * const segments = domAdapter.adapt(document)
38
+ *
39
+ * // Adapt specific element
40
+ * const formElement = document.getElementById('user-form')
41
+ * const segments = domAdapter.adapt(formElement)
42
+ *
43
+ * // Segments are ready for classification
44
+ * segments.forEach(segment => {
45
+ * console.log(segment.origin.type) // 'DOM_VISIBLE' or 'DOM_HIDDEN'
46
+ * })
47
+ * ```
48
+ */
49
+ export class DOMAdapter {
50
+
51
+ /**
52
+ * Adapts a DOM element or document to an array of Segment entities.
53
+ *
54
+ * @param root - The Document or HTMLElement to extract content from.
55
+ * If Document is provided, it processes document.body.
56
+ * If HTMLElement is provided, it processes that specific element.
57
+ *
58
+ * @returns An array of Segment entities extracted from the DOM.
59
+ * Each segment contains:
60
+ * - Unique ID (UUID)
61
+ * - Text content from the DOM
62
+ * - Origin (DOM_VISIBLE or DOM_HIDDEN based on visibility)
63
+ * - MIME type (text/plain)
64
+ * - Timestamp (when the segment was created)
65
+ * - Source identifier (element ID, name, className, or tagName)
66
+ *
67
+ * @example
68
+ * ```typescript
69
+ * // Extract segments from entire document
70
+ * const segments = domAdapter.adapt(document)
71
+ * console.log(`Extracted ${segments.length} segments`)
72
+ *
73
+ * // Extract segments from specific container
74
+ * const container = document.querySelector('.content')
75
+ * const segments = domAdapter.adapt(container)
76
+ *
77
+ * // Process segments
78
+ * segments.forEach(segment => {
79
+ * if (segment.origin.type === OriginType.DOM_VISIBLE) {
80
+ * // Handle visible content
81
+ * }
82
+ * })
83
+ * ```
84
+ */
85
+ adapt(root: Document | HTMLElement): Segment[] {
86
+ const segments: Segment[] = [];
87
+
88
+ const walker = document.createTreeWalker(
89
+ root instanceof Document ? root.body : root,
90
+ NodeFilter.SHOW_TEXT
91
+ );
92
+
93
+ let node: Node | null;
94
+ while ((node = walker.nextNode())) {
95
+ const text = node.textContent?.trim();
96
+ if (!text) continue;
97
+
98
+ // Ignore nodes inside script/style/meta/etc.
99
+ if (!this.isProcessableNode(node)) continue;
100
+
101
+ const parent = node.parentElement;
102
+ if (!parent) continue;
103
+
104
+ const visible = this.isVisible(parent);
105
+
106
+ const origin = new Origin(
107
+ visible ? OriginType.DOM_VISIBLE : OriginType.DOM_HIDDEN
108
+ );
109
+
110
+ const segment = new Segment({
111
+ id: crypto.randomUUID(),
112
+ content: text,
113
+ origin,
114
+ mime: "text/plain",
115
+ timestamp: Date.now(),
116
+ source: this.resolveSource(parent)
117
+ });
118
+
119
+ segments.push(segment);
120
+ }
121
+
122
+ return segments;
123
+ }
124
+
125
+ /**
126
+ * Determines if a node should be processed based on its parent element.
127
+ *
128
+ * @remarks
129
+ * This method filters out nodes that should not be processed, such as:
130
+ * - Scripts and styles (security and noise reduction)
131
+ * - Meta tags and links (not user-visible content)
132
+ * - Iframes and noscript (external or conditional content)
133
+ * - Elements with contenteditable="false" (non-editable content)
134
+ *
135
+ * @param node - The text node to check
136
+ *
137
+ * @returns `true` if the node should be processed, `false` otherwise
138
+ *
139
+ * @example
140
+ * ```typescript
141
+ * const node = document.createTextNode('Hello')
142
+ * if (domAdapter.isProcessableNode(node)) {
143
+ * // Process this node
144
+ * }
145
+ * ```
146
+ */
147
+ private isProcessableNode(node: Node): boolean {
148
+ const parent = node.parentElement;
149
+ if (!parent) return false;
150
+
151
+ const tag = parent.tagName.toLowerCase();
152
+
153
+ // Ignore content from non-textual or potentially dangerous elements
154
+ if (["script", "style", "meta", "link", "noscript", "iframe"].includes(tag))
155
+ return false;
156
+
157
+ // Ignore contenteditable="false" elements
158
+ if (parent.getAttribute("contenteditable") === "false")
159
+ return false;
160
+
161
+ return true;
162
+ }
163
+
164
+ /**
165
+ * Determines if an HTML element is visible to the user.
166
+ *
167
+ * @remarks
168
+ * This method checks multiple CSS properties to determine visibility:
169
+ * - `display: none` - Element is not displayed
170
+ * - `visibility: hidden` - Element is hidden but takes up space
171
+ * - `opacity: 0` - Element is transparent (effectively invisible)
172
+ * - `hidden` attribute - HTML5 hidden attribute
173
+ *
174
+ * **Note:** This checks computed styles, so it accounts for CSS inheritance
175
+ * and inline styles. Elements hidden by parent elements may still return
176
+ * `true` if the element itself is not explicitly hidden.
177
+ *
178
+ * @param element - The HTMLElement to check for visibility
179
+ *
180
+ * @returns `true` if the element is visible, `false` if it's hidden
181
+ *
182
+ * @example
183
+ * ```typescript
184
+ * const element = document.getElementById('my-element')
185
+ * const visible = domAdapter.isVisible(element)
186
+ * // Returns true if element is visible, false if hidden
187
+ * ```
188
+ */
189
+ private isVisible(element: HTMLElement): boolean {
190
+ const style = getComputedStyle(element);
191
+ return (
192
+ style.display !== "none" &&
193
+ style.visibility !== "hidden" &&
194
+ style.opacity !== "0" &&
195
+ !element.hasAttribute("hidden")
196
+ );
197
+ }
198
+
199
+ /**
200
+ * Resolves a source identifier for an element to track where content came from.
201
+ *
202
+ * @remarks
203
+ * This method attempts to find a meaningful identifier for the element in
204
+ * the following priority order:
205
+ * 1. Element ID (most specific)
206
+ * 2. Name attribute (for form elements)
207
+ * 3. Class name (for styled elements)
208
+ * 4. Tag name (fallback)
209
+ *
210
+ * The source identifier is used in Segment entities for traceability and
211
+ * debugging purposes.
212
+ *
213
+ * @param element - The HTMLElement to get the source identifier from
214
+ *
215
+ * @returns A string identifier for the element, used as the Segment source
216
+ *
217
+ * @example
218
+ * ```typescript
219
+ * const element = document.getElementById('user-input')
220
+ * const source = domAdapter.resolveSource(element)
221
+ * // Returns: 'user-input' (the element ID)
222
+ *
223
+ * const div = document.querySelector('.content')
224
+ * const source = domAdapter.resolveSource(div)
225
+ * // Returns: 'content' (the class name) or 'DIV' (tag name as fallback)
226
+ * ```
227
+ */
228
+ private resolveSource(element: HTMLElement): string {
229
+ return (
230
+ element.id ||
231
+ element.getAttribute("name") ||
232
+ element.className ||
233
+ element.tagName
234
+ );
235
+ }
236
+ }
File without changes
@@ -0,0 +1,34 @@
1
+ import type { LoggerPort } from "../../ports";
2
+ import { ColorMap } from "../../utils/colors";
3
+
4
+
5
+ export class ConsoleLogger implements LoggerPort {
6
+
7
+ private timestamp(): string {
8
+ return new Date().toISOString()
9
+ }
10
+ private buildMessage(level: string, emoji: string,msg:string): string {
11
+ return `[${this.timestamp()}] ${emoji} ${level}: ${msg}`;
12
+ }
13
+ info(msg: string): void {
14
+ console.log(ColorMap.Info(this.buildMessage("INFO", "ℹ️", msg)));
15
+ }
16
+
17
+ warn(msg: string): void {
18
+ console.log(ColorMap.Warning(this.buildMessage("WARN", "⚠️", msg)));
19
+ }
20
+
21
+ error(msg: string): void {
22
+ console.log(ColorMap.Error(this.buildMessage("ERROR", "❌", msg)));
23
+ }
24
+
25
+ debug(msg: string): void {
26
+ const trace = new Error("Debug trace").stack?.split("\n")[2]?.trim() ?? "";
27
+ const traceFormatted = ColorMap.Neutral(`(${trace})`);
28
+ const enriched = `${msg} ${traceFormatted}`;
29
+
30
+ console.log(ColorMap.Debug(
31
+ this.buildMessage("DEBUG", "🐛", enriched)
32
+ ));
33
+ }
34
+ }
@@ -0,0 +1,29 @@
1
+ import type { HashGeneratorPort, HashAlgorithm } from "../../ports";
2
+ import crypto from 'node:crypto'
3
+
4
+
5
+ export class CryptoHashGenerator implements HashGeneratorPort {
6
+
7
+ private readonly DEFAULT_ALGORITHM: HashAlgorithm = 'sha512';
8
+
9
+ generate(content: string, algorithm: HashAlgorithm = this.DEFAULT_ALGORITHM): Promise<string> {
10
+ const hash = crypto.createHash(algorithm).update(content).digest('hex')
11
+ return Promise.resolve(hash)
12
+ }
13
+
14
+ compare(content: string, hash: string, algorithm: HashAlgorithm = this.DEFAULT_ALGORITHM): Promise<boolean> {
15
+ const contentHash = crypto.createHash(algorithm).update(content).digest('hex')
16
+ return Promise.resolve(contentHash === hash)
17
+ }
18
+
19
+ generateHMAC(content: string, secretKey: string, algorithm: HashAlgorithm = this.DEFAULT_ALGORITHM): Promise<string> {
20
+ const hmac = crypto.createHmac(algorithm, secretKey).update(content).digest('hex');
21
+ return Promise.resolve(hmac);
22
+ }
23
+
24
+ compareHMAC(content: string, hash: string, secretKey: string, algorithm: HashAlgorithm = this.DEFAULT_ALGORITHM): Promise<boolean> {
25
+ const contentHMAC = crypto.createHmac(algorithm, secretKey).update(content).digest('hex');
26
+ return Promise.resolve(contentHMAC === hash);
27
+ }
28
+
29
+ }
@@ -0,0 +1,135 @@
1
+ import { PolicyRule } from '../../domain/value-objects'
2
+ import type { PolicyRepositoryPort } from '../../ports/output/PolicyRepository'
3
+
4
+ /**
5
+ * InMemoryPolicyRepository is an in-memory implementation of PolicyRepositoryPort.
6
+ *
7
+ * @remarks
8
+ * This adapter stores policies in memory, making it suitable for:
9
+ * - Testing and development
10
+ * - Single-process applications
11
+ * - Temporary policy storage
12
+ *
13
+ * **Limitations:**
14
+ * - Policies are lost when the process terminates
15
+ * - Not suitable for distributed systems
16
+ * - No persistence layer
17
+ *
18
+ * @example
19
+ * ```typescript
20
+ * const repository = new InMemoryPolicyRepository()
21
+ * const policy = new PolicyRule(...)
22
+ * await repository.savePolicy(policy)
23
+ *
24
+ * const activePolicy = await repository.getActivePolicy()
25
+ * ```
26
+ */
27
+ export class InMemoryPolicyRepository implements PolicyRepositoryPort {
28
+ private readonly policies: Map<string, PolicyRule> = new Map()
29
+ private activePolicyVersion: string | null = null
30
+
31
+ /**
32
+ * Saves a policy to the repository
33
+ *
34
+ * @param policy - The policy rule to save
35
+ *
36
+ * @example
37
+ * ```typescript
38
+ * await repository.savePolicy(policy)
39
+ * ```
40
+ */
41
+ async savePolicy(policy: PolicyRule): Promise<void> {
42
+ if (!policy || !(policy instanceof PolicyRule)) {
43
+ throw new TypeError('InMemoryPolicyRepository.savePolicy: policy must be a PolicyRule instance')
44
+ }
45
+
46
+ this.policies.set(policy.version, policy)
47
+
48
+ // If this is the first policy or no active policy is set, make it active
49
+ this.activePolicyVersion ??= policy.version
50
+ }
51
+
52
+ /**
53
+ * Sets the active policy version
54
+ *
55
+ * @param version - The version to set as active
56
+ * @throws Error if the version doesn't exist
57
+ *
58
+ * @example
59
+ * ```typescript
60
+ * await repository.setActivePolicy('1.0')
61
+ * ```
62
+ */
63
+ async setActivePolicy(version: string): Promise<void> {
64
+ if (!this.policies.has(version)) {
65
+ throw new Error(`InMemoryPolicyRepository: Policy version '${version}' not found`)
66
+ }
67
+
68
+ this.activePolicyVersion = version
69
+ }
70
+
71
+ /**
72
+ * Get the currently active policy
73
+ *
74
+ * @returns The active PolicyRule
75
+ * @throws Error if no active policy is set
76
+ */
77
+ async getActivePolicy(): Promise<PolicyRule> {
78
+ if (this.activePolicyVersion === null) {
79
+ throw new Error('InMemoryPolicyRepository: No active policy is set')
80
+ }
81
+
82
+ const policy = this.policies.get(this.activePolicyVersion)
83
+ if (!policy) {
84
+ throw new Error(`InMemoryPolicyRepository: Active policy version '${this.activePolicyVersion}' not found`)
85
+ }
86
+
87
+ return policy
88
+ }
89
+
90
+ /**
91
+ * Get a policy by its version
92
+ *
93
+ * @param version - The version of the policy to retrieve
94
+ * @returns The PolicyRule for the specified version, or null if not found
95
+ */
96
+ async getPolicyByVersion(version: string): Promise<PolicyRule | null> {
97
+ if (!version || typeof version !== 'string') {
98
+ throw new TypeError('InMemoryPolicyRepository.getPolicyByVersion: version must be a non-empty string')
99
+ }
100
+
101
+ const policy = this.policies.get(version)
102
+ return policy ?? null
103
+ }
104
+
105
+ /**
106
+ * Get all available policies
107
+ *
108
+ * @returns An array of all available PolicyRule instances
109
+ */
110
+ async getAllPolicies(): Promise<PolicyRule[]> {
111
+ return Array.from(this.policies.values())
112
+ }
113
+
114
+ /**
115
+ * Clears all policies from the repository
116
+ *
117
+ * @example
118
+ * ```typescript
119
+ * repository.clear()
120
+ * ```
121
+ */
122
+ clear(): void {
123
+ this.policies.clear()
124
+ this.activePolicyVersion = null
125
+ }
126
+
127
+ /**
128
+ * Gets the active policy version
129
+ *
130
+ * @returns The active policy version, or null if none is set
131
+ */
132
+ getActiveVersion(): string | null {
133
+ return this.activePolicyVersion
134
+ }
135
+ }
@@ -0,0 +1,9 @@
1
+ import type { TimeStampProviderPort } from "../../ports";
2
+
3
+
4
+
5
+ export class SystemTimestamProvider implements TimeStampProviderPort {
6
+ now(): number {
7
+ return Date.now()
8
+ }
9
+ }