@ai-pip/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +165 -0
- package/package.json +79 -0
- package/src/cpe/envelope.ts +115 -0
- package/src/cpe/exceptions/EnvelopeError.ts +11 -0
- package/src/cpe/exceptions/index.ts +6 -0
- package/src/cpe/index.ts +35 -0
- package/src/cpe/types.ts +68 -0
- package/src/cpe/utils.ts +65 -0
- package/src/cpe/value-objects/Metadata.ts +78 -0
- package/src/cpe/value-objects/Nonce.ts +57 -0
- package/src/cpe/value-objects/Signature.ts +83 -0
- package/src/cpe/value-objects/index.ts +8 -0
- package/src/csl/classify.ts +77 -0
- package/src/csl/exceptions/ClassificationError.ts +16 -0
- package/src/csl/exceptions/SegmentationError.ts +19 -0
- package/src/csl/exceptions/index.ts +3 -0
- package/src/csl/index.ts +34 -0
- package/src/csl/lineage.ts +40 -0
- package/src/csl/segment.ts +100 -0
- package/src/csl/types.ts +113 -0
- package/src/csl/utils.ts +30 -0
- package/src/csl/value-objects/ContentHash.ts +48 -0
- package/src/csl/value-objects/LineageEntry.ts +33 -0
- package/src/csl/value-objects/Origin-map.ts +51 -0
- package/src/csl/value-objects/Origin.ts +52 -0
- package/src/csl/value-objects/TrustLevel.ts +33 -0
- package/src/csl/value-objects/index.ts +14 -0
- package/src/index.ts +18 -0
- package/src/isl/exceptions/SanitizationError.ts +14 -0
- package/src/isl/exceptions/index.ts +2 -0
- package/src/isl/index.ts +20 -0
- package/src/isl/sanitize.ts +93 -0
- package/src/isl/types.ts +87 -0
- package/src/isl/value-objects/AnomalyScore.ts +40 -0
- package/src/isl/value-objects/Pattern.ts +158 -0
- package/src/isl/value-objects/PiDetection.ts +92 -0
- package/src/isl/value-objects/PiDetectionResult.ts +129 -0
- package/src/isl/value-objects/PolicyRule.ts +117 -0
- package/src/isl/value-objects/index.ts +41 -0
- package/src/shared/index.ts +13 -0
- package/src/shared/lineage.ts +53 -0
- package/tsconfig.json +27 -0
package/src/cpe/utils.ts
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Utilidades puras para CPE
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Serializa el contenido sanitizado de ISL para firma
|
|
7
|
+
*
|
|
8
|
+
* @param segments - Segmentos sanitizados
|
|
9
|
+
* @returns Contenido serializado
|
|
10
|
+
*/
|
|
11
|
+
export function serializeContent(segments: readonly { readonly sanitizedContent: string }[]): string {
|
|
12
|
+
return segments
|
|
13
|
+
.map((segment, index) => `[${index}]:${segment.sanitizedContent}`)
|
|
14
|
+
.join('\n')
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Serializa metadata para firma
|
|
19
|
+
*
|
|
20
|
+
* @param metadata - Metadata a serializar
|
|
21
|
+
* @returns Metadata serializada
|
|
22
|
+
*/
|
|
23
|
+
export function serializeMetadata(metadata: {
|
|
24
|
+
readonly timestamp: number
|
|
25
|
+
readonly nonce: string
|
|
26
|
+
readonly protocolVersion: string
|
|
27
|
+
readonly previousSignatures?: {
|
|
28
|
+
readonly csl?: string | undefined
|
|
29
|
+
readonly isl?: string | undefined
|
|
30
|
+
} | undefined
|
|
31
|
+
}): string {
|
|
32
|
+
const parts = [
|
|
33
|
+
`timestamp:${metadata.timestamp}`,
|
|
34
|
+
`nonce:${metadata.nonce}`,
|
|
35
|
+
`version:${metadata.protocolVersion}`,
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
if (metadata.previousSignatures?.csl) {
|
|
39
|
+
parts.push(`csl:${metadata.previousSignatures.csl}`)
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (metadata.previousSignatures?.isl) {
|
|
43
|
+
parts.push(`isl:${metadata.previousSignatures.isl}`)
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
return parts.join('|')
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Genera el contenido completo para firma
|
|
51
|
+
* Según spec: contenido procesado + metadata + identificador del algoritmo
|
|
52
|
+
*
|
|
53
|
+
* @param content - Contenido serializado (payload semántico)
|
|
54
|
+
* @param metadata - Metadata serializada
|
|
55
|
+
* @param algorithm - Identificador del algoritmo de firma
|
|
56
|
+
* @returns Contenido completo para firma
|
|
57
|
+
*/
|
|
58
|
+
export function generateSignableContent(
|
|
59
|
+
content: string,
|
|
60
|
+
metadata: string,
|
|
61
|
+
algorithm: string
|
|
62
|
+
): string {
|
|
63
|
+
return `${metadata}\n---\n${content}\n---\nalgorithm:${algorithm}`
|
|
64
|
+
}
|
|
65
|
+
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CPEMetadata - Metadata de seguridad del envelope
|
|
3
|
+
* Value Object puro e inmutable
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import type { CPEMetadata, ProtocolVersion, Timestamp } from '../types'
|
|
7
|
+
import type { Nonce as NonceVO } from './Nonce'
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Versión actual del protocolo
|
|
11
|
+
*/
|
|
12
|
+
export const CURRENT_PROTOCOL_VERSION: ProtocolVersion = '1.0.0'
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Crea metadata de seguridad para el envelope
|
|
16
|
+
* Según especificación: timestamp, nonce, protocolVersion, previousSignatures opcionales
|
|
17
|
+
*
|
|
18
|
+
* @param timestamp - Timestamp Unix en milisegundos
|
|
19
|
+
* @param nonce - Nonce único
|
|
20
|
+
* @param protocolVersion - Versión del protocolo (default: CURRENT_PROTOCOL_VERSION)
|
|
21
|
+
* @param previousSignatures - Firmas opcionales de capas anteriores (csl, isl)
|
|
22
|
+
* @returns CPEMetadata inmutable
|
|
23
|
+
*/
|
|
24
|
+
export function createMetadata(
|
|
25
|
+
timestamp: Timestamp,
|
|
26
|
+
nonce: NonceVO,
|
|
27
|
+
protocolVersion: ProtocolVersion = CURRENT_PROTOCOL_VERSION,
|
|
28
|
+
previousSignatures?: {
|
|
29
|
+
csl?: string
|
|
30
|
+
isl?: string
|
|
31
|
+
}
|
|
32
|
+
): CPEMetadata {
|
|
33
|
+
// Validar timestamp
|
|
34
|
+
if (timestamp <= 0) {
|
|
35
|
+
throw new Error('Timestamp must be a positive number')
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Validar que no sea del futuro (con margen de 5 minutos para sincronización)
|
|
39
|
+
const maxFutureTimestamp = Date.now() + 5 * 60 * 1000
|
|
40
|
+
if (timestamp > maxFutureTimestamp) {
|
|
41
|
+
throw new Error('Timestamp cannot be in the future')
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Validar version del protocolo
|
|
45
|
+
if (!protocolVersion || typeof protocolVersion !== 'string') {
|
|
46
|
+
throw new Error('Protocol version must be a non-empty string')
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return Object.freeze({
|
|
50
|
+
timestamp,
|
|
51
|
+
nonce: nonce.value,
|
|
52
|
+
protocolVersion,
|
|
53
|
+
previousSignatures: previousSignatures
|
|
54
|
+
? Object.freeze({
|
|
55
|
+
csl: previousSignatures.csl ?? undefined,
|
|
56
|
+
isl: previousSignatures.isl ?? undefined,
|
|
57
|
+
})
|
|
58
|
+
: undefined,
|
|
59
|
+
})
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Valida que la metadata sea válida
|
|
64
|
+
*
|
|
65
|
+
* @param metadata - Metadata a validar
|
|
66
|
+
* @returns true si es válida
|
|
67
|
+
*/
|
|
68
|
+
export function isValidMetadata(metadata: CPEMetadata): boolean {
|
|
69
|
+
try {
|
|
70
|
+
if (metadata.timestamp <= 0) return false
|
|
71
|
+
if (!metadata.nonce || metadata.nonce.length < 16) return false
|
|
72
|
+
if (!metadata.protocolVersion) return false
|
|
73
|
+
return true
|
|
74
|
+
} catch {
|
|
75
|
+
return false
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Nonce - Valor único para prevenir ataques de replay
|
|
3
|
+
* Value Object puro e inmutable
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { randomBytes } from 'node:crypto'
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Nonce - Valor único generado aleatoriamente
|
|
10
|
+
*/
|
|
11
|
+
export type Nonce = {
|
|
12
|
+
readonly value: string
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Genera un nonce único
|
|
17
|
+
*
|
|
18
|
+
* @param length - Longitud del nonce en bytes (default: 16)
|
|
19
|
+
* @returns Nonce único
|
|
20
|
+
*/
|
|
21
|
+
export function createNonce(length: number = 16): Nonce {
|
|
22
|
+
if (length < 8) {
|
|
23
|
+
throw new Error('Nonce length must be at least 8 bytes')
|
|
24
|
+
}
|
|
25
|
+
if (length > 64) {
|
|
26
|
+
throw new Error('Nonce length must be at most 64 bytes')
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const bytes = randomBytes(length)
|
|
30
|
+
const value = bytes.toString('hex')
|
|
31
|
+
|
|
32
|
+
return Object.freeze({
|
|
33
|
+
value,
|
|
34
|
+
})
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Valida que un string sea un nonce válido
|
|
39
|
+
*
|
|
40
|
+
* @param value - String a validar
|
|
41
|
+
* @returns true si es un nonce válido
|
|
42
|
+
*/
|
|
43
|
+
export function isValidNonce(value: string): boolean {
|
|
44
|
+
return /^[a-f0-9]{16,128}$/i.test(value)
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Compara dos nonces
|
|
49
|
+
*
|
|
50
|
+
* @param nonce1 - Primer nonce
|
|
51
|
+
* @param nonce2 - Segundo nonce
|
|
52
|
+
* @returns true si son iguales
|
|
53
|
+
*/
|
|
54
|
+
export function equalsNonce(nonce1: Nonce, nonce2: Nonce): boolean {
|
|
55
|
+
return nonce1.value === nonce2.value
|
|
56
|
+
}
|
|
57
|
+
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Signature - Firma criptográfica HMAC-SHA256
|
|
3
|
+
* Value Object puro e inmutable
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { createHmac } from 'node:crypto'
|
|
7
|
+
import type { SignatureAlgorithm } from '../types'
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Signature - Firma criptográfica
|
|
11
|
+
*/
|
|
12
|
+
export type SignatureVO = {
|
|
13
|
+
readonly value: string
|
|
14
|
+
readonly algorithm: SignatureAlgorithm
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Genera una firma HMAC-SHA256 del contenido
|
|
19
|
+
*
|
|
20
|
+
* @param content - Contenido a firmar
|
|
21
|
+
* @param secretKey - Clave secreta para HMAC
|
|
22
|
+
* @returns Signature inmutable
|
|
23
|
+
*
|
|
24
|
+
* @throws {Error} Si la clave secreta está vacía
|
|
25
|
+
*/
|
|
26
|
+
export function createSignature(
|
|
27
|
+
content: string,
|
|
28
|
+
secretKey: string
|
|
29
|
+
): SignatureVO {
|
|
30
|
+
if (!secretKey || secretKey.length === 0) {
|
|
31
|
+
throw new Error('Secret key is required for signature generation')
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
if (typeof content !== 'string') {
|
|
35
|
+
throw new TypeError('Content must be a string')
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const hmac = createHmac('sha256', secretKey)
|
|
39
|
+
hmac.update(content)
|
|
40
|
+
const signature = hmac.digest('hex')
|
|
41
|
+
|
|
42
|
+
return Object.freeze({
|
|
43
|
+
value: signature,
|
|
44
|
+
algorithm: 'HMAC-SHA256',
|
|
45
|
+
})
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Verifica una firma HMAC-SHA256
|
|
50
|
+
*
|
|
51
|
+
* @param content - Contenido original
|
|
52
|
+
* @param signature - Firma a verificar
|
|
53
|
+
* @param secretKey - Clave secreta para HMAC
|
|
54
|
+
* @returns true si la firma es válida
|
|
55
|
+
*/
|
|
56
|
+
export function verifySignature(
|
|
57
|
+
content: string,
|
|
58
|
+
signature: string,
|
|
59
|
+
secretKey: string
|
|
60
|
+
): boolean {
|
|
61
|
+
if (!secretKey || secretKey.length === 0) {
|
|
62
|
+
return false
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
try {
|
|
66
|
+
const expectedSignature = createSignature(content, secretKey)
|
|
67
|
+
return expectedSignature.value === signature
|
|
68
|
+
} catch {
|
|
69
|
+
return false
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Valida el formato de una firma
|
|
75
|
+
*
|
|
76
|
+
* @param signature - Firma a validar
|
|
77
|
+
* @returns true si el formato es válido
|
|
78
|
+
*/
|
|
79
|
+
export function isValidSignatureFormat(signature: string): boolean {
|
|
80
|
+
// HMAC-SHA256 produce un hash hexadecimal de 64 caracteres
|
|
81
|
+
return /^[a-f0-9]{64}$/i.test(signature)
|
|
82
|
+
}
|
|
83
|
+
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import { createTrustLevel } from './value-objects/TrustLevel'
|
|
2
|
+
import type { Origin } from './value-objects/Origin'
|
|
3
|
+
import { originMap } from './value-objects/Origin-map'
|
|
4
|
+
import { ClassificationError } from './exceptions'
|
|
5
|
+
import { OriginType } from './types'
|
|
6
|
+
import type { Source } from './types'
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Clasifica un source y retorna su TrustLevel - función pura determinista
|
|
10
|
+
*
|
|
11
|
+
* @remarks
|
|
12
|
+
* - 100% determinista: mismo source → mismo trust level, siempre
|
|
13
|
+
* - Sin efectos secundarios: función pura
|
|
14
|
+
* - Sin análisis de contenido: solo el source importa
|
|
15
|
+
*
|
|
16
|
+
* @param source - El source del contenido ('DOM' | 'UI' | 'SYSTEM' | 'API')
|
|
17
|
+
* @returns TrustLevel determinado por el source
|
|
18
|
+
*
|
|
19
|
+
* @throws {ClassificationError} Si el source no puede ser clasificado
|
|
20
|
+
*
|
|
21
|
+
* @example
|
|
22
|
+
* ```typescript
|
|
23
|
+
* const trust = classifySource('UI')
|
|
24
|
+
* // Returns: { value: 'TC' }
|
|
25
|
+
*
|
|
26
|
+
* const trust2 = classifySource('DOM')
|
|
27
|
+
* // Returns: { value: 'STC' }
|
|
28
|
+
* ```
|
|
29
|
+
*/
|
|
30
|
+
export function classifySource(source: Source) {
|
|
31
|
+
// Mapeo simple: Source → OriginType → TrustLevel
|
|
32
|
+
const sourceToOriginType: Record<Source, OriginType> = {
|
|
33
|
+
'UI': OriginType.SYSTEM_GENERATED, // UI directa → TC
|
|
34
|
+
'SYSTEM': OriginType.SYSTEM_GENERATED, // System → TC
|
|
35
|
+
'DOM': OriginType.DOM_VISIBLE, // DOM → STC
|
|
36
|
+
'API': OriginType.NETWORK_FETCHED // API → UC
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const originType = sourceToOriginType[source]
|
|
40
|
+
|
|
41
|
+
if (!originType) {
|
|
42
|
+
throw new ClassificationError(`Source '${source}' cannot be classified`)
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const trustLevelType = originMap.get(originType)
|
|
46
|
+
|
|
47
|
+
if (!trustLevelType) {
|
|
48
|
+
throw new ClassificationError(
|
|
49
|
+
`Origin type '${originType}' is not mapped in originMap. ` +
|
|
50
|
+
`All OriginType values must have a corresponding TrustLevel mapping.`
|
|
51
|
+
)
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
return createTrustLevel(trustLevelType)
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Clasifica un Origin y retorna su TrustLevel - función pura determinista
|
|
59
|
+
*
|
|
60
|
+
* @param origin - El Origin value object
|
|
61
|
+
* @returns TrustLevel determinado por el origin
|
|
62
|
+
*
|
|
63
|
+
* @throws {ClassificationError} Si el origin no está mapeado
|
|
64
|
+
*/
|
|
65
|
+
export function classifyOrigin(origin: Origin) {
|
|
66
|
+
const trustLevelType = originMap.get(origin.type)
|
|
67
|
+
|
|
68
|
+
if (!trustLevelType) {
|
|
69
|
+
throw new ClassificationError(
|
|
70
|
+
`Origin type '${origin.type}' is not mapped in originMap. ` +
|
|
71
|
+
`All OriginType values must have a corresponding TrustLevel mapping.`
|
|
72
|
+
)
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return createTrustLevel(trustLevelType)
|
|
76
|
+
}
|
|
77
|
+
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ClassificationError is thrown when classification fails.
|
|
3
|
+
*
|
|
4
|
+
* @remarks
|
|
5
|
+
* This error is thrown when:
|
|
6
|
+
* - An origin type is not mapped in originMap
|
|
7
|
+
* - Classification cannot be determined
|
|
8
|
+
*/
|
|
9
|
+
export class ClassificationError extends Error {
|
|
10
|
+
constructor(message: string) {
|
|
11
|
+
super(message)
|
|
12
|
+
this.name = 'ClassificationError'
|
|
13
|
+
Object.setPrototypeOf(this, ClassificationError.prototype)
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* SegmentationError is thrown when the segmentation process fails.
|
|
3
|
+
*
|
|
4
|
+
* @remarks
|
|
5
|
+
* This error is thrown when:
|
|
6
|
+
* - Content segmentation fails
|
|
7
|
+
* - Critical pipeline steps fail
|
|
8
|
+
*/
|
|
9
|
+
export class SegmentationError extends Error {
|
|
10
|
+
constructor(
|
|
11
|
+
message: string,
|
|
12
|
+
public readonly cause?: unknown
|
|
13
|
+
) {
|
|
14
|
+
super(message)
|
|
15
|
+
this.name = 'SegmentationError'
|
|
16
|
+
Object.setPrototypeOf(this, SegmentationError.prototype)
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
package/src/csl/index.ts
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CSL (Context Segmentation Layer) - Core Semántico
|
|
3
|
+
*
|
|
4
|
+
* @remarks
|
|
5
|
+
* Este es el core semántico de CSL. Solo contiene:
|
|
6
|
+
* - Funciones puras (sin estado)
|
|
7
|
+
* - Value objects inmutables
|
|
8
|
+
* - Tipos y excepciones
|
|
9
|
+
*
|
|
10
|
+
* **NO contiene:**
|
|
11
|
+
* - Detección de prompt injection (va a ISL)
|
|
12
|
+
* - Políticas (van a ISL)
|
|
13
|
+
* - Anomaly scores (van a ISL)
|
|
14
|
+
* - Normalización agresiva (va a ISL)
|
|
15
|
+
* - Servicios con estado (van al SDK)
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
// Funciones puras principales
|
|
19
|
+
export { segment } from './segment'
|
|
20
|
+
export { classifySource, classifyOrigin } from './classify'
|
|
21
|
+
export { initLineage, createLineageEntry } from './lineage'
|
|
22
|
+
|
|
23
|
+
// Value objects
|
|
24
|
+
export * from './value-objects'
|
|
25
|
+
|
|
26
|
+
// Exceptions
|
|
27
|
+
export * from './exceptions'
|
|
28
|
+
|
|
29
|
+
// Types
|
|
30
|
+
export * from './types'
|
|
31
|
+
|
|
32
|
+
// Utils
|
|
33
|
+
export { generateId, splitByContextRules } from './utils'
|
|
34
|
+
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { createLineageEntry as createEntry } from './value-objects/LineageEntry'
|
|
2
|
+
import type { LineageEntry } from './value-objects/LineageEntry'
|
|
3
|
+
import type { CSLSegment } from './types'
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Inicializa el linaje para un segmento - función pura
|
|
7
|
+
*
|
|
8
|
+
* @remarks
|
|
9
|
+
* Crea la entrada inicial de linaje cuando se crea un segmento en CSL.
|
|
10
|
+
* El core solo registra step y timestamp, sin notes.
|
|
11
|
+
*
|
|
12
|
+
* @param segment - El segmento para el cual inicializar el linaje
|
|
13
|
+
* @returns Array con la entrada inicial de linaje
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* ```typescript
|
|
17
|
+
* const lineage = initLineage(segment)
|
|
18
|
+
* // Returns: [{ step: 'CSL', timestamp: ... }]
|
|
19
|
+
* ```
|
|
20
|
+
*/
|
|
21
|
+
export function initLineage(_segment: CSLSegment): LineageEntry[] {
|
|
22
|
+
return [
|
|
23
|
+
createEntry('CSL', Date.now())
|
|
24
|
+
]
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Crea una entrada de linaje - función pura
|
|
29
|
+
*
|
|
30
|
+
* @remarks
|
|
31
|
+
* El core solo registra step y timestamp.
|
|
32
|
+
* Notes y metadata van en el SDK para observabilidad.
|
|
33
|
+
*
|
|
34
|
+
* @param step - Nombre del paso de procesamiento
|
|
35
|
+
* @returns Nueva entrada de linaje
|
|
36
|
+
*/
|
|
37
|
+
export function createLineageEntry(step: string): LineageEntry {
|
|
38
|
+
return createEntry(step, Date.now())
|
|
39
|
+
}
|
|
40
|
+
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import type { CSLInput, CSLResult, CSLSegment } from './types'
|
|
2
|
+
import { classifySource } from './classify'
|
|
3
|
+
import { initLineage } from './lineage'
|
|
4
|
+
import { generateId, splitByContextRules } from './utils'
|
|
5
|
+
import { SegmentationError } from './exceptions'
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Segmenta input en segmentos semánticos - función pura principal de CSL
|
|
9
|
+
*
|
|
10
|
+
* @remarks
|
|
11
|
+
* Esta es la función principal de CSL. Segmenta el contenido, clasifica
|
|
12
|
+
* por origen, e inicializa el linaje. Todo de forma pura y determinista.
|
|
13
|
+
*
|
|
14
|
+
* **Invariantes preservados:**
|
|
15
|
+
* - El contenido original nunca se pierde
|
|
16
|
+
* - El orden de segmentos es estable
|
|
17
|
+
* - Todo segmento tiene linaje inicial
|
|
18
|
+
* - CSL es determinista
|
|
19
|
+
*
|
|
20
|
+
* @param input - Input con contenido y source
|
|
21
|
+
* @returns CSLResult con segmentos clasificados y linaje inicializado
|
|
22
|
+
*
|
|
23
|
+
* @throws {SegmentationError} Si la segmentación falla
|
|
24
|
+
*
|
|
25
|
+
* @example
|
|
26
|
+
* ```typescript
|
|
27
|
+
* const result = segment({
|
|
28
|
+
* content: 'Hello\nWorld',
|
|
29
|
+
* source: 'UI',
|
|
30
|
+
* metadata: {}
|
|
31
|
+
* })
|
|
32
|
+
*
|
|
33
|
+
* // result.segments contiene 2 segmentos, cada uno con:
|
|
34
|
+
* // - content original
|
|
35
|
+
* // - trust level clasificado
|
|
36
|
+
* // - lineage inicializado
|
|
37
|
+
* ```
|
|
38
|
+
*/
|
|
39
|
+
export function segment(input: CSLInput): CSLResult {
|
|
40
|
+
try {
|
|
41
|
+
// 1. Validar input
|
|
42
|
+
if (!input.content || typeof input.content !== 'string') {
|
|
43
|
+
throw new SegmentationError('CSLInput content must be a non-empty string')
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
if (!input.source) {
|
|
47
|
+
throw new SegmentationError('CSLInput source is required')
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// 2. Dividir contenido en segmentos estructurales (función pura)
|
|
51
|
+
// CSL solo segmenta por estructura, no por intención semántica
|
|
52
|
+
const contentSegments = splitByContextRules(input.content)
|
|
53
|
+
|
|
54
|
+
// 3. Si no hay contenido, retornar resultado vacío
|
|
55
|
+
if (contentSegments.length === 0) {
|
|
56
|
+
return {
|
|
57
|
+
segments: Object.freeze([]),
|
|
58
|
+
lineage: Object.freeze([])
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// 4. Clasificar source una vez (determinista)
|
|
63
|
+
const trust = classifySource(input.source)
|
|
64
|
+
|
|
65
|
+
// 5. Crear segmentos con clasificación y linaje
|
|
66
|
+
const segments: CSLSegment[] = contentSegments.map((content) => {
|
|
67
|
+
// Crear segmento temporal para inicializar linaje
|
|
68
|
+
const tempSegment: CSLSegment = {
|
|
69
|
+
id: generateId(),
|
|
70
|
+
content, // ✅ Original preservado
|
|
71
|
+
source: input.source, // ✅ Origen preservado
|
|
72
|
+
trust, // ✅ Clasificación determinista
|
|
73
|
+
lineage: [], // Se inicializa después
|
|
74
|
+
...(input.metadata && { metadata: input.metadata })
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Inicializar linaje
|
|
78
|
+
const lineage = initLineage(tempSegment)
|
|
79
|
+
|
|
80
|
+
// Retornar segmento completo
|
|
81
|
+
return {
|
|
82
|
+
...tempSegment,
|
|
83
|
+
lineage // ✅ Linaje inicializado
|
|
84
|
+
}
|
|
85
|
+
})
|
|
86
|
+
|
|
87
|
+
// 6. Recolectar todo el linaje
|
|
88
|
+
const allLineage = segments.flatMap(s => s.lineage)
|
|
89
|
+
|
|
90
|
+
// 7. Retornar resultado puro
|
|
91
|
+
return {
|
|
92
|
+
segments: Object.freeze(segments),
|
|
93
|
+
lineage: Object.freeze(allLineage)
|
|
94
|
+
}
|
|
95
|
+
} catch (error) {
|
|
96
|
+
const errorMessage = error instanceof Error ? error.message : 'Unknown error during segmentation'
|
|
97
|
+
throw new SegmentationError(`Failed to segment content: ${errorMessage}`, error)
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
package/src/csl/types.ts
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Types for CSL (Context Segmentation Layer) - Core Semántico
|
|
3
|
+
*
|
|
4
|
+
* Solo tipos esenciales para CSL. Tipos relacionados con detección,
|
|
5
|
+
* anomalías y políticas van a ISL.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* OriginType represents the deterministic source of a content segment.
|
|
10
|
+
*/
|
|
11
|
+
export enum OriginType {
|
|
12
|
+
/**
|
|
13
|
+
* Direct user input from UI controls
|
|
14
|
+
* Always classified as UC (Untrusted Content) for security.
|
|
15
|
+
*/
|
|
16
|
+
USER = 'USER',
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Content from visible DOM elements
|
|
20
|
+
* Classified as STC (Semi-Trusted Content) because user can verify it.
|
|
21
|
+
*/
|
|
22
|
+
DOM_VISIBLE = 'DOM_VISIBLE',
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Content from hidden DOM elements
|
|
26
|
+
* Classified as UC (Untrusted Content) - potential attack vector.
|
|
27
|
+
*/
|
|
28
|
+
DOM_HIDDEN = 'DOM_HIDDEN',
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Content from DOM attributes (data-*, aria-*, etc.)
|
|
32
|
+
* Classified as STC (Semi-Trusted Content) - visible in source.
|
|
33
|
+
*/
|
|
34
|
+
DOM_ATTRIBUTE = 'DOM_ATTRIBUTE',
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Content injected by scripts (dynamically generated)
|
|
38
|
+
* Classified as UC (Untrusted Content) - can be manipulated.
|
|
39
|
+
*/
|
|
40
|
+
SCRIPT_INJECTED = 'SCRIPT_INJECTED',
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Content fetched from network (API calls, external resources)
|
|
44
|
+
* Classified as UC (Untrusted Content) - external source, not verified.
|
|
45
|
+
*/
|
|
46
|
+
NETWORK_FETCHED = 'NETWORK_FETCHED',
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* System-generated content (instructions, system prompts, etc.)
|
|
50
|
+
* Classified as TC (Trusted Content) - system controls this content.
|
|
51
|
+
*/
|
|
52
|
+
SYSTEM_GENERATED = 'SYSTEM_GENERATED',
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Origin cannot be determined
|
|
56
|
+
* Classified as UC (Untrusted Content) - unknown is untrusted by default.
|
|
57
|
+
*/
|
|
58
|
+
UNKNOWN = 'UNKNOWN',
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* TrustLevelType represents the trust level of content
|
|
63
|
+
*/
|
|
64
|
+
export enum TrustLevelType {
|
|
65
|
+
TC = 'TC', // Trusted Content
|
|
66
|
+
STC = 'STC', // Semi-Trusted Content
|
|
67
|
+
UC = 'UC', // Untrusted Content
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* HashAlgorithm for ContentHash (opcional, para trazabilidad)
|
|
72
|
+
*/
|
|
73
|
+
export type HashAlgorithm = 'sha256' | 'sha512'
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Source type for CSL input
|
|
77
|
+
*/
|
|
78
|
+
export type Source = 'DOM' | 'UI' | 'SYSTEM' | 'API'
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* CSLInput - Input para la función segment()
|
|
82
|
+
*/
|
|
83
|
+
export interface CSLInput {
|
|
84
|
+
readonly content: string
|
|
85
|
+
readonly source: Source
|
|
86
|
+
readonly metadata?: Record<string, unknown>
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Importar tipos de value objects para usar en interfaces
|
|
90
|
+
import type { ContentHash, LineageEntry, TrustLevel } from './value-objects'
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* CSLSegment - Segmento puro, solo datos semánticos
|
|
94
|
+
*/
|
|
95
|
+
export interface CSLSegment {
|
|
96
|
+
readonly id: string
|
|
97
|
+
readonly content: string // Original, sin modificar
|
|
98
|
+
readonly source: Source
|
|
99
|
+
readonly trust: TrustLevel // Clasificado por origen
|
|
100
|
+
readonly lineage: LineageEntry[] // Inicializado en CSL
|
|
101
|
+
readonly hash?: ContentHash // Opcional, para trazabilidad
|
|
102
|
+
readonly metadata?: Record<string, unknown>
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* CSLResult - Resultado puro, solo datos
|
|
107
|
+
*/
|
|
108
|
+
export interface CSLResult {
|
|
109
|
+
readonly segments: readonly CSLSegment[]
|
|
110
|
+
readonly lineage: readonly LineageEntry[]
|
|
111
|
+
readonly processingTimeMs?: number // Opcional, para métricas
|
|
112
|
+
}
|
|
113
|
+
|