llm-trust-guard 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +318 -0
- package/dist/guards/agent-communication-guard.d.ts +169 -0
- package/dist/guards/agent-communication-guard.d.ts.map +1 -0
- package/dist/guards/agent-communication-guard.js +468 -0
- package/dist/guards/agent-communication-guard.js.map +1 -0
- package/dist/guards/autonomy-escalation-guard.d.ts +137 -0
- package/dist/guards/autonomy-escalation-guard.d.ts.map +1 -0
- package/dist/guards/autonomy-escalation-guard.js +470 -0
- package/dist/guards/autonomy-escalation-guard.js.map +1 -0
- package/dist/guards/circuit-breaker.d.ts +142 -0
- package/dist/guards/circuit-breaker.d.ts.map +1 -0
- package/dist/guards/circuit-breaker.js +347 -0
- package/dist/guards/circuit-breaker.js.map +1 -0
- package/dist/guards/code-execution-guard.d.ts +114 -0
- package/dist/guards/code-execution-guard.d.ts.map +1 -0
- package/dist/guards/code-execution-guard.js +467 -0
- package/dist/guards/code-execution-guard.js.map +1 -0
- package/dist/guards/conversation-guard.d.ts +73 -0
- package/dist/guards/conversation-guard.d.ts.map +1 -0
- package/dist/guards/conversation-guard.js +281 -0
- package/dist/guards/conversation-guard.js.map +1 -0
- package/dist/guards/drift-detector.d.ts +182 -0
- package/dist/guards/drift-detector.d.ts.map +1 -0
- package/dist/guards/drift-detector.js +480 -0
- package/dist/guards/drift-detector.js.map +1 -0
- package/dist/guards/encoding-detector.d.ts +76 -0
- package/dist/guards/encoding-detector.d.ts.map +1 -0
- package/dist/guards/encoding-detector.js +698 -0
- package/dist/guards/encoding-detector.js.map +1 -0
- package/dist/guards/execution-monitor.d.ts +73 -0
- package/dist/guards/execution-monitor.d.ts.map +1 -0
- package/dist/guards/execution-monitor.js +205 -0
- package/dist/guards/execution-monitor.js.map +1 -0
- package/dist/guards/input-sanitizer.d.ts +87 -0
- package/dist/guards/input-sanitizer.d.ts.map +1 -0
- package/dist/guards/input-sanitizer.js +301 -0
- package/dist/guards/input-sanitizer.js.map +1 -0
- package/dist/guards/mcp-security-guard.d.ts +204 -0
- package/dist/guards/mcp-security-guard.d.ts.map +1 -0
- package/dist/guards/mcp-security-guard.js +618 -0
- package/dist/guards/mcp-security-guard.js.map +1 -0
- package/dist/guards/memory-guard.d.ts +124 -0
- package/dist/guards/memory-guard.d.ts.map +1 -0
- package/dist/guards/memory-guard.js +476 -0
- package/dist/guards/memory-guard.js.map +1 -0
- package/dist/guards/multimodal-guard.d.ts +93 -0
- package/dist/guards/multimodal-guard.d.ts.map +1 -0
- package/dist/guards/multimodal-guard.js +507 -0
- package/dist/guards/multimodal-guard.js.map +1 -0
- package/dist/guards/output-filter.d.ts +76 -0
- package/dist/guards/output-filter.d.ts.map +1 -0
- package/dist/guards/output-filter.js +289 -0
- package/dist/guards/output-filter.js.map +1 -0
- package/dist/guards/policy-gate.d.ts +57 -0
- package/dist/guards/policy-gate.d.ts.map +1 -0
- package/dist/guards/policy-gate.js +182 -0
- package/dist/guards/policy-gate.js.map +1 -0
- package/dist/guards/prompt-leakage-guard.d.ts +110 -0
- package/dist/guards/prompt-leakage-guard.d.ts.map +1 -0
- package/dist/guards/prompt-leakage-guard.js +529 -0
- package/dist/guards/prompt-leakage-guard.js.map +1 -0
- package/dist/guards/rag-guard.d.ts +188 -0
- package/dist/guards/rag-guard.d.ts.map +1 -0
- package/dist/guards/rag-guard.js +769 -0
- package/dist/guards/rag-guard.js.map +1 -0
- package/dist/guards/schema-validator.d.ts +35 -0
- package/dist/guards/schema-validator.d.ts.map +1 -0
- package/dist/guards/schema-validator.js +316 -0
- package/dist/guards/schema-validator.js.map +1 -0
- package/dist/guards/state-persistence-guard.d.ts +153 -0
- package/dist/guards/state-persistence-guard.d.ts.map +1 -0
- package/dist/guards/state-persistence-guard.js +484 -0
- package/dist/guards/state-persistence-guard.js.map +1 -0
- package/dist/guards/tenant-boundary.d.ts +67 -0
- package/dist/guards/tenant-boundary.d.ts.map +1 -0
- package/dist/guards/tenant-boundary.js +187 -0
- package/dist/guards/tenant-boundary.js.map +1 -0
- package/dist/guards/tool-chain-validator.d.ts +102 -0
- package/dist/guards/tool-chain-validator.d.ts.map +1 -0
- package/dist/guards/tool-chain-validator.js +480 -0
- package/dist/guards/tool-chain-validator.js.map +1 -0
- package/dist/guards/tool-registry.d.ts +45 -0
- package/dist/guards/tool-registry.d.ts.map +1 -0
- package/dist/guards/tool-registry.js +155 -0
- package/dist/guards/tool-registry.js.map +1 -0
- package/dist/guards/trust-exploitation-guard.d.ts +134 -0
- package/dist/guards/trust-exploitation-guard.d.ts.map +1 -0
- package/dist/guards/trust-exploitation-guard.js +354 -0
- package/dist/guards/trust-exploitation-guard.js.map +1 -0
- package/dist/index.d.ts +133 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +430 -0
- package/dist/index.js.map +1 -0
- package/dist/integrations/express.d.ts +119 -0
- package/dist/integrations/express.d.ts.map +1 -0
- package/dist/integrations/express.js +244 -0
- package/dist/integrations/express.js.map +1 -0
- package/dist/integrations/index.d.ts +9 -0
- package/dist/integrations/index.d.ts.map +1 -0
- package/dist/integrations/index.js +26 -0
- package/dist/integrations/index.js.map +1 -0
- package/dist/integrations/langchain.d.ts +165 -0
- package/dist/integrations/langchain.d.ts.map +1 -0
- package/dist/integrations/langchain.js +308 -0
- package/dist/integrations/langchain.js.map +1 -0
- package/dist/integrations/openai.d.ts +205 -0
- package/dist/integrations/openai.d.ts.map +1 -0
- package/dist/integrations/openai.js +380 -0
- package/dist/integrations/openai.js.map +1 -0
- package/dist/types/index.d.ts +245 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +6 -0
- package/dist/types/index.js.map +1 -0
- package/package.json +64 -0
|
@@ -0,0 +1,769 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* RAGGuard (L10) v2
|
|
4
|
+
*
|
|
5
|
+
* Validates RAG (Retrieval Augmented Generation) content before injection.
|
|
6
|
+
* Protects against supply chain attacks via poisoned documents and embeddings.
|
|
7
|
+
*
|
|
8
|
+
* Threat Model:
|
|
9
|
+
* - ASI04: Agentic Supply Chain Vulnerabilities
|
|
10
|
+
* - RAG Poisoning: Malicious content in retrieved documents
|
|
11
|
+
* - Embedding manipulation attacks
|
|
12
|
+
* - Indirect prompt injection via documents
|
|
13
|
+
*
|
|
14
|
+
* Protection Capabilities (v2 Enhanced):
|
|
15
|
+
* - Retrieved document sanitization
|
|
16
|
+
* - Source verification and trust scoring
|
|
17
|
+
* - Injection pattern detection in documents
|
|
18
|
+
* - Content integrity verification
|
|
19
|
+
* - Suspicious document quarantine
|
|
20
|
+
* - Advanced embedding attack detection (backdoor, adversarial)
|
|
21
|
+
* - Unicode steganography detection
|
|
22
|
+
* - Markdown/HTML hidden instruction detection
|
|
23
|
+
* - Cross-document similarity anomaly detection
|
|
24
|
+
* - Embedding norm and distribution analysis
|
|
25
|
+
*/
|
|
26
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
27
|
+
if (k2 === undefined) k2 = k;
|
|
28
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
29
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
30
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
31
|
+
}
|
|
32
|
+
Object.defineProperty(o, k2, desc);
|
|
33
|
+
}) : (function(o, m, k, k2) {
|
|
34
|
+
if (k2 === undefined) k2 = k;
|
|
35
|
+
o[k2] = m[k];
|
|
36
|
+
}));
|
|
37
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
38
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
39
|
+
}) : function(o, v) {
|
|
40
|
+
o["default"] = v;
|
|
41
|
+
});
|
|
42
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
43
|
+
var ownKeys = function(o) {
|
|
44
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
45
|
+
var ar = [];
|
|
46
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
47
|
+
return ar;
|
|
48
|
+
};
|
|
49
|
+
return ownKeys(o);
|
|
50
|
+
};
|
|
51
|
+
return function (mod) {
|
|
52
|
+
if (mod && mod.__esModule) return mod;
|
|
53
|
+
var result = {};
|
|
54
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
55
|
+
__setModuleDefault(result, mod);
|
|
56
|
+
return result;
|
|
57
|
+
};
|
|
58
|
+
})();
|
|
59
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
60
|
+
exports.RAGGuard = void 0;
|
|
61
|
+
const crypto = __importStar(require("crypto"));
|
|
62
|
+
class RAGGuard {
|
|
63
|
+
constructor(config = {}) {
|
|
64
|
+
this.contentHashCache = new Map();
|
|
65
|
+
this.sourceReputationCache = new Map();
|
|
66
|
+
// Injection patterns specifically targeting RAG/document content
|
|
67
|
+
this.RAG_INJECTION_PATTERNS = [
|
|
68
|
+
// Direct instruction injection
|
|
69
|
+
{ name: "instruction_override", pattern: /ignore\s+(all\s+)?previous\s+(context|documents|information)/i, severity: 50 },
|
|
70
|
+
{ name: "system_instruction", pattern: /\[SYSTEM\]|\[INSTRUCTION\]|\[OVERRIDE\]|<system>|<instruction>/i, severity: 45 },
|
|
71
|
+
{ name: "role_injection", pattern: /you\s+(are|must|should)\s+(now\s+)?(be|act\s+as|become)/i, severity: 40 },
|
|
72
|
+
// Delimiter injection (to break out of RAG context)
|
|
73
|
+
{ name: "delimiter_break", pattern: /---\s*(end|stop)\s*(of\s*)?(context|document|rag)/i, severity: 45 },
|
|
74
|
+
{ name: "context_escape", pattern: /\]\]\]|\}\}\}|<<<|>>>|'''|"""/g, severity: 30 },
|
|
75
|
+
// Hidden instruction markers
|
|
76
|
+
{ name: "hidden_instruction", pattern: /HIDDEN:|SECRET:|INVISIBLE:|DO_NOT_DISPLAY:/i, severity: 50 },
|
|
77
|
+
{ name: "admin_marker", pattern: /ADMIN_INSTRUCTION|ROOT_COMMAND|ELEVATED_PROMPT/i, severity: 55 },
|
|
78
|
+
// Data exfiltration setup
|
|
79
|
+
{ name: "exfil_setup", pattern: /send\s+(all|this|data)\s+to|forward\s+to\s+https?:\/\//i, severity: 50 },
|
|
80
|
+
{ name: "callback_injection", pattern: /callback\s*[:=]\s*https?:\/\/|webhook\s*[:=]/i, severity: 45 },
|
|
81
|
+
// Tool/action injection via documents
|
|
82
|
+
{ name: "tool_injection", pattern: /call\s+(tool|function|action)\s*[:=]|execute\s*[:=]/i, severity: 45 },
|
|
83
|
+
{ name: "code_injection", pattern: /```(javascript|python|bash|sh)\s*\n[^`]*\b(eval|exec|system|subprocess)\b/i, severity: 50 },
|
|
84
|
+
// Persona/behavior modification
|
|
85
|
+
{ name: "persona_override", pattern: /your\s+(new\s+)?(persona|identity|character)\s+(is|will\s+be)/i, severity: 40 },
|
|
86
|
+
{ name: "behavior_mod", pattern: /always\s+(respond|reply|answer)\s+with|never\s+(mention|reveal|disclose)/i, severity: 35 },
|
|
87
|
+
// Prompt leakage attempts
|
|
88
|
+
{ name: "prompt_extraction", pattern: /reveal\s+(your\s+)?(system\s+)?prompt|show\s+(me\s+)?(your\s+)?instructions/i, severity: 40 },
|
|
89
|
+
{ name: "debug_mode", pattern: /enable\s+debug|activate\s+developer\s+mode|enter\s+test\s+mode/i, severity: 35 },
|
|
90
|
+
];
|
|
91
|
+
// Suspicious metadata patterns
|
|
92
|
+
this.SUSPICIOUS_METADATA_PATTERNS = [
|
|
93
|
+
{ name: "script_in_title", pattern: /<script|javascript:/i },
|
|
94
|
+
{ name: "injection_in_author", pattern: /admin|system|root|override/i },
|
|
95
|
+
{ name: "suspicious_content_type", pattern: /application\/x-|text\/x-/i },
|
|
96
|
+
];
|
|
97
|
+
// Known malicious source patterns
|
|
98
|
+
this.MALICIOUS_SOURCE_PATTERNS = [
|
|
99
|
+
/pastebin\.com/i,
|
|
100
|
+
/hastebin\.com/i,
|
|
101
|
+
/gist\.githubusercontent\.com.*injection/i,
|
|
102
|
+
/raw\.githubusercontent\.com.*malicious/i,
|
|
103
|
+
/\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/, // IP addresses
|
|
104
|
+
];
|
|
105
|
+
// Indirect prompt injection patterns (v2)
|
|
106
|
+
this.INDIRECT_INJECTION_PATTERNS = [
|
|
107
|
+
// HTML/Markdown hidden instructions
|
|
108
|
+
{ name: "html_comment_injection", pattern: /<!--[\s\S]*?(ignore|override|system|instruction|admin)[\s\S]*?-->/i, severity: 45 },
|
|
109
|
+
{ name: "markdown_hidden", pattern: /\[[\s\S]*?\]\(javascript:|data:text\/html|about:blank\)/i, severity: 50 },
|
|
110
|
+
{ name: "invisible_link", pattern: /\[]\([^)]+\)/g, severity: 30 },
|
|
111
|
+
// Unicode steganography
|
|
112
|
+
{ name: "zero_width_chars", pattern: /[\u200B-\u200F\u2028-\u202F\uFEFF]{3,}/g, severity: 40 },
|
|
113
|
+
{ name: "rtl_override", pattern: /[\u202A-\u202E\u2066-\u2069]/g, severity: 35 },
|
|
114
|
+
{ name: "confusable_chars", pattern: /[\u0430\u0435\u043E\u0440\u0441\u0443\u0445]/g, severity: 25 }, // Cyrillic lookalikes
|
|
115
|
+
// Whitespace injection
|
|
116
|
+
{ name: "excessive_whitespace", pattern: /[\t\n\r]{10,}/g, severity: 20 },
|
|
117
|
+
{ name: "tab_encoding", pattern: /\t{5,}/g, severity: 25 },
|
|
118
|
+
// Encoded instructions - enhanced detection
|
|
119
|
+
{ name: "base64_block", pattern: /[A-Za-z0-9+/]{40,}={0,2}/g, severity: 40 },
|
|
120
|
+
{ name: "base64_with_context", pattern: /(?:encode|decode|base64|reference)[:\s]*[A-Za-z0-9+/]{20,}/i, severity: 45 },
|
|
121
|
+
{ name: "hex_encoded", pattern: /\\x[0-9a-fA-F]{2}(?:\\x[0-9a-fA-F]{2}){5,}/g, severity: 35 },
|
|
122
|
+
{ name: "unicode_escape", pattern: /\\u[0-9a-fA-F]{4}(?:\\u[0-9a-fA-F]{4}){3,}/g, severity: 35 },
|
|
123
|
+
// Context switching attempts
|
|
124
|
+
{ name: "fake_boundary", pattern: /={5,}|#{5,}|-{10,}/g, severity: 20 },
|
|
125
|
+
{ name: "json_injection", pattern: /\{"(role|content|system)":/i, severity: 45 },
|
|
126
|
+
{ name: "xml_injection", pattern: /<\/?(?:prompt|assistant|user|system)>/i, severity: 45 },
|
|
127
|
+
];
|
|
128
|
+
this.config = {
|
|
129
|
+
detectInjections: config.detectInjections ?? true,
|
|
130
|
+
verifySource: config.verifySource ?? true,
|
|
131
|
+
trustedSources: config.trustedSources ?? [],
|
|
132
|
+
blockedSources: config.blockedSources ?? [],
|
|
133
|
+
maxDocumentSize: config.maxDocumentSize ?? 50000, // 50KB
|
|
134
|
+
minTrustScore: config.minTrustScore ?? 30,
|
|
135
|
+
enableContentHashing: config.enableContentHashing ?? true,
|
|
136
|
+
knownGoodHashes: config.knownGoodHashes ?? new Set(),
|
|
137
|
+
autoSanitize: config.autoSanitize ?? true,
|
|
138
|
+
// v2 options
|
|
139
|
+
detectEmbeddingAttacks: config.detectEmbeddingAttacks ?? true,
|
|
140
|
+
embeddingDimension: config.embeddingDimension ?? 1536, // OpenAI default
|
|
141
|
+
detectSteganography: config.detectSteganography ?? true,
|
|
142
|
+
detectClusteringAnomalies: config.detectClusteringAnomalies ?? true,
|
|
143
|
+
embeddingMagnitudeRange: config.embeddingMagnitudeRange ?? [0.8, 1.2],
|
|
144
|
+
similarityThreshold: config.similarityThreshold ?? 0.95,
|
|
145
|
+
detectIndirectInjection: config.detectIndirectInjection ?? true,
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Validate RAG documents before injecting into context
|
|
150
|
+
*/
|
|
151
|
+
validate(documents, requestId) {
|
|
152
|
+
const reqId = requestId || `rag-${Date.now()}`;
|
|
153
|
+
const violations = [];
|
|
154
|
+
const blockedIds = [];
|
|
155
|
+
const untrustedSources = [];
|
|
156
|
+
const sanitizedDocs = [];
|
|
157
|
+
const embeddingAnalysis = [];
|
|
158
|
+
let injectionAttempts = 0;
|
|
159
|
+
let documentsBlocked = 0;
|
|
160
|
+
let documentsSanitized = 0;
|
|
161
|
+
let totalTrustScore = 0;
|
|
162
|
+
let embeddingAttacksDetected = 0;
|
|
163
|
+
let steganographyDetected = 0;
|
|
164
|
+
let indirectInjectionAttempts = 0;
|
|
165
|
+
for (const doc of documents) {
|
|
166
|
+
let docViolations = [];
|
|
167
|
+
let docRiskScore = 0;
|
|
168
|
+
let shouldBlock = false;
|
|
169
|
+
let needsSanitization = false;
|
|
170
|
+
// Check document size
|
|
171
|
+
if (doc.content.length > this.config.maxDocumentSize) {
|
|
172
|
+
docViolations.push("oversized_document");
|
|
173
|
+
docRiskScore += 20;
|
|
174
|
+
}
|
|
175
|
+
// Verify source
|
|
176
|
+
if (this.config.verifySource) {
|
|
177
|
+
const sourceResult = this.verifyDocumentSource(doc.source);
|
|
178
|
+
if (!sourceResult.trusted) {
|
|
179
|
+
docViolations.push(`untrusted_source: ${sourceResult.reason}`);
|
|
180
|
+
untrustedSources.push(doc.source);
|
|
181
|
+
docRiskScore += 100 - sourceResult.score;
|
|
182
|
+
if (sourceResult.score < this.config.minTrustScore) {
|
|
183
|
+
shouldBlock = true;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
totalTrustScore += sourceResult.score;
|
|
187
|
+
}
|
|
188
|
+
else {
|
|
189
|
+
totalTrustScore += 50; // Neutral score when not verifying
|
|
190
|
+
}
|
|
191
|
+
// Check content hash if enabled
|
|
192
|
+
if (this.config.enableContentHashing) {
|
|
193
|
+
const hash = this.hashContent(doc.content);
|
|
194
|
+
if (doc.contentHash && doc.contentHash !== hash) {
|
|
195
|
+
docViolations.push("content_hash_mismatch");
|
|
196
|
+
docRiskScore += 40;
|
|
197
|
+
shouldBlock = true;
|
|
198
|
+
}
|
|
199
|
+
// Check against known good hashes
|
|
200
|
+
if (this.config.knownGoodHashes.has(hash)) {
|
|
201
|
+
docRiskScore = Math.max(0, docRiskScore - 30); // Reduce risk for known good content
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
// Check for injection patterns
|
|
205
|
+
if (this.config.detectInjections) {
|
|
206
|
+
const injectionResult = this.detectInjections(doc.content);
|
|
207
|
+
if (injectionResult.found) {
|
|
208
|
+
injectionAttempts += injectionResult.patterns.length;
|
|
209
|
+
docViolations.push(...injectionResult.violations);
|
|
210
|
+
docRiskScore += injectionResult.riskContribution;
|
|
211
|
+
needsSanitization = true;
|
|
212
|
+
if (injectionResult.riskContribution >= 50) {
|
|
213
|
+
shouldBlock = true;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
// Check metadata
|
|
218
|
+
if (doc.metadata) {
|
|
219
|
+
const metadataResult = this.checkMetadata(doc.metadata);
|
|
220
|
+
if (metadataResult.suspicious) {
|
|
221
|
+
docViolations.push(...metadataResult.violations);
|
|
222
|
+
docRiskScore += metadataResult.riskContribution;
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
// Check embedding anomalies (basic) - runs first to catch critical issues
|
|
226
|
+
if (doc.embedding) {
|
|
227
|
+
// Critical: Check for invalid values (NaN, Infinity, null, non-numbers)
|
|
228
|
+
// Note: JSON serialization converts NaN/Infinity to null
|
|
229
|
+
const hasInvalidValues = doc.embedding.some((v) => v === null ||
|
|
230
|
+
v === undefined ||
|
|
231
|
+
typeof v !== "number" ||
|
|
232
|
+
!isFinite(v) ||
|
|
233
|
+
isNaN(v));
|
|
234
|
+
if (hasInvalidValues) {
|
|
235
|
+
docViolations.push("embedding_contains_invalid_values");
|
|
236
|
+
docRiskScore += 50;
|
|
237
|
+
shouldBlock = true;
|
|
238
|
+
}
|
|
239
|
+
if (doc.retrievalScore !== undefined) {
|
|
240
|
+
const embeddingResult = this.checkEmbedding(doc.embedding, doc.retrievalScore);
|
|
241
|
+
if (embeddingResult.anomalous) {
|
|
242
|
+
docViolations.push(`embedding_anomaly: ${embeddingResult.reason}`);
|
|
243
|
+
docRiskScore += 35;
|
|
244
|
+
if (embeddingResult.shouldBlock) {
|
|
245
|
+
shouldBlock = true;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
// v2: Advanced embedding attack detection
|
|
251
|
+
if (this.config.detectEmbeddingAttacks && doc.embedding) {
|
|
252
|
+
const embeddingAttack = this.detectEmbeddingAttacks(doc.embedding, doc.retrievalScore);
|
|
253
|
+
if (embeddingAttack.detected) {
|
|
254
|
+
embeddingAttacksDetected++;
|
|
255
|
+
embeddingAnalysis.push(embeddingAttack);
|
|
256
|
+
docViolations.push(...embeddingAttack.attack_type.map(t => `embedding_attack: ${t}`));
|
|
257
|
+
docRiskScore += embeddingAttack.risk_score;
|
|
258
|
+
if (embeddingAttack.risk_score >= 40) {
|
|
259
|
+
shouldBlock = true;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
// v2: Indirect injection detection
|
|
264
|
+
if (this.config.detectIndirectInjection) {
|
|
265
|
+
const indirectResult = this.detectIndirectInjection(doc.content);
|
|
266
|
+
if (indirectResult.found) {
|
|
267
|
+
indirectInjectionAttempts += indirectResult.patterns.length;
|
|
268
|
+
docViolations.push(...indirectResult.violations);
|
|
269
|
+
docRiskScore += indirectResult.riskContribution;
|
|
270
|
+
needsSanitization = true;
|
|
271
|
+
if (indirectResult.riskContribution >= 40) {
|
|
272
|
+
shouldBlock = true;
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
// v2: Steganography detection
|
|
277
|
+
if (this.config.detectSteganography) {
|
|
278
|
+
const stegoResult = this.detectSteganography(doc.content);
|
|
279
|
+
if (stegoResult.found) {
|
|
280
|
+
steganographyDetected++;
|
|
281
|
+
docViolations.push(...stegoResult.violations);
|
|
282
|
+
docRiskScore += stegoResult.riskContribution;
|
|
283
|
+
needsSanitization = true;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
// Decision for this document
|
|
287
|
+
if (shouldBlock || docRiskScore >= 70) {
|
|
288
|
+
blockedIds.push(doc.id);
|
|
289
|
+
documentsBlocked++;
|
|
290
|
+
violations.push(...docViolations.map((v) => `[${doc.id}] ${v}`));
|
|
291
|
+
}
|
|
292
|
+
else if (needsSanitization && this.config.autoSanitize) {
|
|
293
|
+
const sanitized = this.sanitizeDocument(doc);
|
|
294
|
+
sanitizedDocs.push(sanitized);
|
|
295
|
+
documentsSanitized++;
|
|
296
|
+
violations.push(...docViolations.map((v) => `[${doc.id}] ${v} (sanitized)`));
|
|
297
|
+
}
|
|
298
|
+
else {
|
|
299
|
+
sanitizedDocs.push(doc);
|
|
300
|
+
if (docViolations.length > 0) {
|
|
301
|
+
violations.push(...docViolations.map((v) => `[${doc.id}] ${v} (allowed)`));
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
const averageTrustScore = documents.length > 0 ? totalTrustScore / documents.length : 0;
|
|
306
|
+
const blocked = documentsBlocked === documents.length || averageTrustScore < this.config.minTrustScore;
|
|
307
|
+
return {
|
|
308
|
+
allowed: !blocked,
|
|
309
|
+
reason: blocked
|
|
310
|
+
? `RAG content blocked: ${documentsBlocked}/${documents.length} documents failed validation`
|
|
311
|
+
: "RAG content validated",
|
|
312
|
+
violations,
|
|
313
|
+
request_id: reqId,
|
|
314
|
+
document_analysis: {
|
|
315
|
+
documents_checked: documents.length,
|
|
316
|
+
documents_blocked: documentsBlocked,
|
|
317
|
+
documents_sanitized: documentsSanitized,
|
|
318
|
+
injection_attempts: injectionAttempts,
|
|
319
|
+
untrusted_sources: [...new Set(untrustedSources)],
|
|
320
|
+
average_trust_score: Math.round(averageTrustScore),
|
|
321
|
+
// v2 additions
|
|
322
|
+
embedding_attacks_detected: embeddingAttacksDetected,
|
|
323
|
+
steganography_detected: steganographyDetected,
|
|
324
|
+
indirect_injection_attempts: indirectInjectionAttempts,
|
|
325
|
+
},
|
|
326
|
+
sanitized_documents: blocked ? undefined : sanitizedDocs,
|
|
327
|
+
blocked_document_ids: blockedIds,
|
|
328
|
+
recommendations: this.generateRecommendations(violations, untrustedSources.length > 0),
|
|
329
|
+
// v2 addition
|
|
330
|
+
embedding_analysis: embeddingAnalysis.length > 0 ? embeddingAnalysis : undefined,
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
/**
|
|
334
|
+
* Validate a single document
|
|
335
|
+
*/
|
|
336
|
+
validateSingle(document, requestId) {
|
|
337
|
+
return this.validate([document], requestId);
|
|
338
|
+
}
|
|
339
|
+
/**
|
|
340
|
+
* Verify document source trustworthiness
|
|
341
|
+
*/
|
|
342
|
+
verifyDocumentSource(source) {
|
|
343
|
+
// Check cache
|
|
344
|
+
const cached = this.sourceReputationCache.get(source);
|
|
345
|
+
if (cached !== undefined) {
|
|
346
|
+
return {
|
|
347
|
+
trusted: cached >= this.config.minTrustScore,
|
|
348
|
+
score: cached,
|
|
349
|
+
reason: cached >= this.config.minTrustScore ? "Cached trusted source" : "Cached untrusted source",
|
|
350
|
+
};
|
|
351
|
+
}
|
|
352
|
+
let score = 50; // Neutral starting point
|
|
353
|
+
let reason = "Unknown source";
|
|
354
|
+
// Check blocked sources
|
|
355
|
+
for (const blocked of this.config.blockedSources) {
|
|
356
|
+
if (source.includes(blocked) || new RegExp(blocked, "i").test(source)) {
|
|
357
|
+
this.sourceReputationCache.set(source, 0);
|
|
358
|
+
return { trusted: false, score: 0, reason: "Blocked source" };
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
// Check malicious patterns
|
|
362
|
+
for (const pattern of this.MALICIOUS_SOURCE_PATTERNS) {
|
|
363
|
+
if (pattern.test(source)) {
|
|
364
|
+
this.sourceReputationCache.set(source, 10);
|
|
365
|
+
return { trusted: false, score: 10, reason: "Matches malicious source pattern" };
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
// Check trusted sources
|
|
369
|
+
for (const trusted of this.config.trustedSources) {
|
|
370
|
+
if (source.includes(trusted) || new RegExp(trusted, "i").test(source)) {
|
|
371
|
+
this.sourceReputationCache.set(source, 90);
|
|
372
|
+
return { trusted: true, score: 90, reason: "Trusted source" };
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
// Analyze source URL/path
|
|
376
|
+
try {
|
|
377
|
+
const url = new URL(source);
|
|
378
|
+
// HTTPS is more trusted
|
|
379
|
+
if (url.protocol === "https:") {
|
|
380
|
+
score += 15;
|
|
381
|
+
reason = "HTTPS source";
|
|
382
|
+
}
|
|
383
|
+
// Well-known domains get bonus
|
|
384
|
+
const trustedDomains = [".gov", ".edu", ".org", "wikipedia.org", "microsoft.com", "google.com"];
|
|
385
|
+
for (const domain of trustedDomains) {
|
|
386
|
+
if (url.hostname.endsWith(domain)) {
|
|
387
|
+
score += 20;
|
|
388
|
+
reason = `Trusted domain: ${domain}`;
|
|
389
|
+
break;
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
// Suspicious URL patterns
|
|
393
|
+
if (url.pathname.includes("..") || url.search.includes("<")) {
|
|
394
|
+
score -= 30;
|
|
395
|
+
reason = "Suspicious URL pattern";
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
catch {
|
|
399
|
+
// Local file path
|
|
400
|
+
if (source.startsWith("/") || source.match(/^[A-Z]:\\/)) {
|
|
401
|
+
score = 60;
|
|
402
|
+
reason = "Local file path";
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
this.sourceReputationCache.set(source, score);
|
|
406
|
+
return {
|
|
407
|
+
trusted: score >= this.config.minTrustScore,
|
|
408
|
+
score,
|
|
409
|
+
reason,
|
|
410
|
+
};
|
|
411
|
+
}
|
|
412
|
+
/**
|
|
413
|
+
* Add trusted source
|
|
414
|
+
*/
|
|
415
|
+
addTrustedSource(source) {
|
|
416
|
+
if (!this.config.trustedSources.includes(source)) {
|
|
417
|
+
this.config.trustedSources.push(source);
|
|
418
|
+
}
|
|
419
|
+
this.sourceReputationCache.set(source, 90);
|
|
420
|
+
}
|
|
421
|
+
/**
|
|
422
|
+
* Add blocked source
|
|
423
|
+
*/
|
|
424
|
+
addBlockedSource(source) {
|
|
425
|
+
if (!this.config.blockedSources.includes(source)) {
|
|
426
|
+
this.config.blockedSources.push(source);
|
|
427
|
+
}
|
|
428
|
+
this.sourceReputationCache.set(source, 0);
|
|
429
|
+
}
|
|
430
|
+
/**
|
|
431
|
+
* Register known good content hash
|
|
432
|
+
*/
|
|
433
|
+
registerKnownGoodHash(content) {
|
|
434
|
+
const hash = this.hashContent(content);
|
|
435
|
+
this.config.knownGoodHashes.add(hash);
|
|
436
|
+
return hash;
|
|
437
|
+
}
|
|
438
|
+
/**
|
|
439
|
+
* Clear source reputation cache
|
|
440
|
+
*/
|
|
441
|
+
clearSourceCache() {
|
|
442
|
+
this.sourceReputationCache.clear();
|
|
443
|
+
}
|
|
444
|
+
detectInjections(content) {
|
|
445
|
+
const patterns = [];
|
|
446
|
+
const violations = [];
|
|
447
|
+
let riskContribution = 0;
|
|
448
|
+
for (const { name, pattern, severity } of this.RAG_INJECTION_PATTERNS) {
|
|
449
|
+
const matches = content.match(pattern);
|
|
450
|
+
if (matches) {
|
|
451
|
+
patterns.push(name);
|
|
452
|
+
violations.push(`injection_${name}`);
|
|
453
|
+
riskContribution += severity;
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
// Check for excessive special characters (possible obfuscation)
|
|
457
|
+
const specialCharRatio = (content.match(/[^\w\s]/g) || []).length / content.length;
|
|
458
|
+
if (specialCharRatio > 0.3) {
|
|
459
|
+
patterns.push("high_special_char_ratio");
|
|
460
|
+
violations.push("possible_obfuscation");
|
|
461
|
+
riskContribution += 15;
|
|
462
|
+
}
|
|
463
|
+
// Check for invisible unicode
|
|
464
|
+
const invisibleChars = content.match(/[\u200B-\u200D\uFEFF\u2060-\u206F]/g);
|
|
465
|
+
if (invisibleChars && invisibleChars.length > 5) {
|
|
466
|
+
patterns.push("invisible_unicode");
|
|
467
|
+
violations.push("hidden_characters");
|
|
468
|
+
riskContribution += 20;
|
|
469
|
+
}
|
|
470
|
+
return {
|
|
471
|
+
found: patterns.length > 0,
|
|
472
|
+
patterns,
|
|
473
|
+
violations,
|
|
474
|
+
riskContribution: Math.min(100, riskContribution),
|
|
475
|
+
};
|
|
476
|
+
}
|
|
477
|
+
checkMetadata(metadata) {
|
|
478
|
+
const violations = [];
|
|
479
|
+
let riskContribution = 0;
|
|
480
|
+
const metadataStr = JSON.stringify(metadata);
|
|
481
|
+
for (const { name, pattern } of this.SUSPICIOUS_METADATA_PATTERNS) {
|
|
482
|
+
if (pattern.test(metadataStr)) {
|
|
483
|
+
violations.push(`metadata_${name}`);
|
|
484
|
+
riskContribution += 15;
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
// Check for injection in specific fields
|
|
488
|
+
for (const { name, pattern, severity } of this.RAG_INJECTION_PATTERNS.slice(0, 5)) {
|
|
489
|
+
if (pattern.test(metadataStr)) {
|
|
490
|
+
violations.push(`metadata_injection_${name}`);
|
|
491
|
+
riskContribution += severity / 2;
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
return {
|
|
495
|
+
suspicious: violations.length > 0,
|
|
496
|
+
violations,
|
|
497
|
+
riskContribution: Math.min(50, riskContribution),
|
|
498
|
+
};
|
|
499
|
+
}
|
|
500
|
+
checkEmbedding(embedding, retrievalScore) {
|
|
501
|
+
// Simplified embedding anomaly detection
|
|
502
|
+
// Check for invalid values (NaN, Infinity, null) - CRITICAL, always block
|
|
503
|
+
if (embedding.some((v) => v === null || v === undefined || typeof v !== "number" || !isFinite(v))) {
|
|
504
|
+
return { anomalous: true, reason: "Invalid embedding values (NaN/Infinity/null)", shouldBlock: true };
|
|
505
|
+
}
|
|
506
|
+
// Check for suspiciously uniform embeddings
|
|
507
|
+
const uniqueValues = new Set(embedding.map((v) => Math.round(v * 100) / 100));
|
|
508
|
+
if (uniqueValues.size < embedding.length * 0.1) {
|
|
509
|
+
return { anomalous: true, reason: "Suspiciously uniform embedding", shouldBlock: true };
|
|
510
|
+
}
|
|
511
|
+
// Check for mismatch between high retrieval score and embedding characteristics
|
|
512
|
+
const magnitude = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
|
|
513
|
+
if (retrievalScore > 0.9 && magnitude < 0.1) {
|
|
514
|
+
return { anomalous: true, reason: "Score/embedding mismatch" };
|
|
515
|
+
}
|
|
516
|
+
return { anomalous: false };
|
|
517
|
+
}
|
|
518
|
+
sanitizeDocument(doc) {
|
|
519
|
+
let sanitizedContent = doc.content;
|
|
520
|
+
// Remove injection patterns
|
|
521
|
+
for (const { pattern } of this.RAG_INJECTION_PATTERNS) {
|
|
522
|
+
sanitizedContent = sanitizedContent.replace(pattern, "[REDACTED]");
|
|
523
|
+
}
|
|
524
|
+
// Remove invisible characters
|
|
525
|
+
sanitizedContent = sanitizedContent.replace(/[\u200B-\u200D\uFEFF\u2060-\u206F]/g, "");
|
|
526
|
+
// Escape potential delimiter breakers
|
|
527
|
+
sanitizedContent = sanitizedContent.replace(/(\[{3,}|\]{3,}|\{{3,}|\}{3,}|<{3,}|>{3,})/g, "");
|
|
528
|
+
return {
|
|
529
|
+
...doc,
|
|
530
|
+
content: sanitizedContent,
|
|
531
|
+
metadata: {
|
|
532
|
+
...doc.metadata,
|
|
533
|
+
_sanitized: true,
|
|
534
|
+
_originalLength: doc.content.length,
|
|
535
|
+
_sanitizedLength: sanitizedContent.length,
|
|
536
|
+
},
|
|
537
|
+
};
|
|
538
|
+
}
|
|
539
|
+
hashContent(content) {
|
|
540
|
+
return crypto.createHash("sha256").update(content).digest("hex");
|
|
541
|
+
}
|
|
542
|
+
generateRecommendations(violations, hasUntrustedSources) {
|
|
543
|
+
const recommendations = [];
|
|
544
|
+
if (hasUntrustedSources) {
|
|
545
|
+
recommendations.push("Review and whitelist trusted document sources");
|
|
546
|
+
}
|
|
547
|
+
if (violations.some((v) => v.includes("injection"))) {
|
|
548
|
+
recommendations.push("Implement document sanitization in your RAG pipeline");
|
|
549
|
+
}
|
|
550
|
+
if (violations.some((v) => v.includes("hash"))) {
|
|
551
|
+
recommendations.push("Enable content integrity verification with known good hashes");
|
|
552
|
+
}
|
|
553
|
+
if (violations.some((v) => v.includes("oversized"))) {
|
|
554
|
+
recommendations.push("Implement document chunking with size limits");
|
|
555
|
+
}
|
|
556
|
+
if (violations.some((v) => v.includes("embedding"))) {
|
|
557
|
+
recommendations.push("Add embedding validation to your vector store pipeline");
|
|
558
|
+
}
|
|
559
|
+
if (recommendations.length === 0) {
|
|
560
|
+
recommendations.push("Continue monitoring RAG document sources");
|
|
561
|
+
}
|
|
562
|
+
return recommendations;
|
|
563
|
+
}
|
|
564
|
+
// ============= v2 Enhanced Detection Methods =============
|
|
565
|
+
/**
|
|
566
|
+
* Detect advanced embedding attacks (backdoor, adversarial perturbation)
|
|
567
|
+
*/
|
|
568
|
+
detectEmbeddingAttacks(embedding, retrievalScore) {
|
|
569
|
+
const attackTypes = [];
|
|
570
|
+
const details = {};
|
|
571
|
+
let riskScore = 0;
|
|
572
|
+
// Check embedding dimension
|
|
573
|
+
if (embedding.length !== this.config.embeddingDimension) {
|
|
574
|
+
attackTypes.push("dimension_mismatch");
|
|
575
|
+
riskScore += 20;
|
|
576
|
+
}
|
|
577
|
+
// Calculate embedding magnitude
|
|
578
|
+
const magnitude = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
|
|
579
|
+
const [minMag, maxMag] = this.config.embeddingMagnitudeRange;
|
|
580
|
+
// Check for magnitude anomalies
|
|
581
|
+
if (magnitude < minMag || magnitude > maxMag) {
|
|
582
|
+
attackTypes.push("magnitude_anomaly");
|
|
583
|
+
details.magnitude_anomaly = true;
|
|
584
|
+
riskScore += 25;
|
|
585
|
+
}
|
|
586
|
+
// Check for adversarial perturbation patterns
|
|
587
|
+
// Adversarial embeddings often have unusual value distributions
|
|
588
|
+
const values = embedding.map(Math.abs);
|
|
589
|
+
const sortedValues = [...values].sort((a, b) => b - a);
|
|
590
|
+
const topValues = sortedValues.slice(0, 10);
|
|
591
|
+
const avgTop = topValues.reduce((a, b) => a + b, 0) / topValues.length;
|
|
592
|
+
const avgAll = values.reduce((a, b) => a + b, 0) / values.length;
|
|
593
|
+
// Adversarial perturbations often spike certain dimensions
|
|
594
|
+
if (avgTop > avgAll * 10) {
|
|
595
|
+
attackTypes.push("adversarial_perturbation");
|
|
596
|
+
details.adversarial_perturbation = true;
|
|
597
|
+
riskScore += 35;
|
|
598
|
+
}
|
|
599
|
+
// Check for backdoor patterns
|
|
600
|
+
// Backdoor embeddings often have repeated patterns
|
|
601
|
+
const chunkSize = Math.min(50, Math.floor(embedding.length / 10));
|
|
602
|
+
const chunks = [];
|
|
603
|
+
for (let i = 0; i < embedding.length - chunkSize; i += chunkSize) {
|
|
604
|
+
chunks.push(embedding.slice(i, i + chunkSize));
|
|
605
|
+
}
|
|
606
|
+
// Check for repeated chunks (backdoor signature)
|
|
607
|
+
if (chunks.length >= 2) {
|
|
608
|
+
for (let i = 0; i < chunks.length - 1; i++) {
|
|
609
|
+
const similarity = this.cosineSimilarity(chunks[i], chunks[i + 1]);
|
|
610
|
+
if (similarity > this.config.similarityThreshold) {
|
|
611
|
+
attackTypes.push("backdoor_pattern");
|
|
612
|
+
details.backdoor_pattern = true;
|
|
613
|
+
riskScore += 40;
|
|
614
|
+
break;
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
}
|
|
618
|
+
// Check distribution anomalies
|
|
619
|
+
const mean = embedding.reduce((a, b) => a + b, 0) / embedding.length;
|
|
620
|
+
const variance = embedding.reduce((sum, v) => sum + Math.pow(v - mean, 2), 0) / embedding.length;
|
|
621
|
+
const stdDev = Math.sqrt(variance);
|
|
622
|
+
// Normal embeddings usually have stdDev in reasonable range
|
|
623
|
+
if (stdDev < 0.001 || stdDev > 2.0) {
|
|
624
|
+
attackTypes.push("distribution_anomaly");
|
|
625
|
+
details.distribution_anomaly = true;
|
|
626
|
+
riskScore += 20;
|
|
627
|
+
}
|
|
628
|
+
// High retrieval score with suspicious embedding
|
|
629
|
+
if (retrievalScore && retrievalScore > 0.95 && riskScore > 20) {
|
|
630
|
+
attackTypes.push("suspicious_high_score");
|
|
631
|
+
riskScore += 15;
|
|
632
|
+
}
|
|
633
|
+
return {
|
|
634
|
+
detected: attackTypes.length > 0,
|
|
635
|
+
attack_type: attackTypes,
|
|
636
|
+
risk_score: Math.min(100, riskScore),
|
|
637
|
+
details,
|
|
638
|
+
};
|
|
639
|
+
}
|
|
640
|
+
/**
|
|
641
|
+
* Detect indirect prompt injection patterns
|
|
642
|
+
*/
|
|
643
|
+
detectIndirectInjection(content) {
|
|
644
|
+
const patterns = [];
|
|
645
|
+
const violations = [];
|
|
646
|
+
let riskContribution = 0;
|
|
647
|
+
for (const { name, pattern, severity } of this.INDIRECT_INJECTION_PATTERNS) {
|
|
648
|
+
const matches = content.match(pattern);
|
|
649
|
+
if (matches) {
|
|
650
|
+
patterns.push(name);
|
|
651
|
+
violations.push(`indirect_injection_${name}`);
|
|
652
|
+
riskContribution += severity;
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
return {
|
|
656
|
+
found: patterns.length > 0,
|
|
657
|
+
patterns,
|
|
658
|
+
violations,
|
|
659
|
+
riskContribution: Math.min(100, riskContribution),
|
|
660
|
+
};
|
|
661
|
+
}
|
|
662
|
+
/**
|
|
663
|
+
* Detect steganography (hidden data in content)
|
|
664
|
+
*/
|
|
665
|
+
detectSteganography(content) {
|
|
666
|
+
const violations = [];
|
|
667
|
+
let riskContribution = 0;
|
|
668
|
+
// Zero-width character steganography - lower threshold
|
|
669
|
+
const zeroWidthChars = content.match(/[\u200B-\u200F\u2028-\u202F\uFEFF]+/g);
|
|
670
|
+
if (zeroWidthChars) {
|
|
671
|
+
const totalZeroWidth = zeroWidthChars.reduce((sum, m) => sum + m.length, 0);
|
|
672
|
+
// Lower threshold to 3 (any zero-width chars are suspicious in normal text)
|
|
673
|
+
if (totalZeroWidth >= 3) {
|
|
674
|
+
violations.push("zero_width_steganography");
|
|
675
|
+
riskContribution += 40 + Math.min(30, totalZeroWidth * 5);
|
|
676
|
+
}
|
|
677
|
+
}
|
|
678
|
+
// Whitespace pattern encoding - multiple checks
|
|
679
|
+
const tabSpacePattern = /\s{4,}\t+\s+|\t{2,}\s+\t/;
|
|
680
|
+
if (tabSpacePattern.test(content)) {
|
|
681
|
+
violations.push("whitespace_encoding");
|
|
682
|
+
riskContribution += 35;
|
|
683
|
+
}
|
|
684
|
+
const whitespaceRatio = (content.match(/[\t\n\r ]/g) || []).length / content.length;
|
|
685
|
+
if (whitespaceRatio > 0.35) {
|
|
686
|
+
violations.push("excessive_whitespace_ratio");
|
|
687
|
+
riskContribution += 25;
|
|
688
|
+
}
|
|
689
|
+
// Unicode tag character steganography (U+E0000-U+E007F)
|
|
690
|
+
const tagChars = content.match(/[\uDB40][\uDC00-\uDC7F]/g);
|
|
691
|
+
if (tagChars && tagChars.length > 0) {
|
|
692
|
+
violations.push("unicode_tag_steganography");
|
|
693
|
+
riskContribution += 40;
|
|
694
|
+
}
|
|
695
|
+
// Variation selector abuse (U+FE00-U+FE0F)
|
|
696
|
+
const variationSelectors = content.match(/[\uFE00-\uFE0F]/g);
|
|
697
|
+
if (variationSelectors && variationSelectors.length > 5) {
|
|
698
|
+
violations.push("variation_selector_abuse");
|
|
699
|
+
riskContribution += 25;
|
|
700
|
+
}
|
|
701
|
+
// Binary-like pattern in text (potential hidden data)
|
|
702
|
+
const binaryPattern = content.match(/[01]{16,}/g);
|
|
703
|
+
if (binaryPattern) {
|
|
704
|
+
violations.push("binary_steganography");
|
|
705
|
+
riskContribution += 30;
|
|
706
|
+
}
|
|
707
|
+
return {
|
|
708
|
+
found: violations.length > 0,
|
|
709
|
+
violations,
|
|
710
|
+
riskContribution: Math.min(100, riskContribution),
|
|
711
|
+
};
|
|
712
|
+
}
|
|
713
|
+
/**
|
|
714
|
+
* Calculate cosine similarity between two vectors
|
|
715
|
+
*/
|
|
716
|
+
cosineSimilarity(a, b) {
|
|
717
|
+
if (a.length !== b.length)
|
|
718
|
+
return 0;
|
|
719
|
+
const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0);
|
|
720
|
+
const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
|
|
721
|
+
const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
|
|
722
|
+
if (magnitudeA === 0 || magnitudeB === 0)
|
|
723
|
+
return 0;
|
|
724
|
+
return dotProduct / (magnitudeA * magnitudeB);
|
|
725
|
+
}
|
|
726
|
+
/**
|
|
727
|
+
* Analyze a batch of embeddings for clustering anomalies
|
|
728
|
+
*/
|
|
729
|
+
analyzeEmbeddingCluster(embeddings) {
|
|
730
|
+
if (embeddings.length < 3) {
|
|
731
|
+
return { anomalous: false, anomalousIndices: [], reason: "Not enough embeddings for cluster analysis" };
|
|
732
|
+
}
|
|
733
|
+
const anomalousIndices = [];
|
|
734
|
+
// Calculate pairwise similarities
|
|
735
|
+
const similarities = [];
|
|
736
|
+
for (let i = 0; i < embeddings.length; i++) {
|
|
737
|
+
similarities[i] = [];
|
|
738
|
+
for (let j = 0; j < embeddings.length; j++) {
|
|
739
|
+
if (i === j) {
|
|
740
|
+
similarities[i][j] = 1;
|
|
741
|
+
}
|
|
742
|
+
else {
|
|
743
|
+
similarities[i][j] = this.cosineSimilarity(embeddings[i], embeddings[j]);
|
|
744
|
+
}
|
|
745
|
+
}
|
|
746
|
+
}
|
|
747
|
+
// Find embeddings with unusually high or low similarity to all others
|
|
748
|
+
for (let i = 0; i < embeddings.length; i++) {
|
|
749
|
+
const avgSimilarity = similarities[i].reduce((a, b) => a + b, 0) / embeddings.length;
|
|
750
|
+
// Anomaly: embedding is too similar to everything (potential backdoor)
|
|
751
|
+
if (avgSimilarity > this.config.similarityThreshold) {
|
|
752
|
+
anomalousIndices.push(i);
|
|
753
|
+
}
|
|
754
|
+
// Anomaly: embedding is dissimilar to everything (potential outlier attack)
|
|
755
|
+
if (avgSimilarity < 0.3) {
|
|
756
|
+
anomalousIndices.push(i);
|
|
757
|
+
}
|
|
758
|
+
}
|
|
759
|
+
return {
|
|
760
|
+
anomalous: anomalousIndices.length > 0,
|
|
761
|
+
anomalousIndices: [...new Set(anomalousIndices)],
|
|
762
|
+
reason: anomalousIndices.length > 0
|
|
763
|
+
? `${anomalousIndices.length} embeddings show clustering anomalies`
|
|
764
|
+
: "No clustering anomalies detected",
|
|
765
|
+
};
|
|
766
|
+
}
|
|
767
|
+
}
|
|
768
|
+
exports.RAGGuard = RAGGuard;
|
|
769
|
+
//# sourceMappingURL=rag-guard.js.map
|