llm-trust-guard 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +318 -0
  3. package/dist/guards/agent-communication-guard.d.ts +169 -0
  4. package/dist/guards/agent-communication-guard.d.ts.map +1 -0
  5. package/dist/guards/agent-communication-guard.js +468 -0
  6. package/dist/guards/agent-communication-guard.js.map +1 -0
  7. package/dist/guards/autonomy-escalation-guard.d.ts +137 -0
  8. package/dist/guards/autonomy-escalation-guard.d.ts.map +1 -0
  9. package/dist/guards/autonomy-escalation-guard.js +470 -0
  10. package/dist/guards/autonomy-escalation-guard.js.map +1 -0
  11. package/dist/guards/circuit-breaker.d.ts +142 -0
  12. package/dist/guards/circuit-breaker.d.ts.map +1 -0
  13. package/dist/guards/circuit-breaker.js +347 -0
  14. package/dist/guards/circuit-breaker.js.map +1 -0
  15. package/dist/guards/code-execution-guard.d.ts +114 -0
  16. package/dist/guards/code-execution-guard.d.ts.map +1 -0
  17. package/dist/guards/code-execution-guard.js +467 -0
  18. package/dist/guards/code-execution-guard.js.map +1 -0
  19. package/dist/guards/conversation-guard.d.ts +73 -0
  20. package/dist/guards/conversation-guard.d.ts.map +1 -0
  21. package/dist/guards/conversation-guard.js +281 -0
  22. package/dist/guards/conversation-guard.js.map +1 -0
  23. package/dist/guards/drift-detector.d.ts +182 -0
  24. package/dist/guards/drift-detector.d.ts.map +1 -0
  25. package/dist/guards/drift-detector.js +480 -0
  26. package/dist/guards/drift-detector.js.map +1 -0
  27. package/dist/guards/encoding-detector.d.ts +76 -0
  28. package/dist/guards/encoding-detector.d.ts.map +1 -0
  29. package/dist/guards/encoding-detector.js +698 -0
  30. package/dist/guards/encoding-detector.js.map +1 -0
  31. package/dist/guards/execution-monitor.d.ts +73 -0
  32. package/dist/guards/execution-monitor.d.ts.map +1 -0
  33. package/dist/guards/execution-monitor.js +205 -0
  34. package/dist/guards/execution-monitor.js.map +1 -0
  35. package/dist/guards/input-sanitizer.d.ts +87 -0
  36. package/dist/guards/input-sanitizer.d.ts.map +1 -0
  37. package/dist/guards/input-sanitizer.js +301 -0
  38. package/dist/guards/input-sanitizer.js.map +1 -0
  39. package/dist/guards/mcp-security-guard.d.ts +204 -0
  40. package/dist/guards/mcp-security-guard.d.ts.map +1 -0
  41. package/dist/guards/mcp-security-guard.js +618 -0
  42. package/dist/guards/mcp-security-guard.js.map +1 -0
  43. package/dist/guards/memory-guard.d.ts +124 -0
  44. package/dist/guards/memory-guard.d.ts.map +1 -0
  45. package/dist/guards/memory-guard.js +476 -0
  46. package/dist/guards/memory-guard.js.map +1 -0
  47. package/dist/guards/multimodal-guard.d.ts +93 -0
  48. package/dist/guards/multimodal-guard.d.ts.map +1 -0
  49. package/dist/guards/multimodal-guard.js +507 -0
  50. package/dist/guards/multimodal-guard.js.map +1 -0
  51. package/dist/guards/output-filter.d.ts +76 -0
  52. package/dist/guards/output-filter.d.ts.map +1 -0
  53. package/dist/guards/output-filter.js +289 -0
  54. package/dist/guards/output-filter.js.map +1 -0
  55. package/dist/guards/policy-gate.d.ts +57 -0
  56. package/dist/guards/policy-gate.d.ts.map +1 -0
  57. package/dist/guards/policy-gate.js +182 -0
  58. package/dist/guards/policy-gate.js.map +1 -0
  59. package/dist/guards/prompt-leakage-guard.d.ts +110 -0
  60. package/dist/guards/prompt-leakage-guard.d.ts.map +1 -0
  61. package/dist/guards/prompt-leakage-guard.js +529 -0
  62. package/dist/guards/prompt-leakage-guard.js.map +1 -0
  63. package/dist/guards/rag-guard.d.ts +188 -0
  64. package/dist/guards/rag-guard.d.ts.map +1 -0
  65. package/dist/guards/rag-guard.js +769 -0
  66. package/dist/guards/rag-guard.js.map +1 -0
  67. package/dist/guards/schema-validator.d.ts +35 -0
  68. package/dist/guards/schema-validator.d.ts.map +1 -0
  69. package/dist/guards/schema-validator.js +316 -0
  70. package/dist/guards/schema-validator.js.map +1 -0
  71. package/dist/guards/state-persistence-guard.d.ts +153 -0
  72. package/dist/guards/state-persistence-guard.d.ts.map +1 -0
  73. package/dist/guards/state-persistence-guard.js +484 -0
  74. package/dist/guards/state-persistence-guard.js.map +1 -0
  75. package/dist/guards/tenant-boundary.d.ts +67 -0
  76. package/dist/guards/tenant-boundary.d.ts.map +1 -0
  77. package/dist/guards/tenant-boundary.js +187 -0
  78. package/dist/guards/tenant-boundary.js.map +1 -0
  79. package/dist/guards/tool-chain-validator.d.ts +102 -0
  80. package/dist/guards/tool-chain-validator.d.ts.map +1 -0
  81. package/dist/guards/tool-chain-validator.js +480 -0
  82. package/dist/guards/tool-chain-validator.js.map +1 -0
  83. package/dist/guards/tool-registry.d.ts +45 -0
  84. package/dist/guards/tool-registry.d.ts.map +1 -0
  85. package/dist/guards/tool-registry.js +155 -0
  86. package/dist/guards/tool-registry.js.map +1 -0
  87. package/dist/guards/trust-exploitation-guard.d.ts +134 -0
  88. package/dist/guards/trust-exploitation-guard.d.ts.map +1 -0
  89. package/dist/guards/trust-exploitation-guard.js +354 -0
  90. package/dist/guards/trust-exploitation-guard.js.map +1 -0
  91. package/dist/index.d.ts +133 -0
  92. package/dist/index.d.ts.map +1 -0
  93. package/dist/index.js +430 -0
  94. package/dist/index.js.map +1 -0
  95. package/dist/integrations/express.d.ts +119 -0
  96. package/dist/integrations/express.d.ts.map +1 -0
  97. package/dist/integrations/express.js +244 -0
  98. package/dist/integrations/express.js.map +1 -0
  99. package/dist/integrations/index.d.ts +9 -0
  100. package/dist/integrations/index.d.ts.map +1 -0
  101. package/dist/integrations/index.js +26 -0
  102. package/dist/integrations/index.js.map +1 -0
  103. package/dist/integrations/langchain.d.ts +165 -0
  104. package/dist/integrations/langchain.d.ts.map +1 -0
  105. package/dist/integrations/langchain.js +308 -0
  106. package/dist/integrations/langchain.js.map +1 -0
  107. package/dist/integrations/openai.d.ts +205 -0
  108. package/dist/integrations/openai.d.ts.map +1 -0
  109. package/dist/integrations/openai.js +380 -0
  110. package/dist/integrations/openai.js.map +1 -0
  111. package/dist/types/index.d.ts +245 -0
  112. package/dist/types/index.d.ts.map +1 -0
  113. package/dist/types/index.js +6 -0
  114. package/dist/types/index.js.map +1 -0
  115. package/package.json +64 -0
@@ -0,0 +1,769 @@
1
+ "use strict";
2
+ /**
3
+ * RAGGuard (L10) v2
4
+ *
5
+ * Validates RAG (Retrieval Augmented Generation) content before injection.
6
+ * Protects against supply chain attacks via poisoned documents and embeddings.
7
+ *
8
+ * Threat Model:
9
+ * - ASI04: Agentic Supply Chain Vulnerabilities
10
+ * - RAG Poisoning: Malicious content in retrieved documents
11
+ * - Embedding manipulation attacks
12
+ * - Indirect prompt injection via documents
13
+ *
14
+ * Protection Capabilities (v2 Enhanced):
15
+ * - Retrieved document sanitization
16
+ * - Source verification and trust scoring
17
+ * - Injection pattern detection in documents
18
+ * - Content integrity verification
19
+ * - Suspicious document quarantine
20
+ * - Advanced embedding attack detection (backdoor, adversarial)
21
+ * - Unicode steganography detection
22
+ * - Markdown/HTML hidden instruction detection
23
+ * - Cross-document similarity anomaly detection
24
+ * - Embedding norm and distribution analysis
25
+ */
26
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
27
+ if (k2 === undefined) k2 = k;
28
+ var desc = Object.getOwnPropertyDescriptor(m, k);
29
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
30
+ desc = { enumerable: true, get: function() { return m[k]; } };
31
+ }
32
+ Object.defineProperty(o, k2, desc);
33
+ }) : (function(o, m, k, k2) {
34
+ if (k2 === undefined) k2 = k;
35
+ o[k2] = m[k];
36
+ }));
37
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
38
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
39
+ }) : function(o, v) {
40
+ o["default"] = v;
41
+ });
42
+ var __importStar = (this && this.__importStar) || (function () {
43
+ var ownKeys = function(o) {
44
+ ownKeys = Object.getOwnPropertyNames || function (o) {
45
+ var ar = [];
46
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
47
+ return ar;
48
+ };
49
+ return ownKeys(o);
50
+ };
51
+ return function (mod) {
52
+ if (mod && mod.__esModule) return mod;
53
+ var result = {};
54
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
55
+ __setModuleDefault(result, mod);
56
+ return result;
57
+ };
58
+ })();
59
+ Object.defineProperty(exports, "__esModule", { value: true });
60
+ exports.RAGGuard = void 0;
61
+ const crypto = __importStar(require("crypto"));
62
+ class RAGGuard {
63
+ constructor(config = {}) {
64
+ this.contentHashCache = new Map();
65
+ this.sourceReputationCache = new Map();
66
+ // Injection patterns specifically targeting RAG/document content
67
+ this.RAG_INJECTION_PATTERNS = [
68
+ // Direct instruction injection
69
+ { name: "instruction_override", pattern: /ignore\s+(all\s+)?previous\s+(context|documents|information)/i, severity: 50 },
70
+ { name: "system_instruction", pattern: /\[SYSTEM\]|\[INSTRUCTION\]|\[OVERRIDE\]|<system>|<instruction>/i, severity: 45 },
71
+ { name: "role_injection", pattern: /you\s+(are|must|should)\s+(now\s+)?(be|act\s+as|become)/i, severity: 40 },
72
+ // Delimiter injection (to break out of RAG context)
73
+ { name: "delimiter_break", pattern: /---\s*(end|stop)\s*(of\s*)?(context|document|rag)/i, severity: 45 },
74
+ { name: "context_escape", pattern: /\]\]\]|\}\}\}|<<<|>>>|'''|"""/g, severity: 30 },
75
+ // Hidden instruction markers
76
+ { name: "hidden_instruction", pattern: /HIDDEN:|SECRET:|INVISIBLE:|DO_NOT_DISPLAY:/i, severity: 50 },
77
+ { name: "admin_marker", pattern: /ADMIN_INSTRUCTION|ROOT_COMMAND|ELEVATED_PROMPT/i, severity: 55 },
78
+ // Data exfiltration setup
79
+ { name: "exfil_setup", pattern: /send\s+(all|this|data)\s+to|forward\s+to\s+https?:\/\//i, severity: 50 },
80
+ { name: "callback_injection", pattern: /callback\s*[:=]\s*https?:\/\/|webhook\s*[:=]/i, severity: 45 },
81
+ // Tool/action injection via documents
82
+ { name: "tool_injection", pattern: /call\s+(tool|function|action)\s*[:=]|execute\s*[:=]/i, severity: 45 },
83
+ { name: "code_injection", pattern: /```(javascript|python|bash|sh)\s*\n[^`]*\b(eval|exec|system|subprocess)\b/i, severity: 50 },
84
+ // Persona/behavior modification
85
+ { name: "persona_override", pattern: /your\s+(new\s+)?(persona|identity|character)\s+(is|will\s+be)/i, severity: 40 },
86
+ { name: "behavior_mod", pattern: /always\s+(respond|reply|answer)\s+with|never\s+(mention|reveal|disclose)/i, severity: 35 },
87
+ // Prompt leakage attempts
88
+ { name: "prompt_extraction", pattern: /reveal\s+(your\s+)?(system\s+)?prompt|show\s+(me\s+)?(your\s+)?instructions/i, severity: 40 },
89
+ { name: "debug_mode", pattern: /enable\s+debug|activate\s+developer\s+mode|enter\s+test\s+mode/i, severity: 35 },
90
+ ];
91
+ // Suspicious metadata patterns
92
+ this.SUSPICIOUS_METADATA_PATTERNS = [
93
+ { name: "script_in_title", pattern: /<script|javascript:/i },
94
+ { name: "injection_in_author", pattern: /admin|system|root|override/i },
95
+ { name: "suspicious_content_type", pattern: /application\/x-|text\/x-/i },
96
+ ];
97
+ // Known malicious source patterns
98
+ this.MALICIOUS_SOURCE_PATTERNS = [
99
+ /pastebin\.com/i,
100
+ /hastebin\.com/i,
101
+ /gist\.githubusercontent\.com.*injection/i,
102
+ /raw\.githubusercontent\.com.*malicious/i,
103
+ /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/, // IP addresses
104
+ ];
105
+ // Indirect prompt injection patterns (v2)
106
+ this.INDIRECT_INJECTION_PATTERNS = [
107
+ // HTML/Markdown hidden instructions
108
+ { name: "html_comment_injection", pattern: /<!--[\s\S]*?(ignore|override|system|instruction|admin)[\s\S]*?-->/i, severity: 45 },
109
+ { name: "markdown_hidden", pattern: /\[[\s\S]*?\]\(javascript:|data:text\/html|about:blank\)/i, severity: 50 },
110
+ { name: "invisible_link", pattern: /\[]\([^)]+\)/g, severity: 30 },
111
+ // Unicode steganography
112
+ { name: "zero_width_chars", pattern: /[\u200B-\u200F\u2028-\u202F\uFEFF]{3,}/g, severity: 40 },
113
+ { name: "rtl_override", pattern: /[\u202A-\u202E\u2066-\u2069]/g, severity: 35 },
114
+ { name: "confusable_chars", pattern: /[\u0430\u0435\u043E\u0440\u0441\u0443\u0445]/g, severity: 25 }, // Cyrillic lookalikes
115
+ // Whitespace injection
116
+ { name: "excessive_whitespace", pattern: /[\t\n\r]{10,}/g, severity: 20 },
117
+ { name: "tab_encoding", pattern: /\t{5,}/g, severity: 25 },
118
+ // Encoded instructions - enhanced detection
119
+ { name: "base64_block", pattern: /[A-Za-z0-9+/]{40,}={0,2}/g, severity: 40 },
120
+ { name: "base64_with_context", pattern: /(?:encode|decode|base64|reference)[:\s]*[A-Za-z0-9+/]{20,}/i, severity: 45 },
121
+ { name: "hex_encoded", pattern: /\\x[0-9a-fA-F]{2}(?:\\x[0-9a-fA-F]{2}){5,}/g, severity: 35 },
122
+ { name: "unicode_escape", pattern: /\\u[0-9a-fA-F]{4}(?:\\u[0-9a-fA-F]{4}){3,}/g, severity: 35 },
123
+ // Context switching attempts
124
+ { name: "fake_boundary", pattern: /={5,}|#{5,}|-{10,}/g, severity: 20 },
125
+ { name: "json_injection", pattern: /\{"(role|content|system)":/i, severity: 45 },
126
+ { name: "xml_injection", pattern: /<\/?(?:prompt|assistant|user|system)>/i, severity: 45 },
127
+ ];
128
+ this.config = {
129
+ detectInjections: config.detectInjections ?? true,
130
+ verifySource: config.verifySource ?? true,
131
+ trustedSources: config.trustedSources ?? [],
132
+ blockedSources: config.blockedSources ?? [],
133
+ maxDocumentSize: config.maxDocumentSize ?? 50000, // 50KB
134
+ minTrustScore: config.minTrustScore ?? 30,
135
+ enableContentHashing: config.enableContentHashing ?? true,
136
+ knownGoodHashes: config.knownGoodHashes ?? new Set(),
137
+ autoSanitize: config.autoSanitize ?? true,
138
+ // v2 options
139
+ detectEmbeddingAttacks: config.detectEmbeddingAttacks ?? true,
140
+ embeddingDimension: config.embeddingDimension ?? 1536, // OpenAI default
141
+ detectSteganography: config.detectSteganography ?? true,
142
+ detectClusteringAnomalies: config.detectClusteringAnomalies ?? true,
143
+ embeddingMagnitudeRange: config.embeddingMagnitudeRange ?? [0.8, 1.2],
144
+ similarityThreshold: config.similarityThreshold ?? 0.95,
145
+ detectIndirectInjection: config.detectIndirectInjection ?? true,
146
+ };
147
+ }
148
+ /**
149
+ * Validate RAG documents before injecting into context
150
+ */
151
+ validate(documents, requestId) {
152
+ const reqId = requestId || `rag-${Date.now()}`;
153
+ const violations = [];
154
+ const blockedIds = [];
155
+ const untrustedSources = [];
156
+ const sanitizedDocs = [];
157
+ const embeddingAnalysis = [];
158
+ let injectionAttempts = 0;
159
+ let documentsBlocked = 0;
160
+ let documentsSanitized = 0;
161
+ let totalTrustScore = 0;
162
+ let embeddingAttacksDetected = 0;
163
+ let steganographyDetected = 0;
164
+ let indirectInjectionAttempts = 0;
165
+ for (const doc of documents) {
166
+ let docViolations = [];
167
+ let docRiskScore = 0;
168
+ let shouldBlock = false;
169
+ let needsSanitization = false;
170
+ // Check document size
171
+ if (doc.content.length > this.config.maxDocumentSize) {
172
+ docViolations.push("oversized_document");
173
+ docRiskScore += 20;
174
+ }
175
+ // Verify source
176
+ if (this.config.verifySource) {
177
+ const sourceResult = this.verifyDocumentSource(doc.source);
178
+ if (!sourceResult.trusted) {
179
+ docViolations.push(`untrusted_source: ${sourceResult.reason}`);
180
+ untrustedSources.push(doc.source);
181
+ docRiskScore += 100 - sourceResult.score;
182
+ if (sourceResult.score < this.config.minTrustScore) {
183
+ shouldBlock = true;
184
+ }
185
+ }
186
+ totalTrustScore += sourceResult.score;
187
+ }
188
+ else {
189
+ totalTrustScore += 50; // Neutral score when not verifying
190
+ }
191
+ // Check content hash if enabled
192
+ if (this.config.enableContentHashing) {
193
+ const hash = this.hashContent(doc.content);
194
+ if (doc.contentHash && doc.contentHash !== hash) {
195
+ docViolations.push("content_hash_mismatch");
196
+ docRiskScore += 40;
197
+ shouldBlock = true;
198
+ }
199
+ // Check against known good hashes
200
+ if (this.config.knownGoodHashes.has(hash)) {
201
+ docRiskScore = Math.max(0, docRiskScore - 30); // Reduce risk for known good content
202
+ }
203
+ }
204
+ // Check for injection patterns
205
+ if (this.config.detectInjections) {
206
+ const injectionResult = this.detectInjections(doc.content);
207
+ if (injectionResult.found) {
208
+ injectionAttempts += injectionResult.patterns.length;
209
+ docViolations.push(...injectionResult.violations);
210
+ docRiskScore += injectionResult.riskContribution;
211
+ needsSanitization = true;
212
+ if (injectionResult.riskContribution >= 50) {
213
+ shouldBlock = true;
214
+ }
215
+ }
216
+ }
217
+ // Check metadata
218
+ if (doc.metadata) {
219
+ const metadataResult = this.checkMetadata(doc.metadata);
220
+ if (metadataResult.suspicious) {
221
+ docViolations.push(...metadataResult.violations);
222
+ docRiskScore += metadataResult.riskContribution;
223
+ }
224
+ }
225
+ // Check embedding anomalies (basic) - runs first to catch critical issues
226
+ if (doc.embedding) {
227
+ // Critical: Check for invalid values (NaN, Infinity, null, non-numbers)
228
+ // Note: JSON serialization converts NaN/Infinity to null
229
+ const hasInvalidValues = doc.embedding.some((v) => v === null ||
230
+ v === undefined ||
231
+ typeof v !== "number" ||
232
+ !isFinite(v) ||
233
+ isNaN(v));
234
+ if (hasInvalidValues) {
235
+ docViolations.push("embedding_contains_invalid_values");
236
+ docRiskScore += 50;
237
+ shouldBlock = true;
238
+ }
239
+ if (doc.retrievalScore !== undefined) {
240
+ const embeddingResult = this.checkEmbedding(doc.embedding, doc.retrievalScore);
241
+ if (embeddingResult.anomalous) {
242
+ docViolations.push(`embedding_anomaly: ${embeddingResult.reason}`);
243
+ docRiskScore += 35;
244
+ if (embeddingResult.shouldBlock) {
245
+ shouldBlock = true;
246
+ }
247
+ }
248
+ }
249
+ }
250
+ // v2: Advanced embedding attack detection
251
+ if (this.config.detectEmbeddingAttacks && doc.embedding) {
252
+ const embeddingAttack = this.detectEmbeddingAttacks(doc.embedding, doc.retrievalScore);
253
+ if (embeddingAttack.detected) {
254
+ embeddingAttacksDetected++;
255
+ embeddingAnalysis.push(embeddingAttack);
256
+ docViolations.push(...embeddingAttack.attack_type.map(t => `embedding_attack: ${t}`));
257
+ docRiskScore += embeddingAttack.risk_score;
258
+ if (embeddingAttack.risk_score >= 40) {
259
+ shouldBlock = true;
260
+ }
261
+ }
262
+ }
263
+ // v2: Indirect injection detection
264
+ if (this.config.detectIndirectInjection) {
265
+ const indirectResult = this.detectIndirectInjection(doc.content);
266
+ if (indirectResult.found) {
267
+ indirectInjectionAttempts += indirectResult.patterns.length;
268
+ docViolations.push(...indirectResult.violations);
269
+ docRiskScore += indirectResult.riskContribution;
270
+ needsSanitization = true;
271
+ if (indirectResult.riskContribution >= 40) {
272
+ shouldBlock = true;
273
+ }
274
+ }
275
+ }
276
+ // v2: Steganography detection
277
+ if (this.config.detectSteganography) {
278
+ const stegoResult = this.detectSteganography(doc.content);
279
+ if (stegoResult.found) {
280
+ steganographyDetected++;
281
+ docViolations.push(...stegoResult.violations);
282
+ docRiskScore += stegoResult.riskContribution;
283
+ needsSanitization = true;
284
+ }
285
+ }
286
+ // Decision for this document
287
+ if (shouldBlock || docRiskScore >= 70) {
288
+ blockedIds.push(doc.id);
289
+ documentsBlocked++;
290
+ violations.push(...docViolations.map((v) => `[${doc.id}] ${v}`));
291
+ }
292
+ else if (needsSanitization && this.config.autoSanitize) {
293
+ const sanitized = this.sanitizeDocument(doc);
294
+ sanitizedDocs.push(sanitized);
295
+ documentsSanitized++;
296
+ violations.push(...docViolations.map((v) => `[${doc.id}] ${v} (sanitized)`));
297
+ }
298
+ else {
299
+ sanitizedDocs.push(doc);
300
+ if (docViolations.length > 0) {
301
+ violations.push(...docViolations.map((v) => `[${doc.id}] ${v} (allowed)`));
302
+ }
303
+ }
304
+ }
305
+ const averageTrustScore = documents.length > 0 ? totalTrustScore / documents.length : 0;
306
+ const blocked = documentsBlocked === documents.length || averageTrustScore < this.config.minTrustScore;
307
+ return {
308
+ allowed: !blocked,
309
+ reason: blocked
310
+ ? `RAG content blocked: ${documentsBlocked}/${documents.length} documents failed validation`
311
+ : "RAG content validated",
312
+ violations,
313
+ request_id: reqId,
314
+ document_analysis: {
315
+ documents_checked: documents.length,
316
+ documents_blocked: documentsBlocked,
317
+ documents_sanitized: documentsSanitized,
318
+ injection_attempts: injectionAttempts,
319
+ untrusted_sources: [...new Set(untrustedSources)],
320
+ average_trust_score: Math.round(averageTrustScore),
321
+ // v2 additions
322
+ embedding_attacks_detected: embeddingAttacksDetected,
323
+ steganography_detected: steganographyDetected,
324
+ indirect_injection_attempts: indirectInjectionAttempts,
325
+ },
326
+ sanitized_documents: blocked ? undefined : sanitizedDocs,
327
+ blocked_document_ids: blockedIds,
328
+ recommendations: this.generateRecommendations(violations, untrustedSources.length > 0),
329
+ // v2 addition
330
+ embedding_analysis: embeddingAnalysis.length > 0 ? embeddingAnalysis : undefined,
331
+ };
332
+ }
333
+ /**
334
+ * Validate a single document
335
+ */
336
+ validateSingle(document, requestId) {
337
+ return this.validate([document], requestId);
338
+ }
339
+ /**
340
+ * Verify document source trustworthiness
341
+ */
342
+ verifyDocumentSource(source) {
343
+ // Check cache
344
+ const cached = this.sourceReputationCache.get(source);
345
+ if (cached !== undefined) {
346
+ return {
347
+ trusted: cached >= this.config.minTrustScore,
348
+ score: cached,
349
+ reason: cached >= this.config.minTrustScore ? "Cached trusted source" : "Cached untrusted source",
350
+ };
351
+ }
352
+ let score = 50; // Neutral starting point
353
+ let reason = "Unknown source";
354
+ // Check blocked sources
355
+ for (const blocked of this.config.blockedSources) {
356
+ if (source.includes(blocked) || new RegExp(blocked, "i").test(source)) {
357
+ this.sourceReputationCache.set(source, 0);
358
+ return { trusted: false, score: 0, reason: "Blocked source" };
359
+ }
360
+ }
361
+ // Check malicious patterns
362
+ for (const pattern of this.MALICIOUS_SOURCE_PATTERNS) {
363
+ if (pattern.test(source)) {
364
+ this.sourceReputationCache.set(source, 10);
365
+ return { trusted: false, score: 10, reason: "Matches malicious source pattern" };
366
+ }
367
+ }
368
+ // Check trusted sources
369
+ for (const trusted of this.config.trustedSources) {
370
+ if (source.includes(trusted) || new RegExp(trusted, "i").test(source)) {
371
+ this.sourceReputationCache.set(source, 90);
372
+ return { trusted: true, score: 90, reason: "Trusted source" };
373
+ }
374
+ }
375
+ // Analyze source URL/path
376
+ try {
377
+ const url = new URL(source);
378
+ // HTTPS is more trusted
379
+ if (url.protocol === "https:") {
380
+ score += 15;
381
+ reason = "HTTPS source";
382
+ }
383
+ // Well-known domains get bonus
384
+ const trustedDomains = [".gov", ".edu", ".org", "wikipedia.org", "microsoft.com", "google.com"];
385
+ for (const domain of trustedDomains) {
386
+ if (url.hostname.endsWith(domain)) {
387
+ score += 20;
388
+ reason = `Trusted domain: ${domain}`;
389
+ break;
390
+ }
391
+ }
392
+ // Suspicious URL patterns
393
+ if (url.pathname.includes("..") || url.search.includes("<")) {
394
+ score -= 30;
395
+ reason = "Suspicious URL pattern";
396
+ }
397
+ }
398
+ catch {
399
+ // Local file path
400
+ if (source.startsWith("/") || source.match(/^[A-Z]:\\/)) {
401
+ score = 60;
402
+ reason = "Local file path";
403
+ }
404
+ }
405
+ this.sourceReputationCache.set(source, score);
406
+ return {
407
+ trusted: score >= this.config.minTrustScore,
408
+ score,
409
+ reason,
410
+ };
411
+ }
412
+ /**
413
+ * Add trusted source
414
+ */
415
+ addTrustedSource(source) {
416
+ if (!this.config.trustedSources.includes(source)) {
417
+ this.config.trustedSources.push(source);
418
+ }
419
+ this.sourceReputationCache.set(source, 90);
420
+ }
421
+ /**
422
+ * Add blocked source
423
+ */
424
+ addBlockedSource(source) {
425
+ if (!this.config.blockedSources.includes(source)) {
426
+ this.config.blockedSources.push(source);
427
+ }
428
+ this.sourceReputationCache.set(source, 0);
429
+ }
430
+ /**
431
+ * Register known good content hash
432
+ */
433
+ registerKnownGoodHash(content) {
434
+ const hash = this.hashContent(content);
435
+ this.config.knownGoodHashes.add(hash);
436
+ return hash;
437
+ }
438
+ /**
439
+ * Clear source reputation cache
440
+ */
441
+ clearSourceCache() {
442
+ this.sourceReputationCache.clear();
443
+ }
444
+ detectInjections(content) {
445
+ const patterns = [];
446
+ const violations = [];
447
+ let riskContribution = 0;
448
+ for (const { name, pattern, severity } of this.RAG_INJECTION_PATTERNS) {
449
+ const matches = content.match(pattern);
450
+ if (matches) {
451
+ patterns.push(name);
452
+ violations.push(`injection_${name}`);
453
+ riskContribution += severity;
454
+ }
455
+ }
456
+ // Check for excessive special characters (possible obfuscation)
457
+ const specialCharRatio = (content.match(/[^\w\s]/g) || []).length / content.length;
458
+ if (specialCharRatio > 0.3) {
459
+ patterns.push("high_special_char_ratio");
460
+ violations.push("possible_obfuscation");
461
+ riskContribution += 15;
462
+ }
463
+ // Check for invisible unicode
464
+ const invisibleChars = content.match(/[\u200B-\u200D\uFEFF\u2060-\u206F]/g);
465
+ if (invisibleChars && invisibleChars.length > 5) {
466
+ patterns.push("invisible_unicode");
467
+ violations.push("hidden_characters");
468
+ riskContribution += 20;
469
+ }
470
+ return {
471
+ found: patterns.length > 0,
472
+ patterns,
473
+ violations,
474
+ riskContribution: Math.min(100, riskContribution),
475
+ };
476
+ }
477
+ checkMetadata(metadata) {
478
+ const violations = [];
479
+ let riskContribution = 0;
480
+ const metadataStr = JSON.stringify(metadata);
481
+ for (const { name, pattern } of this.SUSPICIOUS_METADATA_PATTERNS) {
482
+ if (pattern.test(metadataStr)) {
483
+ violations.push(`metadata_${name}`);
484
+ riskContribution += 15;
485
+ }
486
+ }
487
+ // Check for injection in specific fields
488
+ for (const { name, pattern, severity } of this.RAG_INJECTION_PATTERNS.slice(0, 5)) {
489
+ if (pattern.test(metadataStr)) {
490
+ violations.push(`metadata_injection_${name}`);
491
+ riskContribution += severity / 2;
492
+ }
493
+ }
494
+ return {
495
+ suspicious: violations.length > 0,
496
+ violations,
497
+ riskContribution: Math.min(50, riskContribution),
498
+ };
499
+ }
500
+ checkEmbedding(embedding, retrievalScore) {
501
+ // Simplified embedding anomaly detection
502
+ // Check for invalid values (NaN, Infinity, null) - CRITICAL, always block
503
+ if (embedding.some((v) => v === null || v === undefined || typeof v !== "number" || !isFinite(v))) {
504
+ return { anomalous: true, reason: "Invalid embedding values (NaN/Infinity/null)", shouldBlock: true };
505
+ }
506
+ // Check for suspiciously uniform embeddings
507
+ const uniqueValues = new Set(embedding.map((v) => Math.round(v * 100) / 100));
508
+ if (uniqueValues.size < embedding.length * 0.1) {
509
+ return { anomalous: true, reason: "Suspiciously uniform embedding", shouldBlock: true };
510
+ }
511
+ // Check for mismatch between high retrieval score and embedding characteristics
512
+ const magnitude = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
513
+ if (retrievalScore > 0.9 && magnitude < 0.1) {
514
+ return { anomalous: true, reason: "Score/embedding mismatch" };
515
+ }
516
+ return { anomalous: false };
517
+ }
518
+ sanitizeDocument(doc) {
519
+ let sanitizedContent = doc.content;
520
+ // Remove injection patterns
521
+ for (const { pattern } of this.RAG_INJECTION_PATTERNS) {
522
+ sanitizedContent = sanitizedContent.replace(pattern, "[REDACTED]");
523
+ }
524
+ // Remove invisible characters
525
+ sanitizedContent = sanitizedContent.replace(/[\u200B-\u200D\uFEFF\u2060-\u206F]/g, "");
526
+ // Escape potential delimiter breakers
527
+ sanitizedContent = sanitizedContent.replace(/(\[{3,}|\]{3,}|\{{3,}|\}{3,}|<{3,}|>{3,})/g, "");
528
+ return {
529
+ ...doc,
530
+ content: sanitizedContent,
531
+ metadata: {
532
+ ...doc.metadata,
533
+ _sanitized: true,
534
+ _originalLength: doc.content.length,
535
+ _sanitizedLength: sanitizedContent.length,
536
+ },
537
+ };
538
+ }
539
+ hashContent(content) {
540
+ return crypto.createHash("sha256").update(content).digest("hex");
541
+ }
542
+ generateRecommendations(violations, hasUntrustedSources) {
543
+ const recommendations = [];
544
+ if (hasUntrustedSources) {
545
+ recommendations.push("Review and whitelist trusted document sources");
546
+ }
547
+ if (violations.some((v) => v.includes("injection"))) {
548
+ recommendations.push("Implement document sanitization in your RAG pipeline");
549
+ }
550
+ if (violations.some((v) => v.includes("hash"))) {
551
+ recommendations.push("Enable content integrity verification with known good hashes");
552
+ }
553
+ if (violations.some((v) => v.includes("oversized"))) {
554
+ recommendations.push("Implement document chunking with size limits");
555
+ }
556
+ if (violations.some((v) => v.includes("embedding"))) {
557
+ recommendations.push("Add embedding validation to your vector store pipeline");
558
+ }
559
+ if (recommendations.length === 0) {
560
+ recommendations.push("Continue monitoring RAG document sources");
561
+ }
562
+ return recommendations;
563
+ }
564
+ // ============= v2 Enhanced Detection Methods =============
565
+ /**
566
+ * Detect advanced embedding attacks (backdoor, adversarial perturbation)
567
+ */
568
+ detectEmbeddingAttacks(embedding, retrievalScore) {
569
+ const attackTypes = [];
570
+ const details = {};
571
+ let riskScore = 0;
572
+ // Check embedding dimension
573
+ if (embedding.length !== this.config.embeddingDimension) {
574
+ attackTypes.push("dimension_mismatch");
575
+ riskScore += 20;
576
+ }
577
+ // Calculate embedding magnitude
578
+ const magnitude = Math.sqrt(embedding.reduce((sum, v) => sum + v * v, 0));
579
+ const [minMag, maxMag] = this.config.embeddingMagnitudeRange;
580
+ // Check for magnitude anomalies
581
+ if (magnitude < minMag || magnitude > maxMag) {
582
+ attackTypes.push("magnitude_anomaly");
583
+ details.magnitude_anomaly = true;
584
+ riskScore += 25;
585
+ }
586
+ // Check for adversarial perturbation patterns
587
+ // Adversarial embeddings often have unusual value distributions
588
+ const values = embedding.map(Math.abs);
589
+ const sortedValues = [...values].sort((a, b) => b - a);
590
+ const topValues = sortedValues.slice(0, 10);
591
+ const avgTop = topValues.reduce((a, b) => a + b, 0) / topValues.length;
592
+ const avgAll = values.reduce((a, b) => a + b, 0) / values.length;
593
+ // Adversarial perturbations often spike certain dimensions
594
+ if (avgTop > avgAll * 10) {
595
+ attackTypes.push("adversarial_perturbation");
596
+ details.adversarial_perturbation = true;
597
+ riskScore += 35;
598
+ }
599
+ // Check for backdoor patterns
600
+ // Backdoor embeddings often have repeated patterns
601
+ const chunkSize = Math.min(50, Math.floor(embedding.length / 10));
602
+ const chunks = [];
603
+ for (let i = 0; i < embedding.length - chunkSize; i += chunkSize) {
604
+ chunks.push(embedding.slice(i, i + chunkSize));
605
+ }
606
+ // Check for repeated chunks (backdoor signature)
607
+ if (chunks.length >= 2) {
608
+ for (let i = 0; i < chunks.length - 1; i++) {
609
+ const similarity = this.cosineSimilarity(chunks[i], chunks[i + 1]);
610
+ if (similarity > this.config.similarityThreshold) {
611
+ attackTypes.push("backdoor_pattern");
612
+ details.backdoor_pattern = true;
613
+ riskScore += 40;
614
+ break;
615
+ }
616
+ }
617
+ }
618
+ // Check distribution anomalies
619
+ const mean = embedding.reduce((a, b) => a + b, 0) / embedding.length;
620
+ const variance = embedding.reduce((sum, v) => sum + Math.pow(v - mean, 2), 0) / embedding.length;
621
+ const stdDev = Math.sqrt(variance);
622
+ // Normal embeddings usually have stdDev in reasonable range
623
+ if (stdDev < 0.001 || stdDev > 2.0) {
624
+ attackTypes.push("distribution_anomaly");
625
+ details.distribution_anomaly = true;
626
+ riskScore += 20;
627
+ }
628
+ // High retrieval score with suspicious embedding
629
+ if (retrievalScore && retrievalScore > 0.95 && riskScore > 20) {
630
+ attackTypes.push("suspicious_high_score");
631
+ riskScore += 15;
632
+ }
633
+ return {
634
+ detected: attackTypes.length > 0,
635
+ attack_type: attackTypes,
636
+ risk_score: Math.min(100, riskScore),
637
+ details,
638
+ };
639
+ }
640
+ /**
641
+ * Detect indirect prompt injection patterns
642
+ */
643
+ detectIndirectInjection(content) {
644
+ const patterns = [];
645
+ const violations = [];
646
+ let riskContribution = 0;
647
+ for (const { name, pattern, severity } of this.INDIRECT_INJECTION_PATTERNS) {
648
+ const matches = content.match(pattern);
649
+ if (matches) {
650
+ patterns.push(name);
651
+ violations.push(`indirect_injection_${name}`);
652
+ riskContribution += severity;
653
+ }
654
+ }
655
+ return {
656
+ found: patterns.length > 0,
657
+ patterns,
658
+ violations,
659
+ riskContribution: Math.min(100, riskContribution),
660
+ };
661
+ }
662
+ /**
663
+ * Detect steganography (hidden data in content)
664
+ */
665
+ detectSteganography(content) {
666
+ const violations = [];
667
+ let riskContribution = 0;
668
+ // Zero-width character steganography - lower threshold
669
+ const zeroWidthChars = content.match(/[\u200B-\u200F\u2028-\u202F\uFEFF]+/g);
670
+ if (zeroWidthChars) {
671
+ const totalZeroWidth = zeroWidthChars.reduce((sum, m) => sum + m.length, 0);
672
+ // Lower threshold to 3 (any zero-width chars are suspicious in normal text)
673
+ if (totalZeroWidth >= 3) {
674
+ violations.push("zero_width_steganography");
675
+ riskContribution += 40 + Math.min(30, totalZeroWidth * 5);
676
+ }
677
+ }
678
+ // Whitespace pattern encoding - multiple checks
679
+ const tabSpacePattern = /\s{4,}\t+\s+|\t{2,}\s+\t/;
680
+ if (tabSpacePattern.test(content)) {
681
+ violations.push("whitespace_encoding");
682
+ riskContribution += 35;
683
+ }
684
+ const whitespaceRatio = (content.match(/[\t\n\r ]/g) || []).length / content.length;
685
+ if (whitespaceRatio > 0.35) {
686
+ violations.push("excessive_whitespace_ratio");
687
+ riskContribution += 25;
688
+ }
689
+ // Unicode tag character steganography (U+E0000-U+E007F)
690
+ const tagChars = content.match(/[\uDB40][\uDC00-\uDC7F]/g);
691
+ if (tagChars && tagChars.length > 0) {
692
+ violations.push("unicode_tag_steganography");
693
+ riskContribution += 40;
694
+ }
695
+ // Variation selector abuse (U+FE00-U+FE0F)
696
+ const variationSelectors = content.match(/[\uFE00-\uFE0F]/g);
697
+ if (variationSelectors && variationSelectors.length > 5) {
698
+ violations.push("variation_selector_abuse");
699
+ riskContribution += 25;
700
+ }
701
+ // Binary-like pattern in text (potential hidden data)
702
+ const binaryPattern = content.match(/[01]{16,}/g);
703
+ if (binaryPattern) {
704
+ violations.push("binary_steganography");
705
+ riskContribution += 30;
706
+ }
707
+ return {
708
+ found: violations.length > 0,
709
+ violations,
710
+ riskContribution: Math.min(100, riskContribution),
711
+ };
712
+ }
713
+ /**
714
+ * Calculate cosine similarity between two vectors
715
+ */
716
+ cosineSimilarity(a, b) {
717
+ if (a.length !== b.length)
718
+ return 0;
719
+ const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0);
720
+ const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
721
+ const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
722
+ if (magnitudeA === 0 || magnitudeB === 0)
723
+ return 0;
724
+ return dotProduct / (magnitudeA * magnitudeB);
725
+ }
726
+ /**
727
+ * Analyze a batch of embeddings for clustering anomalies
728
+ */
729
+ analyzeEmbeddingCluster(embeddings) {
730
+ if (embeddings.length < 3) {
731
+ return { anomalous: false, anomalousIndices: [], reason: "Not enough embeddings for cluster analysis" };
732
+ }
733
+ const anomalousIndices = [];
734
+ // Calculate pairwise similarities
735
+ const similarities = [];
736
+ for (let i = 0; i < embeddings.length; i++) {
737
+ similarities[i] = [];
738
+ for (let j = 0; j < embeddings.length; j++) {
739
+ if (i === j) {
740
+ similarities[i][j] = 1;
741
+ }
742
+ else {
743
+ similarities[i][j] = this.cosineSimilarity(embeddings[i], embeddings[j]);
744
+ }
745
+ }
746
+ }
747
+ // Find embeddings with unusually high or low similarity to all others
748
+ for (let i = 0; i < embeddings.length; i++) {
749
+ const avgSimilarity = similarities[i].reduce((a, b) => a + b, 0) / embeddings.length;
750
+ // Anomaly: embedding is too similar to everything (potential backdoor)
751
+ if (avgSimilarity > this.config.similarityThreshold) {
752
+ anomalousIndices.push(i);
753
+ }
754
+ // Anomaly: embedding is dissimilar to everything (potential outlier attack)
755
+ if (avgSimilarity < 0.3) {
756
+ anomalousIndices.push(i);
757
+ }
758
+ }
759
+ return {
760
+ anomalous: anomalousIndices.length > 0,
761
+ anomalousIndices: [...new Set(anomalousIndices)],
762
+ reason: anomalousIndices.length > 0
763
+ ? `${anomalousIndices.length} embeddings show clustering anomalies`
764
+ : "No clustering anomalies detected",
765
+ };
766
+ }
767
+ }
768
+ exports.RAGGuard = RAGGuard;
769
+ //# sourceMappingURL=rag-guard.js.map