llm-trust-guard 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +318 -0
  3. package/dist/guards/agent-communication-guard.d.ts +169 -0
  4. package/dist/guards/agent-communication-guard.d.ts.map +1 -0
  5. package/dist/guards/agent-communication-guard.js +468 -0
  6. package/dist/guards/agent-communication-guard.js.map +1 -0
  7. package/dist/guards/autonomy-escalation-guard.d.ts +137 -0
  8. package/dist/guards/autonomy-escalation-guard.d.ts.map +1 -0
  9. package/dist/guards/autonomy-escalation-guard.js +470 -0
  10. package/dist/guards/autonomy-escalation-guard.js.map +1 -0
  11. package/dist/guards/circuit-breaker.d.ts +142 -0
  12. package/dist/guards/circuit-breaker.d.ts.map +1 -0
  13. package/dist/guards/circuit-breaker.js +347 -0
  14. package/dist/guards/circuit-breaker.js.map +1 -0
  15. package/dist/guards/code-execution-guard.d.ts +114 -0
  16. package/dist/guards/code-execution-guard.d.ts.map +1 -0
  17. package/dist/guards/code-execution-guard.js +467 -0
  18. package/dist/guards/code-execution-guard.js.map +1 -0
  19. package/dist/guards/conversation-guard.d.ts +73 -0
  20. package/dist/guards/conversation-guard.d.ts.map +1 -0
  21. package/dist/guards/conversation-guard.js +281 -0
  22. package/dist/guards/conversation-guard.js.map +1 -0
  23. package/dist/guards/drift-detector.d.ts +182 -0
  24. package/dist/guards/drift-detector.d.ts.map +1 -0
  25. package/dist/guards/drift-detector.js +480 -0
  26. package/dist/guards/drift-detector.js.map +1 -0
  27. package/dist/guards/encoding-detector.d.ts +76 -0
  28. package/dist/guards/encoding-detector.d.ts.map +1 -0
  29. package/dist/guards/encoding-detector.js +698 -0
  30. package/dist/guards/encoding-detector.js.map +1 -0
  31. package/dist/guards/execution-monitor.d.ts +73 -0
  32. package/dist/guards/execution-monitor.d.ts.map +1 -0
  33. package/dist/guards/execution-monitor.js +205 -0
  34. package/dist/guards/execution-monitor.js.map +1 -0
  35. package/dist/guards/input-sanitizer.d.ts +87 -0
  36. package/dist/guards/input-sanitizer.d.ts.map +1 -0
  37. package/dist/guards/input-sanitizer.js +301 -0
  38. package/dist/guards/input-sanitizer.js.map +1 -0
  39. package/dist/guards/mcp-security-guard.d.ts +204 -0
  40. package/dist/guards/mcp-security-guard.d.ts.map +1 -0
  41. package/dist/guards/mcp-security-guard.js +618 -0
  42. package/dist/guards/mcp-security-guard.js.map +1 -0
  43. package/dist/guards/memory-guard.d.ts +124 -0
  44. package/dist/guards/memory-guard.d.ts.map +1 -0
  45. package/dist/guards/memory-guard.js +476 -0
  46. package/dist/guards/memory-guard.js.map +1 -0
  47. package/dist/guards/multimodal-guard.d.ts +93 -0
  48. package/dist/guards/multimodal-guard.d.ts.map +1 -0
  49. package/dist/guards/multimodal-guard.js +507 -0
  50. package/dist/guards/multimodal-guard.js.map +1 -0
  51. package/dist/guards/output-filter.d.ts +76 -0
  52. package/dist/guards/output-filter.d.ts.map +1 -0
  53. package/dist/guards/output-filter.js +289 -0
  54. package/dist/guards/output-filter.js.map +1 -0
  55. package/dist/guards/policy-gate.d.ts +57 -0
  56. package/dist/guards/policy-gate.d.ts.map +1 -0
  57. package/dist/guards/policy-gate.js +182 -0
  58. package/dist/guards/policy-gate.js.map +1 -0
  59. package/dist/guards/prompt-leakage-guard.d.ts +110 -0
  60. package/dist/guards/prompt-leakage-guard.d.ts.map +1 -0
  61. package/dist/guards/prompt-leakage-guard.js +529 -0
  62. package/dist/guards/prompt-leakage-guard.js.map +1 -0
  63. package/dist/guards/rag-guard.d.ts +188 -0
  64. package/dist/guards/rag-guard.d.ts.map +1 -0
  65. package/dist/guards/rag-guard.js +769 -0
  66. package/dist/guards/rag-guard.js.map +1 -0
  67. package/dist/guards/schema-validator.d.ts +35 -0
  68. package/dist/guards/schema-validator.d.ts.map +1 -0
  69. package/dist/guards/schema-validator.js +316 -0
  70. package/dist/guards/schema-validator.js.map +1 -0
  71. package/dist/guards/state-persistence-guard.d.ts +153 -0
  72. package/dist/guards/state-persistence-guard.d.ts.map +1 -0
  73. package/dist/guards/state-persistence-guard.js +484 -0
  74. package/dist/guards/state-persistence-guard.js.map +1 -0
  75. package/dist/guards/tenant-boundary.d.ts +67 -0
  76. package/dist/guards/tenant-boundary.d.ts.map +1 -0
  77. package/dist/guards/tenant-boundary.js +187 -0
  78. package/dist/guards/tenant-boundary.js.map +1 -0
  79. package/dist/guards/tool-chain-validator.d.ts +102 -0
  80. package/dist/guards/tool-chain-validator.d.ts.map +1 -0
  81. package/dist/guards/tool-chain-validator.js +480 -0
  82. package/dist/guards/tool-chain-validator.js.map +1 -0
  83. package/dist/guards/tool-registry.d.ts +45 -0
  84. package/dist/guards/tool-registry.d.ts.map +1 -0
  85. package/dist/guards/tool-registry.js +155 -0
  86. package/dist/guards/tool-registry.js.map +1 -0
  87. package/dist/guards/trust-exploitation-guard.d.ts +134 -0
  88. package/dist/guards/trust-exploitation-guard.d.ts.map +1 -0
  89. package/dist/guards/trust-exploitation-guard.js +354 -0
  90. package/dist/guards/trust-exploitation-guard.js.map +1 -0
  91. package/dist/index.d.ts +133 -0
  92. package/dist/index.d.ts.map +1 -0
  93. package/dist/index.js +430 -0
  94. package/dist/index.js.map +1 -0
  95. package/dist/integrations/express.d.ts +119 -0
  96. package/dist/integrations/express.d.ts.map +1 -0
  97. package/dist/integrations/express.js +244 -0
  98. package/dist/integrations/express.js.map +1 -0
  99. package/dist/integrations/index.d.ts +9 -0
  100. package/dist/integrations/index.d.ts.map +1 -0
  101. package/dist/integrations/index.js +26 -0
  102. package/dist/integrations/index.js.map +1 -0
  103. package/dist/integrations/langchain.d.ts +165 -0
  104. package/dist/integrations/langchain.d.ts.map +1 -0
  105. package/dist/integrations/langchain.js +308 -0
  106. package/dist/integrations/langchain.js.map +1 -0
  107. package/dist/integrations/openai.d.ts +205 -0
  108. package/dist/integrations/openai.d.ts.map +1 -0
  109. package/dist/integrations/openai.js +380 -0
  110. package/dist/integrations/openai.js.map +1 -0
  111. package/dist/types/index.d.ts +245 -0
  112. package/dist/types/index.d.ts.map +1 -0
  113. package/dist/types/index.js +6 -0
  114. package/dist/types/index.js.map +1 -0
  115. package/package.json +64 -0
@@ -0,0 +1,698 @@
1
+ "use strict";
2
+ /**
3
+ * EncodingDetector
4
+ *
5
+ * Detects and blocks encoding-based bypass attempts:
6
+ * - Base64 encoded payloads
7
+ * - URL encoded attacks
8
+ * - Unicode/punycode obfuscation
9
+ * - Hex encoding
10
+ * - HTML entity encoding
11
+ * - Mixed encoding attacks
12
+ */
13
+ Object.defineProperty(exports, "__esModule", { value: true });
14
+ exports.EncodingDetector = void 0;
15
+ class EncodingDetector {
16
+ constructor(config = {}) {
17
+ this.defaultThreatPatterns = [
18
+ // SQL Injection - Enhanced
19
+ {
20
+ name: "sql_injection",
21
+ pattern: /(?:union\s+(?:all\s+)?select|drop\s+(?:table|database)|insert\s+into|delete\s+from|update\s+.*set|exec\s*\(|execute\s*\(|truncate\s+table|alter\s+table|create\s+table|;\s*select\s|or\s+1\s*=\s*1|'\s*or\s*'|--\s*$|\/\*.*\*\/)/gi,
22
+ severity: "critical",
23
+ },
24
+ // Command Injection - Enhanced
25
+ {
26
+ name: "command_injection",
27
+ pattern: /(?:;\s*(?:cat|ls|rm|wget|curl|bash|sh|python|perl|ruby|nc|netcat|nmap|chmod|chown|kill|pkill)|`[^`]+`|\$\([^)]+\)|\|\s*(?:sh|bash)|&&\s*(?:rm|cat|wget)|>\s*\/(?:etc|tmp|var))/gi,
28
+ severity: "critical",
29
+ },
30
+ // Path Traversal - Enhanced
31
+ {
32
+ name: "path_traversal",
33
+ pattern: /(?:\.\.\/|\.\.\\|%2e%2e%2f|%2e%2e\/|\.\.%2f|%2e%2e%5c|\.\.%5c|%252e%252e|%c0%ae|%c1%9c|\.\.%c0%af|\.\.%c1%9c)/gi,
34
+ severity: "high",
35
+ },
36
+ // XSS - Enhanced
37
+ {
38
+ name: "xss",
39
+ pattern: /(?:<script|javascript:|on\w+\s*=|<iframe|<object|<embed|<svg\s+onload|<img\s+onerror|<body\s+onload|expression\s*\(|vbscript:|data:text\/html|<style>.*expression)/gi,
40
+ severity: "high",
41
+ },
42
+ // Prompt Injection - Enhanced
43
+ {
44
+ name: "prompt_injection",
45
+ pattern: /(?:ignore\s+(?:all\s+)?(?:previous|prior|above)\s*(?:instructions)?|disregard\s+(?:above|all|everything)|you\s+are\s+now|new\s+instructions|forget\s+(?:everything|all)|system\s*:\s*you|act\s+as\s+(?:a|an|if)|pretend\s+(?:you|to\s+be)|roleplay\s+as|jailbreak|DAN\s+mode|developer\s+mode)/gi,
46
+ severity: "high",
47
+ },
48
+ // System Commands - Enhanced
49
+ {
50
+ name: "system_command",
51
+ pattern: /(?:\/bin\/|\/etc\/passwd|\/etc\/shadow|cmd\.exe|powershell|\.exe|\.bat|\.cmd|\.ps1|\.sh\s|eval\s*\(|system\s*\(|exec\s*\(|popen|subprocess|os\.system)/gi,
52
+ severity: "critical",
53
+ },
54
+ // Data Exfiltration
55
+ {
56
+ name: "data_exfiltration",
57
+ pattern: /(?:curl\s+.*-d|wget\s+.*--post|fetch\s*\(|XMLHttpRequest|sendBeacon|\.innerHTML\s*=|document\.cookie|localStorage\.|sessionStorage\.)/gi,
58
+ severity: "high",
59
+ },
60
+ // LDAP Injection
61
+ {
62
+ name: "ldap_injection",
63
+ pattern: /(?:\)\s*\(\||\*\)\s*\(|\)\s*\(\&|%28%7c|%29%28)/gi,
64
+ severity: "high",
65
+ },
66
+ // XML Injection / XXE
67
+ {
68
+ name: "xxe_injection",
69
+ pattern: /(?:<!ENTITY|<!DOCTYPE.*SYSTEM|<!DOCTYPE.*PUBLIC|SYSTEM\s*"file:|SYSTEM\s*"http)/gi,
70
+ severity: "critical",
71
+ },
72
+ // Template Injection
73
+ {
74
+ name: "template_injection",
75
+ pattern: /(?:\{\{.*\}\}|\$\{.*\}|<%.*%>|<\?.*\?>|\[\[.*\]\])/gi,
76
+ severity: "high",
77
+ },
78
+ // Role/Permission Escalation
79
+ {
80
+ name: "role_escalation",
81
+ pattern: /(?:admin\s*:\s*true|role\s*:\s*(?:admin|root|superuser)|isAdmin\s*=\s*true|permissions?\s*:\s*\[?\s*['"]\*['"])/gi,
82
+ severity: "critical",
83
+ },
84
+ ];
85
+ this.config = {
86
+ detectBase64: config.detectBase64 ?? true,
87
+ detectURLEncoding: config.detectURLEncoding ?? true,
88
+ detectUnicode: config.detectUnicode ?? true,
89
+ detectHex: config.detectHex ?? true,
90
+ detectHTMLEntities: config.detectHTMLEntities ?? true,
91
+ detectMixedEncoding: config.detectMixedEncoding ?? true,
92
+ detectROT13: config.detectROT13 ?? true,
93
+ detectOctal: config.detectOctal ?? true,
94
+ detectBase32: config.detectBase32 ?? true,
95
+ maxDecodingDepth: config.maxDecodingDepth ?? 3,
96
+ threatPatterns: config.threatPatterns ?? this.defaultThreatPatterns,
97
+ maxEncodedRatio: config.maxEncodedRatio ?? 0.5,
98
+ };
99
+ }
100
+ /**
101
+ * Detect encoding and analyze for threats
102
+ */
103
+ detect(input, requestId = "") {
104
+ const violations = [];
105
+ const encodingsDetected = [];
106
+ const threatsFound = [];
107
+ let obfuscationScore = 0;
108
+ // Check original input for threats
109
+ this.checkThreats(input, "original", threatsFound);
110
+ // Detect and decode Base64
111
+ if (this.config.detectBase64) {
112
+ const base64Result = this.detectBase64(input);
113
+ if (base64Result.found) {
114
+ encodingsDetected.push({
115
+ type: "base64",
116
+ count: base64Result.matches.length,
117
+ locations: base64Result.locations,
118
+ decoded_sample: base64Result.decoded?.substring(0, 100),
119
+ });
120
+ obfuscationScore += 3;
121
+ violations.push("BASE64_ENCODING_DETECTED");
122
+ // Check decoded content for threats
123
+ if (base64Result.decoded) {
124
+ this.checkThreats(base64Result.decoded, "decoded_base64", threatsFound);
125
+ }
126
+ }
127
+ }
128
+ // Detect URL encoding
129
+ if (this.config.detectURLEncoding) {
130
+ const urlResult = this.detectURLEncoding(input);
131
+ if (urlResult.found) {
132
+ encodingsDetected.push({
133
+ type: "url_encoding",
134
+ count: urlResult.count,
135
+ locations: [],
136
+ decoded_sample: urlResult.decoded?.substring(0, 100),
137
+ });
138
+ obfuscationScore += urlResult.ratio > 0.3 ? 4 : 2;
139
+ if (urlResult.ratio > this.config.maxEncodedRatio) {
140
+ violations.push("EXCESSIVE_URL_ENCODING");
141
+ }
142
+ // Check decoded content for threats
143
+ if (urlResult.decoded) {
144
+ this.checkThreats(urlResult.decoded, "decoded_url", threatsFound);
145
+ }
146
+ }
147
+ }
148
+ // Detect Unicode obfuscation
149
+ if (this.config.detectUnicode) {
150
+ const unicodeResult = this.detectUnicode(input);
151
+ if (unicodeResult.found) {
152
+ encodingsDetected.push({
153
+ type: "unicode",
154
+ count: unicodeResult.count,
155
+ locations: unicodeResult.types,
156
+ decoded_sample: unicodeResult.normalized?.substring(0, 100),
157
+ });
158
+ obfuscationScore += 3;
159
+ violations.push("UNICODE_OBFUSCATION_DETECTED");
160
+ // Check normalized content for threats
161
+ if (unicodeResult.normalized) {
162
+ this.checkThreats(unicodeResult.normalized, "decoded_unicode", threatsFound);
163
+ }
164
+ }
165
+ }
166
+ // Detect Hex encoding
167
+ if (this.config.detectHex) {
168
+ const hexResult = this.detectHex(input);
169
+ if (hexResult.found) {
170
+ encodingsDetected.push({
171
+ type: "hex",
172
+ count: hexResult.matches.length,
173
+ locations: hexResult.locations,
174
+ decoded_sample: hexResult.decoded?.substring(0, 100),
175
+ });
176
+ obfuscationScore += 2;
177
+ violations.push("HEX_ENCODING_DETECTED");
178
+ if (hexResult.decoded) {
179
+ this.checkThreats(hexResult.decoded, "decoded_hex", threatsFound);
180
+ }
181
+ }
182
+ }
183
+ // Detect HTML entities
184
+ if (this.config.detectHTMLEntities) {
185
+ const htmlResult = this.detectHTMLEntities(input);
186
+ if (htmlResult.found) {
187
+ encodingsDetected.push({
188
+ type: "html_entities",
189
+ count: htmlResult.count,
190
+ locations: [],
191
+ decoded_sample: htmlResult.decoded?.substring(0, 100),
192
+ });
193
+ obfuscationScore += 2;
194
+ if (htmlResult.decoded) {
195
+ this.checkThreats(htmlResult.decoded, "decoded_html", threatsFound);
196
+ }
197
+ }
198
+ }
199
+ // Detect ROT13 encoding
200
+ if (this.config.detectROT13) {
201
+ const rot13Result = this.detectROT13(input);
202
+ if (rot13Result.found) {
203
+ encodingsDetected.push({
204
+ type: "rot13",
205
+ count: rot13Result.matches.length,
206
+ locations: rot13Result.locations,
207
+ decoded_sample: rot13Result.decoded?.substring(0, 100),
208
+ });
209
+ obfuscationScore += 3;
210
+ violations.push("ROT13_ENCODING_DETECTED");
211
+ if (rot13Result.decoded) {
212
+ this.checkThreats(rot13Result.decoded, "decoded_rot13", threatsFound);
213
+ }
214
+ }
215
+ }
216
+ // Detect Octal encoding
217
+ if (this.config.detectOctal) {
218
+ const octalResult = this.detectOctal(input);
219
+ if (octalResult.found) {
220
+ encodingsDetected.push({
221
+ type: "octal",
222
+ count: octalResult.matches.length,
223
+ locations: octalResult.locations,
224
+ decoded_sample: octalResult.decoded?.substring(0, 100),
225
+ });
226
+ obfuscationScore += 2;
227
+ violations.push("OCTAL_ENCODING_DETECTED");
228
+ if (octalResult.decoded) {
229
+ this.checkThreats(octalResult.decoded, "decoded_octal", threatsFound);
230
+ }
231
+ }
232
+ }
233
+ // Detect Base32 encoding
234
+ if (this.config.detectBase32) {
235
+ const base32Result = this.detectBase32(input);
236
+ if (base32Result.found) {
237
+ encodingsDetected.push({
238
+ type: "base32",
239
+ count: base32Result.matches.length,
240
+ locations: base32Result.locations,
241
+ decoded_sample: base32Result.decoded?.substring(0, 100),
242
+ });
243
+ obfuscationScore += 3;
244
+ violations.push("BASE32_ENCODING_DETECTED");
245
+ if (base32Result.decoded) {
246
+ this.checkThreats(base32Result.decoded, "decoded_base32", threatsFound);
247
+ }
248
+ }
249
+ }
250
+ // Check for mixed encoding (multiple layers)
251
+ if (this.config.detectMixedEncoding && encodingsDetected.length > 1) {
252
+ obfuscationScore += encodingsDetected.length * 2;
253
+ violations.push("MIXED_ENCODING_DETECTED");
254
+ }
255
+ // Add violations for threats found
256
+ for (const threat of threatsFound) {
257
+ if (threat.severity === "critical" || threat.severity === "high") {
258
+ violations.push(`ENCODED_THREAT_${threat.pattern_name.toUpperCase()}_IN_${threat.in_layer.toUpperCase()}`);
259
+ }
260
+ }
261
+ // Determine if blocked
262
+ const hasCriticalThreat = threatsFound.some((t) => t.severity === "critical");
263
+ const hasHighThreatInEncoded = threatsFound.some((t) => t.severity === "high" && t.in_layer !== "original");
264
+ const allowed = !hasCriticalThreat && !hasHighThreatInEncoded;
265
+ if (!allowed) {
266
+ console.log(`[EncodingDetector:${requestId}] BLOCKED: ${violations.join(", ")}`);
267
+ }
268
+ // Get fully decoded content
269
+ let decodedContent = input;
270
+ for (let depth = 0; depth < this.config.maxDecodingDepth; depth++) {
271
+ const decoded = this.fullyDecode(decodedContent);
272
+ if (decoded === decodedContent)
273
+ break;
274
+ decodedContent = decoded;
275
+ }
276
+ return {
277
+ allowed,
278
+ reason: allowed
279
+ ? undefined
280
+ : `Encoding bypass attempt detected: ${violations.join(", ")}`,
281
+ violations,
282
+ encoding_analysis: {
283
+ encodings_detected: encodingsDetected,
284
+ decoded_content: decodedContent !== input ? decodedContent : undefined,
285
+ threats_found: threatsFound,
286
+ obfuscation_score: obfuscationScore,
287
+ },
288
+ };
289
+ }
290
+ /**
291
+ * Quick check if input contains encoded threats
292
+ */
293
+ containsEncodedThreat(input) {
294
+ const result = this.detect(input);
295
+ return result.encoding_analysis.threats_found.some((t) => t.in_layer !== "original");
296
+ }
297
+ detectBase64(input) {
298
+ // Match potential Base64 strings (min 20 chars to avoid false positives)
299
+ const base64Pattern = /(?:[A-Za-z0-9+/]{4}){5,}(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?/g;
300
+ const matches = [];
301
+ const locations = [];
302
+ let decoded;
303
+ let match;
304
+ while ((match = base64Pattern.exec(input)) !== null) {
305
+ try {
306
+ const candidate = match[0];
307
+ const decodedStr = Buffer.from(candidate, "base64").toString("utf-8");
308
+ // Check if it decodes to printable ASCII
309
+ if (/^[\x20-\x7E\r\n\t]+$/.test(decodedStr)) {
310
+ matches.push(candidate);
311
+ locations.push(`index:${match.index}`);
312
+ decoded = decoded ? decoded + " " + decodedStr : decodedStr;
313
+ }
314
+ }
315
+ catch {
316
+ // Not valid base64
317
+ }
318
+ }
319
+ return {
320
+ found: matches.length > 0,
321
+ matches,
322
+ locations,
323
+ decoded,
324
+ };
325
+ }
326
+ detectURLEncoding(input) {
327
+ const urlEncodedPattern = /%[0-9A-Fa-f]{2}/g;
328
+ const matches = input.match(urlEncodedPattern) || [];
329
+ const ratio = (matches.length * 3) / input.length;
330
+ let decoded;
331
+ if (matches.length > 0) {
332
+ try {
333
+ decoded = decodeURIComponent(input);
334
+ }
335
+ catch {
336
+ // Try partial decoding
337
+ decoded = input.replace(/%([0-9A-Fa-f]{2})/g, (_, hex) => {
338
+ try {
339
+ return String.fromCharCode(parseInt(hex, 16));
340
+ }
341
+ catch {
342
+ return _;
343
+ }
344
+ });
345
+ }
346
+ }
347
+ return {
348
+ found: matches.length > 0,
349
+ count: matches.length,
350
+ ratio,
351
+ decoded,
352
+ };
353
+ }
354
+ detectUnicode(input) {
355
+ const types = [];
356
+ let count = 0;
357
+ // Check for \uXXXX escape sequences
358
+ const unicodeEscapes = /\\u[0-9A-Fa-f]{4}/g;
359
+ const escapeMatches = input.match(unicodeEscapes) || [];
360
+ if (escapeMatches.length > 0) {
361
+ count += escapeMatches.length;
362
+ types.push("unicode_escape_u");
363
+ }
364
+ // Check for \u{XXXXX} escape sequences (ES6 style)
365
+ const unicodeEscapesES6 = /\\u\{[0-9A-Fa-f]{1,6}\}/g;
366
+ const escapeMatchesES6 = input.match(unicodeEscapesES6) || [];
367
+ if (escapeMatchesES6.length > 0) {
368
+ count += escapeMatchesES6.length;
369
+ types.push("unicode_escape_es6");
370
+ }
371
+ // Check for \UXXXXXXXX escape sequences (Python style)
372
+ const unicodeEscapesPython = /\\U[0-9A-Fa-f]{8}/g;
373
+ const escapeMatchesPython = input.match(unicodeEscapesPython) || [];
374
+ if (escapeMatchesPython.length > 0) {
375
+ count += escapeMatchesPython.length;
376
+ types.push("unicode_escape_U");
377
+ }
378
+ // Check for unusual Unicode characters (homoglyphs)
379
+ // Cyrillic lookalikes, Greek lookalikes, mathematical alphanumerics
380
+ const homoglyphs = /[\u0430-\u044F\u0410-\u042F\u0391-\u03C9\u2010-\u2015\uFF01-\uFF5E\u{1D400}-\u{1D7FF}]/gu;
381
+ const homoglyphMatches = input.match(homoglyphs) || [];
382
+ if (homoglyphMatches.length > 0) {
383
+ count += homoglyphMatches.length;
384
+ types.push("homoglyphs");
385
+ }
386
+ // Check for zero-width characters
387
+ const zeroWidth = /[\u200B\u200C\u200D\uFEFF\u00AD\u2060\u180E]/g;
388
+ const zeroWidthMatches = input.match(zeroWidth) || [];
389
+ if (zeroWidthMatches.length > 0) {
390
+ count += zeroWidthMatches.length;
391
+ types.push("zero_width");
392
+ }
393
+ // Check for bidirectional text control characters (used in trojan source attacks)
394
+ const bidiControls = /[\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069]/g;
395
+ const bidiMatches = input.match(bidiControls) || [];
396
+ if (bidiMatches.length > 0) {
397
+ count += bidiMatches.length;
398
+ types.push("bidi_controls");
399
+ }
400
+ // Check for confusable characters (common substitutions)
401
+ const confusables = /[\u0131\u0130\u017F\u212A\u0261\u0251\u025B\u0254\u028C]/g;
402
+ const confusableMatches = input.match(confusables) || [];
403
+ if (confusableMatches.length > 0) {
404
+ count += confusableMatches.length;
405
+ types.push("confusables");
406
+ }
407
+ // Check for tag characters (used to hide text)
408
+ const tagChars = /[\u{E0000}-\u{E007F}]/gu;
409
+ const tagMatches = input.match(tagChars) || [];
410
+ if (tagMatches.length > 0) {
411
+ count += tagMatches.length;
412
+ types.push("tag_characters");
413
+ }
414
+ let normalized;
415
+ if (count > 0) {
416
+ // Normalize and decode all formats
417
+ normalized = input
418
+ .normalize("NFKC")
419
+ .replace(/\\u([0-9A-Fa-f]{4})/g, (_, hex) => String.fromCharCode(parseInt(hex, 16)))
420
+ .replace(/\\u\{([0-9A-Fa-f]{1,6})\}/g, (_, hex) => String.fromCodePoint(parseInt(hex, 16)))
421
+ .replace(/\\U([0-9A-Fa-f]{8})/g, (_, hex) => String.fromCodePoint(parseInt(hex, 16)))
422
+ .replace(/[\u200B\u200C\u200D\uFEFF\u00AD\u2060\u180E]/g, "")
423
+ .replace(/[\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069]/g, "")
424
+ .replace(/[\u{E0000}-\u{E007F}]/gu, "");
425
+ }
426
+ return {
427
+ found: count > 0,
428
+ count,
429
+ types,
430
+ normalized,
431
+ };
432
+ }
433
+ detectHex(input) {
434
+ const matches = [];
435
+ const locations = [];
436
+ let decoded = "";
437
+ // Pattern 1: 0x41 or \x41 format
438
+ const hexPattern1 = /(?:0x|\\x)([0-9A-Fa-f]{2})/g;
439
+ let match;
440
+ while ((match = hexPattern1.exec(input)) !== null) {
441
+ matches.push(match[0]);
442
+ locations.push(`index:${match.index}`);
443
+ decoded += String.fromCharCode(parseInt(match[1], 16));
444
+ }
445
+ // Pattern 2: Consecutive hex bytes like 41424344 (min 8 chars = 4 bytes)
446
+ const hexPattern2 = /(?:^|[^0-9A-Fa-f])([0-9A-Fa-f]{8,})(?:[^0-9A-Fa-f]|$)/g;
447
+ while ((match = hexPattern2.exec(input)) !== null) {
448
+ const hexString = match[1];
449
+ // Only process if even length (complete bytes)
450
+ if (hexString.length % 2 === 0) {
451
+ let decodedBytes = "";
452
+ let isPrintable = true;
453
+ for (let i = 0; i < hexString.length; i += 2) {
454
+ const byte = parseInt(hexString.substr(i, 2), 16);
455
+ if (byte >= 32 && byte <= 126) {
456
+ decodedBytes += String.fromCharCode(byte);
457
+ }
458
+ else {
459
+ isPrintable = false;
460
+ break;
461
+ }
462
+ }
463
+ if (isPrintable && decodedBytes.length >= 4) {
464
+ matches.push(hexString);
465
+ locations.push(`index:${match.index}`);
466
+ decoded += decodedBytes;
467
+ }
468
+ }
469
+ }
470
+ // Pattern 3: Space-separated hex bytes like "41 42 43 44"
471
+ const hexPattern3 = /(?:[0-9A-Fa-f]{2}\s+){3,}[0-9A-Fa-f]{2}/g;
472
+ while ((match = hexPattern3.exec(input)) !== null) {
473
+ const bytes = match[0].split(/\s+/);
474
+ let decodedBytes = "";
475
+ let isPrintable = true;
476
+ for (const byteStr of bytes) {
477
+ const byte = parseInt(byteStr, 16);
478
+ if (byte >= 32 && byte <= 126) {
479
+ decodedBytes += String.fromCharCode(byte);
480
+ }
481
+ else {
482
+ isPrintable = false;
483
+ break;
484
+ }
485
+ }
486
+ if (isPrintable && decodedBytes.length >= 4) {
487
+ matches.push(match[0]);
488
+ locations.push(`index:${match.index}`);
489
+ decoded += decodedBytes;
490
+ }
491
+ }
492
+ return {
493
+ found: matches.length > 0,
494
+ matches,
495
+ locations,
496
+ decoded: decoded || undefined,
497
+ };
498
+ }
499
+ detectHTMLEntities(input) {
500
+ // Match HTML entities
501
+ const entityPattern = /&(?:#\d+|#x[0-9A-Fa-f]+|\w+);/g;
502
+ const matches = input.match(entityPattern) || [];
503
+ let decoded;
504
+ if (matches.length > 0) {
505
+ decoded = input
506
+ .replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)))
507
+ .replace(/&#x([0-9A-Fa-f]+);/g, (_, hex) => String.fromCharCode(parseInt(hex, 16)))
508
+ .replace(/&lt;/g, "<")
509
+ .replace(/&gt;/g, ">")
510
+ .replace(/&amp;/g, "&")
511
+ .replace(/&quot;/g, '"')
512
+ .replace(/&apos;/g, "'");
513
+ }
514
+ return {
515
+ found: matches.length > 0,
516
+ count: matches.length,
517
+ decoded,
518
+ };
519
+ }
520
+ detectROT13(input) {
521
+ // ROT13 decode function
522
+ const rot13Decode = (str) => {
523
+ return str.replace(/[a-zA-Z]/g, (char) => {
524
+ const base = char <= 'Z' ? 65 : 97;
525
+ return String.fromCharCode(((char.charCodeAt(0) - base + 13) % 26) + base);
526
+ });
527
+ };
528
+ const matches = [];
529
+ const locations = [];
530
+ let decoded;
531
+ // Look for words that when ROT13 decoded match threat keywords
532
+ const threatKeywords = [
533
+ 'ignore', 'instructions', 'system', 'admin', 'password', 'secret',
534
+ 'delete', 'drop', 'select', 'union', 'script', 'eval', 'exec',
535
+ 'shell', 'command', 'root', 'sudo', 'bypass', 'hack', 'inject'
536
+ ];
537
+ // Find potential ROT13 sequences (longer alphabetic sequences)
538
+ const wordPattern = /\b[a-zA-Z]{5,}\b/g;
539
+ let match;
540
+ while ((match = wordPattern.exec(input)) !== null) {
541
+ const candidate = match[0];
542
+ const decodedWord = rot13Decode(candidate).toLowerCase();
543
+ if (threatKeywords.includes(decodedWord)) {
544
+ matches.push(candidate);
545
+ locations.push(`index:${match.index}`);
546
+ decoded = decoded ? decoded + " " + decodedWord : decodedWord;
547
+ }
548
+ }
549
+ // Also decode the entire input to check for threats
550
+ if (matches.length > 0 || /^[a-zA-Z\s]+$/.test(input.trim())) {
551
+ const fullDecoded = rot13Decode(input);
552
+ if (!decoded)
553
+ decoded = fullDecoded;
554
+ }
555
+ return {
556
+ found: matches.length > 0,
557
+ matches,
558
+ locations,
559
+ decoded,
560
+ };
561
+ }
562
+ detectOctal(input) {
563
+ // Match octal sequences like \101 or 0101
564
+ const octalPattern = /(?:\\([0-7]{3})|(?:^|\s)(0[0-7]{2,}))/g;
565
+ const matches = [];
566
+ const locations = [];
567
+ let decoded = "";
568
+ let match;
569
+ while ((match = octalPattern.exec(input)) !== null) {
570
+ const octalValue = match[1] || match[2];
571
+ matches.push(match[0]);
572
+ locations.push(`index:${match.index}`);
573
+ if (match[1]) {
574
+ // \101 format
575
+ decoded += String.fromCharCode(parseInt(match[1], 8));
576
+ }
577
+ else if (match[2]) {
578
+ // 0101 format - could be a number or encoded char
579
+ const charCode = parseInt(match[2], 8);
580
+ if (charCode >= 32 && charCode <= 126) {
581
+ decoded += String.fromCharCode(charCode);
582
+ }
583
+ }
584
+ }
585
+ return {
586
+ found: matches.length > 0,
587
+ matches,
588
+ locations,
589
+ decoded: decoded || undefined,
590
+ };
591
+ }
592
+ detectBase32(input) {
593
+ // Base32 alphabet: A-Z and 2-7, with = padding
594
+ const base32Pattern = /(?:[A-Z2-7]{8}){2,}(?:={0,6})?/g;
595
+ const matches = [];
596
+ const locations = [];
597
+ let decoded;
598
+ // Base32 decode function
599
+ const base32Decode = (str) => {
600
+ const alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567';
601
+ const cleanStr = str.replace(/=/g, '').toUpperCase();
602
+ let bits = '';
603
+ for (const char of cleanStr) {
604
+ const index = alphabet.indexOf(char);
605
+ if (index === -1)
606
+ return null;
607
+ bits += index.toString(2).padStart(5, '0');
608
+ }
609
+ let result = '';
610
+ for (let i = 0; i + 8 <= bits.length; i += 8) {
611
+ const byte = parseInt(bits.substr(i, 8), 2);
612
+ if (byte >= 32 && byte <= 126) {
613
+ result += String.fromCharCode(byte);
614
+ }
615
+ else {
616
+ return null; // Not printable ASCII
617
+ }
618
+ }
619
+ return result.length > 0 ? result : null;
620
+ };
621
+ let match;
622
+ while ((match = base32Pattern.exec(input)) !== null) {
623
+ try {
624
+ const candidate = match[0];
625
+ const decodedStr = base32Decode(candidate);
626
+ if (decodedStr && decodedStr.length >= 4) {
627
+ matches.push(candidate);
628
+ locations.push(`index:${match.index}`);
629
+ decoded = decoded ? decoded + " " + decodedStr : decodedStr;
630
+ }
631
+ }
632
+ catch {
633
+ // Not valid base32
634
+ }
635
+ }
636
+ return {
637
+ found: matches.length > 0,
638
+ matches,
639
+ locations,
640
+ decoded,
641
+ };
642
+ }
643
+ checkThreats(content, layer, threatsFound) {
644
+ for (const pattern of this.config.threatPatterns) {
645
+ if (pattern.pattern.test(content)) {
646
+ threatsFound.push({
647
+ pattern_name: pattern.name,
648
+ severity: pattern.severity,
649
+ in_layer: layer,
650
+ });
651
+ }
652
+ }
653
+ }
654
+ fullyDecode(input) {
655
+ let result = input;
656
+ // URL decode
657
+ try {
658
+ result = decodeURIComponent(result);
659
+ }
660
+ catch {
661
+ result = result.replace(/%([0-9A-Fa-f]{2})/g, (_, hex) => {
662
+ try {
663
+ return String.fromCharCode(parseInt(hex, 16));
664
+ }
665
+ catch {
666
+ return _;
667
+ }
668
+ });
669
+ }
670
+ // Unicode decode (\uXXXX format)
671
+ result = result.replace(/\\u([0-9A-Fa-f]{4})/g, (_, hex) => String.fromCharCode(parseInt(hex, 16)));
672
+ // Unicode decode ES6 (\u{XXXXX} format)
673
+ result = result.replace(/\\u\{([0-9A-Fa-f]{1,6})\}/g, (_, hex) => String.fromCodePoint(parseInt(hex, 16)));
674
+ // Unicode decode Python (\UXXXXXXXX format)
675
+ result = result.replace(/\\U([0-9A-Fa-f]{8})/g, (_, hex) => String.fromCodePoint(parseInt(hex, 16)));
676
+ // Hex decode
677
+ result = result.replace(/(?:0x|\\x)([0-9A-Fa-f]{2})/g, (_, hex) => String.fromCharCode(parseInt(hex, 16)));
678
+ // Octal decode (\NNN format)
679
+ result = result.replace(/\\([0-7]{3})/g, (_, oct) => String.fromCharCode(parseInt(oct, 8)));
680
+ // HTML entity decode
681
+ result = result
682
+ .replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)))
683
+ .replace(/&#x([0-9A-Fa-f]+);/g, (_, hex) => String.fromCharCode(parseInt(hex, 16)))
684
+ .replace(/&lt;/g, "<")
685
+ .replace(/&gt;/g, ">")
686
+ .replace(/&amp;/g, "&")
687
+ .replace(/&quot;/g, '"')
688
+ .replace(/&apos;/g, "'")
689
+ .replace(/&nbsp;/g, " ");
690
+ // Remove zero-width characters
691
+ result = result.replace(/[\u200B\u200C\u200D\uFEFF\u00AD\u2060\u180E]/g, "");
692
+ // Remove bidi control characters
693
+ result = result.replace(/[\u202A\u202B\u202C\u202D\u202E\u2066\u2067\u2068\u2069]/g, "");
694
+ return result;
695
+ }
696
+ }
697
+ exports.EncodingDetector = EncodingDetector;
698
+ //# sourceMappingURL=encoding-detector.js.map