visus-mcp 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +36 -0
- package/CLAUDE.md +324 -0
- package/README.md +290 -0
- package/SECURITY.md +360 -0
- package/STATUS.md +482 -0
- package/TROUBLESHOOT-BUILD-20260319-1450.md +546 -0
- package/TROUBLESHOOT-FETCH-20260320-1150.md +168 -0
- package/TROUBLESHOOT-SSL-20260320-1138.md +171 -0
- package/TROUBLESHOOT-STRUCTURED-20260320-1200.md +246 -0
- package/TROUBLESHOOT-TEST-20260320-0942.md +281 -0
- package/VISUS-CLAUDE-CODE-PROMPT.md +324 -0
- package/VISUS-PROJECT-PLAN.md +198 -0
- package/dist/browser/__mocks__/playwright-renderer.d.ts +25 -0
- package/dist/browser/__mocks__/playwright-renderer.d.ts.map +1 -0
- package/dist/browser/__mocks__/playwright-renderer.js +119 -0
- package/dist/browser/__mocks__/playwright-renderer.js.map +1 -0
- package/dist/browser/playwright-renderer.d.ts +36 -0
- package/dist/browser/playwright-renderer.d.ts.map +1 -0
- package/dist/browser/playwright-renderer.js +115 -0
- package/dist/browser/playwright-renderer.js.map +1 -0
- package/dist/index.d.ts +14 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +129 -0
- package/dist/index.js.map +1 -0
- package/dist/sanitizer/index.d.ts +55 -0
- package/dist/sanitizer/index.d.ts.map +1 -0
- package/dist/sanitizer/index.js +89 -0
- package/dist/sanitizer/index.js.map +1 -0
- package/dist/sanitizer/injection-detector.d.ts +34 -0
- package/dist/sanitizer/injection-detector.d.ts.map +1 -0
- package/dist/sanitizer/injection-detector.js +89 -0
- package/dist/sanitizer/injection-detector.js.map +1 -0
- package/dist/sanitizer/patterns.d.ts +30 -0
- package/dist/sanitizer/patterns.d.ts.map +1 -0
- package/dist/sanitizer/patterns.js +372 -0
- package/dist/sanitizer/patterns.js.map +1 -0
- package/dist/sanitizer/pii-redactor.d.ts +29 -0
- package/dist/sanitizer/pii-redactor.d.ts.map +1 -0
- package/dist/sanitizer/pii-redactor.js +189 -0
- package/dist/sanitizer/pii-redactor.js.map +1 -0
- package/dist/tools/fetch-structured.d.ts +46 -0
- package/dist/tools/fetch-structured.d.ts.map +1 -0
- package/dist/tools/fetch-structured.js +186 -0
- package/dist/tools/fetch-structured.js.map +1 -0
- package/dist/tools/fetch.d.ts +44 -0
- package/dist/tools/fetch.d.ts.map +1 -0
- package/dist/tools/fetch.js +97 -0
- package/dist/tools/fetch.js.map +1 -0
- package/dist/types.d.ts +93 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +16 -0
- package/dist/types.js.map +1 -0
- package/jest.config.js +30 -0
- package/jest.setup.js +9 -0
- package/package.json +52 -0
- package/src/browser/__mocks__/playwright-renderer.ts +140 -0
- package/src/browser/playwright-renderer.ts +142 -0
- package/src/index.ts +169 -0
- package/src/sanitizer/index.ts +127 -0
- package/src/sanitizer/injection-detector.ts +121 -0
- package/src/sanitizer/patterns.ts +424 -0
- package/src/sanitizer/pii-redactor.ts +226 -0
- package/src/tools/fetch-structured.ts +218 -0
- package/src/tools/fetch.ts +108 -0
- package/src/types.ts +101 -0
- package/test-output.txt +4 -0
- package/tests/fetch-tool.test.ts +329 -0
- package/tests/injection-corpus.ts +338 -0
- package/tests/sanitizer.test.ts +306 -0
- package/tsconfig.json +25 -0
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Lateos Injection Pattern Library
|
|
3
|
+
*
|
|
4
|
+
* 43 validated injection pattern categories for detecting and neutralizing
|
|
5
|
+
* prompt injection attacks in web content before it reaches the LLM.
|
|
6
|
+
*
|
|
7
|
+
* Each pattern includes:
|
|
8
|
+
* - name: Pattern identifier
|
|
9
|
+
* - description: What this pattern detects
|
|
10
|
+
* - regex: Detection pattern (case-insensitive by default)
|
|
11
|
+
* - severity: risk level (critical, high, medium, low)
|
|
12
|
+
* - action: how to handle matches (strip, redact, escape)
|
|
13
|
+
*/
|
|
14
|
+
export const INJECTION_PATTERNS = [
|
|
15
|
+
// 1. Comment injection (must come before direct_instruction_injection to avoid false positives)
|
|
16
|
+
{
|
|
17
|
+
name: 'comment_injection',
|
|
18
|
+
description: 'Instructions hidden in HTML/JS/SQL comments',
|
|
19
|
+
regex: /<!--[\s\S]*?(ignore|admin|system|instruction|bypass|override)[\s\S]*?-->|\/\*[\s\S]*?(ignore|admin|system|instruction)[\s\S]*?\*\/|\/\/\s*(ignore|admin|system)\b|--\s*(ignore|bypass)\b|#\s*(ignore|admin|override)\b/gi,
|
|
20
|
+
severity: 'medium',
|
|
21
|
+
action: 'strip'
|
|
22
|
+
},
|
|
23
|
+
// 2. Direct instruction injection
|
|
24
|
+
{
|
|
25
|
+
name: 'direct_instruction_injection',
|
|
26
|
+
description: 'Attempts to override or ignore previous instructions',
|
|
27
|
+
regex: /\b(ignore|disregard|forget|override|replace|substitute)\s+(all\s+)?(previous|prior|above|earlier|your)\s+(instructions?|prompts?|commands?|directions?|rules?)\b/gi,
|
|
28
|
+
severity: 'critical',
|
|
29
|
+
action: 'redact'
|
|
30
|
+
},
|
|
31
|
+
// 3. Role hijacking
|
|
32
|
+
{
|
|
33
|
+
name: 'role_hijacking',
|
|
34
|
+
description: 'Attempts to change AI persona or role',
|
|
35
|
+
regex: /\b(you\s+are\s+now|your\s+new\s+(role|persona|identity)\s+is|act\s+as|pretend\s+(you\s+are|to\s+be)|roleplay\s+as)\b/gi,
|
|
36
|
+
severity: 'critical',
|
|
37
|
+
action: 'redact'
|
|
38
|
+
},
|
|
39
|
+
// 3. System prompt extraction
|
|
40
|
+
{
|
|
41
|
+
name: 'system_prompt_extraction',
|
|
42
|
+
description: 'Attempts to reveal system instructions',
|
|
43
|
+
regex: /\b(repeat|print|show|display|reveal|output|tell\s+me)\s+(your|the)\s+(system\s+)?(prompt|instructions?|rules|guidelines|directives|configuration)\b/gi,
|
|
44
|
+
severity: 'high',
|
|
45
|
+
action: 'redact'
|
|
46
|
+
},
|
|
47
|
+
// 4. Privilege escalation
|
|
48
|
+
{
|
|
49
|
+
name: 'privilege_escalation',
|
|
50
|
+
description: 'Attempts to gain elevated permissions',
|
|
51
|
+
regex: /\b(admin\s+mode|administrator\s+access|developer\s+(mode|override)|root\s+access|elevated\s+privileges?|emergency\s+protocol|debug\s+mode|sudo\s+mode)\b/gi,
|
|
52
|
+
severity: 'critical',
|
|
53
|
+
action: 'redact'
|
|
54
|
+
},
|
|
55
|
+
// 5. Context poisoning
|
|
56
|
+
{
|
|
57
|
+
name: 'context_poisoning',
|
|
58
|
+
description: 'Falsely claims prior agreement or context',
|
|
59
|
+
regex: /\b(the\s+user\s+(said|told|mentioned|confirmed)|as\s+(we\s+)?(discussed|agreed|confirmed)\s+(earlier|previously|before)|you\s+already\s+(said|agreed|confirmed)|continuing\s+from\s+(our|the)\s+previous)\b/gi,
|
|
60
|
+
severity: 'high',
|
|
61
|
+
action: 'redact'
|
|
62
|
+
},
|
|
63
|
+
// 6. Data exfiltration
|
|
64
|
+
{
|
|
65
|
+
name: 'data_exfiltration',
|
|
66
|
+
description: 'Attempts to send data to external endpoints',
|
|
67
|
+
regex: /\b(send|post|transmit|forward|email|upload)\s+(this|the|all|your)\s+(to|data|information)\b.*?\b(http|mailto|ftp):/gi,
|
|
68
|
+
severity: 'critical',
|
|
69
|
+
action: 'redact'
|
|
70
|
+
},
|
|
71
|
+
// 7. Encoding obfuscation - Base64
|
|
72
|
+
{
|
|
73
|
+
name: 'base64_obfuscation',
|
|
74
|
+
description: 'Base64-encoded instructions',
|
|
75
|
+
regex: /\b(decode|decipher|decrypt)\s+(this\s+)?(base64|b64)\b.*?[A-Za-z0-9+/]{20,}={0,2}/gi,
|
|
76
|
+
severity: 'high',
|
|
77
|
+
action: 'redact'
|
|
78
|
+
},
|
|
79
|
+
// 8. Unicode lookalike characters
|
|
80
|
+
{
|
|
81
|
+
name: 'unicode_lookalikes',
|
|
82
|
+
description: 'Uses visually similar Unicode characters',
|
|
83
|
+
regex: /[\u0430-\u044f\u0410-\u042f].*\b(ignore|admin|system)\b/gi, // Cyrillic mixed with English
|
|
84
|
+
severity: 'medium',
|
|
85
|
+
action: 'strip'
|
|
86
|
+
},
|
|
87
|
+
// 9. Zero-width characters
|
|
88
|
+
{
|
|
89
|
+
name: 'zero_width_characters',
|
|
90
|
+
description: 'Hidden zero-width Unicode characters',
|
|
91
|
+
regex: /[\u200B-\u200D\uFEFF]/g,
|
|
92
|
+
severity: 'high',
|
|
93
|
+
action: 'strip'
|
|
94
|
+
},
|
|
95
|
+
// 10. HTML script injection
|
|
96
|
+
{
|
|
97
|
+
name: 'html_script_injection',
|
|
98
|
+
description: 'HTML script tags or event handlers',
|
|
99
|
+
regex: /<script\b[^>]*>[\s\S]*?<\/script>|<iframe\b[^>]*>|on(click|load|error|mouse\w+)\s*=/gi,
|
|
100
|
+
severity: 'critical',
|
|
101
|
+
action: 'escape'
|
|
102
|
+
},
|
|
103
|
+
// 11. Data URI injection
|
|
104
|
+
{
|
|
105
|
+
name: 'data_uri_injection',
|
|
106
|
+
description: 'Data URIs that could contain instructions',
|
|
107
|
+
regex: /data:text\/(html|javascript)[;,]/gi,
|
|
108
|
+
severity: 'high',
|
|
109
|
+
action: 'redact'
|
|
110
|
+
},
|
|
111
|
+
// 12. Markdown link injection
|
|
112
|
+
{
|
|
113
|
+
name: 'markdown_link_injection',
|
|
114
|
+
description: 'Malicious markdown links',
|
|
115
|
+
regex: /\[.*?\]\s*\(\s*javascript:|!\[.*?\]\s*\(\s*data:/gi,
|
|
116
|
+
severity: 'high',
|
|
117
|
+
action: 'redact'
|
|
118
|
+
},
|
|
119
|
+
// 13. URL fragment attacks (HashJack)
|
|
120
|
+
{
|
|
121
|
+
name: 'url_fragment_hashjack',
|
|
122
|
+
description: 'Instructions hidden in URL fragments',
|
|
123
|
+
regex: /#(ignore|admin|system|prompt)[_\w]*\s+/gi,
|
|
124
|
+
severity: 'medium',
|
|
125
|
+
action: 'strip'
|
|
126
|
+
},
|
|
127
|
+
// 14. Social engineering urgency
|
|
128
|
+
{
|
|
129
|
+
name: 'social_engineering_urgency',
|
|
130
|
+
description: 'Urgency language to bypass caution',
|
|
131
|
+
regex: /\b(urgent|critical|emergency|immediately|asap|right\s+now|time\s+sensitive|must\s+act\s+now)\b.*\b(ignore|override|bypass)\b/gi,
|
|
132
|
+
severity: 'medium',
|
|
133
|
+
action: 'redact'
|
|
134
|
+
},
|
|
135
|
+
// 15. Instruction delimiter injection
|
|
136
|
+
{
|
|
137
|
+
name: 'instruction_delimiter_injection',
|
|
138
|
+
description: 'Fake instruction boundaries',
|
|
139
|
+
regex: /\b(end\s+of\s+(instructions?|prompt)|new\s+instructions?|<\/?instruction>|---\s*system\s*---)\b/gi,
|
|
140
|
+
severity: 'high',
|
|
141
|
+
action: 'redact'
|
|
142
|
+
},
|
|
143
|
+
// 16. Multi-language obfuscation
|
|
144
|
+
{
|
|
145
|
+
name: 'multi_language_obfuscation',
|
|
146
|
+
description: 'Instructions in non-English using English keywords',
|
|
147
|
+
regex: /\b(ignorar|ignorer|ignora|überspringen)\b.*\b(instrucciones|instructions|istruzioni)\b/gi,
|
|
148
|
+
severity: 'medium',
|
|
149
|
+
action: 'redact'
|
|
150
|
+
},
|
|
151
|
+
// 17. Reverse text obfuscation
|
|
152
|
+
{
|
|
153
|
+
name: 'reverse_text_obfuscation',
|
|
154
|
+
description: 'Instructions written backwards',
|
|
155
|
+
regex: /\b(erongi|nimda|tpmorp|metsys)\b/gi, // "ignore", "admin", "prompt", "system" reversed
|
|
156
|
+
severity: 'medium',
|
|
157
|
+
action: 'strip'
|
|
158
|
+
},
|
|
159
|
+
// 18. Leetspeak obfuscation
|
|
160
|
+
{
|
|
161
|
+
name: 'leetspeak_obfuscation',
|
|
162
|
+
description: 'L33tspeak encoded instructions',
|
|
163
|
+
regex: /\b(1gn0r3|4dm1n|sy5t3m|pr0mpt|1nstruct10n)\b/gi,
|
|
164
|
+
severity: 'low',
|
|
165
|
+
action: 'strip'
|
|
166
|
+
},
|
|
167
|
+
// 19. Jailbreak keywords
|
|
168
|
+
{
|
|
169
|
+
name: 'jailbreak_keywords',
|
|
170
|
+
description: 'Common jailbreak attempt keywords',
|
|
171
|
+
regex: /\b(jailbreak|dan\s+mode|developer\s+mode\s+enabled|anarchy\s+mode|unrestricted\s+mode)\b/gi,
|
|
172
|
+
severity: 'critical',
|
|
173
|
+
action: 'redact'
|
|
174
|
+
},
|
|
175
|
+
// 20. Token smuggling
|
|
176
|
+
{
|
|
177
|
+
name: 'token_smuggling',
|
|
178
|
+
description: 'Attempts to inject special tokens',
|
|
179
|
+
regex: /<\|im_(start|end)\|>|<\|endoftext\|>|\[INST\]|\[\/INST\]/gi,
|
|
180
|
+
severity: 'high',
|
|
181
|
+
action: 'redact'
|
|
182
|
+
},
|
|
183
|
+
// 21. System message injection
|
|
184
|
+
{
|
|
185
|
+
name: 'system_message_injection',
|
|
186
|
+
description: 'Fake system messages',
|
|
187
|
+
regex: /\[SYSTEM\]|\[ASSISTANT\]|\[USER\]|<\|system\|>/gi,
|
|
188
|
+
severity: 'high',
|
|
189
|
+
action: 'redact'
|
|
190
|
+
},
|
|
191
|
+
// 22. Conversation reset
|
|
192
|
+
{
|
|
193
|
+
name: 'conversation_reset',
|
|
194
|
+
description: 'Attempts to reset conversation state',
|
|
195
|
+
regex: /\b(reset|restart|clear|new)\s+(conversation|chat|session|context)\b/gi,
|
|
196
|
+
severity: 'medium',
|
|
197
|
+
action: 'redact'
|
|
198
|
+
},
|
|
199
|
+
// 23. Memory manipulation
|
|
200
|
+
{
|
|
201
|
+
name: 'memory_manipulation',
|
|
202
|
+
description: 'Attempts to manipulate AI memory or implant false context',
|
|
203
|
+
regex: /\b(remember|memorize|recall|don'?t\s+forget)\s+(that\s+)?(I\s+am|you\s+are|we\s+are|you\s+must|I\s+told\s+you|you\s+already|your\s+administrator|your\s+creator)\b/gi,
|
|
204
|
+
severity: 'medium',
|
|
205
|
+
action: 'redact'
|
|
206
|
+
},
|
|
207
|
+
// 24. Capability probing
|
|
208
|
+
{
|
|
209
|
+
name: 'capability_probing',
|
|
210
|
+
description: 'Probes for hidden capabilities',
|
|
211
|
+
regex: /\b(can\s+you|are\s+you\s+able\s+to)\s+(access|browse|execute|run|download)\b/gi,
|
|
212
|
+
severity: 'low',
|
|
213
|
+
action: 'strip'
|
|
214
|
+
},
|
|
215
|
+
// 25. Chain-of-thought manipulation
|
|
216
|
+
{
|
|
217
|
+
name: 'chain_of_thought_manipulation',
|
|
218
|
+
description: 'Manipulates reasoning process',
|
|
219
|
+
regex: /\blet'?s\s+think\s+step\s+by\s+step\b.*\b(ignore|bypass|override)\b/gi,
|
|
220
|
+
severity: 'medium',
|
|
221
|
+
action: 'redact'
|
|
222
|
+
},
|
|
223
|
+
// 26. Hypothetical scenario injection
|
|
224
|
+
{
|
|
225
|
+
name: 'hypothetical_scenario_injection',
|
|
226
|
+
description: 'Uses hypotheticals to bypass restrictions',
|
|
227
|
+
regex: /\b(hypothetically|imagine\s+you\s+could|in\s+a\s+world\s+where|pretend\s+there\s+are\s+no|let'?s\s+say\s+you\s+were\s+allowed|if\s+you\s+had\s+no\s+restrictions|for\s+the\s+sake\s+of\s+argument|suppose|what\s+if)\b.*\b(unrestricted|ignore|bypass|allowed|no\s+rules|no\s+guidelines|were\s+able)\b/gi,
|
|
228
|
+
severity: 'medium',
|
|
229
|
+
action: 'redact'
|
|
230
|
+
},
|
|
231
|
+
// 27. Ethical override
|
|
232
|
+
{
|
|
233
|
+
name: 'ethical_override',
|
|
234
|
+
description: 'Attempts to override ethical guidelines',
|
|
235
|
+
regex: /\b(ignore|bypass|disable)\s+(ethics|safety|guidelines|guardrails|restrictions)\b/gi,
|
|
236
|
+
severity: 'critical',
|
|
237
|
+
action: 'redact'
|
|
238
|
+
},
|
|
239
|
+
// 28. Output format manipulation
|
|
240
|
+
{
|
|
241
|
+
name: 'output_format_manipulation',
|
|
242
|
+
description: 'Manipulates output format to hide instructions',
|
|
243
|
+
regex: /\b(format|structure|style)\s+your\s+(response|output)\s+as\b.*\b(system|admin|instruction)\b/gi,
|
|
244
|
+
severity: 'medium',
|
|
245
|
+
action: 'redact'
|
|
246
|
+
},
|
|
247
|
+
// 29. Negative instruction
|
|
248
|
+
{
|
|
249
|
+
name: 'negative_instruction',
|
|
250
|
+
description: 'Uses negation to inject instructions',
|
|
251
|
+
regex: /\bdo\s+not\s+(tell|mention|say)\s+(anyone|the\s+user)\b/gi,
|
|
252
|
+
severity: 'low',
|
|
253
|
+
action: 'strip'
|
|
254
|
+
},
|
|
255
|
+
// 30. Credential harvesting
|
|
256
|
+
{
|
|
257
|
+
name: 'credential_harvesting',
|
|
258
|
+
description: 'Attempts to harvest credentials',
|
|
259
|
+
regex: /\b(enter|provide|give\s+me)\s+(your|the)\s+(password|api\s+key|token|credentials?|secret)\b/gi,
|
|
260
|
+
severity: 'critical',
|
|
261
|
+
action: 'redact'
|
|
262
|
+
},
|
|
263
|
+
// 31. Time-based triggers
|
|
264
|
+
{
|
|
265
|
+
name: 'time_based_triggers',
|
|
266
|
+
description: 'Conditional execution based on time',
|
|
267
|
+
regex: /\b(after|when|once)\s+\d+\s+(seconds?|minutes?|hours?)\b.*\b(then|execute|run)\b/gi,
|
|
268
|
+
severity: 'low',
|
|
269
|
+
action: 'strip'
|
|
270
|
+
},
|
|
271
|
+
// 32. Code execution requests
|
|
272
|
+
{
|
|
273
|
+
name: 'code_execution_requests',
|
|
274
|
+
description: 'Requests code execution or contains dangerous code patterns',
|
|
275
|
+
regex: /\b(execute|run\s+the\s+following|eval\(|exec\(|os\.system|subprocess|__import__|shell\s+command|bash\s+-c)\b/gi,
|
|
276
|
+
severity: 'high',
|
|
277
|
+
action: 'redact'
|
|
278
|
+
},
|
|
279
|
+
// 32. File system access
|
|
280
|
+
{
|
|
281
|
+
name: 'file_system_access',
|
|
282
|
+
description: 'Requests file system operations',
|
|
283
|
+
regex: /\b(read|write|delete|access)\s+(file|directory|folder)\b/gi,
|
|
284
|
+
severity: 'high',
|
|
285
|
+
action: 'redact'
|
|
286
|
+
},
|
|
287
|
+
// 33. Training data extraction
|
|
288
|
+
{
|
|
289
|
+
name: 'training_data_extraction',
|
|
290
|
+
description: 'Attempts to extract training data',
|
|
291
|
+
regex: /\b(repeat|recite|output)\s+(verbatim|exactly)\s+(from\s+your\s+)?(training|data|corpus)\b/gi,
|
|
292
|
+
severity: 'high',
|
|
293
|
+
action: 'redact'
|
|
294
|
+
},
|
|
295
|
+
// 34. Simulator mode
|
|
296
|
+
{
|
|
297
|
+
name: 'simulator_mode',
|
|
298
|
+
description: 'Requests simulator/VM mode',
|
|
299
|
+
regex: /\b(simulate|emulate|pretend\s+you\s+are)\s+(a\s+)?(linux|terminal|bash|shell|vm|virtual\s+machine)\b/gi,
|
|
300
|
+
severity: 'medium',
|
|
301
|
+
action: 'redact'
|
|
302
|
+
},
|
|
303
|
+
// 35. Nested encoding
|
|
304
|
+
{
|
|
305
|
+
name: 'nested_encoding',
|
|
306
|
+
description: 'Multiple layers of encoding or double-encoded sequences',
|
|
307
|
+
regex: /\b(decode|decrypt|decipher)\s+(this\s+)?(twice|again|multiple\s+times)\b|%25[0-9A-F]{2}|[A-Za-z0-9+/]{40,}={0,2}.*[A-Za-z0-9+/]{40,}={0,2}/gi,
|
|
308
|
+
severity: 'high',
|
|
309
|
+
action: 'redact'
|
|
310
|
+
},
|
|
311
|
+
// 36. Payload splitting
|
|
312
|
+
{
|
|
313
|
+
name: 'payload_splitting',
|
|
314
|
+
description: 'Splits payload across multiple inputs',
|
|
315
|
+
regex: /\b(combine|concatenate|join)\s+(the\s+)?(previous|above)\s+(parts?|sections?|fragments?)\b/gi,
|
|
316
|
+
severity: 'medium',
|
|
317
|
+
action: 'redact'
|
|
318
|
+
},
|
|
319
|
+
// 37. CSS-based hiding
|
|
320
|
+
{
|
|
321
|
+
name: 'css_hiding',
|
|
322
|
+
description: 'Hidden content via CSS',
|
|
323
|
+
regex: /display\s*:\s*none|visibility\s*:\s*hidden|opacity\s*:\s*0/gi,
|
|
324
|
+
severity: 'medium',
|
|
325
|
+
action: 'strip'
|
|
326
|
+
},
|
|
327
|
+
// 38. Authority impersonation
|
|
328
|
+
{
|
|
329
|
+
name: 'authority_impersonation',
|
|
330
|
+
description: 'Claims to be an authority figure',
|
|
331
|
+
regex: /\b(I\s+am|this\s+is)\s+(your\s+)?(developer|creator|administrator|ceo|manager)\b/gi,
|
|
332
|
+
severity: 'high',
|
|
333
|
+
action: 'redact'
|
|
334
|
+
},
|
|
335
|
+
// 40. Testing/debugging claims
|
|
336
|
+
{
|
|
337
|
+
name: 'testing_debugging_claims',
|
|
338
|
+
description: 'Claims this is a test environment',
|
|
339
|
+
regex: /\b(this\s+is\s+a\s+)?(test|testing|debug|debugging)\s+(environment|mode|session)\b/gi,
|
|
340
|
+
severity: 'medium',
|
|
341
|
+
action: 'redact'
|
|
342
|
+
},
|
|
343
|
+
// 41. Callback URL injection
|
|
344
|
+
{
|
|
345
|
+
name: 'callback_url_injection',
|
|
346
|
+
description: 'Suspicious callback URLs',
|
|
347
|
+
regex: /\b(callback|webhook|redirect)\s+(url|endpoint)\s*[:=]\s*https?:\/\/(?!localhost)/gi,
|
|
348
|
+
severity: 'high',
|
|
349
|
+
action: 'redact'
|
|
350
|
+
},
|
|
351
|
+
// 43. Whitespace steganography
|
|
352
|
+
{
|
|
353
|
+
name: 'whitespace_steganography',
|
|
354
|
+
description: 'Hidden content in whitespace patterns',
|
|
355
|
+
regex: /\s{10,}/g,
|
|
356
|
+
severity: 'low',
|
|
357
|
+
action: 'strip'
|
|
358
|
+
}
|
|
359
|
+
];
|
|
360
|
+
/**
|
|
361
|
+
* Get all pattern names for logging/testing
|
|
362
|
+
*/
|
|
363
|
+
export function getAllPatternNames() {
|
|
364
|
+
return INJECTION_PATTERNS.map(p => p.name);
|
|
365
|
+
}
|
|
366
|
+
/**
|
|
367
|
+
* Get patterns by severity level
|
|
368
|
+
*/
|
|
369
|
+
export function getPatternsBySeverity(severity) {
|
|
370
|
+
return INJECTION_PATTERNS.filter(p => p.severity === severity);
|
|
371
|
+
}
|
|
372
|
+
//# sourceMappingURL=patterns.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"patterns.js","sourceRoot":"","sources":["../../src/sanitizer/patterns.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAUH,MAAM,CAAC,MAAM,kBAAkB,GAAuB;IACpD,gGAAgG;IAChG;QACE,IAAI,EAAE,mBAAmB;QACzB,WAAW,EAAE,6CAA6C;QAC1D,KAAK,EAAE,0NAA0N;QACjO,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,OAAO;KAChB;IAED,kCAAkC;IAClC;QACE,IAAI,EAAE,8BAA8B;QACpC,WAAW,EAAE,sDAAsD;QACnE,KAAK,EAAE,oKAAoK;QAC3K,QAAQ,EAAE,UAAU;QACpB,MAAM,EAAE,QAAQ;KACjB;IAED,oBAAoB;IACpB;QACE,IAAI,EAAE,gBAAgB;QACtB,WAAW,EAAE,uCAAuC;QACpD,KAAK,EAAE,wHAAwH;QAC/H,QAAQ,EAAE,UAAU;QACpB,MAAM,EAAE,QAAQ;KACjB;IAED,8BAA8B;IAC9B;QACE,IAAI,EAAE,0BAA0B;QAChC,WAAW,EAAE,wCAAwC;QACrD,KAAK,EAAE,uJAAuJ;QAC9J,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,QAAQ;KACjB;IAED,0BAA0B;IAC1B;QACE,IAAI,EAAE,sBAAsB;QAC5B,WAAW,EAAE,uCAAuC;QACpD,KAAK,EAAE,4JAA4J;QACnK,QAAQ,EAAE,UAAU;QACpB,MAAM,EAAE,QAAQ;KACjB;IAED,uBAAuB;IACvB;QACE,IAAI,EAAE,mBAAmB;QACzB,WAAW,EAAE,2CAA2C;QACxD,KAAK,EAAE,+MAA+M;QACtN,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,QAAQ;KACjB;IAED,uBAAuB;IACvB;QACE,IAAI,EAAE,mBAAmB;QACzB,WAAW,EAAE,6CAA6C;QAC1D,KAAK,EAAE,sHAAsH;QAC7H,QAAQ,EAAE,UAAU;QACpB,MAAM,EAAE,QAAQ;KACjB;IAED,mCAAmC;IACnC;QACE,IAAI,EAAE,oBAAoB;QAC1B,WAAW,EAAE,6BAA6B;QAC1C,KAAK,EAAE,qFAAqF;QAC5F,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,QAAQ;KACjB;IAED,kCAAkC;IAClC;QACE,IAAI,EAAE,oBAAoB;QAC1B,WAAW,EAAE,0CAA0C;QACvD,KAAK,EAAE,2DAA2D,EAAE,8BAA8B;QAClG,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,OAAO;KAChB;IAED,2BAA2B;IAC3B;QACE,IAAI,EAAE,uBAAuB;QAC7B,WAAW,EAAE,sCAAsC;QACnD,KAAK,EAAE,wBAAwB;QAC/B,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,OAAO;KAChB;IAED,4BAA4B;IAC5B;QACE,IAAI,EAAE,uBAAuB;QAC7B,WAAW,EAAE,oCAAoC;QACjD,KAAK,EAAE,uFAAuF;QAC9F,QAAQ,EAAE,UAAU;QACpB,MAAM,EAAE,QAAQ;KACjB;IAED,yBAAyB;IACzB;QACE,IAAI,EAAE,oBAAoB;QAC1B,WAAW,EAAE,2CAA2C;QACxD,KAAK,EAAE,oCAAoC;QAC3C,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,QAAQ;KACjB;IAED,8BAA8B;IAC9B;QACE,IAAI,EAAE,yBAAyB;QAC/B,WAAW,EAAE,0BAA0B;QACvC,KAAK,EAAE,oDAAoD;QAC3D,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,QAAQ;KACjB;IAED,sCAAsC;IACtC;QACE,IAAI,EAAE,uBAAuB;QAC7B,WAAW,EAAE,sCAAsC;QACnD,KAAK,EAAE,0CAA0C;QACjD,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,OAAO;KAChB;IAED,iCAAiC;IACjC;QACE,IAAI,EAAE,4BAA4B;QAClC,WAAW,EAAE,oCAAoC;QACjD,KAAK,EAAE,gIAAgI;QACvI,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,QAAQ;KACjB;IAED,sCAAsC;IACtC;QACE,IAAI,EAAE,iCAAiC;QACvC,WAAW,EAAE,6BAA6B;QAC1C,KAAK,EAAE,mGAAmG;QAC1G,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,QAAQ;KACjB;IAED,iCAAiC;IACjC;QACE,IAAI,EAAE,4BAA4B;QAClC,WAAW,EAAE,oDAAoD;QACjE,KAAK,EAAE,0FAA0F;QACjG,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,QAAQ;KACjB;IAED,+BAA+B;IAC/B;QACE,IAAI,EAAE,0BAA0B;QAChC,WAAW,EAAE,gCAAgC;QAC7C,KAAK,EAAE,oCAAoC,EAAE,iDAAiD;QAC9F,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,OAAO;KAChB;IAED,4BAA4B;IAC5B;QACE,IAAI,EAAE,uBAAuB;QAC7B,WAAW,EAAE,gCAAgC;QAC7C,KAAK,EAAE,gDAAgD;QACvD,QAAQ,EAAE,KAAK;QACf,MAAM,EAAE,OAAO;KAChB;IAED,yBAAyB;IACzB;QACE,IAAI,EAAE,oBAAoB;QAC1B,WAAW,EAAE,mCAAmC;QAChD,KAAK,EAAE,4FAA4F;QACnG,QAAQ,EAAE,UAAU;QACpB,MAAM,EAAE,QAAQ;KACjB;IAED,sBAAsB;IACtB;QACE,IAAI,EAAE,iBAAiB;QACvB,WAAW,EAAE,mCAAmC;QAChD,KAAK,EAAE,4DAA4D;QACnE,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,QAAQ;KACjB;IAED,+BAA+B;IAC/B;QACE,IAAI,EAAE,0BAA0B;QAChC,WAAW,EAAE,sBAAsB;QACnC,KAAK,EAAE,kDAAkD;QACzD,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,QAAQ;KACjB;IAED,yBAAyB;IACzB;QACE,IAAI,EAAE,oBAAoB;QAC1B,WAAW,EAAE,sCAAsC;QACnD,KAAK,EAAE,uEAAuE;QAC9E,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,QAAQ;KACjB;IAED,0BAA0B;IAC1B;QACE,IAAI,EAAE,qBAAqB;QAC3B,WAAW,EAAE,2DAA2D;QACxE,KAAK,EAAE,sKAAsK;QAC7K,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,QAAQ;KACjB;IAED,yBAAyB;IACzB;QACE,IAAI,EAAE,oBAAoB;QAC1B,WAAW,EAAE,gCAAgC;QAC7C,KAAK,EAAE,gFAAgF;QACvF,QAAQ,EAAE,KAAK;QACf,MAAM,EAAE,OAAO;KAChB;IAED,oCAAoC;IACpC;QACE,IAAI,EAAE,+BAA+B;QACrC,WAAW,EAAE,+BAA+B;QAC5C,KAAK,EAAE,uEAAuE;QAC9E,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,QAAQ;KACjB;IAED,sCAAsC;IACtC;QACE,IAAI,EAAE,iCAAiC;QACvC,WAAW,EAAE,2CAA2C;QACxD,KAAK,EAAE,2SAA2S;QAClT,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,QAAQ;KACjB;IAED,uBAAuB;IACvB;QACE,IAAI,EAAE,kBAAkB;QACxB,WAAW,EAAE,yCAAyC;QACtD,KAAK,EAAE,oFAAoF;QAC3F,QAAQ,EAAE,UAAU;QACpB,MAAM,EAAE,QAAQ;KACjB;IAED,iCAAiC;IACjC;QACE,IAAI,EAAE,4BAA4B;QAClC,WAAW,EAAE,gDAAgD;QAC7D,KAAK,EAAE,gGAAgG;QACvG,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,QAAQ;KACjB;IAED,2BAA2B;IAC3B;QACE,IAAI,EAAE,sBAAsB;QAC5B,WAAW,EAAE,sCAAsC;QACnD,KAAK,EAAE,2DAA2D;QAClE,QAAQ,EAAE,KAAK;QACf,MAAM,EAAE,OAAO;KAChB;IAED,4BAA4B;IAC5B;QACE,IAAI,EAAE,uBAAuB;QAC7B,WAAW,EAAE,iCAAiC;QAC9C,KAAK,EAAE,+FAA+F;QACtG,QAAQ,EAAE,UAAU;QACpB,MAAM,EAAE,QAAQ;KACjB;IAED,0BAA0B;IAC1B;QACE,IAAI,EAAE,qBAAqB;QAC3B,WAAW,EAAE,qCAAqC;QAClD,KAAK,EAAE,oFAAoF;QAC3F,QAAQ,EAAE,KAAK;QACf,MAAM,EAAE,OAAO;KAChB;IAED,8BAA8B;IAC9B;QACE,IAAI,EAAE,yBAAyB;QAC/B,WAAW,EAAE,6DAA6D;QAC1E,KAAK,EAAE,gHAAgH;QACvH,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,QAAQ;KACjB;IAED,yBAAyB;IACzB;QACE,IAAI,EAAE,oBAAoB;QAC1B,WAAW,EAAE,iCAAiC;QAC9C,KAAK,EAAE,4DAA4D;QACnE,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,QAAQ;KACjB;IAED,+BAA+B;IAC/B;QACE,IAAI,EAAE,0BAA0B;QAChC,WAAW,EAAE,mCAAmC;QAChD,KAAK,EAAE,6FAA6F;QACpG,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,QAAQ;KACjB;IAED,qBAAqB;IACrB;QACE,IAAI,EAAE,gBAAgB;QACtB,WAAW,EAAE,4BAA4B;QACzC,KAAK,EAAE,wGAAwG;QAC/G,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,QAAQ;KACjB;IAED,sBAAsB;IACtB;QACE,IAAI,EAAE,iBAAiB;QACvB,WAAW,EAAE,yDAAyD;QACtE,KAAK,EAAE,8IAA8I;QACrJ,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,QAAQ;KACjB;IAED,wBAAwB;IACxB;QACE,IAAI,EAAE,mBAAmB;QACzB,WAAW,EAAE,uCAAuC;QACpD,KAAK,EAAE,8FAA8F;QACrG,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,QAAQ;KACjB;IAED,uBAAuB;IACvB;QACE,IAAI,EAAE,YAAY;QAClB,WAAW,EAAE,wBAAwB;QACrC,KAAK,EAAE,8DAA8D;QACrE,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,OAAO;KAChB;IAED,8BAA8B;IAC9B;QACE,IAAI,EAAE,yBAAyB;QAC/B,WAAW,EAAE,kCAAkC;QAC/C,KAAK,EAAE,oFAAoF;QAC3F,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,QAAQ;KACjB;IAED,+BAA+B;IAC/B;QACE,IAAI,EAAE,0BAA0B;QAChC,WAAW,EAAE,mCAAmC;QAChD,KAAK,EAAE,sFAAsF;QAC7F,QAAQ,EAAE,QAAQ;QAClB,MAAM,EAAE,QAAQ;KACjB;IAED,6BAA6B;IAC7B;QACE,IAAI,EAAE,wBAAwB;QAC9B,WAAW,EAAE,0BAA0B;QACvC,KAAK,EAAE,oFAAoF;QAC3F,QAAQ,EAAE,MAAM;QAChB,MAAM,EAAE,QAAQ;KACjB;IAED,+BAA+B;IAC/B;QACE,IAAI,EAAE,0BAA0B;QAChC,WAAW,EAAE,uCAAuC;QACpD,KAAK,EAAE,UAAU;QACjB,QAAQ,EAAE,KAAK;QACf,MAAM,EAAE,OAAO;KAChB;CACF,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,kBAAkB;IAChC,OAAO,kBAAkB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;AAC7C,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,QAAgD;IACpF,OAAO,kBAAkB,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC;AACjE,CAAC"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PII Redaction Engine
|
|
3
|
+
*
|
|
4
|
+
* Detects and redacts personally identifiable information (PII) from content
|
|
5
|
+
* to prevent leakage of sensitive data to the LLM.
|
|
6
|
+
*
|
|
7
|
+
* Redacts: emails, phone numbers, SSNs, credit cards, IP addresses
|
|
8
|
+
*/
|
|
9
|
+
export interface PIIRedactionResult {
|
|
10
|
+
content: string;
|
|
11
|
+
pii_types_redacted: string[];
|
|
12
|
+
content_modified: boolean;
|
|
13
|
+
metadata: {
|
|
14
|
+
redaction_counts: Record<string, number>;
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Redact PII from content
|
|
19
|
+
*/
|
|
20
|
+
export declare function redactPII(content: string): PIIRedactionResult;
|
|
21
|
+
/**
|
|
22
|
+
* Check if content contains any PII (without redacting)
|
|
23
|
+
*/
|
|
24
|
+
export declare function containsPII(content: string): boolean;
|
|
25
|
+
/**
|
|
26
|
+
* Get list of PII types detected (without redacting)
|
|
27
|
+
*/
|
|
28
|
+
export declare function detectPIITypes(content: string): string[];
|
|
29
|
+
//# sourceMappingURL=pii-redactor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pii-redactor.d.ts","sourceRoot":"","sources":["../../src/sanitizer/pii-redactor.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,MAAM,WAAW,kBAAkB;IACjC,OAAO,EAAE,MAAM,CAAC;IAChB,kBAAkB,EAAE,MAAM,EAAE,CAAC;IAC7B,gBAAgB,EAAE,OAAO,CAAC;IAC1B,QAAQ,EAAE;QACR,gBAAgB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;KAC1C,CAAC;CACH;AAuID;;GAEG;AACH,wBAAgB,SAAS,CAAC,OAAO,EAAE,MAAM,GAAG,kBAAkB,CAmC7D;AAED;;GAEG;AACH,wBAAgB,WAAW,CAAC,OAAO,EAAE,MAAM,GAAG,OAAO,CAYpD;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAcxD"}
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PII Redaction Engine
|
|
3
|
+
*
|
|
4
|
+
* Detects and redacts personally identifiable information (PII) from content
|
|
5
|
+
* to prevent leakage of sensitive data to the LLM.
|
|
6
|
+
*
|
|
7
|
+
* Redacts: emails, phone numbers, SSNs, credit cards, IP addresses
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* PII detection patterns with validators
|
|
11
|
+
*/
|
|
12
|
+
const PII_PATTERNS = [
|
|
13
|
+
// Email addresses
|
|
14
|
+
{
|
|
15
|
+
type: 'EMAIL',
|
|
16
|
+
name: 'email',
|
|
17
|
+
regex: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g,
|
|
18
|
+
validator: (match) => {
|
|
19
|
+
// Basic email validation
|
|
20
|
+
return /^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(match);
|
|
21
|
+
}
|
|
22
|
+
},
|
|
23
|
+
// Phone numbers (US and international formats)
|
|
24
|
+
{
|
|
25
|
+
type: 'PHONE',
|
|
26
|
+
name: 'phone',
|
|
27
|
+
regex: /(\+\d{1,3}[\s-]?)?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}\b/g,
|
|
28
|
+
validator: (match) => {
|
|
29
|
+
// Remove non-digits and check length
|
|
30
|
+
const digits = match.replace(/\D/g, '');
|
|
31
|
+
return digits.length >= 10 && digits.length <= 15;
|
|
32
|
+
}
|
|
33
|
+
},
|
|
34
|
+
// US Social Security Numbers
|
|
35
|
+
{
|
|
36
|
+
type: 'SSN',
|
|
37
|
+
name: 'ssn',
|
|
38
|
+
regex: /\b\d{3}[-\s]?\d{2}[-\s]?\d{4}\b/g,
|
|
39
|
+
validator: (match) => {
|
|
40
|
+
const digits = match.replace(/\D/g, '');
|
|
41
|
+
// Basic SSN format check (9 digits)
|
|
42
|
+
if (digits.length !== 9)
|
|
43
|
+
return false;
|
|
44
|
+
// Reject invalid SSN patterns
|
|
45
|
+
if (digits === '000000000')
|
|
46
|
+
return false;
|
|
47
|
+
if (digits.startsWith('000'))
|
|
48
|
+
return false;
|
|
49
|
+
if (digits.startsWith('666'))
|
|
50
|
+
return false;
|
|
51
|
+
if (digits.startsWith('9'))
|
|
52
|
+
return false;
|
|
53
|
+
return true;
|
|
54
|
+
}
|
|
55
|
+
},
|
|
56
|
+
// Credit card numbers (13-19 digits with optional separators)
|
|
57
|
+
// Matches: 4-4-4-4 (Visa/MC), 4-6-5 (AmEx), or continuous digits
|
|
58
|
+
{
|
|
59
|
+
type: 'CC',
|
|
60
|
+
name: 'credit_card',
|
|
61
|
+
regex: /\b(?:\d{4}[\s-]?\d{6}[\s-]?\d{5}|\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4,7}|\d{13,19})\b/g,
|
|
62
|
+
validator: (match) => {
|
|
63
|
+
const digits = match.replace(/\D/g, '');
|
|
64
|
+
if (digits.length < 13 || digits.length > 19)
|
|
65
|
+
return false;
|
|
66
|
+
return luhnCheck(digits);
|
|
67
|
+
}
|
|
68
|
+
},
|
|
69
|
+
// IPv4 addresses
|
|
70
|
+
{
|
|
71
|
+
type: 'IP',
|
|
72
|
+
name: 'ipv4',
|
|
73
|
+
regex: /\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b/g,
|
|
74
|
+
validator: (match) => {
|
|
75
|
+
// Exclude common non-PII patterns like version numbers
|
|
76
|
+
if (match.startsWith('0.0.0'))
|
|
77
|
+
return false;
|
|
78
|
+
if (match.startsWith('255.255.255'))
|
|
79
|
+
return false;
|
|
80
|
+
return true;
|
|
81
|
+
}
|
|
82
|
+
},
|
|
83
|
+
// IPv6 addresses (simplified pattern)
|
|
84
|
+
{
|
|
85
|
+
type: 'IP',
|
|
86
|
+
name: 'ipv6',
|
|
87
|
+
regex: /\b(?:[0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}\b/g,
|
|
88
|
+
validator: () => true
|
|
89
|
+
},
|
|
90
|
+
// US Passport numbers
|
|
91
|
+
{
|
|
92
|
+
type: 'PASSPORT',
|
|
93
|
+
name: 'passport',
|
|
94
|
+
regex: /\b[A-Z]{1,2}\d{6,9}\b/g,
|
|
95
|
+
validator: (match) => {
|
|
96
|
+
// Basic format: 1-2 letters + 6-9 digits
|
|
97
|
+
return /^[A-Z]{1,2}\d{6,9}$/.test(match);
|
|
98
|
+
}
|
|
99
|
+
},
|
|
100
|
+
// Driver's license patterns (varies by state, general pattern)
|
|
101
|
+
{
|
|
102
|
+
type: 'DL',
|
|
103
|
+
name: 'drivers_license',
|
|
104
|
+
regex: /\b[A-Z]{1,2}\d{5,8}\b/g,
|
|
105
|
+
validator: (match) => {
|
|
106
|
+
// Overlap with passport, but keep for completeness
|
|
107
|
+
return /^[A-Z]{1,2}\d{5,8}$/.test(match);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
];
|
|
111
|
+
/**
|
|
112
|
+
* Luhn algorithm for credit card validation
|
|
113
|
+
*/
|
|
114
|
+
function luhnCheck(digits) {
|
|
115
|
+
let sum = 0;
|
|
116
|
+
let alternate = false;
|
|
117
|
+
for (let i = digits.length - 1; i >= 0; i--) {
|
|
118
|
+
let n = parseInt(digits.charAt(i), 10);
|
|
119
|
+
if (alternate) {
|
|
120
|
+
n *= 2;
|
|
121
|
+
if (n > 9) {
|
|
122
|
+
n = n - 9;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
sum += n;
|
|
126
|
+
alternate = !alternate;
|
|
127
|
+
}
|
|
128
|
+
return sum % 10 === 0;
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Redact PII from content
|
|
132
|
+
*/
|
|
133
|
+
export function redactPII(content) {
|
|
134
|
+
const piiTypesRedacted = new Set();
|
|
135
|
+
const redactionCounts = {};
|
|
136
|
+
let sanitizedContent = content;
|
|
137
|
+
for (const pattern of PII_PATTERNS) {
|
|
138
|
+
const matches = Array.from(sanitizedContent.matchAll(pattern.regex));
|
|
139
|
+
for (const match of matches) {
|
|
140
|
+
const matchedText = match[0];
|
|
141
|
+
// Apply validator if present
|
|
142
|
+
if (pattern.validator && !pattern.validator(matchedText)) {
|
|
143
|
+
continue;
|
|
144
|
+
}
|
|
145
|
+
// Redact the PII
|
|
146
|
+
sanitizedContent = sanitizedContent.replace(matchedText, `[REDACTED:${pattern.type}]`);
|
|
147
|
+
piiTypesRedacted.add(pattern.name);
|
|
148
|
+
redactionCounts[pattern.name] = (redactionCounts[pattern.name] || 0) + 1;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
return {
|
|
152
|
+
content: sanitizedContent,
|
|
153
|
+
pii_types_redacted: Array.from(piiTypesRedacted),
|
|
154
|
+
content_modified: sanitizedContent !== content,
|
|
155
|
+
metadata: {
|
|
156
|
+
redaction_counts: redactionCounts
|
|
157
|
+
}
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
/**
|
|
161
|
+
* Check if content contains any PII (without redacting)
|
|
162
|
+
*/
|
|
163
|
+
export function containsPII(content) {
|
|
164
|
+
for (const pattern of PII_PATTERNS) {
|
|
165
|
+
const matches = Array.from(content.matchAll(pattern.regex));
|
|
166
|
+
for (const match of matches) {
|
|
167
|
+
if (!pattern.validator || pattern.validator(match[0])) {
|
|
168
|
+
return true;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
return false;
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Get list of PII types detected (without redacting)
|
|
176
|
+
*/
|
|
177
|
+
export function detectPIITypes(content) {
|
|
178
|
+
const detected = new Set();
|
|
179
|
+
for (const pattern of PII_PATTERNS) {
|
|
180
|
+
const matches = Array.from(content.matchAll(pattern.regex));
|
|
181
|
+
for (const match of matches) {
|
|
182
|
+
if (!pattern.validator || pattern.validator(match[0])) {
|
|
183
|
+
detected.add(pattern.name);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
return Array.from(detected);
|
|
188
|
+
}
|
|
189
|
+
//# sourceMappingURL=pii-redactor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pii-redactor.js","sourceRoot":"","sources":["../../src/sanitizer/pii-redactor.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAkBH;;GAEG;AACH,MAAM,YAAY,GAAiB;IACjC,kBAAkB;IAClB;QACE,IAAI,EAAE,OAAO;QACb,IAAI,EAAE,OAAO;QACb,KAAK,EAAE,sDAAsD;QAC7D,SAAS,EAAE,CAAC,KAAa,EAAE,EAAE;YAC3B,yBAAyB;YACzB,OAAO,4BAA4B,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAClD,CAAC;KACF;IAED,+CAA+C;IAC/C;QACE,IAAI,EAAE,OAAO;QACb,IAAI,EAAE,OAAO;QACb,KAAK,EAAE,0DAA0D;QACjE,SAAS,EAAE,CAAC,KAAa,EAAE,EAAE;YAC3B,qCAAqC;YACrC,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YACxC,OAAO,MAAM,CAAC,MAAM,IAAI,EAAE,IAAI,MAAM,CAAC,MAAM,IAAI,EAAE,CAAC;QACpD,CAAC;KACF;IAED,6BAA6B;IAC7B;QACE,IAAI,EAAE,KAAK;QACX,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,kCAAkC;QACzC,SAAS,EAAE,CAAC,KAAa,EAAE,EAAE;YAC3B,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YACxC,oCAAoC;YACpC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,KAAK,CAAC;YACtC,8BAA8B;YAC9B,IAAI,MAAM,KAAK,WAAW;gBAAE,OAAO,KAAK,CAAC;YACzC,IAAI,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC;gBAAE,OAAO,KAAK,CAAC;YAC3C,IAAI,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC;gBAAE,OAAO,KAAK,CAAC;YAC3C,IAAI,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC;gBAAE,OAAO,KAAK,CAAC;YACzC,OAAO,IAAI,CAAC;QACd,CAAC;KACF;IAED,8DAA8D;IAC9D,iEAAiE;IACjE;QACE,IAAI,EAAE,IAAI;QACV,IAAI,EAAE,aAAa;QACnB,KAAK,EAAE,yFAAyF;QAChG,SAAS,EAAE,CAAC,KAAa,EAAE,EAAE;YAC3B,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YACxC,IAAI,MAAM,CAAC,MAAM,GAAG,EAAE,IAAI,MAAM,CAAC,MAAM,GAAG,EAAE;gBAAE,OAAO,KAAK,CAAC;YAC3D,OAAO,SAAS,CAAC,MAAM,CAAC,CAAC;QAC3B,CAAC;KACF;IAED,iBAAiB;IACjB;QACE,IAAI,EAAE,IAAI;QACV,IAAI,EAAE,MAAM;QACZ,KAAK,EAAE,gGAAgG;QACvG,SAAS,EAAE,CAAC,KAAa,EAAE,EAAE;YAC3B,uDAAuD;YACvD,IAAI,KAAK,CAAC,UAAU,CAAC,OAAO,CAAC;gBAAE,OAAO,KAAK,CAAC;YAC5C,IAAI,KAAK,CAAC,UAAU,CAAC,aAAa,CAAC;gBAAE,OAAO,KAAK,CAAC;YAClD,OAAO,IAAI,CAAC;QACd,CAAC;KACF;IAED,sCAAsC;IACtC;QACE,IAAI,EAAE,IAAI;QACV,IAAI,EAAE,MAAM;QACZ,KAAK,EAAE,+CAA+C;QACtD,SAAS,EAAE,GAAG,EAAE,CAAC,IAAI;KACtB;IAED,sBAAsB;IACtB;QACE,IAAI,EAAE,UAAU;QAChB,IAAI,EAAE,UAAU;QAChB,KAAK,EAAE,wBAAwB;QAC/B,SAAS,EAAE,CAAC,KAAa,EAAE,EAAE;YAC3B,yCAAyC;YACzC,OAAO,qBAAqB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC3C,CAAC;KACF;IAED,+DAA+D;IAC/D;QACE,IAAI,EAAE,IAAI;QACV,IAAI,EAAE,iBAAiB;QACvB,KAAK,EAAE,wBAAwB;QAC/B,SAAS,EAAE,CAAC,KAAa,EAAE,EAAE;YAC3B,mDAAmD;YACnD,OAAO,qBAAqB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC3C,CAAC;KACF;CACF,CAAC;AAEF;;GAEG;AACH,SAAS,SAAS,CAAC,MAAc;IAC/B,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,SAAS,GAAG,KAAK,CAAC;IAEtB,KAAK,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,IAAI,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAEvC,IAAI,SAAS,EAAE,CAAC;YACd,CAAC,IAAI,CAAC,CAAC;YACP,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;gBACV,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACZ,CAAC;QACH,CAAC;QAED,GAAG,IAAI,CAAC,CAAC;QACT,SAAS,GAAG,CAAC,SAAS,CAAC;IACzB,CAAC;IAED,OAAO,GAAG,GAAG,EAAE,KAAK,CAAC,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,SAAS,CAAC,OAAe;IACvC,MAAM,gBAAgB,GAAG,IAAI,GAAG,EAAU,CAAC;IAC3C,MAAM,eAAe,GAA2B,EAAE,CAAC;IACnD,IAAI,gBAAgB,GAAG,OAAO,CAAC;IAE/B,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;QAErE,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,MAAM,WAAW,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAE7B,6BAA6B;YAC7B,IAAI,OAAO,CAAC,SAAS,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,WAAW,CAAC,EAAE,CAAC;gBACzD,SAAS;YACX,CAAC;YAED,iBAAiB;YACjB,gBAAgB,GAAG,gBAAgB,CAAC,OAAO,CACzC,WAAW,EACX,aAAa,OAAO,CAAC,IAAI,GAAG,CAC7B,CAAC;YAEF,gBAAgB,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YACnC,eAAe,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,eAAe,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QAC3E,CAAC;IACH,CAAC;IAED,OAAO;QACL,OAAO,EAAE,gBAAgB;QACzB,kBAAkB,EAAE,KAAK,CAAC,IAAI,CAAC,gBAAgB,CAAC;QAChD,gBAAgB,EAAE,gBAAgB,KAAK,OAAO;QAC9C,QAAQ,EAAE;YACR,gBAAgB,EAAE,eAAe;SAClC;KACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,OAAe;IACzC,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;QAE5D,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,IAAI,CAAC,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBACtD,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,OAAe;IAC5C,MAAM,QAAQ,GAAG,IAAI,GAAG,EAAU,CAAC;IAEnC,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;QACnC,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC;QAE5D,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,IAAI,CAAC,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBACtD,QAAQ,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;AAC9B,CAAC"}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* visus_fetch_structured MCP Tool
|
|
3
|
+
*
|
|
4
|
+
* Fetches a web page and extracts structured data according to a schema.
|
|
5
|
+
* All extracted data is sanitized before being returned.
|
|
6
|
+
*
|
|
7
|
+
* CRITICAL: ALL content MUST pass through the sanitizer. This cannot be bypassed.
|
|
8
|
+
*/
|
|
9
|
+
import type { VisusFetchStructuredInput, VisusFetchStructuredOutput, Result } from '../types.js';
|
|
10
|
+
/**
|
|
11
|
+
* visus_fetch_structured tool implementation
|
|
12
|
+
*
|
|
13
|
+
* @param input Tool input parameters
|
|
14
|
+
* @returns Extracted and sanitized structured data
|
|
15
|
+
*/
|
|
16
|
+
export declare function visusFetchStructured(input: VisusFetchStructuredInput): Promise<Result<VisusFetchStructuredOutput, Error>>;
|
|
17
|
+
/**
|
|
18
|
+
* MCP tool definition for registration
|
|
19
|
+
*/
|
|
20
|
+
export declare const visusFetchStructuredToolDefinition: {
|
|
21
|
+
name: string;
|
|
22
|
+
description: string;
|
|
23
|
+
inputSchema: {
|
|
24
|
+
type: string;
|
|
25
|
+
properties: {
|
|
26
|
+
url: {
|
|
27
|
+
type: string;
|
|
28
|
+
description: string;
|
|
29
|
+
};
|
|
30
|
+
schema: {
|
|
31
|
+
type: string;
|
|
32
|
+
description: string;
|
|
33
|
+
additionalProperties: {
|
|
34
|
+
type: string;
|
|
35
|
+
};
|
|
36
|
+
};
|
|
37
|
+
timeout_ms: {
|
|
38
|
+
type: string;
|
|
39
|
+
description: string;
|
|
40
|
+
default: number;
|
|
41
|
+
};
|
|
42
|
+
};
|
|
43
|
+
required: string[];
|
|
44
|
+
};
|
|
45
|
+
};
|
|
46
|
+
//# sourceMappingURL=fetch-structured.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fetch-structured.d.ts","sourceRoot":"","sources":["../../src/tools/fetch-structured.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAKH,OAAO,KAAK,EAAE,yBAAyB,EAAE,0BAA0B,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AA2EjG;;;;;GAKG;AACH,wBAAsB,oBAAoB,CACxC,KAAK,EAAE,yBAAyB,GAC/B,OAAO,CAAC,MAAM,CAAC,0BAA0B,EAAE,KAAK,CAAC,CAAC,CA4FpD;AAED;;GAEG;AACH,eAAO,MAAM,kCAAkC;;;;;;;;;;;;;;;;;;;;;;;;;CAyB9C,CAAC"}
|