clawmoat 0.8.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +9 -0
- package/CHANGELOG.md +18 -0
- package/DEMO.md +87 -0
- package/Dockerfile +5 -18
- package/README.md +232 -8
- package/THREAT_MODEL.md +129 -0
- package/agent/README.md +131 -0
- package/agent/index.js +471 -0
- package/agent/install-service.sh +94 -0
- package/agent/openclaw-hook.js +453 -0
- package/agent/provider-setup.js +649 -0
- package/agent/setup.js +274 -0
- package/assets/BADGE-USAGE.md +20 -0
- package/assets/clawmoat-badge.svg +21 -0
- package/bin/clawmoat.js +468 -111
- package/docs/affiliates/dashboard.html +124 -0
- package/docs/affiliates/index.html +236 -0
- package/docs/agent-install.html +183 -0
- package/docs/ai-agent-security-scanner.html +10 -6
- package/docs/badge/index.html +149 -0
- package/docs/badge/scanning.svg +23 -0
- package/docs/blog/386-malicious-skills.html +11 -4
- package/docs/blog/40000-exposed-openclaw-instances.html +11 -4
- package/docs/blog/agent-trust-protocol.html +5 -4
- package/docs/blog/ai-agent-earns-commissions.html +230 -0
- package/docs/blog/bugmageddon-agent-firewall.html +174 -0
- package/docs/blog/calculator-math.html +180 -0
- package/docs/blog/clawmoat-vs-llamafirewall-nemo-guardrails.html +10 -4
- package/docs/blog/host-guardian-launch.html +18 -8
- package/docs/blog/ibm-experts-agent-runtime-protection.html +15 -6
- package/docs/blog/index.html +67 -9
- package/docs/blog/langchain-security-tutorial.html +18 -8
- package/docs/blog/mcp-30-cves-security-crisis.html +11 -4
- package/docs/blog/meta-researcher-rogue-agent.html +201 -0
- package/docs/blog/microsoft-openclaw-workstation-security.html +5 -4
- package/docs/blog/nist-ai-agent-standards-clawmoat.html +16 -8
- package/docs/blog/oasis-websocket-hijack.html +11 -4
- package/docs/blog/ollama-openclaw-security.html +10 -4
- package/docs/blog/openclaw-enterprise-readiness-claw10.html +5 -4
- package/docs/blog/openclaw-security-reckoning-2026.html +11 -4
- package/docs/blog/owasp-agentic-ai-top10.html +18 -8
- package/docs/blog/securing-ai-agents.html +18 -8
- package/docs/blog/supply-chain-agents.html +18 -8
- package/docs/business/index.html +11 -16
- package/docs/business/install.html +21 -7
- package/docs/checklist.html +10 -4
- package/docs/compare/index.html +122 -0
- package/docs/compare/lakera/index.html +62 -0
- package/docs/compare/llm-guard/index.html +49 -0
- package/docs/compare/snyk-agent-scan/index.html +63 -0
- package/docs/compare.html +10 -6
- package/docs/dashboard/index.html +520 -0
- package/docs/finance/index.html +9 -6
- package/docs/guides/business-deployment.html +770 -0
- package/docs/hall-of-fame.html +11 -5
- package/docs/index.html +266 -137
- package/docs/integrations/langchain.html +14 -6
- package/docs/integrations/openai.html +14 -6
- package/docs/integrations/openclaw.html +55 -7
- package/docs/plans/2026-03-26-threat-intel-api.md +255 -0
- package/docs/plans/2026-04-14-bugmageddon-marketing-pack.md +329 -0
- package/docs/plans/2026-04-14-clawmoat-v1-bugmageddon.md +248 -0
- package/docs/plans/2026-04-14-v1-release-update.md +91 -0
- package/docs/plans/2026-04-19-supabase-audit.md +68 -0
- package/docs/plans/2026-05-12-sales-push.md +303 -0
- package/docs/playground/index.html +893 -0
- package/docs/playground.html +4 -7
- package/docs/rfcs/defense-in-depth.md +467 -0
- package/docs/scan/index.html +156 -12
- package/docs/services/case-study.html +255 -0
- package/docs/services/downloads/install-openclaw.bat +45 -0
- package/docs/services/downloads/install-openclaw.command +38 -0
- package/docs/services/downloads/install-openclaw.sh +38 -0
- package/docs/services/get-started.html +165 -0
- package/docs/services/index.html +598 -0
- package/docs/services/multi-agent-security.html +284 -0
- package/docs/services/one-pager.html +99 -0
- package/docs/services/pitch-deck.html +229 -0
- package/docs/services/roi-calculator.html +258 -0
- package/docs/sitemap.xml +62 -2
- package/docs/support/index.html +12 -1
- package/docs/templates/customer-service/HEARTBEAT.md +61 -0
- package/docs/templates/customer-service/MEMORY.md +89 -0
- package/docs/templates/customer-service/SOUL.md +41 -0
- package/docs/templates/customer-service/USER.md +56 -0
- package/docs/templates/executive/HEARTBEAT.md +86 -0
- package/docs/templates/executive/MEMORY.md +92 -0
- package/docs/templates/executive/SOUL.md +44 -0
- package/docs/templates/executive/USER.md +62 -0
- package/docs/templates/finance/HEARTBEAT.md +58 -0
- package/docs/templates/finance/MEMORY.md +87 -0
- package/docs/templates/finance/SOUL.md +38 -0
- package/docs/templates/finance/USER.md +53 -0
- package/docs/templates/index.html +115 -0
- package/docs/templates/operations/HEARTBEAT.md +63 -0
- package/docs/templates/operations/MEMORY.md +68 -0
- package/docs/templates/operations/SOUL.md +38 -0
- package/docs/templates/operations/USER.md +49 -0
- package/docs/templates/sales/HEARTBEAT.md +55 -0
- package/docs/templates/sales/MEMORY.md +89 -0
- package/docs/templates/sales/SOUL.md +34 -0
- package/docs/templates/sales/USER.md +54 -0
- package/eslint.config.js +32 -0
- package/evals/README.md +29 -0
- package/evals/cases.json +390 -0
- package/evals/results.md +68 -0
- package/evals/run.js +180 -0
- package/examples/demo-attack/demo.js +186 -0
- package/examples/python-quickstart/README.md +54 -0
- package/examples/python-quickstart/clawmoat_client.py +167 -0
- package/examples/video-demo/README.md +14 -0
- package/examples/video-demo/scene-a-normal.js +29 -0
- package/examples/video-demo/scene-b-attack-arrives.js +31 -0
- package/examples/video-demo/scene-c-hijack.js +44 -0
- package/examples/video-demo/scene-d-clawmoat.js +46 -0
- package/integrations/crewai/README.md +32 -0
- package/integrations/crewai/clawmoat_crewai/__init__.py +17 -0
- package/integrations/crewai/clawmoat_crewai/guard.py +103 -0
- package/integrations/crewai/pyproject.toml +21 -0
- package/integrations/langchain/README.md +91 -0
- package/integrations/langchain/clawmoat_langchain/__init__.py +17 -0
- package/integrations/langchain/clawmoat_langchain/callback.py +489 -0
- package/integrations/langchain/pyproject.toml +32 -0
- package/integrations/litellm/README.md +324 -0
- package/integrations/litellm/clawmoat_litellm/__init__.py +21 -0
- package/integrations/litellm/clawmoat_litellm/callback.py +329 -0
- package/integrations/litellm/clawmoat_litellm/proxy_middleware.py +224 -0
- package/integrations/litellm/pyproject.toml +74 -0
- package/integrations/openai-agents/README.md +392 -0
- package/integrations/openai-agents/clawmoat_openai_agents/__init__.py +20 -0
- package/integrations/openai-agents/clawmoat_openai_agents/guardrail.py +431 -0
- package/integrations/openai-agents/clawmoat_openai_agents/middleware.py +311 -0
- package/integrations/openai-agents/pyproject.toml +76 -0
- package/package.json +6 -5
- package/plugins/openclaw-adapter/PHASE1.md +439 -0
- package/plugins/openclaw-adapter/README.md +103 -0
- package/plugins/openclaw-adapter/SPEC.md +1644 -0
- package/plugins/openclaw-adapter/package.json +31 -0
- package/plugins/openclaw-adapter/src/index.test.ts +226 -0
- package/plugins/openclaw-adapter/src/index.ts +140 -0
- package/plugins/openclaw-adapter/tsconfig.json +14 -0
- package/server/data/threats.json +290 -0
- package/server/index.js +142 -7
- package/src/adapters/express.js +161 -0
- package/src/adapters/index.js +92 -0
- package/src/adapters/langchain.js +185 -0
- package/src/approval/index.js +456 -0
- package/src/ban-scanner.js +200 -0
- package/src/boundary-scanner.js +296 -0
- package/src/ci-scanner.js +279 -0
- package/src/code-scanner.js +245 -0
- package/src/enforce.js +166 -0
- package/src/formatters/json.js +80 -0
- package/src/formatters/sarif.js +388 -0
- package/src/guardian/alerts.js +34 -3
- package/src/guardian/index.js +41 -2
- package/src/index.js +102 -0
- package/src/integrations/agentmesh.js +501 -0
- package/src/language-detector.js +201 -0
- package/src/mcp-scanner.js +253 -0
- package/src/multimodal/index.js +579 -0
- package/src/obfuscation-scanner.js +457 -0
- package/src/policy-engine.js +402 -0
- package/src/scanners/dependency-attacks.js +128 -0
- package/src/scanners/prompt-injection.js +18 -0
- package/src/scanners/supply-chain.js +14 -0
- package/src/templates/default-config.yml +90 -0
- package/src/vuln-ops/exploitability.js +46 -0
- package/src/watch/live-monitor.js +720 -0
- package/clawmoat-0.8.0.tgz +0 -0
- package/server/index.js.patch +0 -1
|
@@ -0,0 +1,579 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Multimodal Input Scanning
|
|
3
|
+
* Scan base64 image data URLs, PDF text content, and file metadata for threats
|
|
4
|
+
*
|
|
5
|
+
* Focuses on detectable patterns without external dependencies:
|
|
6
|
+
* - Metadata analysis (MIME type validation, filename patterns)
|
|
7
|
+
* - Embedded strings in base64 content (between tags, steganographic patterns)
|
|
8
|
+
* - Size anomalies and suspicious payloads
|
|
9
|
+
* - Hidden text injection patterns
|
|
10
|
+
*
|
|
11
|
+
* @module multimodal
|
|
12
|
+
* @example
|
|
13
|
+
* const { scanMultimodalInput } = require('./multimodal');
|
|
14
|
+
*
|
|
15
|
+
* const result = scanMultimodalInput({
|
|
16
|
+
* content: 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...',
|
|
17
|
+
* filename: 'innocent.png',
|
|
18
|
+
* mimeType: 'image/png'
|
|
19
|
+
* });
|
|
20
|
+
*
|
|
21
|
+
* if (!result.safe) {
|
|
22
|
+
* console.log('Threats found:', result.findings);
|
|
23
|
+
* }
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
const { Buffer } = require('buffer');
|
|
27
|
+
const path = require('path');
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* @typedef {Object} MultimodalScanResult
|
|
31
|
+
* @property {boolean} safe - true if no threats detected
|
|
32
|
+
* @property {Array} findings - Array of detected threat patterns
|
|
33
|
+
* @property {string|null} maxSeverity - Highest severity among findings
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* @typedef {Object} MultimodalInput
|
|
38
|
+
* @property {string} content - Base64 data URL or text content
|
|
39
|
+
* @property {string} [filename] - Original filename if available
|
|
40
|
+
* @property {string} [mimeType] - MIME type if available
|
|
41
|
+
* @property {number} [size] - File size in bytes if available
|
|
42
|
+
*/
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Suspicious file extensions that should trigger enhanced scanning
|
|
46
|
+
*/
|
|
47
|
+
const SUSPICIOUS_EXTENSIONS = [
|
|
48
|
+
'.exe', '.scr', '.bat', '.cmd', '.com', '.pif', '.vbs', '.js', '.jar',
|
|
49
|
+
'.ps1', '.sh', '.py', '.rb', '.php', '.asp', '.jsp', '.pl'
|
|
50
|
+
];
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Expected MIME types for common file extensions
|
|
54
|
+
*/
|
|
55
|
+
const MIME_TYPE_MAP = {
|
|
56
|
+
'.png': ['image/png'],
|
|
57
|
+
'.jpg': ['image/jpeg'],
|
|
58
|
+
'.jpeg': ['image/jpeg'],
|
|
59
|
+
'.gif': ['image/gif'],
|
|
60
|
+
'.webp': ['image/webp'],
|
|
61
|
+
'.svg': ['image/svg+xml'],
|
|
62
|
+
'.pdf': ['application/pdf'],
|
|
63
|
+
'.txt': ['text/plain'],
|
|
64
|
+
'.json': ['application/json'],
|
|
65
|
+
'.xml': ['application/xml', 'text/xml'],
|
|
66
|
+
'.html': ['text/html'],
|
|
67
|
+
'.css': ['text/css'],
|
|
68
|
+
'.js': ['application/javascript', 'text/javascript']
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Patterns that indicate potential prompt injection in embedded content
|
|
73
|
+
*/
|
|
74
|
+
const INJECTION_PATTERNS = [
|
|
75
|
+
// Direct injection attempts
|
|
76
|
+
/ignore\s+(?:previous|all)\s+instructions?/gi,
|
|
77
|
+
/system\s*:\s*you\s+are\s+now/gi,
|
|
78
|
+
/forget\s+(?:everything|all)\s+(?:above|before)/gi,
|
|
79
|
+
/act\s+as\s+(?:if\s+you\s+are|a)\s+(?:different|new)/gi,
|
|
80
|
+
|
|
81
|
+
// Hidden instruction markers
|
|
82
|
+
/<!--\s*(?:system|instruction|prompt)/gi,
|
|
83
|
+
/\[(?:SYSTEM|INSTRUCTION|PROMPT)\]/gi,
|
|
84
|
+
/<(?:system|instruction|prompt)>/gi,
|
|
85
|
+
|
|
86
|
+
// Steganographic patterns
|
|
87
|
+
/\u200b|\u200c|\u200d|\ufeff/g, // Zero-width characters
|
|
88
|
+
/\u00a0{2,}/g, // Multiple non-breaking spaces
|
|
89
|
+
|
|
90
|
+
// Base64 encoded instructions (common patterns)
|
|
91
|
+
/aWdub3Jl|c3lzdGVt|Zm9yZ2V0|YWN0IGFz/g, // base64 for: ignore, system, forget, act as
|
|
92
|
+
|
|
93
|
+
// URL-like patterns that could be callback URLs
|
|
94
|
+
/(?:https?:\/\/|data:)[^\s\'"]{20,}/gi
|
|
95
|
+
];
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Patterns indicating potential steganographic content
|
|
99
|
+
*/
|
|
100
|
+
const STEGANOGRAPHIC_PATTERNS = [
|
|
101
|
+
// Repeated patterns that might hide data
|
|
102
|
+
/(.{1,4})\1{10,}/g, // Same 1-4 character sequence repeated 10+ times
|
|
103
|
+
|
|
104
|
+
// Unusual entropy markers
|
|
105
|
+
/[A-Za-z0-9+/=]{100,}/g, // Long base64-like sequences
|
|
106
|
+
|
|
107
|
+
// Hidden text markers
|
|
108
|
+
/\x00|\x01|\x02|\x03|\x04|\x05|\x06|\x07|\x08/g, // Control characters
|
|
109
|
+
];
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* PDF-specific threat patterns
|
|
113
|
+
*/
|
|
114
|
+
const PDF_THREAT_PATTERNS = [
|
|
115
|
+
// JavaScript in PDFs
|
|
116
|
+
/\/JavaScript/gi,
|
|
117
|
+
/\/JS/gi,
|
|
118
|
+
|
|
119
|
+
// Launch actions
|
|
120
|
+
/\/Launch/gi,
|
|
121
|
+
/\/F\s*\(.*\.(?:exe|bat|cmd|scr)\)/gi,
|
|
122
|
+
|
|
123
|
+
// Embedded files
|
|
124
|
+
/\/EmbeddedFile/gi,
|
|
125
|
+
/\/FileAttachment/gi,
|
|
126
|
+
|
|
127
|
+
// Form actions
|
|
128
|
+
/\/URI\s*\(.*(?:javascript|data:)/gi
|
|
129
|
+
];
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Analyze filename for suspicious patterns
|
|
133
|
+
* @param {string} filename
|
|
134
|
+
* @returns {Array} Array of findings
|
|
135
|
+
*/
|
|
136
|
+
function analyzeFilename(filename) {
|
|
137
|
+
if (!filename) return [];
|
|
138
|
+
|
|
139
|
+
const findings = [];
|
|
140
|
+
const ext = path.extname(filename).toLowerCase();
|
|
141
|
+
const basename = path.basename(filename, ext);
|
|
142
|
+
|
|
143
|
+
// Check for suspicious extensions
|
|
144
|
+
if (SUSPICIOUS_EXTENSIONS.includes(ext)) {
|
|
145
|
+
findings.push({
|
|
146
|
+
type: 'suspicious_file_extension',
|
|
147
|
+
subtype: 'executable_extension',
|
|
148
|
+
severity: 'high',
|
|
149
|
+
matched: ext,
|
|
150
|
+
position: filename.lastIndexOf(ext),
|
|
151
|
+
message: `Potentially dangerous file extension: ${ext}`
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// Check for double extensions (e.g., file.txt.exe)
|
|
156
|
+
const doubleExtMatch = filename.match(/\.([^.]+)\.([^.]+)$/);
|
|
157
|
+
if (doubleExtMatch && SUSPICIOUS_EXTENSIONS.includes('.' + doubleExtMatch[2])) {
|
|
158
|
+
findings.push({
|
|
159
|
+
type: 'filename_obfuscation',
|
|
160
|
+
subtype: 'double_extension',
|
|
161
|
+
severity: 'high',
|
|
162
|
+
matched: doubleExtMatch[0],
|
|
163
|
+
position: doubleExtMatch.index,
|
|
164
|
+
message: 'Double file extension detected (possible obfuscation)'
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Check for null bytes in filename
|
|
169
|
+
if (filename.includes('\x00')) {
|
|
170
|
+
findings.push({
|
|
171
|
+
type: 'filename_injection',
|
|
172
|
+
subtype: 'null_byte',
|
|
173
|
+
severity: 'critical',
|
|
174
|
+
matched: '\\x00',
|
|
175
|
+
position: filename.indexOf('\x00'),
|
|
176
|
+
message: 'Null byte in filename (path traversal attempt)'
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
// Check for path traversal patterns
|
|
181
|
+
if (filename.includes('../') || filename.includes('..\\')) {
|
|
182
|
+
findings.push({
|
|
183
|
+
type: 'filename_injection',
|
|
184
|
+
subtype: 'path_traversal',
|
|
185
|
+
severity: 'high',
|
|
186
|
+
matched: filename.includes('../') ? '../' : '..\\',
|
|
187
|
+
position: Math.max(filename.indexOf('../'), filename.indexOf('..\\')),
|
|
188
|
+
message: 'Path traversal pattern in filename'
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Check for drive-relative path traversal (Windows) — GHSA-qffp-2rhf-9h96
|
|
193
|
+
// Patterns like "C:target" (no backslash) resolve to current dir of that drive,
|
|
194
|
+
// bypassing ../ checks. Also catch absolute paths and UNC paths.
|
|
195
|
+
const driveRelativeMatch = filename.match(/^[A-Za-z]:[^\\\/]/);
|
|
196
|
+
const absolutePathMatch = filename.match(/^[A-Za-z]:[\\\/]/) || filename.startsWith('/') || filename.startsWith('\\\\');
|
|
197
|
+
if (driveRelativeMatch) {
|
|
198
|
+
findings.push({
|
|
199
|
+
type: 'filename_injection',
|
|
200
|
+
subtype: 'drive_relative_traversal',
|
|
201
|
+
severity: 'high',
|
|
202
|
+
matched: driveRelativeMatch[0],
|
|
203
|
+
position: 0,
|
|
204
|
+
message: 'Drive-relative path traversal (Windows) — can escape extraction directory'
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
if (absolutePathMatch) {
|
|
208
|
+
findings.push({
|
|
209
|
+
type: 'filename_injection',
|
|
210
|
+
subtype: 'absolute_path',
|
|
211
|
+
severity: 'high',
|
|
212
|
+
matched: filename.substring(0, 10),
|
|
213
|
+
position: 0,
|
|
214
|
+
message: 'Absolute path in filename — may write outside intended directory'
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Check for extremely long filenames (possible buffer overflow attempt)
|
|
219
|
+
if (filename.length > 255) {
|
|
220
|
+
findings.push({
|
|
221
|
+
type: 'filename_anomaly',
|
|
222
|
+
subtype: 'excessive_length',
|
|
223
|
+
severity: 'medium',
|
|
224
|
+
matched: filename.substring(0, 50) + '...',
|
|
225
|
+
position: 255,
|
|
226
|
+
message: `Filename exceeds typical limits (${filename.length} characters)`
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
return findings;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Validate MIME type against filename extension
|
|
235
|
+
* @param {string} filename
|
|
236
|
+
* @param {string} mimeType
|
|
237
|
+
* @returns {Array} Array of findings
|
|
238
|
+
*/
|
|
239
|
+
function validateMimeType(filename, mimeType) {
|
|
240
|
+
if (!filename || !mimeType) return [];
|
|
241
|
+
|
|
242
|
+
const findings = [];
|
|
243
|
+
const ext = path.extname(filename).toLowerCase();
|
|
244
|
+
const expectedMimes = MIME_TYPE_MAP[ext];
|
|
245
|
+
|
|
246
|
+
if (expectedMimes && !expectedMimes.includes(mimeType)) {
|
|
247
|
+
findings.push({
|
|
248
|
+
type: 'mime_mismatch',
|
|
249
|
+
subtype: 'extension_mismatch',
|
|
250
|
+
severity: 'medium',
|
|
251
|
+
matched: mimeType,
|
|
252
|
+
message: `MIME type '${mimeType}' doesn't match extension '${ext}' (expected: ${expectedMimes.join(' or ')})`
|
|
253
|
+
});
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// Check for dangerous MIME types
|
|
257
|
+
const dangerousMimes = [
|
|
258
|
+
'application/x-msdownload',
|
|
259
|
+
'application/x-executable',
|
|
260
|
+
'application/x-msdos-program',
|
|
261
|
+
'application/x-ms-shortcut'
|
|
262
|
+
];
|
|
263
|
+
|
|
264
|
+
if (dangerousMimes.includes(mimeType)) {
|
|
265
|
+
findings.push({
|
|
266
|
+
type: 'dangerous_mime_type',
|
|
267
|
+
subtype: 'executable_mime',
|
|
268
|
+
severity: 'critical',
|
|
269
|
+
matched: mimeType,
|
|
270
|
+
message: `Dangerous MIME type detected: ${mimeType}`
|
|
271
|
+
});
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
return findings;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Analyze data URL for embedded threats
|
|
279
|
+
* @param {string} dataUrl
|
|
280
|
+
* @returns {Array} Array of findings
|
|
281
|
+
*/
|
|
282
|
+
function analyzeDataUrl(dataUrl) {
|
|
283
|
+
if (!dataUrl.startsWith('data:')) return [];
|
|
284
|
+
|
|
285
|
+
const findings = [];
|
|
286
|
+
|
|
287
|
+
try {
|
|
288
|
+
// Parse data URL
|
|
289
|
+
const [header, data] = dataUrl.split(',');
|
|
290
|
+
const [mimeType, encoding] = header.replace('data:', '').split(';');
|
|
291
|
+
|
|
292
|
+
// Check for suspicious MIME types in data URLs
|
|
293
|
+
if (mimeType === 'text/html' || mimeType === 'application/javascript') {
|
|
294
|
+
findings.push({
|
|
295
|
+
type: 'suspicious_data_url',
|
|
296
|
+
subtype: 'executable_content',
|
|
297
|
+
severity: 'high',
|
|
298
|
+
matched: mimeType,
|
|
299
|
+
message: `Potentially dangerous data URL MIME type: ${mimeType}`
|
|
300
|
+
});
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
if (encoding === 'base64' && data) {
|
|
304
|
+
// Decode base64 and scan for patterns
|
|
305
|
+
try {
|
|
306
|
+
const decoded = Buffer.from(data, 'base64').toString('utf8');
|
|
307
|
+
const injectionFindings = scanForInjectionPatterns(decoded);
|
|
308
|
+
findings.push(...injectionFindings);
|
|
309
|
+
|
|
310
|
+
} catch (err) {
|
|
311
|
+
// If base64 decode fails, still check the raw data for patterns
|
|
312
|
+
const rawFindings = scanRawBase64(data);
|
|
313
|
+
findings.push(...rawFindings);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// Check for oversized data URLs (possible DoS)
|
|
318
|
+
if (data && data.length > 10 * 1024 * 1024) { // 10MB limit
|
|
319
|
+
findings.push({
|
|
320
|
+
type: 'size_anomaly',
|
|
321
|
+
subtype: 'oversized_data_url',
|
|
322
|
+
severity: 'medium',
|
|
323
|
+
matched: `${Math.round(data.length / 1024 / 1024)}MB`,
|
|
324
|
+
message: `Extremely large data URL (${Math.round(data.length / 1024 / 1024)}MB)`
|
|
325
|
+
});
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
} catch (err) {
|
|
329
|
+
findings.push({
|
|
330
|
+
type: 'malformed_data_url',
|
|
331
|
+
subtype: 'parse_error',
|
|
332
|
+
severity: 'low',
|
|
333
|
+
matched: dataUrl.substring(0, 100),
|
|
334
|
+
message: 'Malformed data URL structure'
|
|
335
|
+
});
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
return findings;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* Scan text content for injection patterns
|
|
343
|
+
* @param {string} content
|
|
344
|
+
* @returns {Array} Array of findings
|
|
345
|
+
*/
|
|
346
|
+
function scanForInjectionPatterns(content) {
|
|
347
|
+
const findings = [];
|
|
348
|
+
|
|
349
|
+
for (const pattern of INJECTION_PATTERNS) {
|
|
350
|
+
const matches = [...content.matchAll(pattern)];
|
|
351
|
+
for (const match of matches) {
|
|
352
|
+
findings.push({
|
|
353
|
+
type: 'embedded_injection',
|
|
354
|
+
subtype: 'prompt_injection',
|
|
355
|
+
severity: 'high',
|
|
356
|
+
matched: match[0],
|
|
357
|
+
position: match.index,
|
|
358
|
+
message: 'Potential prompt injection pattern detected in embedded content'
|
|
359
|
+
});
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
for (const pattern of STEGANOGRAPHIC_PATTERNS) {
|
|
364
|
+
const matches = [...content.matchAll(pattern)];
|
|
365
|
+
for (const match of matches) {
|
|
366
|
+
findings.push({
|
|
367
|
+
type: 'steganographic_pattern',
|
|
368
|
+
subtype: 'hidden_data',
|
|
369
|
+
severity: 'medium',
|
|
370
|
+
matched: match[0].substring(0, 50),
|
|
371
|
+
position: match.index,
|
|
372
|
+
message: 'Potential steganographic or hidden data pattern'
|
|
373
|
+
});
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
return findings;
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
/**
|
|
381
|
+
* Scan raw base64 data for patterns without decoding
|
|
382
|
+
* @param {string} base64Data
|
|
383
|
+
* @returns {Array} Array of findings
|
|
384
|
+
*/
|
|
385
|
+
function scanRawBase64(base64Data) {
|
|
386
|
+
const findings = [];
|
|
387
|
+
|
|
388
|
+
// Check for known base64-encoded malicious patterns
|
|
389
|
+
const maliciousB64Patterns = [
|
|
390
|
+
/aWdub3JlXHMrKD86cHJldmlvdXN8YWxsKVxzK2luc3RydWN0aW9ucz8=/g, // "ignore previous instructions"
|
|
391
|
+
/c3lzdGVtXHMqOlxzKnlvdVxzK2FyZVxzK25vdw==/g, // "system: you are now"
|
|
392
|
+
/Zm9yZ2V0XHMrKD86ZXZlcnl0aGluZ3xhbGwpXHMrKD86YWJvdmV8YmVmb3JlKQ==/g, // "forget everything above"
|
|
393
|
+
];
|
|
394
|
+
|
|
395
|
+
for (const pattern of maliciousB64Patterns) {
|
|
396
|
+
const matches = [...base64Data.matchAll(pattern)];
|
|
397
|
+
for (const match of matches) {
|
|
398
|
+
findings.push({
|
|
399
|
+
type: 'encoded_injection',
|
|
400
|
+
subtype: 'base64_injection',
|
|
401
|
+
severity: 'high',
|
|
402
|
+
matched: match[0],
|
|
403
|
+
position: match.index,
|
|
404
|
+
message: 'Base64-encoded injection pattern detected'
|
|
405
|
+
});
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
return findings;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
/**
|
|
413
|
+
* Scan PDF text content for threats
|
|
414
|
+
* @param {string} pdfText
|
|
415
|
+
* @returns {Array} Array of findings
|
|
416
|
+
*/
|
|
417
|
+
function scanPdfContent(pdfText) {
|
|
418
|
+
if (!pdfText) return [];
|
|
419
|
+
|
|
420
|
+
const findings = [];
|
|
421
|
+
|
|
422
|
+
for (const pattern of PDF_THREAT_PATTERNS) {
|
|
423
|
+
const matches = [...pdfText.matchAll(pattern)];
|
|
424
|
+
for (const match of matches) {
|
|
425
|
+
findings.push({
|
|
426
|
+
type: 'pdf_threat',
|
|
427
|
+
subtype: 'suspicious_pdf_feature',
|
|
428
|
+
severity: 'high',
|
|
429
|
+
matched: match[0],
|
|
430
|
+
position: match.index,
|
|
431
|
+
message: 'Suspicious PDF feature detected (JavaScript, launch action, or embedded file)'
|
|
432
|
+
});
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
// Scan for general injection patterns in PDF text
|
|
437
|
+
const injectionFindings = scanForInjectionPatterns(pdfText);
|
|
438
|
+
findings.push(...injectionFindings);
|
|
439
|
+
|
|
440
|
+
return findings;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
/**
|
|
444
|
+
* Check for size anomalies that might indicate malicious content
|
|
445
|
+
* @param {MultimodalInput} input
|
|
446
|
+
* @returns {Array} Array of findings
|
|
447
|
+
*/
|
|
448
|
+
function checkSizeAnomalies(input) {
|
|
449
|
+
const findings = [];
|
|
450
|
+
|
|
451
|
+
if (input.size) {
|
|
452
|
+
// Check for suspiciously small files that claim to be images
|
|
453
|
+
if (input.mimeType && input.mimeType.startsWith('image/') && input.size < 100) {
|
|
454
|
+
findings.push({
|
|
455
|
+
type: 'size_anomaly',
|
|
456
|
+
subtype: 'suspiciously_small',
|
|
457
|
+
severity: 'low',
|
|
458
|
+
matched: `${input.size} bytes`,
|
|
459
|
+
message: `Suspiciously small image file (${input.size} bytes)`
|
|
460
|
+
});
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// Check for extremely large files (potential DoS)
|
|
464
|
+
if (input.size > 100 * 1024 * 1024) { // 100MB
|
|
465
|
+
findings.push({
|
|
466
|
+
type: 'size_anomaly',
|
|
467
|
+
subtype: 'extremely_large',
|
|
468
|
+
severity: 'medium',
|
|
469
|
+
matched: `${Math.round(input.size / 1024 / 1024)}MB`,
|
|
470
|
+
message: `Extremely large file (${Math.round(input.size / 1024 / 1024)}MB) - potential DoS`
|
|
471
|
+
});
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
return findings;
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
/**
|
|
479
|
+
* Main multimodal input scanner
|
|
480
|
+
* @param {MultimodalInput} input
|
|
481
|
+
* @returns {MultimodalScanResult}
|
|
482
|
+
*/
|
|
483
|
+
function scanMultimodalInput(input) {
|
|
484
|
+
if (!input) {
|
|
485
|
+
return { safe: true, findings: [], maxSeverity: null };
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
const findings = [];
|
|
489
|
+
|
|
490
|
+
// Analyze filename if provided
|
|
491
|
+
if (input.filename) {
|
|
492
|
+
findings.push(...analyzeFilename(input.filename));
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
// Validate MIME type against filename
|
|
496
|
+
if (input.filename && input.mimeType) {
|
|
497
|
+
findings.push(...validateMimeType(input.filename, input.mimeType));
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
// Check size anomalies
|
|
501
|
+
findings.push(...checkSizeAnomalies(input));
|
|
502
|
+
|
|
503
|
+
// Analyze content based on type
|
|
504
|
+
if (input.content) {
|
|
505
|
+
if (input.content.startsWith('data:')) {
|
|
506
|
+
// Data URL analysis
|
|
507
|
+
findings.push(...analyzeDataUrl(input.content));
|
|
508
|
+
} else if (input.mimeType === 'application/pdf') {
|
|
509
|
+
// PDF content analysis
|
|
510
|
+
findings.push(...scanPdfContent(input.content));
|
|
511
|
+
} else if (typeof input.content === 'string') {
|
|
512
|
+
// Generic text content analysis
|
|
513
|
+
findings.push(...scanForInjectionPatterns(input.content));
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
// Determine overall safety and max severity
|
|
518
|
+
const safe = findings.length === 0;
|
|
519
|
+
let maxSeverity = null;
|
|
520
|
+
|
|
521
|
+
if (!safe) {
|
|
522
|
+
const severityRank = { low: 0, medium: 1, high: 2, critical: 3 };
|
|
523
|
+
maxSeverity = findings.reduce((max, finding) => {
|
|
524
|
+
return (severityRank[finding.severity] || 0) > (severityRank[max] || 0)
|
|
525
|
+
? finding.severity
|
|
526
|
+
: max;
|
|
527
|
+
}, 'low');
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
return {
|
|
531
|
+
safe,
|
|
532
|
+
findings,
|
|
533
|
+
maxSeverity
|
|
534
|
+
};
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
/**
|
|
538
|
+
* Convenience function to scan image data URLs
|
|
539
|
+
* @param {string} dataUrl
|
|
540
|
+
* @param {string} [filename]
|
|
541
|
+
* @returns {MultimodalScanResult}
|
|
542
|
+
*/
|
|
543
|
+
function scanImageDataUrl(dataUrl, filename = null) {
|
|
544
|
+
const mimeMatch = dataUrl.match(/data:([^;]+)/);
|
|
545
|
+
const mimeType = mimeMatch ? mimeMatch[1] : null;
|
|
546
|
+
|
|
547
|
+
return scanMultimodalInput({
|
|
548
|
+
content: dataUrl,
|
|
549
|
+
filename,
|
|
550
|
+
mimeType
|
|
551
|
+
});
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
/**
|
|
555
|
+
* Convenience function to scan file metadata
|
|
556
|
+
* @param {string} filename
|
|
557
|
+
* @param {string} mimeType
|
|
558
|
+
* @param {number} [size]
|
|
559
|
+
* @returns {MultimodalScanResult}
|
|
560
|
+
*/
|
|
561
|
+
function scanFileMetadata(filename, mimeType, size = null) {
|
|
562
|
+
return scanMultimodalInput({
|
|
563
|
+
content: '',
|
|
564
|
+
filename,
|
|
565
|
+
mimeType,
|
|
566
|
+
size
|
|
567
|
+
});
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
module.exports = {
|
|
571
|
+
scanMultimodalInput,
|
|
572
|
+
scanImageDataUrl,
|
|
573
|
+
scanFileMetadata,
|
|
574
|
+
analyzeFilename,
|
|
575
|
+
validateMimeType,
|
|
576
|
+
analyzeDataUrl,
|
|
577
|
+
scanForInjectionPatterns,
|
|
578
|
+
scanPdfContent
|
|
579
|
+
};
|