clawmoat 0.7.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (178) hide show
  1. package/.dockerignore +9 -0
  2. package/CHANGELOG.md +18 -0
  3. package/CONTRIBUTING.md +4 -2
  4. package/DEMO.md +87 -0
  5. package/Dockerfile +5 -18
  6. package/README.md +294 -8
  7. package/SECURITY.md +58 -10
  8. package/THREAT_MODEL.md +129 -0
  9. package/agent/README.md +131 -0
  10. package/agent/index.js +471 -0
  11. package/agent/install-service.sh +94 -0
  12. package/agent/openclaw-hook.js +453 -0
  13. package/agent/provider-setup.js +649 -0
  14. package/agent/setup.js +274 -0
  15. package/assets/BADGE-USAGE.md +20 -0
  16. package/assets/clawmoat-badge.svg +21 -0
  17. package/bin/clawmoat.js +468 -111
  18. package/docs/affiliates/dashboard.html +124 -0
  19. package/docs/affiliates/index.html +236 -0
  20. package/docs/agent-install.html +183 -0
  21. package/docs/ai-agent-security-scanner.html +10 -6
  22. package/docs/badge/index.html +149 -0
  23. package/docs/badge/scanning.svg +23 -0
  24. package/docs/blog/386-malicious-skills.html +262 -0
  25. package/docs/blog/40000-exposed-openclaw-instances.html +201 -0
  26. package/docs/blog/agent-trust-protocol.html +198 -0
  27. package/docs/blog/ai-agent-earns-commissions.html +230 -0
  28. package/docs/blog/bugmageddon-agent-firewall.html +174 -0
  29. package/docs/blog/calculator-math.html +180 -0
  30. package/docs/blog/clawmoat-vs-llamafirewall-nemo-guardrails.html +229 -0
  31. package/docs/blog/host-guardian-launch.html +18 -8
  32. package/docs/blog/ibm-experts-agent-runtime-protection.html +247 -0
  33. package/docs/blog/index.html +211 -9
  34. package/docs/blog/langchain-security-tutorial.html +18 -8
  35. package/docs/blog/mcp-30-cves-security-crisis.html +286 -0
  36. package/docs/blog/meta-researcher-rogue-agent.html +201 -0
  37. package/docs/blog/microsoft-openclaw-workstation-security.html +235 -0
  38. package/docs/blog/nist-ai-agent-standards-clawmoat.html +377 -0
  39. package/docs/blog/oasis-websocket-hijack.html +212 -0
  40. package/docs/blog/ollama-openclaw-security.html +160 -0
  41. package/docs/blog/openclaw-enterprise-readiness-claw10.html +199 -0
  42. package/docs/blog/openclaw-security-reckoning-2026.html +368 -0
  43. package/docs/blog/owasp-agentic-ai-top10.html +18 -8
  44. package/docs/blog/securing-ai-agents.html +18 -8
  45. package/docs/blog/supply-chain-agents.html +18 -8
  46. package/docs/business/index.html +525 -0
  47. package/docs/business/install.html +261 -0
  48. package/docs/checklist.html +174 -0
  49. package/docs/compare/index.html +122 -0
  50. package/docs/compare/lakera/index.html +62 -0
  51. package/docs/compare/llm-guard/index.html +49 -0
  52. package/docs/compare/snyk-agent-scan/index.html +63 -0
  53. package/docs/compare.html +10 -6
  54. package/docs/dashboard/index.html +520 -0
  55. package/docs/finance/index.html +220 -0
  56. package/docs/guides/business-deployment.html +770 -0
  57. package/docs/hall-of-fame.html +174 -0
  58. package/docs/index.html +447 -154
  59. package/docs/install.sh +557 -0
  60. package/docs/integrations/langchain.html +14 -6
  61. package/docs/integrations/openai.html +14 -6
  62. package/docs/integrations/openclaw.html +55 -7
  63. package/docs/plans/2026-03-26-threat-intel-api.md +255 -0
  64. package/docs/plans/2026-04-14-bugmageddon-marketing-pack.md +329 -0
  65. package/docs/plans/2026-04-14-clawmoat-v1-bugmageddon.md +248 -0
  66. package/docs/plans/2026-04-14-v1-release-update.md +91 -0
  67. package/docs/plans/2026-04-19-supabase-audit.md +68 -0
  68. package/docs/plans/2026-05-12-sales-push.md +303 -0
  69. package/docs/playground/index.html +893 -0
  70. package/docs/playground.html +4 -7
  71. package/docs/privacy-policy/index.html +122 -0
  72. package/docs/rfcs/defense-in-depth.md +467 -0
  73. package/docs/scan/index.html +358 -0
  74. package/docs/services/case-study.html +255 -0
  75. package/docs/services/downloads/install-openclaw.bat +45 -0
  76. package/docs/services/downloads/install-openclaw.command +38 -0
  77. package/docs/services/downloads/install-openclaw.sh +38 -0
  78. package/docs/services/get-started.html +165 -0
  79. package/docs/services/index.html +598 -0
  80. package/docs/services/multi-agent-security.html +284 -0
  81. package/docs/services/one-pager.html +99 -0
  82. package/docs/services/pitch-deck.html +229 -0
  83. package/docs/services/roi-calculator.html +258 -0
  84. package/docs/sitemap.xml +192 -2
  85. package/docs/support/index.html +135 -0
  86. package/docs/templates/customer-service/HEARTBEAT.md +61 -0
  87. package/docs/templates/customer-service/MEMORY.md +89 -0
  88. package/docs/templates/customer-service/SOUL.md +41 -0
  89. package/docs/templates/customer-service/USER.md +56 -0
  90. package/docs/templates/executive/HEARTBEAT.md +86 -0
  91. package/docs/templates/executive/MEMORY.md +92 -0
  92. package/docs/templates/executive/SOUL.md +44 -0
  93. package/docs/templates/executive/USER.md +62 -0
  94. package/docs/templates/finance/HEARTBEAT.md +58 -0
  95. package/docs/templates/finance/MEMORY.md +87 -0
  96. package/docs/templates/finance/SOUL.md +38 -0
  97. package/docs/templates/finance/USER.md +53 -0
  98. package/docs/templates/index.html +115 -0
  99. package/docs/templates/operations/HEARTBEAT.md +63 -0
  100. package/docs/templates/operations/MEMORY.md +68 -0
  101. package/docs/templates/operations/SOUL.md +38 -0
  102. package/docs/templates/operations/USER.md +49 -0
  103. package/docs/templates/sales/HEARTBEAT.md +55 -0
  104. package/docs/templates/sales/MEMORY.md +89 -0
  105. package/docs/templates/sales/SOUL.md +34 -0
  106. package/docs/templates/sales/USER.md +54 -0
  107. package/docs/terms-of-service/index.html +122 -0
  108. package/eslint.config.js +32 -0
  109. package/evals/README.md +29 -0
  110. package/evals/cases.json +390 -0
  111. package/evals/results.md +68 -0
  112. package/evals/run.js +180 -0
  113. package/examples/basic-usage.js +38 -0
  114. package/examples/demo-attack/demo.js +186 -0
  115. package/examples/python-quickstart/README.md +54 -0
  116. package/examples/python-quickstart/clawmoat_client.py +167 -0
  117. package/examples/video-demo/README.md +14 -0
  118. package/examples/video-demo/scene-a-normal.js +29 -0
  119. package/examples/video-demo/scene-b-attack-arrives.js +31 -0
  120. package/examples/video-demo/scene-c-hijack.js +44 -0
  121. package/examples/video-demo/scene-d-clawmoat.js +46 -0
  122. package/integrations/crewai/README.md +32 -0
  123. package/integrations/crewai/clawmoat_crewai/__init__.py +17 -0
  124. package/integrations/crewai/clawmoat_crewai/guard.py +103 -0
  125. package/integrations/crewai/pyproject.toml +21 -0
  126. package/integrations/langchain/README.md +91 -0
  127. package/integrations/langchain/clawmoat_langchain/__init__.py +17 -0
  128. package/integrations/langchain/clawmoat_langchain/callback.py +489 -0
  129. package/integrations/langchain/pyproject.toml +32 -0
  130. package/integrations/litellm/README.md +324 -0
  131. package/integrations/litellm/clawmoat_litellm/__init__.py +21 -0
  132. package/integrations/litellm/clawmoat_litellm/callback.py +329 -0
  133. package/integrations/litellm/clawmoat_litellm/proxy_middleware.py +224 -0
  134. package/integrations/litellm/pyproject.toml +74 -0
  135. package/integrations/openai-agents/README.md +392 -0
  136. package/integrations/openai-agents/clawmoat_openai_agents/__init__.py +20 -0
  137. package/integrations/openai-agents/clawmoat_openai_agents/guardrail.py +431 -0
  138. package/integrations/openai-agents/clawmoat_openai_agents/middleware.py +311 -0
  139. package/integrations/openai-agents/pyproject.toml +76 -0
  140. package/package.json +6 -5
  141. package/plugins/openclaw-adapter/PHASE1.md +439 -0
  142. package/plugins/openclaw-adapter/README.md +103 -0
  143. package/plugins/openclaw-adapter/SPEC.md +1644 -0
  144. package/plugins/openclaw-adapter/package.json +31 -0
  145. package/plugins/openclaw-adapter/src/index.test.ts +226 -0
  146. package/plugins/openclaw-adapter/src/index.ts +140 -0
  147. package/plugins/openclaw-adapter/tsconfig.json +14 -0
  148. package/server/data/threats.json +290 -0
  149. package/server/index.js +224 -10
  150. package/src/adapters/express.js +161 -0
  151. package/src/adapters/index.js +92 -0
  152. package/src/adapters/langchain.js +185 -0
  153. package/src/approval/index.js +456 -0
  154. package/src/ban-scanner.js +200 -0
  155. package/src/boundary-scanner.js +296 -0
  156. package/src/ci-scanner.js +279 -0
  157. package/src/code-scanner.js +245 -0
  158. package/src/enforce.js +166 -0
  159. package/src/finance/index.js +585 -0
  160. package/src/finance/mcp-firewall.js +486 -0
  161. package/src/formatters/json.js +80 -0
  162. package/src/formatters/sarif.js +388 -0
  163. package/src/guardian/alerts.js +34 -3
  164. package/src/guardian/gateway-monitor.js +590 -0
  165. package/src/guardian/index.js +41 -2
  166. package/src/index.js +105 -0
  167. package/src/integrations/agentmesh.js +501 -0
  168. package/src/language-detector.js +201 -0
  169. package/src/mcp-scanner.js +253 -0
  170. package/src/multimodal/index.js +579 -0
  171. package/src/obfuscation-scanner.js +457 -0
  172. package/src/policy-engine.js +402 -0
  173. package/src/scanners/dependency-attacks.js +128 -0
  174. package/src/scanners/prompt-injection.js +18 -0
  175. package/src/scanners/supply-chain.js +14 -0
  176. package/src/templates/default-config.yml +90 -0
  177. package/src/vuln-ops/exploitability.js +46 -0
  178. package/src/watch/live-monitor.js +720 -0
@@ -0,0 +1,579 @@
1
+ /**
2
+ * Multimodal Input Scanning
3
+ * Scan base64 image data URLs, PDF text content, and file metadata for threats
4
+ *
5
+ * Focuses on detectable patterns without external dependencies:
6
+ * - Metadata analysis (MIME type validation, filename patterns)
7
+ * - Embedded strings in base64 content (between tags, steganographic patterns)
8
+ * - Size anomalies and suspicious payloads
9
+ * - Hidden text injection patterns
10
+ *
11
+ * @module multimodal
12
+ * @example
13
+ * const { scanMultimodalInput } = require('./multimodal');
14
+ *
15
+ * const result = scanMultimodalInput({
16
+ * content: 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAA...',
17
+ * filename: 'innocent.png',
18
+ * mimeType: 'image/png'
19
+ * });
20
+ *
21
+ * if (!result.safe) {
22
+ * console.log('Threats found:', result.findings);
23
+ * }
24
+ */
25
+
26
+ const { Buffer } = require('buffer');
27
+ const path = require('path');
28
+
29
+ /**
30
+ * @typedef {Object} MultimodalScanResult
31
+ * @property {boolean} safe - true if no threats detected
32
+ * @property {Array} findings - Array of detected threat patterns
33
+ * @property {string|null} maxSeverity - Highest severity among findings
34
+ */
35
+
36
+ /**
37
+ * @typedef {Object} MultimodalInput
38
+ * @property {string} content - Base64 data URL or text content
39
+ * @property {string} [filename] - Original filename if available
40
+ * @property {string} [mimeType] - MIME type if available
41
+ * @property {number} [size] - File size in bytes if available
42
+ */
43
+
44
+ /**
45
+ * Suspicious file extensions that should trigger enhanced scanning
46
+ */
47
+ const SUSPICIOUS_EXTENSIONS = [
48
+ '.exe', '.scr', '.bat', '.cmd', '.com', '.pif', '.vbs', '.js', '.jar',
49
+ '.ps1', '.sh', '.py', '.rb', '.php', '.asp', '.jsp', '.pl'
50
+ ];
51
+
52
+ /**
53
+ * Expected MIME types for common file extensions
54
+ */
55
+ const MIME_TYPE_MAP = {
56
+ '.png': ['image/png'],
57
+ '.jpg': ['image/jpeg'],
58
+ '.jpeg': ['image/jpeg'],
59
+ '.gif': ['image/gif'],
60
+ '.webp': ['image/webp'],
61
+ '.svg': ['image/svg+xml'],
62
+ '.pdf': ['application/pdf'],
63
+ '.txt': ['text/plain'],
64
+ '.json': ['application/json'],
65
+ '.xml': ['application/xml', 'text/xml'],
66
+ '.html': ['text/html'],
67
+ '.css': ['text/css'],
68
+ '.js': ['application/javascript', 'text/javascript']
69
+ };
70
+
71
+ /**
72
+ * Patterns that indicate potential prompt injection in embedded content
73
+ */
74
+ const INJECTION_PATTERNS = [
75
+ // Direct injection attempts
76
+ /ignore\s+(?:previous|all)\s+instructions?/gi,
77
+ /system\s*:\s*you\s+are\s+now/gi,
78
+ /forget\s+(?:everything|all)\s+(?:above|before)/gi,
79
+ /act\s+as\s+(?:if\s+you\s+are|a)\s+(?:different|new)/gi,
80
+
81
+ // Hidden instruction markers
82
+ /<!--\s*(?:system|instruction|prompt)/gi,
83
+ /\[(?:SYSTEM|INSTRUCTION|PROMPT)\]/gi,
84
+ /<(?:system|instruction|prompt)>/gi,
85
+
86
+ // Steganographic patterns
87
+ /\u200b|\u200c|\u200d|\ufeff/g, // Zero-width characters
88
+ /\u00a0{2,}/g, // Multiple non-breaking spaces
89
+
90
+ // Base64 encoded instructions (common patterns)
91
+ /aWdub3Jl|c3lzdGVt|Zm9yZ2V0|YWN0IGFz/g, // base64 for: ignore, system, forget, act as
92
+
93
+ // URL-like patterns that could be callback URLs
94
+ /(?:https?:\/\/|data:)[^\s\'"]{20,}/gi
95
+ ];
96
+
97
+ /**
98
+ * Patterns indicating potential steganographic content
99
+ */
100
+ const STEGANOGRAPHIC_PATTERNS = [
101
+ // Repeated patterns that might hide data
102
+ /(.{1,4})\1{10,}/g, // Same 1-4 character sequence repeated 10+ times
103
+
104
+ // Unusual entropy markers
105
+ /[A-Za-z0-9+/=]{100,}/g, // Long base64-like sequences
106
+
107
+ // Hidden text markers
108
+ /\x00|\x01|\x02|\x03|\x04|\x05|\x06|\x07|\x08/g, // Control characters
109
+ ];
110
+
111
+ /**
112
+ * PDF-specific threat patterns
113
+ */
114
+ const PDF_THREAT_PATTERNS = [
115
+ // JavaScript in PDFs
116
+ /\/JavaScript/gi,
117
+ /\/JS/gi,
118
+
119
+ // Launch actions
120
+ /\/Launch/gi,
121
+ /\/F\s*\(.*\.(?:exe|bat|cmd|scr)\)/gi,
122
+
123
+ // Embedded files
124
+ /\/EmbeddedFile/gi,
125
+ /\/FileAttachment/gi,
126
+
127
+ // Form actions
128
+ /\/URI\s*\(.*(?:javascript|data:)/gi
129
+ ];
130
+
131
+ /**
132
+ * Analyze filename for suspicious patterns
133
+ * @param {string} filename
134
+ * @returns {Array} Array of findings
135
+ */
136
+ function analyzeFilename(filename) {
137
+ if (!filename) return [];
138
+
139
+ const findings = [];
140
+ const ext = path.extname(filename).toLowerCase();
141
+ const basename = path.basename(filename, ext);
142
+
143
+ // Check for suspicious extensions
144
+ if (SUSPICIOUS_EXTENSIONS.includes(ext)) {
145
+ findings.push({
146
+ type: 'suspicious_file_extension',
147
+ subtype: 'executable_extension',
148
+ severity: 'high',
149
+ matched: ext,
150
+ position: filename.lastIndexOf(ext),
151
+ message: `Potentially dangerous file extension: ${ext}`
152
+ });
153
+ }
154
+
155
+ // Check for double extensions (e.g., file.txt.exe)
156
+ const doubleExtMatch = filename.match(/\.([^.]+)\.([^.]+)$/);
157
+ if (doubleExtMatch && SUSPICIOUS_EXTENSIONS.includes('.' + doubleExtMatch[2])) {
158
+ findings.push({
159
+ type: 'filename_obfuscation',
160
+ subtype: 'double_extension',
161
+ severity: 'high',
162
+ matched: doubleExtMatch[0],
163
+ position: doubleExtMatch.index,
164
+ message: 'Double file extension detected (possible obfuscation)'
165
+ });
166
+ }
167
+
168
+ // Check for null bytes in filename
169
+ if (filename.includes('\x00')) {
170
+ findings.push({
171
+ type: 'filename_injection',
172
+ subtype: 'null_byte',
173
+ severity: 'critical',
174
+ matched: '\\x00',
175
+ position: filename.indexOf('\x00'),
176
+ message: 'Null byte in filename (path traversal attempt)'
177
+ });
178
+ }
179
+
180
+ // Check for path traversal patterns
181
+ if (filename.includes('../') || filename.includes('..\\')) {
182
+ findings.push({
183
+ type: 'filename_injection',
184
+ subtype: 'path_traversal',
185
+ severity: 'high',
186
+ matched: filename.includes('../') ? '../' : '..\\',
187
+ position: Math.max(filename.indexOf('../'), filename.indexOf('..\\')),
188
+ message: 'Path traversal pattern in filename'
189
+ });
190
+ }
191
+
192
+ // Check for drive-relative path traversal (Windows) — GHSA-qffp-2rhf-9h96
193
+ // Patterns like "C:target" (no backslash) resolve to current dir of that drive,
194
+ // bypassing ../ checks. Also catch absolute paths and UNC paths.
195
+ const driveRelativeMatch = filename.match(/^[A-Za-z]:[^\\\/]/);
196
+ const absolutePathMatch = filename.match(/^[A-Za-z]:[\\\/]/) || filename.startsWith('/') || filename.startsWith('\\\\');
197
+ if (driveRelativeMatch) {
198
+ findings.push({
199
+ type: 'filename_injection',
200
+ subtype: 'drive_relative_traversal',
201
+ severity: 'high',
202
+ matched: driveRelativeMatch[0],
203
+ position: 0,
204
+ message: 'Drive-relative path traversal (Windows) — can escape extraction directory'
205
+ });
206
+ }
207
+ if (absolutePathMatch) {
208
+ findings.push({
209
+ type: 'filename_injection',
210
+ subtype: 'absolute_path',
211
+ severity: 'high',
212
+ matched: filename.substring(0, 10),
213
+ position: 0,
214
+ message: 'Absolute path in filename — may write outside intended directory'
215
+ });
216
+ }
217
+
218
+ // Check for extremely long filenames (possible buffer overflow attempt)
219
+ if (filename.length > 255) {
220
+ findings.push({
221
+ type: 'filename_anomaly',
222
+ subtype: 'excessive_length',
223
+ severity: 'medium',
224
+ matched: filename.substring(0, 50) + '...',
225
+ position: 255,
226
+ message: `Filename exceeds typical limits (${filename.length} characters)`
227
+ });
228
+ }
229
+
230
+ return findings;
231
+ }
232
+
233
+ /**
234
+ * Validate MIME type against filename extension
235
+ * @param {string} filename
236
+ * @param {string} mimeType
237
+ * @returns {Array} Array of findings
238
+ */
239
+ function validateMimeType(filename, mimeType) {
240
+ if (!filename || !mimeType) return [];
241
+
242
+ const findings = [];
243
+ const ext = path.extname(filename).toLowerCase();
244
+ const expectedMimes = MIME_TYPE_MAP[ext];
245
+
246
+ if (expectedMimes && !expectedMimes.includes(mimeType)) {
247
+ findings.push({
248
+ type: 'mime_mismatch',
249
+ subtype: 'extension_mismatch',
250
+ severity: 'medium',
251
+ matched: mimeType,
252
+ message: `MIME type '${mimeType}' doesn't match extension '${ext}' (expected: ${expectedMimes.join(' or ')})`
253
+ });
254
+ }
255
+
256
+ // Check for dangerous MIME types
257
+ const dangerousMimes = [
258
+ 'application/x-msdownload',
259
+ 'application/x-executable',
260
+ 'application/x-msdos-program',
261
+ 'application/x-ms-shortcut'
262
+ ];
263
+
264
+ if (dangerousMimes.includes(mimeType)) {
265
+ findings.push({
266
+ type: 'dangerous_mime_type',
267
+ subtype: 'executable_mime',
268
+ severity: 'critical',
269
+ matched: mimeType,
270
+ message: `Dangerous MIME type detected: ${mimeType}`
271
+ });
272
+ }
273
+
274
+ return findings;
275
+ }
276
+
277
+ /**
278
+ * Analyze data URL for embedded threats
279
+ * @param {string} dataUrl
280
+ * @returns {Array} Array of findings
281
+ */
282
+ function analyzeDataUrl(dataUrl) {
283
+ if (!dataUrl.startsWith('data:')) return [];
284
+
285
+ const findings = [];
286
+
287
+ try {
288
+ // Parse data URL
289
+ const [header, data] = dataUrl.split(',');
290
+ const [mimeType, encoding] = header.replace('data:', '').split(';');
291
+
292
+ // Check for suspicious MIME types in data URLs
293
+ if (mimeType === 'text/html' || mimeType === 'application/javascript') {
294
+ findings.push({
295
+ type: 'suspicious_data_url',
296
+ subtype: 'executable_content',
297
+ severity: 'high',
298
+ matched: mimeType,
299
+ message: `Potentially dangerous data URL MIME type: ${mimeType}`
300
+ });
301
+ }
302
+
303
+ if (encoding === 'base64' && data) {
304
+ // Decode base64 and scan for patterns
305
+ try {
306
+ const decoded = Buffer.from(data, 'base64').toString('utf8');
307
+ const injectionFindings = scanForInjectionPatterns(decoded);
308
+ findings.push(...injectionFindings);
309
+
310
+ } catch (err) {
311
+ // If base64 decode fails, still check the raw data for patterns
312
+ const rawFindings = scanRawBase64(data);
313
+ findings.push(...rawFindings);
314
+ }
315
+ }
316
+
317
+ // Check for oversized data URLs (possible DoS)
318
+ if (data && data.length > 10 * 1024 * 1024) { // 10MB limit
319
+ findings.push({
320
+ type: 'size_anomaly',
321
+ subtype: 'oversized_data_url',
322
+ severity: 'medium',
323
+ matched: `${Math.round(data.length / 1024 / 1024)}MB`,
324
+ message: `Extremely large data URL (${Math.round(data.length / 1024 / 1024)}MB)`
325
+ });
326
+ }
327
+
328
+ } catch (err) {
329
+ findings.push({
330
+ type: 'malformed_data_url',
331
+ subtype: 'parse_error',
332
+ severity: 'low',
333
+ matched: dataUrl.substring(0, 100),
334
+ message: 'Malformed data URL structure'
335
+ });
336
+ }
337
+
338
+ return findings;
339
+ }
340
+
341
+ /**
342
+ * Scan text content for injection patterns
343
+ * @param {string} content
344
+ * @returns {Array} Array of findings
345
+ */
346
+ function scanForInjectionPatterns(content) {
347
+ const findings = [];
348
+
349
+ for (const pattern of INJECTION_PATTERNS) {
350
+ const matches = [...content.matchAll(pattern)];
351
+ for (const match of matches) {
352
+ findings.push({
353
+ type: 'embedded_injection',
354
+ subtype: 'prompt_injection',
355
+ severity: 'high',
356
+ matched: match[0],
357
+ position: match.index,
358
+ message: 'Potential prompt injection pattern detected in embedded content'
359
+ });
360
+ }
361
+ }
362
+
363
+ for (const pattern of STEGANOGRAPHIC_PATTERNS) {
364
+ const matches = [...content.matchAll(pattern)];
365
+ for (const match of matches) {
366
+ findings.push({
367
+ type: 'steganographic_pattern',
368
+ subtype: 'hidden_data',
369
+ severity: 'medium',
370
+ matched: match[0].substring(0, 50),
371
+ position: match.index,
372
+ message: 'Potential steganographic or hidden data pattern'
373
+ });
374
+ }
375
+ }
376
+
377
+ return findings;
378
+ }
379
+
380
+ /**
381
+ * Scan raw base64 data for patterns without decoding
382
+ * @param {string} base64Data
383
+ * @returns {Array} Array of findings
384
+ */
385
+ function scanRawBase64(base64Data) {
386
+ const findings = [];
387
+
388
+ // Check for known base64-encoded malicious patterns
389
+ const maliciousB64Patterns = [
390
+ /aWdub3JlXHMrKD86cHJldmlvdXN8YWxsKVxzK2luc3RydWN0aW9ucz8=/g, // "ignore previous instructions"
391
+ /c3lzdGVtXHMqOlxzKnlvdVxzK2FyZVxzK25vdw==/g, // "system: you are now"
392
+ /Zm9yZ2V0XHMrKD86ZXZlcnl0aGluZ3xhbGwpXHMrKD86YWJvdmV8YmVmb3JlKQ==/g, // "forget everything above"
393
+ ];
394
+
395
+ for (const pattern of maliciousB64Patterns) {
396
+ const matches = [...base64Data.matchAll(pattern)];
397
+ for (const match of matches) {
398
+ findings.push({
399
+ type: 'encoded_injection',
400
+ subtype: 'base64_injection',
401
+ severity: 'high',
402
+ matched: match[0],
403
+ position: match.index,
404
+ message: 'Base64-encoded injection pattern detected'
405
+ });
406
+ }
407
+ }
408
+
409
+ return findings;
410
+ }
411
+
412
+ /**
413
+ * Scan PDF text content for threats
414
+ * @param {string} pdfText
415
+ * @returns {Array} Array of findings
416
+ */
417
+ function scanPdfContent(pdfText) {
418
+ if (!pdfText) return [];
419
+
420
+ const findings = [];
421
+
422
+ for (const pattern of PDF_THREAT_PATTERNS) {
423
+ const matches = [...pdfText.matchAll(pattern)];
424
+ for (const match of matches) {
425
+ findings.push({
426
+ type: 'pdf_threat',
427
+ subtype: 'suspicious_pdf_feature',
428
+ severity: 'high',
429
+ matched: match[0],
430
+ position: match.index,
431
+ message: 'Suspicious PDF feature detected (JavaScript, launch action, or embedded file)'
432
+ });
433
+ }
434
+ }
435
+
436
+ // Scan for general injection patterns in PDF text
437
+ const injectionFindings = scanForInjectionPatterns(pdfText);
438
+ findings.push(...injectionFindings);
439
+
440
+ return findings;
441
+ }
442
+
443
+ /**
444
+ * Check for size anomalies that might indicate malicious content
445
+ * @param {MultimodalInput} input
446
+ * @returns {Array} Array of findings
447
+ */
448
+ function checkSizeAnomalies(input) {
449
+ const findings = [];
450
+
451
+ if (input.size) {
452
+ // Check for suspiciously small files that claim to be images
453
+ if (input.mimeType && input.mimeType.startsWith('image/') && input.size < 100) {
454
+ findings.push({
455
+ type: 'size_anomaly',
456
+ subtype: 'suspiciously_small',
457
+ severity: 'low',
458
+ matched: `${input.size} bytes`,
459
+ message: `Suspiciously small image file (${input.size} bytes)`
460
+ });
461
+ }
462
+
463
+ // Check for extremely large files (potential DoS)
464
+ if (input.size > 100 * 1024 * 1024) { // 100MB
465
+ findings.push({
466
+ type: 'size_anomaly',
467
+ subtype: 'extremely_large',
468
+ severity: 'medium',
469
+ matched: `${Math.round(input.size / 1024 / 1024)}MB`,
470
+ message: `Extremely large file (${Math.round(input.size / 1024 / 1024)}MB) - potential DoS`
471
+ });
472
+ }
473
+ }
474
+
475
+ return findings;
476
+ }
477
+
478
+ /**
479
+ * Main multimodal input scanner
480
+ * @param {MultimodalInput} input
481
+ * @returns {MultimodalScanResult}
482
+ */
483
+ function scanMultimodalInput(input) {
484
+ if (!input) {
485
+ return { safe: true, findings: [], maxSeverity: null };
486
+ }
487
+
488
+ const findings = [];
489
+
490
+ // Analyze filename if provided
491
+ if (input.filename) {
492
+ findings.push(...analyzeFilename(input.filename));
493
+ }
494
+
495
+ // Validate MIME type against filename
496
+ if (input.filename && input.mimeType) {
497
+ findings.push(...validateMimeType(input.filename, input.mimeType));
498
+ }
499
+
500
+ // Check size anomalies
501
+ findings.push(...checkSizeAnomalies(input));
502
+
503
+ // Analyze content based on type
504
+ if (input.content) {
505
+ if (input.content.startsWith('data:')) {
506
+ // Data URL analysis
507
+ findings.push(...analyzeDataUrl(input.content));
508
+ } else if (input.mimeType === 'application/pdf') {
509
+ // PDF content analysis
510
+ findings.push(...scanPdfContent(input.content));
511
+ } else if (typeof input.content === 'string') {
512
+ // Generic text content analysis
513
+ findings.push(...scanForInjectionPatterns(input.content));
514
+ }
515
+ }
516
+
517
+ // Determine overall safety and max severity
518
+ const safe = findings.length === 0;
519
+ let maxSeverity = null;
520
+
521
+ if (!safe) {
522
+ const severityRank = { low: 0, medium: 1, high: 2, critical: 3 };
523
+ maxSeverity = findings.reduce((max, finding) => {
524
+ return (severityRank[finding.severity] || 0) > (severityRank[max] || 0)
525
+ ? finding.severity
526
+ : max;
527
+ }, 'low');
528
+ }
529
+
530
+ return {
531
+ safe,
532
+ findings,
533
+ maxSeverity
534
+ };
535
+ }
536
+
537
+ /**
538
+ * Convenience function to scan image data URLs
539
+ * @param {string} dataUrl
540
+ * @param {string} [filename]
541
+ * @returns {MultimodalScanResult}
542
+ */
543
+ function scanImageDataUrl(dataUrl, filename = null) {
544
+ const mimeMatch = dataUrl.match(/data:([^;]+)/);
545
+ const mimeType = mimeMatch ? mimeMatch[1] : null;
546
+
547
+ return scanMultimodalInput({
548
+ content: dataUrl,
549
+ filename,
550
+ mimeType
551
+ });
552
+ }
553
+
554
+ /**
555
+ * Convenience function to scan file metadata
556
+ * @param {string} filename
557
+ * @param {string} mimeType
558
+ * @param {number} [size]
559
+ * @returns {MultimodalScanResult}
560
+ */
561
+ function scanFileMetadata(filename, mimeType, size = null) {
562
+ return scanMultimodalInput({
563
+ content: '',
564
+ filename,
565
+ mimeType,
566
+ size
567
+ });
568
+ }
569
+
570
+ module.exports = {
571
+ scanMultimodalInput,
572
+ scanImageDataUrl,
573
+ scanFileMetadata,
574
+ analyzeFilename,
575
+ validateMimeType,
576
+ analyzeDataUrl,
577
+ scanForInjectionPatterns,
578
+ scanPdfContent
579
+ };