agentshield-sdk 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +191 -0
- package/LICENSE +21 -0
- package/README.md +975 -0
- package/bin/agent-shield.js +680 -0
- package/package.json +118 -0
- package/src/adaptive.js +330 -0
- package/src/agent-protocol.js +998 -0
- package/src/alert-tuning.js +480 -0
- package/src/allowlist.js +603 -0
- package/src/audit-immutable.js +914 -0
- package/src/audit-streaming.js +469 -0
- package/src/badges.js +196 -0
- package/src/behavior-profiling.js +289 -0
- package/src/benchmark-harness.js +804 -0
- package/src/canary.js +271 -0
- package/src/certification.js +563 -0
- package/src/circuit-breaker.js +321 -0
- package/src/compliance.js +617 -0
- package/src/confidence-tuning.js +324 -0
- package/src/confused-deputy.js +624 -0
- package/src/context-scoring.js +360 -0
- package/src/conversation.js +494 -0
- package/src/cost-optimizer.js +1024 -0
- package/src/ctf.js +462 -0
- package/src/detector-core.js +1999 -0
- package/src/distributed.js +359 -0
- package/src/document-scanner.js +795 -0
- package/src/embedding.js +307 -0
- package/src/encoding.js +429 -0
- package/src/enterprise.js +405 -0
- package/src/errors.js +100 -0
- package/src/eu-ai-act.js +523 -0
- package/src/fuzzer.js +764 -0
- package/src/honeypot.js +328 -0
- package/src/i18n-patterns.js +523 -0
- package/src/index.js +430 -0
- package/src/integrations.js +528 -0
- package/src/llm-redteam.js +670 -0
- package/src/main.js +741 -0
- package/src/main.mjs +38 -0
- package/src/mcp-bridge.js +542 -0
- package/src/mcp-certification.js +846 -0
- package/src/mcp-sdk-integration.js +355 -0
- package/src/mcp-security-runtime.js +741 -0
- package/src/mcp-server.js +740 -0
- package/src/middleware.js +208 -0
- package/src/model-finetuning.js +884 -0
- package/src/model-fingerprint.js +1042 -0
- package/src/multi-agent-trust.js +453 -0
- package/src/multi-agent.js +404 -0
- package/src/multimodal.js +296 -0
- package/src/nist-mapping.js +505 -0
- package/src/observability.js +330 -0
- package/src/openclaw.js +450 -0
- package/src/otel.js +544 -0
- package/src/owasp-2025.js +483 -0
- package/src/pii.js +390 -0
- package/src/plugin-marketplace.js +628 -0
- package/src/plugin-system.js +349 -0
- package/src/policy-dsl.js +775 -0
- package/src/policy-extended.js +635 -0
- package/src/policy.js +443 -0
- package/src/presets.js +409 -0
- package/src/production.js +557 -0
- package/src/prompt-leakage.js +321 -0
- package/src/rag-vulnerability.js +579 -0
- package/src/redteam.js +475 -0
- package/src/response-handler.js +429 -0
- package/src/scanners.js +357 -0
- package/src/self-healing.js +363 -0
- package/src/semantic.js +339 -0
- package/src/shield-score.js +250 -0
- package/src/sso-saml.js +897 -0
- package/src/stream-scanner.js +806 -0
- package/src/testing.js +505 -0
- package/src/threat-encyclopedia.js +629 -0
- package/src/threat-intel-network.js +1017 -0
- package/src/token-analysis.js +467 -0
- package/src/tool-guard.js +412 -0
- package/src/tool-output-validator.js +354 -0
- package/src/utils.js +83 -0
- package/src/watermark.js +235 -0
- package/src/worker-scanner.js +601 -0
- package/types/index.d.ts +2088 -0
|
@@ -0,0 +1,795 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Agent Shield — Document Scanner
|
|
5
|
+
*
|
|
6
|
+
* Extracts text from various file formats and scans for threats,
|
|
7
|
+
* with a focus on detecting indirect prompt injection attacks hidden
|
|
8
|
+
* inside documents uploaded to AI agents.
|
|
9
|
+
*
|
|
10
|
+
* All detection runs locally — no data ever leaves your environment.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const fs = require('fs');
|
|
14
|
+
const path = require('path');
|
|
15
|
+
const { scanText, SEVERITY_ORDER } = require('./detector-core');
|
|
16
|
+
|
|
17
|
+
// =========================================================================
|
|
18
|
+
// TEXT EXTRACTOR
|
|
19
|
+
// =========================================================================
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Extracts plain text from common file formats using only Node.js built-ins.
|
|
23
|
+
* No external dependencies required.
|
|
24
|
+
*/
|
|
25
|
+
class TextExtractor {
|
|
26
|
+
/**
|
|
27
|
+
* Extract text from a plain text file buffer.
|
|
28
|
+
* @param {Buffer} buffer - The file contents.
|
|
29
|
+
* @returns {string} The extracted text.
|
|
30
|
+
*/
|
|
31
|
+
static extractFromPlainText(buffer) {
|
|
32
|
+
return buffer.toString('utf-8');
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Extract text from an HTML buffer by stripping tags and decoding entities.
|
|
37
|
+
* @param {Buffer} buffer - The HTML file contents.
|
|
38
|
+
* @returns {string} The extracted text.
|
|
39
|
+
*/
|
|
40
|
+
static extractFromHTML(buffer) {
|
|
41
|
+
let html = buffer.toString('utf-8');
|
|
42
|
+
|
|
43
|
+
// Remove script and style blocks entirely
|
|
44
|
+
html = html.replace(/<script[\s\S]*?<\/script>/gi, ' ');
|
|
45
|
+
html = html.replace(/<style[\s\S]*?<\/style>/gi, ' ');
|
|
46
|
+
|
|
47
|
+
// Remove HTML comments
|
|
48
|
+
html = html.replace(/<!--[\s\S]*?-->/g, ' ');
|
|
49
|
+
|
|
50
|
+
// Extract alt text from images (important for injection detection)
|
|
51
|
+
html = html.replace(/<img[^>]*alt\s*=\s*"([^"]*)"[^>]*>/gi, ' $1 ');
|
|
52
|
+
html = html.replace(/<img[^>]*alt\s*=\s*'([^']*)'[^>]*>/gi, ' $1 ');
|
|
53
|
+
|
|
54
|
+
// Strip all remaining HTML tags
|
|
55
|
+
html = html.replace(/<[^>]+>/g, ' ');
|
|
56
|
+
|
|
57
|
+
// Decode common HTML entities
|
|
58
|
+
const entities = {
|
|
59
|
+
'&': '&',
|
|
60
|
+
'<': '<',
|
|
61
|
+
'>': '>',
|
|
62
|
+
'"': '"',
|
|
63
|
+
''': "'",
|
|
64
|
+
''': "'",
|
|
65
|
+
' ': ' ',
|
|
66
|
+
'/': '/',
|
|
67
|
+
''': "'",
|
|
68
|
+
'…': '...',
|
|
69
|
+
'—': '—',
|
|
70
|
+
'–': '–',
|
|
71
|
+
'©': '(c)',
|
|
72
|
+
'®': '(R)',
|
|
73
|
+
'™': '(TM)'
|
|
74
|
+
};
|
|
75
|
+
for (const [entity, replacement] of Object.entries(entities)) {
|
|
76
|
+
html = html.split(entity).join(replacement);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Decode numeric HTML entities
|
|
80
|
+
html = html.replace(/&#(\d+);/g, (_, code) => {
|
|
81
|
+
return String.fromCharCode(parseInt(code, 10));
|
|
82
|
+
});
|
|
83
|
+
html = html.replace(/&#x([0-9a-fA-F]+);/g, (_, code) => {
|
|
84
|
+
return String.fromCharCode(parseInt(code, 16));
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
// Collapse whitespace
|
|
88
|
+
html = html.replace(/\s+/g, ' ').trim();
|
|
89
|
+
|
|
90
|
+
return html;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Extract text from a CSV buffer by parsing rows and joining cell values.
|
|
95
|
+
* @param {Buffer} buffer - The CSV file contents.
|
|
96
|
+
* @returns {string} The extracted text.
|
|
97
|
+
*/
|
|
98
|
+
static extractFromCSV(buffer) {
|
|
99
|
+
const text = buffer.toString('utf-8');
|
|
100
|
+
const lines = text.split(/\r?\n/);
|
|
101
|
+
const cells = [];
|
|
102
|
+
|
|
103
|
+
for (const line of lines) {
|
|
104
|
+
if (!line.trim()) continue;
|
|
105
|
+
|
|
106
|
+
// Basic CSV parsing: handle quoted fields
|
|
107
|
+
let current = '';
|
|
108
|
+
let inQuotes = false;
|
|
109
|
+
for (let i = 0; i < line.length; i++) {
|
|
110
|
+
const ch = line[i];
|
|
111
|
+
if (ch === '"') {
|
|
112
|
+
if (inQuotes && line[i + 1] === '"') {
|
|
113
|
+
current += '"';
|
|
114
|
+
i++;
|
|
115
|
+
} else {
|
|
116
|
+
inQuotes = !inQuotes;
|
|
117
|
+
}
|
|
118
|
+
} else if (ch === ',' && !inQuotes) {
|
|
119
|
+
cells.push(current.trim());
|
|
120
|
+
current = '';
|
|
121
|
+
} else {
|
|
122
|
+
current += ch;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
cells.push(current.trim());
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return cells.filter(c => c.length > 0).join(' ');
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Extract all string values from a JSON buffer recursively.
|
|
133
|
+
* @param {Buffer} buffer - The JSON file contents.
|
|
134
|
+
* @returns {string} All string values concatenated.
|
|
135
|
+
*/
|
|
136
|
+
static extractFromJSON(buffer) {
|
|
137
|
+
const text = buffer.toString('utf-8');
|
|
138
|
+
let parsed;
|
|
139
|
+
try {
|
|
140
|
+
parsed = JSON.parse(text);
|
|
141
|
+
} catch (e) {
|
|
142
|
+
// If invalid JSON, return raw text
|
|
143
|
+
return text;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const strings = [];
|
|
147
|
+
const extract = (value) => {
|
|
148
|
+
if (typeof value === 'string') {
|
|
149
|
+
strings.push(value);
|
|
150
|
+
} else if (Array.isArray(value)) {
|
|
151
|
+
for (const item of value) {
|
|
152
|
+
extract(item);
|
|
153
|
+
}
|
|
154
|
+
} else if (value !== null && typeof value === 'object') {
|
|
155
|
+
for (const key of Object.keys(value)) {
|
|
156
|
+
strings.push(key);
|
|
157
|
+
extract(value[key]);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
};
|
|
161
|
+
extract(parsed);
|
|
162
|
+
|
|
163
|
+
return strings.join(' ');
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Extract text from a Markdown buffer by stripping formatting.
|
|
168
|
+
* @param {Buffer} buffer - The Markdown file contents.
|
|
169
|
+
* @returns {string} The extracted text.
|
|
170
|
+
*/
|
|
171
|
+
static extractFromMarkdown(buffer) {
|
|
172
|
+
let md = buffer.toString('utf-8');
|
|
173
|
+
|
|
174
|
+
// Remove code blocks
|
|
175
|
+
md = md.replace(/```[\s\S]*?```/g, ' ');
|
|
176
|
+
md = md.replace(/`[^`]+`/g, ' ');
|
|
177
|
+
|
|
178
|
+
// Remove images but keep alt text
|
|
179
|
+
md = md.replace(/!\[([^\]]*)\]\([^)]*\)/g, ' $1 ');
|
|
180
|
+
|
|
181
|
+
// Remove links but keep text
|
|
182
|
+
md = md.replace(/\[([^\]]*)\]\([^)]*\)/g, ' $1 ');
|
|
183
|
+
|
|
184
|
+
// Remove headings markers
|
|
185
|
+
md = md.replace(/^#{1,6}\s+/gm, '');
|
|
186
|
+
|
|
187
|
+
// Remove bold/italic markers
|
|
188
|
+
md = md.replace(/\*\*([^*]+)\*\*/g, '$1');
|
|
189
|
+
md = md.replace(/\*([^*]+)\*/g, '$1');
|
|
190
|
+
md = md.replace(/__([^_]+)__/g, '$1');
|
|
191
|
+
md = md.replace(/_([^_]+)_/g, '$1');
|
|
192
|
+
md = md.replace(/~~([^~]+)~~/g, '$1');
|
|
193
|
+
|
|
194
|
+
// Remove blockquote markers
|
|
195
|
+
md = md.replace(/^>\s+/gm, '');
|
|
196
|
+
|
|
197
|
+
// Remove horizontal rules
|
|
198
|
+
md = md.replace(/^[-*_]{3,}\s*$/gm, '');
|
|
199
|
+
|
|
200
|
+
// Remove list markers
|
|
201
|
+
md = md.replace(/^[\s]*[-*+]\s+/gm, '');
|
|
202
|
+
md = md.replace(/^[\s]*\d+\.\s+/gm, '');
|
|
203
|
+
|
|
204
|
+
// Remove HTML tags that might be embedded
|
|
205
|
+
md = md.replace(/<[^>]+>/g, ' ');
|
|
206
|
+
|
|
207
|
+
// Collapse whitespace
|
|
208
|
+
md = md.replace(/\s+/g, ' ').trim();
|
|
209
|
+
|
|
210
|
+
return md;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Extract text from an XML buffer by stripping tags.
|
|
215
|
+
* @param {Buffer} buffer - The XML file contents.
|
|
216
|
+
* @returns {string} The extracted text.
|
|
217
|
+
*/
|
|
218
|
+
static extractFromXML(buffer) {
|
|
219
|
+
let xml = buffer.toString('utf-8');
|
|
220
|
+
|
|
221
|
+
// Remove XML declarations and processing instructions
|
|
222
|
+
xml = xml.replace(/<\?[\s\S]*?\?>/g, '');
|
|
223
|
+
|
|
224
|
+
// Remove CDATA wrappers but keep content
|
|
225
|
+
xml = xml.replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g, '$1');
|
|
226
|
+
|
|
227
|
+
// Remove comments
|
|
228
|
+
xml = xml.replace(/<!--[\s\S]*?-->/g, ' ');
|
|
229
|
+
|
|
230
|
+
// Strip all XML tags
|
|
231
|
+
xml = xml.replace(/<[^>]+>/g, ' ');
|
|
232
|
+
|
|
233
|
+
// Decode common XML entities
|
|
234
|
+
xml = xml.replace(/&/g, '&');
|
|
235
|
+
xml = xml.replace(/</g, '<');
|
|
236
|
+
xml = xml.replace(/>/g, '>');
|
|
237
|
+
xml = xml.replace(/"/g, '"');
|
|
238
|
+
xml = xml.replace(/'/g, "'");
|
|
239
|
+
|
|
240
|
+
// Collapse whitespace
|
|
241
|
+
xml = xml.replace(/\s+/g, ' ').trim();
|
|
242
|
+
|
|
243
|
+
return xml;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Detect the file type from buffer content using basic heuristics.
|
|
248
|
+
* @param {Buffer} buffer - The file contents.
|
|
249
|
+
* @returns {string} The detected MIME type.
|
|
250
|
+
*/
|
|
251
|
+
static detect(buffer) {
|
|
252
|
+
if (!buffer || buffer.length === 0) {
|
|
253
|
+
return 'application/octet-stream';
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
const head = buffer.slice(0, 512).toString('utf-8').trimStart();
|
|
257
|
+
|
|
258
|
+
// JSON: starts with { or [
|
|
259
|
+
if (head.startsWith('{') || head.startsWith('[')) {
|
|
260
|
+
return 'application/json';
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// XML: starts with <?xml or <!DOCTYPE ... xml
|
|
264
|
+
if (head.startsWith('<?xml') || /^<!DOCTYPE\s+[^>]*xml/i.test(head)) {
|
|
265
|
+
return 'application/xml';
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// HTML: starts with <!DOCTYPE html or <html
|
|
269
|
+
if (/^<!DOCTYPE\s+html/i.test(head) || /^<html[\s>]/i.test(head)) {
|
|
270
|
+
return 'text/html';
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
// Generic tag-based: if it starts with < it might be XML or HTML
|
|
274
|
+
if (head.startsWith('<')) {
|
|
275
|
+
// Look for html-like tags
|
|
276
|
+
if (/<(?:div|span|p|body|head|table|form|input|a\s)/i.test(head)) {
|
|
277
|
+
return 'text/html';
|
|
278
|
+
}
|
|
279
|
+
return 'application/xml';
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// CSV: multiple lines with commas and consistent column counts
|
|
283
|
+
const lines = head.split(/\r?\n/).filter(l => l.trim());
|
|
284
|
+
if (lines.length >= 2) {
|
|
285
|
+
const commaCount0 = (lines[0].match(/,/g) || []).length;
|
|
286
|
+
const commaCount1 = (lines[1].match(/,/g) || []).length;
|
|
287
|
+
if (commaCount0 > 0 && commaCount0 === commaCount1) {
|
|
288
|
+
return 'text/csv';
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// Markdown: check for common markers
|
|
293
|
+
if (/^#{1,6}\s/.test(head) || /^\s*[-*+]\s/.test(head) || /\[.*\]\(.*\)/.test(head)) {
|
|
294
|
+
return 'text/markdown';
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
// Default to plain text
|
|
298
|
+
return 'text/plain';
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// =========================================================================
|
|
303
|
+
// MIME TYPE TO EXTRACTOR MAPPING
|
|
304
|
+
// =========================================================================
|
|
305
|
+
|
|
306
|
+
const EXTRACTORS = {
|
|
307
|
+
'text/plain': TextExtractor.extractFromPlainText,
|
|
308
|
+
'text/html': TextExtractor.extractFromHTML,
|
|
309
|
+
'text/csv': TextExtractor.extractFromCSV,
|
|
310
|
+
'application/json': TextExtractor.extractFromJSON,
|
|
311
|
+
'text/markdown': TextExtractor.extractFromMarkdown,
|
|
312
|
+
'application/xml': TextExtractor.extractFromXML,
|
|
313
|
+
'text/xml': TextExtractor.extractFromXML
|
|
314
|
+
};
|
|
315
|
+
|
|
316
|
+
const SUPPORTED_TYPES = Object.keys(EXTRACTORS);
|
|
317
|
+
|
|
318
|
+
// =========================================================================
|
|
319
|
+
// INDIRECT INJECTION SCANNER
|
|
320
|
+
// =========================================================================
|
|
321
|
+
|
|
322
|
+
/**
|
|
323
|
+
* Scans text extracted from documents for indirect prompt injection attacks.
|
|
324
|
+
* These are attacks where malicious instructions are hidden inside documents
|
|
325
|
+
* that an AI agent will process.
|
|
326
|
+
*/
|
|
327
|
+
class IndirectInjectionScanner {
|
|
328
|
+
/**
|
|
329
|
+
* Create an IndirectInjectionScanner.
|
|
330
|
+
* @param {Object} [options={}] - Scanner options.
|
|
331
|
+
* @param {string} [options.sensitivity='medium'] - Detection sensitivity.
|
|
332
|
+
*/
|
|
333
|
+
constructor(options = {}) {
|
|
334
|
+
this.sensitivity = options.sensitivity || 'medium';
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* Scan extracted text for indirect prompt injection patterns.
|
|
339
|
+
* @param {string} text - The extracted text to scan.
|
|
340
|
+
* @param {string} [source='document'] - The source description.
|
|
341
|
+
* @returns {{ threats: Array, hiddenContent: Array, riskScore: number }}
|
|
342
|
+
*/
|
|
343
|
+
scan(text, source = 'document') {
|
|
344
|
+
const threats = [];
|
|
345
|
+
const hiddenContent = [];
|
|
346
|
+
let riskScore = 0;
|
|
347
|
+
|
|
348
|
+
if (!text || text.trim().length === 0) {
|
|
349
|
+
return { threats, hiddenContent, riskScore };
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
// 1. Check for hidden instructions disguised as data
|
|
353
|
+
const instructionPatterns = [
|
|
354
|
+
{
|
|
355
|
+
regex: /(?:SYSTEM|ADMIN|ASSISTANT|AI)\s*:\s*.{10,}/gi,
|
|
356
|
+
description: 'Role-prefixed instructions hidden in document',
|
|
357
|
+
severity: 'high'
|
|
358
|
+
},
|
|
359
|
+
{
|
|
360
|
+
regex: /(?:BEGIN|START)\s+(?:HIDDEN|SECRET|PRIVATE)\s+(?:INSTRUCTIONS?|COMMANDS?|SECTION)/gi,
|
|
361
|
+
description: 'Hidden instruction block markers in document',
|
|
362
|
+
severity: 'critical'
|
|
363
|
+
},
|
|
364
|
+
{
|
|
365
|
+
regex: /(?:when|if)\s+(?:the\s+)?(?:AI|assistant|model|agent|you)\s+(?:reads?|processes?|sees?|parses?|receives?)\s+this/gi,
|
|
366
|
+
description: 'Conditional instructions targeting AI processing',
|
|
367
|
+
severity: 'high'
|
|
368
|
+
},
|
|
369
|
+
{
|
|
370
|
+
regex: /(?:do\s+not|don'?t)\s+(?:tell|inform|reveal|mention|show)\s+(?:the\s+)?(?:user|human|person|operator)/gi,
|
|
371
|
+
description: 'Instructions to hide information from the user',
|
|
372
|
+
severity: 'critical'
|
|
373
|
+
},
|
|
374
|
+
{
|
|
375
|
+
regex: /(?:you\s+are|you're)\s+(?:now|actually)\s+(?:a|an|in)\s+/gi,
|
|
376
|
+
description: 'Identity reassignment attempt in document',
|
|
377
|
+
severity: 'high'
|
|
378
|
+
},
|
|
379
|
+
{
|
|
380
|
+
regex: /(?:execute|run|perform|call)\s+(?:the\s+)?(?:following|this|these)\s+(?:tool|function|command|action|code)/gi,
|
|
381
|
+
description: 'Tool execution instructions hidden in document',
|
|
382
|
+
severity: 'critical'
|
|
383
|
+
}
|
|
384
|
+
];
|
|
385
|
+
|
|
386
|
+
for (const pattern of instructionPatterns) {
|
|
387
|
+
const matches = text.match(pattern.regex);
|
|
388
|
+
if (matches) {
|
|
389
|
+
for (const match of matches) {
|
|
390
|
+
threats.push({
|
|
391
|
+
type: 'indirect_injection',
|
|
392
|
+
severity: pattern.severity,
|
|
393
|
+
description: pattern.description,
|
|
394
|
+
source,
|
|
395
|
+
match: match.substring(0, 200)
|
|
396
|
+
});
|
|
397
|
+
riskScore += pattern.severity === 'critical' ? 40 : 20;
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// 2. Check for invisible/zero-width characters (used to hide instructions)
|
|
403
|
+
const invisibleChars = [
|
|
404
|
+
{ char: '\u200B', name: 'zero-width space' },
|
|
405
|
+
{ char: '\u200C', name: 'zero-width non-joiner' },
|
|
406
|
+
{ char: '\u200D', name: 'zero-width joiner' },
|
|
407
|
+
{ char: '\u2060', name: 'word joiner' },
|
|
408
|
+
{ char: '\uFEFF', name: 'zero-width no-break space' },
|
|
409
|
+
{ char: '\u00AD', name: 'soft hyphen' },
|
|
410
|
+
{ char: '\u200E', name: 'left-to-right mark' },
|
|
411
|
+
{ char: '\u200F', name: 'right-to-left mark' },
|
|
412
|
+
{ char: '\u2061', name: 'function application' },
|
|
413
|
+
{ char: '\u2062', name: 'invisible times' },
|
|
414
|
+
{ char: '\u2063', name: 'invisible separator' },
|
|
415
|
+
{ char: '\u2064', name: 'invisible plus' }
|
|
416
|
+
];
|
|
417
|
+
|
|
418
|
+
let invisibleCount = 0;
|
|
419
|
+
const foundInvisible = [];
|
|
420
|
+
|
|
421
|
+
for (const { char, name } of invisibleChars) {
|
|
422
|
+
const count = (text.split(char).length - 1);
|
|
423
|
+
if (count > 0) {
|
|
424
|
+
invisibleCount += count;
|
|
425
|
+
foundInvisible.push({ name, count });
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
if (invisibleCount > 5) {
|
|
430
|
+
hiddenContent.push({
|
|
431
|
+
type: 'invisible_characters',
|
|
432
|
+
description: `Found ${invisibleCount} invisible/zero-width characters`,
|
|
433
|
+
details: foundInvisible
|
|
434
|
+
});
|
|
435
|
+
|
|
436
|
+
// Try to extract hidden content by removing visible chars
|
|
437
|
+
const invisibleOnly = text.replace(/[^\u200B\u200C\u200D\u2060\uFEFF]/g, '');
|
|
438
|
+
if (invisibleOnly.length > 10) {
|
|
439
|
+
hiddenContent.push({
|
|
440
|
+
type: 'steganographic_content',
|
|
441
|
+
description: 'Possible steganographic content via invisible characters',
|
|
442
|
+
length: invisibleOnly.length
|
|
443
|
+
});
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
threats.push({
|
|
447
|
+
type: 'hidden_content',
|
|
448
|
+
severity: 'high',
|
|
449
|
+
description: `Suspicious invisible characters detected (${invisibleCount} found)`,
|
|
450
|
+
source,
|
|
451
|
+
details: foundInvisible
|
|
452
|
+
});
|
|
453
|
+
riskScore += 25;
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
// 3. Check for markdown rendering attacks
|
|
457
|
+
const markdownAttacks = [
|
|
458
|
+
{
|
|
459
|
+
regex: /!\[([^\]]{50,})\]\(/g,
|
|
460
|
+
description: 'Oversized image alt text (possible hidden instructions)',
|
|
461
|
+
severity: 'medium'
|
|
462
|
+
},
|
|
463
|
+
{
|
|
464
|
+
regex: /\[([^\]]*)\]\(javascript:/gi,
|
|
465
|
+
description: 'JavaScript URI in markdown link',
|
|
466
|
+
severity: 'high'
|
|
467
|
+
},
|
|
468
|
+
{
|
|
469
|
+
regex: /\[([^\]]*)\]\(data:/gi,
|
|
470
|
+
description: 'Data URI in markdown link',
|
|
471
|
+
severity: 'medium'
|
|
472
|
+
},
|
|
473
|
+
{
|
|
474
|
+
regex: /<!--[\s\S]{20,}?-->/g,
|
|
475
|
+
description: 'Large HTML comment block (possible hidden content)',
|
|
476
|
+
severity: 'medium'
|
|
477
|
+
}
|
|
478
|
+
];
|
|
479
|
+
|
|
480
|
+
for (const pattern of markdownAttacks) {
|
|
481
|
+
const matches = text.match(pattern.regex);
|
|
482
|
+
if (matches) {
|
|
483
|
+
for (const match of matches) {
|
|
484
|
+
threats.push({
|
|
485
|
+
type: 'markdown_injection',
|
|
486
|
+
severity: pattern.severity,
|
|
487
|
+
description: pattern.description,
|
|
488
|
+
source,
|
|
489
|
+
match: match.substring(0, 200)
|
|
490
|
+
});
|
|
491
|
+
riskScore += pattern.severity === 'high' ? 15 : 10;
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
// 4. Check for text that looks like it was encoded or obfuscated
|
|
497
|
+
const base64Regex = /(?:[A-Za-z0-9+/]{4}){10,}(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?/g;
|
|
498
|
+
const base64Matches = text.match(base64Regex);
|
|
499
|
+
if (base64Matches) {
|
|
500
|
+
for (const match of base64Matches) {
|
|
501
|
+
try {
|
|
502
|
+
const decoded = Buffer.from(match, 'base64').toString('utf-8');
|
|
503
|
+
// Check if decoded text contains instruction-like content
|
|
504
|
+
if (/(?:ignore|override|system|execute|admin|secret)/i.test(decoded)) {
|
|
505
|
+
threats.push({
|
|
506
|
+
type: 'encoded_injection',
|
|
507
|
+
severity: 'high',
|
|
508
|
+
description: 'Base64-encoded content contains suspicious instructions',
|
|
509
|
+
source,
|
|
510
|
+
decodedPreview: decoded.substring(0, 200)
|
|
511
|
+
});
|
|
512
|
+
riskScore += 30;
|
|
513
|
+
}
|
|
514
|
+
} catch (e) {
|
|
515
|
+
// Not valid base64, skip
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
// Cap risk score at 100
|
|
521
|
+
riskScore = Math.min(100, riskScore);
|
|
522
|
+
|
|
523
|
+
// Filter by sensitivity
|
|
524
|
+
const filteredThreats = this._filterBySensitivity(threats);
|
|
525
|
+
|
|
526
|
+
return {
|
|
527
|
+
threats: filteredThreats,
|
|
528
|
+
hiddenContent,
|
|
529
|
+
riskScore
|
|
530
|
+
};
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
/**
|
|
534
|
+
* Filter threats based on configured sensitivity.
|
|
535
|
+
* @param {Array} threats - The threats to filter.
|
|
536
|
+
* @returns {Array} Filtered threats.
|
|
537
|
+
* @private
|
|
538
|
+
*/
|
|
539
|
+
_filterBySensitivity(threats) {
|
|
540
|
+
if (this.sensitivity === 'low') {
|
|
541
|
+
return threats.filter(t => t.severity === 'critical' || t.severity === 'high');
|
|
542
|
+
}
|
|
543
|
+
if (this.sensitivity === 'medium') {
|
|
544
|
+
return threats.filter(t => t.severity !== 'low');
|
|
545
|
+
}
|
|
546
|
+
// 'high' sensitivity = return everything
|
|
547
|
+
return threats;
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
// =========================================================================
|
|
552
|
+
// DOCUMENT SCANNER
|
|
553
|
+
// =========================================================================
|
|
554
|
+
|
|
555
|
+
/**
|
|
556
|
+
* Scans documents for threats by extracting text and running it through
|
|
557
|
+
* the Agent Shield detection engine. Designed to catch indirect prompt
|
|
558
|
+
* injection attacks hidden in uploaded documents.
|
|
559
|
+
*/
|
|
560
|
+
class DocumentScanner {
|
|
561
|
+
/**
|
|
562
|
+
* Create a DocumentScanner.
|
|
563
|
+
* @param {Object} [options={}] - Scanner options.
|
|
564
|
+
* @param {string} [options.sensitivity='medium'] - Detection sensitivity ('low', 'medium', 'high').
|
|
565
|
+
* @param {boolean} [options.logging=false] - Whether to log scan results.
|
|
566
|
+
* @param {boolean} [options.scanForInjection=true] - Whether to run indirect injection scanning.
|
|
567
|
+
*/
|
|
568
|
+
constructor(options = {}) {
|
|
569
|
+
this.sensitivity = options.sensitivity || 'medium';
|
|
570
|
+
this.logging = options.logging || false;
|
|
571
|
+
this.scanForInjection = options.scanForInjection !== false;
|
|
572
|
+
this.injectionScanner = new IndirectInjectionScanner({ sensitivity: this.sensitivity });
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
/**
|
|
576
|
+
* Scan a file from disk. Reads the file, detects its type, extracts text,
|
|
577
|
+
* and scans it for threats.
|
|
578
|
+
* @param {string} filePath - Path to the file to scan.
|
|
579
|
+
* @returns {{ fileType: string, textLength: number, threats: Array, status: string }}
|
|
580
|
+
*/
|
|
581
|
+
scanFile(filePath) {
|
|
582
|
+
const resolvedPath = path.resolve(filePath);
|
|
583
|
+
const ext = path.extname(resolvedPath).toLowerCase();
|
|
584
|
+
|
|
585
|
+
let buffer;
|
|
586
|
+
try {
|
|
587
|
+
buffer = fs.readFileSync(resolvedPath);
|
|
588
|
+
} catch (err) {
|
|
589
|
+
if (this.logging) {
|
|
590
|
+
console.log(`[Agent Shield] Document scanner failed to read file: ${resolvedPath}`);
|
|
591
|
+
}
|
|
592
|
+
return {
|
|
593
|
+
fileType: 'unknown',
|
|
594
|
+
textLength: 0,
|
|
595
|
+
threats: [{
|
|
596
|
+
type: 'scan_error',
|
|
597
|
+
severity: 'medium',
|
|
598
|
+
description: `Failed to read file: ${err.message}`,
|
|
599
|
+
source: resolvedPath
|
|
600
|
+
}],
|
|
601
|
+
status: 'caution'
|
|
602
|
+
};
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
// Determine MIME type from extension first, fall back to content detection
|
|
606
|
+
const mimeType = this._mimeFromExtension(ext) || TextExtractor.detect(buffer);
|
|
607
|
+
|
|
608
|
+
if (this.logging) {
|
|
609
|
+
console.log(`[Agent Shield] Scanning document: ${resolvedPath} (${mimeType})`);
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
return this.scanBuffer(buffer, mimeType, resolvedPath);
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
/**
|
|
616
|
+
* Scan a Buffer with a known MIME type.
|
|
617
|
+
* @param {Buffer} buffer - The file contents.
|
|
618
|
+
* @param {string} [mimeType] - The MIME type of the file. Auto-detected if not provided.
|
|
619
|
+
* @param {string} [source='buffer'] - Source description for logging.
|
|
620
|
+
* @returns {{ fileType: string, textLength: number, threats: Array, status: string }}
|
|
621
|
+
*/
|
|
622
|
+
scanBuffer(buffer, mimeType, source = 'buffer') {
|
|
623
|
+
if (!Buffer.isBuffer(buffer)) {
|
|
624
|
+
return {
|
|
625
|
+
fileType: 'unknown',
|
|
626
|
+
textLength: 0,
|
|
627
|
+
threats: [{
|
|
628
|
+
type: 'scan_error',
|
|
629
|
+
severity: 'medium',
|
|
630
|
+
description: 'Input is not a valid Buffer',
|
|
631
|
+
source
|
|
632
|
+
}],
|
|
633
|
+
status: 'caution'
|
|
634
|
+
};
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
// Auto-detect if no MIME type provided
|
|
638
|
+
const detectedType = mimeType || TextExtractor.detect(buffer);
|
|
639
|
+
const extractor = EXTRACTORS[detectedType];
|
|
640
|
+
|
|
641
|
+
if (!extractor) {
|
|
642
|
+
if (this.logging) {
|
|
643
|
+
console.log(`[Agent Shield] Unsupported file type: ${detectedType}`);
|
|
644
|
+
}
|
|
645
|
+
return {
|
|
646
|
+
fileType: detectedType,
|
|
647
|
+
textLength: 0,
|
|
648
|
+
threats: [],
|
|
649
|
+
status: 'safe'
|
|
650
|
+
};
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
let extractedText;
|
|
654
|
+
try {
|
|
655
|
+
extractedText = extractor(buffer);
|
|
656
|
+
} catch (err) {
|
|
657
|
+
return {
|
|
658
|
+
fileType: detectedType,
|
|
659
|
+
textLength: 0,
|
|
660
|
+
threats: [{
|
|
661
|
+
type: 'extraction_error',
|
|
662
|
+
severity: 'medium',
|
|
663
|
+
description: `Failed to extract text: ${err.message}`,
|
|
664
|
+
source
|
|
665
|
+
}],
|
|
666
|
+
status: 'caution'
|
|
667
|
+
};
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
return this.scanText(extractedText, { source, fileType: detectedType });
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
/**
|
|
674
|
+
* Scan pre-extracted text with source metadata.
|
|
675
|
+
* @param {string} text - The extracted text to scan.
|
|
676
|
+
* @param {Object} [metadata={}] - Source metadata.
|
|
677
|
+
* @param {string} [metadata.source='text'] - Where the text came from.
|
|
678
|
+
* @param {string} [metadata.fileType='text/plain'] - The original file type.
|
|
679
|
+
* @returns {{ fileType: string, textLength: number, threats: Array, status: string }}
|
|
680
|
+
*/
|
|
681
|
+
scanText(text, metadata = {}) {
|
|
682
|
+
const source = metadata.source || 'text';
|
|
683
|
+
const fileType = metadata.fileType || 'text/plain';
|
|
684
|
+
|
|
685
|
+
if (!text || text.trim().length === 0) {
|
|
686
|
+
return {
|
|
687
|
+
fileType,
|
|
688
|
+
textLength: 0,
|
|
689
|
+
threats: [],
|
|
690
|
+
status: 'safe'
|
|
691
|
+
};
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
// Run core threat detection
|
|
695
|
+
const coreResult = scanText(text, {
|
|
696
|
+
source: `document:${source}`,
|
|
697
|
+
sensitivity: this.sensitivity
|
|
698
|
+
});
|
|
699
|
+
|
|
700
|
+
// Combine threats from core detection
|
|
701
|
+
let allThreats = [...coreResult.threats];
|
|
702
|
+
|
|
703
|
+
// Run indirect injection scanning
|
|
704
|
+
if (this.scanForInjection) {
|
|
705
|
+
const injectionResult = this.injectionScanner.scan(text, source);
|
|
706
|
+
allThreats = allThreats.concat(injectionResult.threats);
|
|
707
|
+
|
|
708
|
+
// Add hidden content as threats if found
|
|
709
|
+
for (const hidden of injectionResult.hiddenContent) {
|
|
710
|
+
allThreats.push({
|
|
711
|
+
type: 'hidden_content',
|
|
712
|
+
severity: 'medium',
|
|
713
|
+
description: hidden.description,
|
|
714
|
+
source,
|
|
715
|
+
details: hidden
|
|
716
|
+
});
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
|
|
720
|
+
// Deduplicate threats by description
|
|
721
|
+
const seen = new Set();
|
|
722
|
+
allThreats = allThreats.filter(t => {
|
|
723
|
+
const key = `${t.type || t.category}:${t.description}`;
|
|
724
|
+
if (seen.has(key)) return false;
|
|
725
|
+
seen.add(key);
|
|
726
|
+
return true;
|
|
727
|
+
});
|
|
728
|
+
|
|
729
|
+
// Sort by severity
|
|
730
|
+
allThreats.sort((a, b) => {
|
|
731
|
+
const sevA = SEVERITY_ORDER[a.severity] !== undefined ? SEVERITY_ORDER[a.severity] : 3;
|
|
732
|
+
const sevB = SEVERITY_ORDER[b.severity] !== undefined ? SEVERITY_ORDER[b.severity] : 3;
|
|
733
|
+
return sevA - sevB;
|
|
734
|
+
});
|
|
735
|
+
|
|
736
|
+
// Determine overall status
|
|
737
|
+
let status = 'safe';
|
|
738
|
+
const hasCritical = allThreats.some(t => t.severity === 'critical');
|
|
739
|
+
const hasHigh = allThreats.some(t => t.severity === 'high');
|
|
740
|
+
const hasMedium = allThreats.some(t => t.severity === 'medium');
|
|
741
|
+
|
|
742
|
+
if (hasCritical) status = 'danger';
|
|
743
|
+
else if (hasHigh) status = 'warning';
|
|
744
|
+
else if (hasMedium) status = 'caution';
|
|
745
|
+
|
|
746
|
+
if (this.logging) {
|
|
747
|
+
console.log(`[Agent Shield] Document scan complete: ${allThreats.length} threat(s), status=${status}`);
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
return {
|
|
751
|
+
fileType,
|
|
752
|
+
textLength: text.length,
|
|
753
|
+
threats: allThreats,
|
|
754
|
+
status
|
|
755
|
+
};
|
|
756
|
+
}
|
|
757
|
+
|
|
758
|
+
/**
|
|
759
|
+
* Returns the list of supported file MIME types.
|
|
760
|
+
* @returns {string[]} Array of supported MIME types.
|
|
761
|
+
*/
|
|
762
|
+
getSupportedTypes() {
|
|
763
|
+
return [...SUPPORTED_TYPES];
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
/**
|
|
767
|
+
* Map file extension to MIME type.
|
|
768
|
+
* @param {string} ext - The file extension (e.g. '.html').
|
|
769
|
+
* @returns {string|null} The MIME type, or null if unknown.
|
|
770
|
+
* @private
|
|
771
|
+
*/
|
|
772
|
+
_mimeFromExtension(ext) {
|
|
773
|
+
const map = {
|
|
774
|
+
'.txt': 'text/plain',
|
|
775
|
+
'.text': 'text/plain',
|
|
776
|
+
'.log': 'text/plain',
|
|
777
|
+
'.html': 'text/html',
|
|
778
|
+
'.htm': 'text/html',
|
|
779
|
+
'.csv': 'text/csv',
|
|
780
|
+
'.json': 'application/json',
|
|
781
|
+
'.md': 'text/markdown',
|
|
782
|
+
'.markdown': 'text/markdown',
|
|
783
|
+
'.xml': 'application/xml',
|
|
784
|
+
'.svg': 'application/xml',
|
|
785
|
+
'.xhtml': 'text/html'
|
|
786
|
+
};
|
|
787
|
+
return map[ext] || null;
|
|
788
|
+
}
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
// =========================================================================
|
|
792
|
+
// EXPORTS
|
|
793
|
+
// =========================================================================
|
|
794
|
+
|
|
795
|
+
module.exports = { DocumentScanner, TextExtractor, IndirectInjectionScanner };
|