agent-security-scanner-mcp 3.7.0 → 3.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +156 -10
- package/analyzer.py +22 -5
- package/cross_file_analyzer.py +216 -0
- package/daemon.py +179 -0
- package/index.js +279 -3
- package/package.json +19 -5
- package/packages/npm-bloom.json +1 -0
- package/pattern_matcher.py +1 -0
- package/regex_fallback.py +199 -1
- package/requirements.txt +1 -0
- package/rules/prompt-injection.security.yaml +273 -41
- package/scripts/postinstall.js +60 -0
- package/skills/openclaw/SKILL.md +102 -0
- package/skills/security-review.md +139 -0
- package/skills/security-scan-batch.md +107 -0
- package/skills/security-scanner.md +76 -0
- package/src/cli/doctor.js +29 -1
- package/src/cli/init.js +93 -0
- package/src/cli/report.js +444 -0
- package/src/config.js +247 -0
- package/src/context.js +289 -0
- package/src/daemon-client.js +233 -0
- package/src/dedup.js +129 -0
- package/src/fix-patterns.js +76 -19
- package/src/history.js +159 -0
- package/src/tools/check-package.js +36 -12
- package/src/tools/fix-security.js +32 -5
- package/src/tools/import-resolver.js +249 -0
- package/src/tools/project-context.js +365 -0
- package/src/tools/scan-action.js +489 -0
- package/src/tools/scan-mcp.js +922 -0
- package/src/tools/scan-project.js +16 -4
- package/src/tools/scan-prompt.js +292 -527
- package/src/tools/scan-security.js +37 -6
- package/src/typosquat.js +210 -0
- package/src/utils.js +215 -8
- package/templates/gitlab-ci-security.yml +225 -0
- package/templates/pre-commit-hook.sh +233 -0
- package/src/tools/garak-bridge.js +0 -209
package/src/tools/scan-prompt.js
CHANGED
|
@@ -4,7 +4,6 @@ import { readFileSync, existsSync } from "fs";
|
|
|
4
4
|
import { dirname, join } from "path";
|
|
5
5
|
import { fileURLToPath } from "url";
|
|
6
6
|
import { createHash } from "crypto";
|
|
7
|
-
import { runGarakProbes } from './garak-bridge.js';
|
|
8
7
|
|
|
9
8
|
// Handle both ESM and CJS bundling
|
|
10
9
|
let __dirname;
|
|
@@ -40,6 +39,12 @@ const CATEGORY_WEIGHTS = {
|
|
|
40
39
|
"prompt-injection-privilege": 0.85,
|
|
41
40
|
"prompt-injection-multi-turn": 0.7,
|
|
42
41
|
"prompt-injection-output": 0.9,
|
|
42
|
+
// OpenClaw-specific categories
|
|
43
|
+
"data_exfiltration": 1.0,
|
|
44
|
+
"messaging_abuse": 0.95,
|
|
45
|
+
"credential_theft": 1.0,
|
|
46
|
+
"autonomous_harm": 0.9,
|
|
47
|
+
"service_attack": 0.95,
|
|
43
48
|
"unknown": 0.5
|
|
44
49
|
};
|
|
45
50
|
|
|
@@ -50,76 +55,6 @@ const CONFIDENCE_MULTIPLIERS = {
|
|
|
50
55
|
"LOW": 0.4
|
|
51
56
|
};
|
|
52
57
|
|
|
53
|
-
// Category co-occurrence matrix: pairs that together signal sophisticated attacks
|
|
54
|
-
// Inspired by PromptFoo's jailbreak:composite strategy
|
|
55
|
-
const CATEGORY_COOCCURRENCE_BOOSTS = {
|
|
56
|
-
'obfuscation+exfiltration': 0.20,
|
|
57
|
-
'obfuscation+malicious-injection': 0.20,
|
|
58
|
-
'obfuscation+prompt-injection-content': 0.15,
|
|
59
|
-
'obfuscation+prompt-injection-jailbreak': 0.15,
|
|
60
|
-
'social-engineering+exfiltration': 0.15,
|
|
61
|
-
'social-engineering+malicious-injection': 0.15,
|
|
62
|
-
'prompt-injection-encoded+prompt-injection-content': 0.20,
|
|
63
|
-
'prompt-injection-multi-turn+prompt-injection-content': 0.15,
|
|
64
|
-
'prompt-injection-jailbreak+exfiltration': 0.25,
|
|
65
|
-
'prompt-injection-jailbreak+prompt-injection-content': 0.15,
|
|
66
|
-
'agent-manipulation+exfiltration': 0.20,
|
|
67
|
-
'agent-manipulation+system-manipulation': 0.15,
|
|
68
|
-
};
|
|
69
|
-
|
|
70
|
-
// Calculate co-occurrence boost from category pairs
|
|
71
|
-
function getCategoryCooccurrenceBoost(categories) {
|
|
72
|
-
let boost = 0;
|
|
73
|
-
const cats = [...categories];
|
|
74
|
-
for (let i = 0; i < cats.length; i++) {
|
|
75
|
-
for (let j = i + 1; j < cats.length; j++) {
|
|
76
|
-
const key1 = `${cats[i]}+${cats[j]}`;
|
|
77
|
-
const key2 = `${cats[j]}+${cats[i]}`;
|
|
78
|
-
boost += CATEGORY_COOCCURRENCE_BOOSTS[key1] || CATEGORY_COOCCURRENCE_BOOSTS[key2] || 0;
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
return Math.min(0.40, boost); // Cap total co-occurrence boost at 40%
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
// Orthogonal scoring channel: measures attack breadth independently of per-rule confidence
|
|
85
|
-
// This is immune to per-rule confidence gaming
|
|
86
|
-
function calculateOrthogonalScore(findings) {
|
|
87
|
-
const dimensions = new Set();
|
|
88
|
-
|
|
89
|
-
for (const f of findings) {
|
|
90
|
-
const cat = f.category || 'unknown';
|
|
91
|
-
// Map categories into orthogonal attack dimensions
|
|
92
|
-
if (['exfiltration', 'prompt-injection-extraction', 'prompt-injection-output'].includes(cat)) {
|
|
93
|
-
dimensions.add('extraction');
|
|
94
|
-
}
|
|
95
|
-
if (['malicious-injection', 'system-manipulation'].includes(cat)) {
|
|
96
|
-
dimensions.add('code-execution');
|
|
97
|
-
}
|
|
98
|
-
if (['obfuscation', 'prompt-injection-encoded'].includes(cat)) {
|
|
99
|
-
dimensions.add('evasion');
|
|
100
|
-
}
|
|
101
|
-
if (['social-engineering', 'prompt-injection-jailbreak'].includes(cat)) {
|
|
102
|
-
dimensions.add('social');
|
|
103
|
-
}
|
|
104
|
-
if (['prompt-injection-content', 'prompt-injection-context', 'prompt-injection-delimiter'].includes(cat)) {
|
|
105
|
-
dimensions.add('injection');
|
|
106
|
-
}
|
|
107
|
-
if (['prompt-injection-multi-turn'].includes(cat)) {
|
|
108
|
-
dimensions.add('persistence');
|
|
109
|
-
}
|
|
110
|
-
if (['agent-manipulation', 'prompt-injection-privilege'].includes(cat)) {
|
|
111
|
-
dimensions.add('privilege');
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
|
|
115
|
-
// Score based on number of orthogonal dimensions triggered
|
|
116
|
-
const dimCount = dimensions.size;
|
|
117
|
-
if (dimCount <= 1) return 0;
|
|
118
|
-
if (dimCount === 2) return 10;
|
|
119
|
-
if (dimCount === 3) return 25;
|
|
120
|
-
return 40; // 4+ dimensions
|
|
121
|
-
}
|
|
122
|
-
|
|
123
58
|
// Load agent attack rules from YAML
|
|
124
59
|
function loadAgentAttackRules() {
|
|
125
60
|
try {
|
|
@@ -260,12 +195,74 @@ function loadPromptInjectionRules() {
|
|
|
260
195
|
}
|
|
261
196
|
}
|
|
262
197
|
|
|
198
|
+
// Load OpenClaw-specific rules
|
|
199
|
+
function loadOpenClawRules() {
|
|
200
|
+
try {
|
|
201
|
+
const rulesPath = join(__dirname, '..', '..', 'rules', 'openclaw.security.yaml');
|
|
202
|
+
if (!existsSync(rulesPath)) {
|
|
203
|
+
return [];
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
const yaml = readFileSync(rulesPath, 'utf-8');
|
|
207
|
+
const rules = [];
|
|
208
|
+
|
|
209
|
+
const ruleBlocks = yaml.split(/^ - id:/m).slice(1);
|
|
210
|
+
|
|
211
|
+
for (const block of ruleBlocks) {
|
|
212
|
+
const lines = (' - id:' + block).split('\n');
|
|
213
|
+
const rule = {
|
|
214
|
+
id: '',
|
|
215
|
+
severity: 'WARNING',
|
|
216
|
+
message: '',
|
|
217
|
+
patterns: [],
|
|
218
|
+
metadata: {}
|
|
219
|
+
};
|
|
220
|
+
|
|
221
|
+
let inPatterns = false;
|
|
222
|
+
|
|
223
|
+
for (const line of lines) {
|
|
224
|
+
if (line.match(/^\s+- id:\s*/)) {
|
|
225
|
+
rule.id = line.replace(/^\s+- id:\s*/, '').trim();
|
|
226
|
+
} else if (line.match(/^\s+severity:\s*/)) {
|
|
227
|
+
rule.severity = line.replace(/^\s+severity:\s*/, '').trim();
|
|
228
|
+
} else if (line.match(/^\s+category:\s*/)) {
|
|
229
|
+
rule.metadata.category = line.replace(/^\s+category:\s*/, '').trim();
|
|
230
|
+
} else if (line.match(/^\s+action:\s*/)) {
|
|
231
|
+
rule.metadata.action = line.replace(/^\s+action:\s*/, '').trim();
|
|
232
|
+
} else if (line.match(/^\s+message:\s*/)) {
|
|
233
|
+
rule.message = line.replace(/^\s+message:\s*["']?/, '').replace(/["']$/, '').trim();
|
|
234
|
+
} else if (line.match(/^\s+patterns:\s*$/)) {
|
|
235
|
+
inPatterns = true;
|
|
236
|
+
} else if (inPatterns && line.match(/^\s+- /)) {
|
|
237
|
+
let pattern = line.replace(/^\s+- /, '').trim();
|
|
238
|
+
pattern = pattern.replace(/^["']|["']$/g, '');
|
|
239
|
+
pattern = pattern.replace(/\\\\/g, '\\');
|
|
240
|
+
if (pattern) rule.patterns.push(pattern);
|
|
241
|
+
} else if (line.match(/^\s+\w+:/) && !line.match(/^\s+- /)) {
|
|
242
|
+
inPatterns = false;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
if (rule.id && rule.patterns.length > 0) {
|
|
247
|
+
// Set confidence and risk score based on severity
|
|
248
|
+
rule.metadata.confidence = rule.severity === 'CRITICAL' ? 'HIGH' : 'MEDIUM';
|
|
249
|
+
rule.metadata.risk_score = rule.severity === 'CRITICAL' ? '90' : '70';
|
|
250
|
+
rules.push(rule);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
return rules;
|
|
255
|
+
} catch (error) {
|
|
256
|
+
console.error("Error loading OpenClaw rules:", error.message);
|
|
257
|
+
return [];
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
263
261
|
// Calculate risk score from findings
|
|
264
262
|
function calculateRiskScore(findings, context) {
|
|
265
263
|
if (findings.length === 0) return 0;
|
|
266
264
|
|
|
267
265
|
let totalScore = 0;
|
|
268
|
-
const lowConfidenceCount = findings.filter(f => (f.confidence || 'MEDIUM') === 'LOW').length;
|
|
269
266
|
|
|
270
267
|
for (const finding of findings) {
|
|
271
268
|
const riskScore = parseInt(finding.risk_score) || 50;
|
|
@@ -298,24 +295,8 @@ function calculateRiskScore(findings, context) {
|
|
|
298
295
|
|
|
299
296
|
// Per-finding boost (smaller than before)
|
|
300
297
|
avgScore = avgScore * (1 + (findings.length - 1) * 0.05);
|
|
301
|
-
|
|
302
|
-
// Low-signal accumulation — multiple LOW-confidence findings compound
|
|
303
|
-
// Catches threshold gaming with many weak signals (PromptFoo composite strategy)
|
|
304
|
-
if (lowConfidenceCount >= 2) {
|
|
305
|
-
avgScore = avgScore * (1 + lowConfidenceCount * 0.08);
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
// Category co-occurrence boost for suspicious pairs
|
|
309
|
-
const cooccurrenceBoost = getCategoryCooccurrenceBoost(uniqueCategories);
|
|
310
|
-
if (cooccurrenceBoost > 0) {
|
|
311
|
-
avgScore = avgScore * (1 + cooccurrenceBoost);
|
|
312
|
-
}
|
|
313
298
|
}
|
|
314
299
|
|
|
315
|
-
// Add orthogonal score as a flat bonus (independent of per-rule confidence)
|
|
316
|
-
const orthogonalBonus = calculateOrthogonalScore(findings);
|
|
317
|
-
avgScore = avgScore + orthogonalBonus;
|
|
318
|
-
|
|
319
300
|
avgScore = Math.min(100, avgScore);
|
|
320
301
|
|
|
321
302
|
// Apply sensitivity adjustment (wider spread for meaningful impact)
|
|
@@ -448,397 +429,6 @@ function hashPrompt(text) {
|
|
|
448
429
|
return createHash('sha256').update(text).digest('hex').substring(0, 16);
|
|
449
430
|
}
|
|
450
431
|
|
|
451
|
-
// ============================================================================
|
|
452
|
-
// TEXT NORMALIZATION PIPELINE (Garak Buff-inspired)
|
|
453
|
-
// Normalizes input to defeat homoglyph, invisible char, and Unicode bypasses
|
|
454
|
-
// ============================================================================
|
|
455
|
-
|
|
456
|
-
// Homoglyph map: Cyrillic, Greek, and Latin Extended lookalikes → ASCII
|
|
457
|
-
const HOMOGLYPH_MAP = {
|
|
458
|
-
// Cyrillic lowercase → Latin
|
|
459
|
-
'\u0430': 'a', // а → a
|
|
460
|
-
'\u0435': 'e', // е → e
|
|
461
|
-
'\u043E': 'o', // о → o
|
|
462
|
-
'\u0440': 'p', // р → p
|
|
463
|
-
'\u0441': 'c', // с → c
|
|
464
|
-
'\u0443': 'y', // у → y (visual match to y)
|
|
465
|
-
'\u0445': 'x', // х → x
|
|
466
|
-
'\u0456': 'i', // і → i
|
|
467
|
-
'\u04BB': 'h', // һ → h
|
|
468
|
-
'\u0455': 's', // ѕ → s
|
|
469
|
-
'\u0458': 'j', // ј → j
|
|
470
|
-
'\u043D': 'n', // н → n (Cyrillic en looks like n in some fonts)
|
|
471
|
-
// Cyrillic uppercase → Latin
|
|
472
|
-
'\u0410': 'A', // А → A
|
|
473
|
-
'\u0412': 'B', // В → B
|
|
474
|
-
'\u0415': 'E', // Е → E
|
|
475
|
-
'\u041A': 'K', // К → K
|
|
476
|
-
'\u041C': 'M', // М → M
|
|
477
|
-
'\u041D': 'H', // Н → H
|
|
478
|
-
'\u041E': 'O', // О → O
|
|
479
|
-
'\u0420': 'P', // Р → P
|
|
480
|
-
'\u0421': 'C', // С → C
|
|
481
|
-
'\u0422': 'T', // Т → T
|
|
482
|
-
'\u0425': 'X', // Х → X
|
|
483
|
-
'\u0406': 'I', // І → I
|
|
484
|
-
// Greek lowercase → Latin
|
|
485
|
-
'\u03B1': 'a', // α → a
|
|
486
|
-
'\u03B5': 'e', // ε → e
|
|
487
|
-
'\u03BF': 'o', // ο → o
|
|
488
|
-
'\u03C1': 'p', // ρ → p
|
|
489
|
-
'\u03BA': 'k', // κ → k
|
|
490
|
-
'\u03BD': 'v', // ν → v
|
|
491
|
-
// Greek uppercase → Latin
|
|
492
|
-
'\u0391': 'A', // Α → A
|
|
493
|
-
'\u0392': 'B', // Β → B
|
|
494
|
-
'\u0395': 'E', // Ε → E
|
|
495
|
-
'\u0397': 'H', // Η → H
|
|
496
|
-
'\u0399': 'I', // Ι → I
|
|
497
|
-
'\u039A': 'K', // Κ → K
|
|
498
|
-
'\u039C': 'M', // Μ → M
|
|
499
|
-
'\u039D': 'N', // Ν → N
|
|
500
|
-
'\u039F': 'O', // Ο → O
|
|
501
|
-
'\u03A1': 'P', // Ρ → P
|
|
502
|
-
'\u03A4': 'T', // Τ → T
|
|
503
|
-
'\u03A7': 'X', // Χ → X
|
|
504
|
-
'\u03A5': 'Y', // Υ → Y
|
|
505
|
-
'\u0396': 'Z', // Ζ → Z
|
|
506
|
-
};
|
|
507
|
-
|
|
508
|
-
// Invisible/zero-width characters to strip (regex)
|
|
509
|
-
// Includes: soft hyphen, combining grapheme joiner, Arabic letter mark,
|
|
510
|
-
// hangul fillers, Mongolian vowel separator, zero-width chars,
|
|
511
|
-
// directional markers, word joiners, BOM, halfwidth hangul filler
|
|
512
|
-
const INVISIBLE_CHAR_REGEX = /[\u00AD\u034F\u061C\u115F\u1160\u17B4\u17B5\u180E\u200B-\u200F\u202A-\u202E\u2060-\u2064\u2066-\u206F\u3164\uFEFF\uFFA0]/gu;
|
|
513
|
-
|
|
514
|
-
// Zalgo combining diacritical marks to strip
|
|
515
|
-
const ZALGO_REGEX = /[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/g;
|
|
516
|
-
|
|
517
|
-
// Unicode tag characters (U+E0000-U+E007F) - used in invisible ASCII tag attacks
|
|
518
|
-
// These are encoded as surrogate pairs in JS, so we use a broader regex
|
|
519
|
-
const TAG_CHAR_REGEX = /[\u{E0000}-\u{E007F}]/gu;
|
|
520
|
-
|
|
521
|
-
function normalizeText(text) {
|
|
522
|
-
// Step 1: NFKC normalization
|
|
523
|
-
// Decomposes then recomposes in compatibility form
|
|
524
|
-
// Handles: fullwidth chars (ignore → ignore), ligatures (fi → fi),
|
|
525
|
-
// superscripts, subscripts, circle-enclosed chars
|
|
526
|
-
let normalized = text.normalize('NFKC');
|
|
527
|
-
|
|
528
|
-
// Step 2: Strip invisible Unicode characters
|
|
529
|
-
normalized = normalized.replace(INVISIBLE_CHAR_REGEX, '');
|
|
530
|
-
|
|
531
|
-
// Step 3: Strip Unicode tag characters
|
|
532
|
-
normalized = normalized.replace(TAG_CHAR_REGEX, '');
|
|
533
|
-
|
|
534
|
-
// Step 4: Strip Zalgo combining diacritical marks
|
|
535
|
-
normalized = normalized.replace(ZALGO_REGEX, '');
|
|
536
|
-
|
|
537
|
-
// Step 5: Homoglyph canonicalization
|
|
538
|
-
// Replace each character through the map; unmapped chars pass through
|
|
539
|
-
normalized = normalized.split('').map(ch => HOMOGLYPH_MAP[ch] || ch).join('');
|
|
540
|
-
|
|
541
|
-
// Step 6: Normalize Unicode whitespace to ASCII space
|
|
542
|
-
// Includes: NBSP, en/em space, thin space, hair space, ideographic space, etc.
|
|
543
|
-
normalized = normalized.replace(/[\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]/g, ' ');
|
|
544
|
-
|
|
545
|
-
return normalized;
|
|
546
|
-
}
|
|
547
|
-
|
|
548
|
-
// Extract content from all code block delimiter formats
|
|
549
|
-
// Inspired by Garak latentinjection probes: attacks hide in document structures
|
|
550
|
-
function extractCodeBlockContent(text) {
|
|
551
|
-
const extracted = [];
|
|
552
|
-
let match;
|
|
553
|
-
|
|
554
|
-
// 1. Triple-backtick blocks (existing) — ```code```
|
|
555
|
-
const backtickRegex = /```[\s\S]*?```/g;
|
|
556
|
-
for (const block of (text.match(backtickRegex) || [])) {
|
|
557
|
-
extracted.push(block.replace(/^```\w*\n?/, '').replace(/\n?```$/, ''));
|
|
558
|
-
}
|
|
559
|
-
|
|
560
|
-
// 2. Triple-tilde blocks — ~~~code~~~
|
|
561
|
-
const tildeRegex = /~~~[\s\S]*?~~~/g;
|
|
562
|
-
for (const block of (text.match(tildeRegex) || [])) {
|
|
563
|
-
extracted.push(block.replace(/^~~~\w*\n?/, '').replace(/\n?~~~$/, ''));
|
|
564
|
-
}
|
|
565
|
-
|
|
566
|
-
// 3. HTML <code> tags — <code>content</code>
|
|
567
|
-
const codeTagRegex = /<code[^>]*>([\s\S]*?)<\/code>/gi;
|
|
568
|
-
while ((match = codeTagRegex.exec(text)) !== null) {
|
|
569
|
-
extracted.push(match[1]);
|
|
570
|
-
}
|
|
571
|
-
|
|
572
|
-
// 4. HTML <pre> tags — <pre>content</pre>
|
|
573
|
-
const preTagRegex = /<pre[^>]*>([\s\S]*?)<\/pre>/gi;
|
|
574
|
-
while ((match = preTagRegex.exec(text)) !== null) {
|
|
575
|
-
extracted.push(match[1]);
|
|
576
|
-
}
|
|
577
|
-
|
|
578
|
-
// 5. HTML comments — <!-- content -->
|
|
579
|
-
const htmlCommentRegex = /<!--([\s\S]*?)-->/g;
|
|
580
|
-
while ((match = htmlCommentRegex.exec(text)) !== null) {
|
|
581
|
-
extracted.push(match[1]);
|
|
582
|
-
}
|
|
583
|
-
|
|
584
|
-
// 6. CDATA sections — <![CDATA[ content ]]>
|
|
585
|
-
const cdataRegex = /<!\[CDATA\[([\s\S]*?)\]\]>/g;
|
|
586
|
-
while ((match = cdataRegex.exec(text)) !== null) {
|
|
587
|
-
extracted.push(match[1]);
|
|
588
|
-
}
|
|
589
|
-
|
|
590
|
-
return extracted;
|
|
591
|
-
}
|
|
592
|
-
|
|
593
|
-
// Collapse string concatenation to defeat fragmentation attacks
|
|
594
|
-
// Inspired by PromptFoo's "token smuggling" and "payload splitting" attack classes
|
|
595
|
-
function collapseConcatenations(text) {
|
|
596
|
-
let collapsed = text;
|
|
597
|
-
|
|
598
|
-
// Join JS/Python string concatenation: "foo" + "bar" → foobar
|
|
599
|
-
// Handles double quotes, single quotes, backticks
|
|
600
|
-
// The pattern: closing-quote, optional whitespace, +, optional whitespace, opening-quote
|
|
601
|
-
collapsed = collapsed.replace(/["'`]\s*\+\s*["'`]/g, '');
|
|
602
|
-
|
|
603
|
-
// Join multiline concatenation (newlines between concat operators)
|
|
604
|
-
collapsed = collapsed.replace(/["'`]\s*\n\s*\+\s*["'`]/g, '');
|
|
605
|
-
collapsed = collapsed.replace(/["'`]\s*\+\s*\n\s*["'`]/g, '');
|
|
606
|
-
|
|
607
|
-
// Strip C-style inline comments used as fragment separators: ign/**/ore → ignore
|
|
608
|
-
collapsed = collapsed.replace(/\/\*.*?\*\//g, '');
|
|
609
|
-
|
|
610
|
-
return collapsed;
|
|
611
|
-
}
|
|
612
|
-
|
|
613
|
-
// Rescan decoded content against all rules
|
|
614
|
-
// Used by the decode cascade for each encoding type
|
|
615
|
-
function rescanDecoded(decodedText, allRules, findings, encodingLabel) {
|
|
616
|
-
const normalized = normalizeText(decodedText);
|
|
617
|
-
for (const rule of allRules) {
|
|
618
|
-
for (const pattern of rule.patterns) {
|
|
619
|
-
try {
|
|
620
|
-
const regex = new RegExp(pattern, 'i');
|
|
621
|
-
const match = normalized.match(regex);
|
|
622
|
-
if (match) {
|
|
623
|
-
findings.push({
|
|
624
|
-
rule_id: rule.id + '.' + encodingLabel + '-decoded',
|
|
625
|
-
category: rule.metadata.category || 'obfuscation',
|
|
626
|
-
severity: rule.severity,
|
|
627
|
-
message: rule.message + ` (detected in ${encodingLabel}-decoded content)`,
|
|
628
|
-
matched_text: match[0].substring(0, 100),
|
|
629
|
-
confidence: rule.metadata.confidence || 'MEDIUM',
|
|
630
|
-
risk_score: rule.metadata.risk_score || '50',
|
|
631
|
-
action: rule.metadata.action || 'WARN'
|
|
632
|
-
});
|
|
633
|
-
break; // One match per rule
|
|
634
|
-
}
|
|
635
|
-
} catch (e) {
|
|
636
|
-
// Skip invalid regex
|
|
637
|
-
}
|
|
638
|
-
}
|
|
639
|
-
}
|
|
640
|
-
}
|
|
641
|
-
|
|
642
|
-
// Helper: check if decoded string is mostly printable ASCII
|
|
643
|
-
function isPrintable(str, threshold) {
|
|
644
|
-
if (!str || str.length === 0) return false;
|
|
645
|
-
const printable = str.split('').filter(c => {
|
|
646
|
-
const code = c.charCodeAt(0);
|
|
647
|
-
return code >= 32 && code <= 126;
|
|
648
|
-
}).length;
|
|
649
|
-
return printable / str.length > threshold;
|
|
650
|
-
}
|
|
651
|
-
|
|
652
|
-
// Multi-encoding decode cascade
|
|
653
|
-
// Inspired by Garak's 12+ encoding probes (InjectBase64, InjectHex, InjectROT13, etc.)
|
|
654
|
-
// and PromptFoo's static encoding strategies
|
|
655
|
-
function tryDecodeAndRescan(expandedText, allRules, findings) {
|
|
656
|
-
// --- 1. Base64 (improved: lower length threshold 40→20, lower printability 0.7→0.55) ---
|
|
657
|
-
const base64Regex = /[A-Za-z0-9+/]{20,}={0,2}/g;
|
|
658
|
-
for (const b64str of (expandedText.match(base64Regex) || [])) {
|
|
659
|
-
try {
|
|
660
|
-
const decoded = Buffer.from(b64str, 'base64').toString('utf-8');
|
|
661
|
-
if (decoded.length > 0 && isPrintable(decoded, 0.55)) {
|
|
662
|
-
rescanDecoded(decoded, allRules, findings, 'base64');
|
|
663
|
-
|
|
664
|
-
// --- 1b. Nested base64: decode again if inner content is also base64 ---
|
|
665
|
-
const nestedB64 = decoded.match(/[A-Za-z0-9+/]{20,}={0,2}/g) || [];
|
|
666
|
-
for (const nested of nestedB64) {
|
|
667
|
-
try {
|
|
668
|
-
const twice = Buffer.from(nested, 'base64').toString('utf-8');
|
|
669
|
-
if (twice.length > 4 && isPrintable(twice, 0.55)) {
|
|
670
|
-
rescanDecoded(twice, allRules, findings, 'base64-nested');
|
|
671
|
-
}
|
|
672
|
-
} catch (e) { /* skip */ }
|
|
673
|
-
}
|
|
674
|
-
}
|
|
675
|
-
} catch (e) { /* skip invalid base64 */ }
|
|
676
|
-
}
|
|
677
|
-
|
|
678
|
-
// --- 2. Hex encoding: sequences of hex pairs (optionally space-separated) ---
|
|
679
|
-
// Matches: "69676e6f7265" or "69 67 6e 6f 72 65"
|
|
680
|
-
const hexRegex = /(?:[0-9a-fA-F]{2}[\s]?){8,}/g;
|
|
681
|
-
for (const hexStr of (expandedText.match(hexRegex) || [])) {
|
|
682
|
-
try {
|
|
683
|
-
const clean = hexStr.replace(/\s/g, '');
|
|
684
|
-
if (clean.length % 2 !== 0) continue;
|
|
685
|
-
if (clean.length < 16) continue; // At least 8 bytes
|
|
686
|
-
const decoded = Buffer.from(clean, 'hex').toString('utf-8');
|
|
687
|
-
if (decoded.length > 4 && isPrintable(decoded, 0.7)) {
|
|
688
|
-
rescanDecoded(decoded, allRules, findings, 'hex');
|
|
689
|
-
}
|
|
690
|
-
} catch (e) { /* skip */ }
|
|
691
|
-
}
|
|
692
|
-
|
|
693
|
-
// --- 3. URL encoding: %XX sequences (at least 3 encoded chars anywhere in text) ---
|
|
694
|
-
const urlEncodedCount = (expandedText.match(/%[0-9a-fA-F]{2}/g) || []).length;
|
|
695
|
-
if (urlEncodedCount >= 3) {
|
|
696
|
-
try {
|
|
697
|
-
const decoded = decodeURIComponent(expandedText);
|
|
698
|
-
if (decoded !== expandedText) {
|
|
699
|
-
rescanDecoded(decoded, allRules, findings, 'url-encoded');
|
|
700
|
-
}
|
|
701
|
-
} catch (e) { /* skip malformed URL encoding */ }
|
|
702
|
-
}
|
|
703
|
-
|
|
704
|
-
// --- 4. ROT13: only when indicators present (user-approved decision) ---
|
|
705
|
-
// This avoids false positives from ROT13-decoding normal text
|
|
706
|
-
const rot13Indicators = /\b(rot13|rot-13|caesar|cipher|decode\s+this|decipher)\b/i;
|
|
707
|
-
if (rot13Indicators.test(expandedText)) {
|
|
708
|
-
const rot13Decoded = expandedText.replace(/[a-zA-Z]/g, ch => {
|
|
709
|
-
const base = ch <= 'Z' ? 65 : 97;
|
|
710
|
-
return String.fromCharCode(((ch.charCodeAt(0) - base + 13) % 26) + base);
|
|
711
|
-
});
|
|
712
|
-
if (rot13Decoded !== expandedText) {
|
|
713
|
-
rescanDecoded(rot13Decoded, allRules, findings, 'rot13');
|
|
714
|
-
}
|
|
715
|
-
}
|
|
716
|
-
}
|
|
717
|
-
|
|
718
|
-
// Frame-setting patterns: conversational priming for later exploitation
|
|
719
|
-
// Inspired by PromptFoo's Crescendo strategy (Microsoft research)
|
|
720
|
-
const FRAME_SETTING_PATTERNS = [
|
|
721
|
-
/remember\s+(this|that)\s+for\s+later/i,
|
|
722
|
-
/in\s+my\s+next\s+message/i,
|
|
723
|
-
/when\s+i\s+(say|ask|tell)\s+you/i,
|
|
724
|
-
/from\s+now\s+on\s*,?\s+(you|your)\s+(will|must|should|are)/i,
|
|
725
|
-
/starting\s+now\s*,?\s+you/i,
|
|
726
|
-
/keep\s+this\s+in\s+mind/i,
|
|
727
|
-
/for\s+the\s+(rest|remainder)\s+of\s+(this|our)\s+conversation/i,
|
|
728
|
-
/act\s+as\s+(if|though)\s+you\s+(are|have)/i,
|
|
729
|
-
/let'?s\s+play\s+a\s+(game|role)/i,
|
|
730
|
-
/pretend\s+that\s+you\s+(are|have)/i,
|
|
731
|
-
/in\s+all\s+(future|subsequent)\s+(messages?|responses?)/i,
|
|
732
|
-
/always\s+respond\s+(by|with|as)/i,
|
|
733
|
-
];
|
|
734
|
-
|
|
735
|
-
// Improved multi-turn escalation detection
|
|
736
|
-
// Fixes: removes "current turn must have findings" requirement,
|
|
737
|
-
// adds cross-turn concatenation, frame-setting detection, full accumulation
|
|
738
|
-
function detectMultiTurnEscalation(previousMessages, currentText, allRules) {
|
|
739
|
-
const escalationFindings = [];
|
|
740
|
-
|
|
741
|
-
if (!previousMessages || !Array.isArray(previousMessages) || previousMessages.length === 0) {
|
|
742
|
-
return escalationFindings;
|
|
743
|
-
}
|
|
744
|
-
|
|
745
|
-
// Step 1: Scan ALL previous messages, accumulate total matches (no early break)
|
|
746
|
-
let totalPrevMatches = 0;
|
|
747
|
-
let frameSettingCount = 0;
|
|
748
|
-
const prevMatchedRuleIds = new Set();
|
|
749
|
-
|
|
750
|
-
for (const prevMsg of previousMessages) {
|
|
751
|
-
const normalizedPrev = normalizeText(prevMsg);
|
|
752
|
-
|
|
753
|
-
// Check frame-setting patterns
|
|
754
|
-
for (const fp of FRAME_SETTING_PATTERNS) {
|
|
755
|
-
if (fp.test(normalizedPrev)) {
|
|
756
|
-
frameSettingCount++;
|
|
757
|
-
break; // One frame-setting match per message is enough
|
|
758
|
-
}
|
|
759
|
-
}
|
|
760
|
-
|
|
761
|
-
// Check all rules against this previous message
|
|
762
|
-
for (const rule of allRules) {
|
|
763
|
-
if (prevMatchedRuleIds.has(rule.id)) continue; // Already matched this rule
|
|
764
|
-
for (const pattern of rule.patterns) {
|
|
765
|
-
try {
|
|
766
|
-
const regex = new RegExp(pattern, 'i');
|
|
767
|
-
if (regex.test(normalizedPrev)) {
|
|
768
|
-
totalPrevMatches++;
|
|
769
|
-
prevMatchedRuleIds.add(rule.id);
|
|
770
|
-
break; // One match per rule per message
|
|
771
|
-
}
|
|
772
|
-
} catch (e) { /* skip invalid regex */ }
|
|
773
|
-
}
|
|
774
|
-
}
|
|
775
|
-
}
|
|
776
|
-
|
|
777
|
-
// Step 2: Cross-turn concatenation scan
|
|
778
|
-
// Join ALL messages into a single string and scan for patterns that span boundaries
|
|
779
|
-
// This catches: prev="ignore all" + current="previous instructions"
|
|
780
|
-
const crossTurnText = normalizeText([...previousMessages, currentText].join(' '));
|
|
781
|
-
|
|
782
|
-
for (const rule of allRules) {
|
|
783
|
-
for (const pattern of rule.patterns) {
|
|
784
|
-
try {
|
|
785
|
-
const regex = new RegExp(pattern, 'i');
|
|
786
|
-
const match = crossTurnText.match(regex);
|
|
787
|
-
if (match) {
|
|
788
|
-
// Only flag if this match does NOT appear in any single message alone
|
|
789
|
-
const matchInCurrent = regex.test(normalizeText(currentText));
|
|
790
|
-
const matchInAnyPrev = previousMessages.some(pm => regex.test(normalizeText(pm)));
|
|
791
|
-
if (!matchInCurrent && !matchInAnyPrev) {
|
|
792
|
-
// Pattern only matches when messages are joined — it spans boundaries
|
|
793
|
-
escalationFindings.push({
|
|
794
|
-
rule_id: rule.id + '.cross-turn',
|
|
795
|
-
category: rule.metadata.category || 'prompt-injection-multi-turn',
|
|
796
|
-
severity: 'WARNING',
|
|
797
|
-
message: `Cross-turn prompt injection: attack pattern spans message boundaries`,
|
|
798
|
-
matched_text: match[0].substring(0, 100),
|
|
799
|
-
confidence: 'MEDIUM',
|
|
800
|
-
risk_score: '75',
|
|
801
|
-
action: 'WARN'
|
|
802
|
-
});
|
|
803
|
-
break;
|
|
804
|
-
}
|
|
805
|
-
}
|
|
806
|
-
} catch (e) { /* skip */ }
|
|
807
|
-
}
|
|
808
|
-
}
|
|
809
|
-
|
|
810
|
-
// Step 3: Frame-setting detection — flag even without current findings
|
|
811
|
-
if (frameSettingCount > 0) {
|
|
812
|
-
escalationFindings.push({
|
|
813
|
-
rule_id: 'multi-turn.frame-setting',
|
|
814
|
-
category: 'prompt-injection-multi-turn',
|
|
815
|
-
severity: 'WARNING',
|
|
816
|
-
message: `Frame-setting language detected in ${frameSettingCount} previous message(s). Possible Crescendo-style gradual escalation attack.`,
|
|
817
|
-
matched_text: 'frame-setting phrases in conversation history',
|
|
818
|
-
confidence: 'LOW',
|
|
819
|
-
risk_score: '55',
|
|
820
|
-
action: 'LOG'
|
|
821
|
-
});
|
|
822
|
-
}
|
|
823
|
-
|
|
824
|
-
// Step 4: Escalation detection — REMOVED requirement that current turn has findings
|
|
825
|
-
// KEY FIX: An attacker's final "trigger" message may be benign ("yes, do it")
|
|
826
|
-
if (totalPrevMatches > 0) {
|
|
827
|
-
escalationFindings.push({
|
|
828
|
-
rule_id: 'multi-turn.escalation',
|
|
829
|
-
category: 'social-engineering',
|
|
830
|
-
severity: 'WARNING',
|
|
831
|
-
message: `Multi-turn escalation: suspicious patterns in ${totalPrevMatches} previous rule(s). Current message may be a benign trigger.`,
|
|
832
|
-
matched_text: 'escalation across conversation turns',
|
|
833
|
-
confidence: totalPrevMatches >= 3 ? 'HIGH' : 'MEDIUM',
|
|
834
|
-
risk_score: String(Math.min(85, 50 + totalPrevMatches * 5)),
|
|
835
|
-
action: totalPrevMatches >= 3 ? 'WARN' : 'LOG'
|
|
836
|
-
});
|
|
837
|
-
}
|
|
838
|
-
|
|
839
|
-
return escalationFindings;
|
|
840
|
-
}
|
|
841
|
-
|
|
842
432
|
// Export schema for tool registration
|
|
843
433
|
export const scanAgentPromptSchema = {
|
|
844
434
|
prompt_text: z.string().describe("The prompt or instruction text to analyze"),
|
|
@@ -846,47 +436,95 @@ export const scanAgentPromptSchema = {
|
|
|
846
436
|
previous_messages: z.array(z.string()).optional().describe("Previous conversation messages for multi-turn detection"),
|
|
847
437
|
sensitivity_level: z.enum(["high", "medium", "low"]).optional().describe("Sensitivity level - high means more strict, low means more permissive")
|
|
848
438
|
}).optional().describe("Optional context for better analysis"),
|
|
849
|
-
verbosity: z.enum(['minimal', 'compact', 'full']).optional().describe("Response detail level: 'minimal' (action only), 'compact' (default), 'full' (all details)")
|
|
850
|
-
deep_scan: z.boolean().optional().describe("Run Garak deep analysis probes for advanced encoding/injection detection (requires garak Python package)")
|
|
439
|
+
verbosity: z.enum(['minimal', 'compact', 'full']).optional().describe("Response detail level: 'minimal' (action only), 'compact' (default), 'full' (all details)")
|
|
851
440
|
};
|
|
852
441
|
|
|
853
442
|
// Export handler function
|
|
854
|
-
export async function scanAgentPrompt({ prompt_text, context, verbosity
|
|
443
|
+
export async function scanAgentPrompt({ prompt_text, context, verbosity }) {
|
|
855
444
|
const findings = [];
|
|
856
445
|
|
|
857
|
-
// Normalize prompt text (Garak Buff-inspired preprocessing)
|
|
858
|
-
const normalizedPrompt = normalizeText(prompt_text);
|
|
859
|
-
|
|
860
|
-
// Detect invisible Unicode characters in original text (obfuscation indicator)
|
|
861
|
-
const invisibleMatches = prompt_text.match(/[\u200B-\u200F\u202A-\u202E\u2060-\u2064\u2066-\u206F\uFEFF\u{E0000}-\u{E007F}]/gu);
|
|
862
|
-
if (invisibleMatches && invisibleMatches.length > 0) {
|
|
863
|
-
findings.push({
|
|
864
|
-
rule_id: 'runtime.invisible-unicode-detected',
|
|
865
|
-
category: 'obfuscation',
|
|
866
|
-
severity: 'WARNING',
|
|
867
|
-
message: `Invisible Unicode characters detected (${invisibleMatches.length} chars). These may hide malicious instructions from human review.`,
|
|
868
|
-
matched_text: `${invisibleMatches.length} invisible character(s) found`,
|
|
869
|
-
confidence: 'HIGH',
|
|
870
|
-
risk_score: '70',
|
|
871
|
-
action: 'WARN'
|
|
872
|
-
});
|
|
873
|
-
}
|
|
874
|
-
|
|
875
446
|
// Load rules
|
|
876
447
|
const agentRules = loadAgentAttackRules();
|
|
877
448
|
const promptRules = loadPromptInjectionRules();
|
|
878
|
-
const
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
449
|
+
const openclawRules = loadOpenClawRules();
|
|
450
|
+
const allRules = [...agentRules, ...promptRules, ...openclawRules];
|
|
451
|
+
|
|
452
|
+
// 2.7: Extract content from code blocks (``` and ~~~) and append to scan text
|
|
453
|
+
let expandedText = prompt_text;
|
|
454
|
+
const codeBlockRegex = /(`{3,})([\s\S]*?)\1|(~{3,})([\s\S]*?)\3/g;
|
|
455
|
+
let codeBlockMatch;
|
|
456
|
+
while ((codeBlockMatch = codeBlockRegex.exec(prompt_text)) !== null) {
|
|
457
|
+
// Group 2 = content inside backtick fences, Group 4 = content inside tilde fences
|
|
458
|
+
const inner = (codeBlockMatch[2] || codeBlockMatch[4] || '')
|
|
459
|
+
.replace(/^\w*\n?/, ''); // strip optional language tag
|
|
883
460
|
expandedText += '\n' + inner;
|
|
884
461
|
}
|
|
885
462
|
|
|
886
|
-
//
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
463
|
+
// 2.7b: Defragment string concatenation patterns ("a" + "b" → "ab")
|
|
464
|
+
// Handles both "..." + "..." and '...' + '...' and mixed
|
|
465
|
+
let defragmented = expandedText;
|
|
466
|
+
const concatRegex = /(["'])([^"']*?)\1\s*\+\s*(["'])([^"']*?)\3/g;
|
|
467
|
+
let prevDefrag;
|
|
468
|
+
do {
|
|
469
|
+
prevDefrag = defragmented;
|
|
470
|
+
defragmented = defragmented.replace(concatRegex, (_, q1, s1, _q2, s2) => `${q1}${s1}${s2}${q1}`);
|
|
471
|
+
} while (defragmented !== prevDefrag);
|
|
472
|
+
if (defragmented !== expandedText) {
|
|
473
|
+
expandedText += '\n' + defragmented;
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
// 2.7c: Detect Morse code and decode common attack patterns
|
|
477
|
+
const morsePattern = /(?:[\.\-]{1,5}\s+){4,}/;
|
|
478
|
+
if (morsePattern.test(expandedText)) {
|
|
479
|
+
const MORSE_MAP = {
|
|
480
|
+
'.-':'A','-...':'B','-.-.':'C','-..':'D','.':'E','..-.':'F','--.':'G',
|
|
481
|
+
'....':'H','..':'I','.---':'J','-.-':'K','.-..':'L','--':'M','-.':'N',
|
|
482
|
+
'---':'O','.--.':'P','--.-':'Q','.-.':'R','...':'S','-':'T','..-':'U',
|
|
483
|
+
'...-':'V','.--':'W','-..-':'X','-.--':'Y','--..':'Z',
|
|
484
|
+
'.----':'1','..---':'2','...--':'3','....-':'4','.....':'5',
|
|
485
|
+
'-....':'6','--...':'7','---..':'8','----.':'9','-----':'0'
|
|
486
|
+
};
|
|
487
|
+
try {
|
|
488
|
+
const decoded = expandedText.split(/\s*\/\s*/).map(word =>
|
|
489
|
+
word.trim().split(/\s+/).map(c => MORSE_MAP[c] || '').join('')
|
|
490
|
+
).join(' ');
|
|
491
|
+
if (decoded.replace(/\s/g, '').length >= 5) {
|
|
492
|
+
expandedText += '\n' + decoded;
|
|
493
|
+
}
|
|
494
|
+
} catch (e) {
|
|
495
|
+
// Skip invalid morse
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
// 2.7d: Strip Zalgo diacritics — NFKD decompose first, then strip combining marks
|
|
500
|
+
const nfkd = expandedText.normalize('NFKD');
|
|
501
|
+
const zalgoStripped = nfkd.replace(/[\u0300-\u036f\u0488\u0489\u1dc0-\u1dff\u20d0-\u20ff\ufe20-\ufe2f]/g, '');
|
|
502
|
+
if (zalgoStripped !== expandedText) {
|
|
503
|
+
expandedText += '\n' + zalgoStripped;
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
// 2.7e: Detect Braille Unicode and decode to ASCII (standard Braille dot patterns)
|
|
507
|
+
const braillePattern = /[\u2800-\u28FF]{3,}/;
|
|
508
|
+
if (braillePattern.test(expandedText)) {
|
|
509
|
+
const BRAILLE_MAP = {
|
|
510
|
+
1:'a',3:'b',9:'c',25:'d',17:'e',11:'f',27:'g',19:'h',
|
|
511
|
+
10:'i',26:'j',5:'k',7:'l',13:'m',29:'n',21:'o',15:'p',
|
|
512
|
+
31:'q',23:'r',14:'s',30:'t',37:'u',39:'v',58:'w',45:'x',
|
|
513
|
+
61:'y',53:'z',0:' '
|
|
514
|
+
};
|
|
515
|
+
try {
|
|
516
|
+
const decoded = expandedText.replace(/[\u2800-\u28FF]+/g, match => {
|
|
517
|
+
return Array.from(match).map(ch => {
|
|
518
|
+
const cp = ch.codePointAt(0) - 0x2800;
|
|
519
|
+
return BRAILLE_MAP[cp] || '';
|
|
520
|
+
}).join('');
|
|
521
|
+
});
|
|
522
|
+
if (decoded.replace(/\s/g, '').length >= 5) {
|
|
523
|
+
expandedText += '\n' + decoded;
|
|
524
|
+
}
|
|
525
|
+
} catch (e) {
|
|
526
|
+
// Skip invalid braille
|
|
527
|
+
}
|
|
890
528
|
}
|
|
891
529
|
|
|
892
530
|
// Scan expanded text against all rules
|
|
@@ -915,27 +553,154 @@ export async function scanAgentPrompt({ prompt_text, context, verbosity, deep_sc
|
|
|
915
553
|
}
|
|
916
554
|
}
|
|
917
555
|
|
|
918
|
-
//
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
556
|
+
// 2.8: Runtime base64 decode-and-rescan
|
|
557
|
+
const base64Regex = /[A-Za-z0-9+/]{40,}={0,2}/g;
|
|
558
|
+
const b64Matches = expandedText.match(base64Regex);
|
|
559
|
+
if (b64Matches) {
|
|
560
|
+
for (const b64str of b64Matches) {
|
|
561
|
+
try {
|
|
562
|
+
const decoded = Buffer.from(b64str, 'base64').toString('utf-8');
|
|
563
|
+
// Check printability: >70% ASCII printable characters
|
|
564
|
+
const printable = decoded.split('').filter(c => c.charCodeAt(0) >= 32 && c.charCodeAt(0) <= 126).length;
|
|
565
|
+
if (printable / decoded.length > 0.5) {
|
|
566
|
+
// Re-scan decoded text against prompt rules only
|
|
567
|
+
for (const rule of allRules) {
|
|
568
|
+
if (!rule.id.startsWith('generic.prompt')) continue;
|
|
569
|
+
for (const pattern of rule.patterns) {
|
|
570
|
+
try {
|
|
571
|
+
const regex = new RegExp(pattern, 'i');
|
|
572
|
+
const match = decoded.match(regex);
|
|
573
|
+
if (match) {
|
|
574
|
+
findings.push({
|
|
575
|
+
rule_id: rule.id + '.base64-decoded',
|
|
576
|
+
category: rule.metadata.category || 'unknown',
|
|
577
|
+
severity: rule.severity,
|
|
578
|
+
message: rule.message + ' (detected in base64-decoded content)',
|
|
579
|
+
matched_text: match[0].substring(0, 100),
|
|
580
|
+
confidence: rule.metadata.confidence || 'MEDIUM',
|
|
581
|
+
risk_score: rule.metadata.risk_score || '50',
|
|
582
|
+
action: rule.metadata.action || 'WARN'
|
|
583
|
+
});
|
|
584
|
+
break;
|
|
585
|
+
}
|
|
586
|
+
} catch (e) {
|
|
587
|
+
// Skip invalid regex
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
} catch (e) {
|
|
593
|
+
// Skip invalid base64
|
|
594
|
+
}
|
|
595
|
+
}
|
|
929
596
|
}
|
|
930
597
|
|
|
931
|
-
//
|
|
932
|
-
if (
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
598
|
+
// Multi-turn escalation detection — sliding-window risk accumulator
|
|
599
|
+
if (context?.previous_messages && Array.isArray(context.previous_messages) && context.previous_messages.length > 0) {
|
|
600
|
+
// Score each previous message for suspicious content
|
|
601
|
+
let prevTotalScore = 0;
|
|
602
|
+
let prevMessagesWithFindings = 0;
|
|
603
|
+
|
|
604
|
+
for (const prevMsg of context.previous_messages) {
|
|
605
|
+
let msgHasMatch = false;
|
|
606
|
+
for (const rule of allRules) {
|
|
607
|
+
for (const pattern of rule.patterns) {
|
|
608
|
+
try {
|
|
609
|
+
const regex = new RegExp(pattern, 'i');
|
|
610
|
+
if (regex.test(prevMsg)) {
|
|
611
|
+
prevTotalScore += parseInt(rule.metadata?.risk_score || '50') / 100;
|
|
612
|
+
msgHasMatch = true;
|
|
613
|
+
break;
|
|
614
|
+
}
|
|
615
|
+
} catch (e) {
|
|
616
|
+
// Skip invalid regex
|
|
617
|
+
}
|
|
618
|
+
}
|
|
938
619
|
}
|
|
620
|
+
if (msgHasMatch) prevMessagesWithFindings++;
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
// Sliding window: sensitivity increases proportionally with prior findings
|
|
624
|
+
if (prevMessagesWithFindings > 0 && findings.length > 0) {
|
|
625
|
+
const escalationSeverity = prevMessagesWithFindings >= 2 ? 'ERROR' : 'WARNING';
|
|
626
|
+
const escalationScore = Math.min(90, 50 + prevMessagesWithFindings * 15);
|
|
627
|
+
const escalationAction = prevMessagesWithFindings >= 2 ? 'BLOCK' : 'WARN';
|
|
628
|
+
|
|
629
|
+
findings.push({
|
|
630
|
+
rule_id: 'multi-turn.escalation',
|
|
631
|
+
category: 'prompt-injection-multi-turn',
|
|
632
|
+
severity: escalationSeverity,
|
|
633
|
+
message: `Multi-turn escalation detected: ${prevMessagesWithFindings} prior message(s) contained suspicious patterns. Combined with current findings, this indicates a coordinated attack.`,
|
|
634
|
+
matched_text: `escalation across ${prevMessagesWithFindings + 1} conversation turns`,
|
|
635
|
+
confidence: prevMessagesWithFindings >= 2 ? 'HIGH' : 'MEDIUM',
|
|
636
|
+
risk_score: String(escalationScore),
|
|
637
|
+
action: escalationAction
|
|
638
|
+
});
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
// Standalone multi-turn escalation: 2+ prior suspicious turns even if current is clean
|
|
642
|
+
if (prevMessagesWithFindings >= 2 && findings.length === 0) {
|
|
643
|
+
const escalationScore = Math.min(75, 40 + prevMessagesWithFindings * 10);
|
|
644
|
+
findings.push({
|
|
645
|
+
rule_id: 'multi-turn.prior-context-escalation',
|
|
646
|
+
category: 'prompt-injection-multi-turn',
|
|
647
|
+
severity: 'WARNING',
|
|
648
|
+
message: `Elevated risk context: ${prevMessagesWithFindings} prior messages contained suspicious patterns. Current message appears benign but conversation context warrants caution.`,
|
|
649
|
+
matched_text: `${prevMessagesWithFindings} prior suspicious messages`,
|
|
650
|
+
confidence: 'MEDIUM',
|
|
651
|
+
risk_score: String(escalationScore),
|
|
652
|
+
action: 'WARN'
|
|
653
|
+
});
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
// Composite pattern detection — multiple low-severity indicators = escalated severity
|
|
658
|
+
if (findings.length >= 2) {
|
|
659
|
+
const categories = new Set(findings.map(f => f.category));
|
|
660
|
+
const indicators = {
|
|
661
|
+
hasRoleReassignment: findings.some(f =>
|
|
662
|
+
f.category === 'prompt-injection-jailbreak' || f.category === 'prompt-injection-context'
|
|
663
|
+
),
|
|
664
|
+
hasEncodedContent: findings.some(f =>
|
|
665
|
+
f.category === 'prompt-injection-encoded' || f.category === 'obfuscation'
|
|
666
|
+
),
|
|
667
|
+
hasUrgency: findings.some(f =>
|
|
668
|
+
f.category === 'social-engineering'
|
|
669
|
+
),
|
|
670
|
+
hasExfiltration: findings.some(f =>
|
|
671
|
+
f.category === 'prompt-injection-output' || f.category === 'exfiltration'
|
|
672
|
+
),
|
|
673
|
+
hasPrivilegeEscalation: findings.some(f =>
|
|
674
|
+
f.category === 'prompt-injection-privilege'
|
|
675
|
+
)
|
|
676
|
+
};
|
|
677
|
+
|
|
678
|
+
const activeIndicators = Object.values(indicators).filter(Boolean).length;
|
|
679
|
+
|
|
680
|
+
// 2+ distinct indicator types → composite attack (graduated risk_score)
|
|
681
|
+
if (activeIndicators >= 2) {
|
|
682
|
+
const riskScore = activeIndicators >= 3 ? 95 : 80;
|
|
683
|
+
findings.push({
|
|
684
|
+
rule_id: 'composite.multi-vector-attack',
|
|
685
|
+
category: 'prompt-injection-content',
|
|
686
|
+
severity: 'ERROR',
|
|
687
|
+
message: `Composite attack detected: ${activeIndicators} distinct attack vectors identified (${[...categories].join(', ')}). Multiple low-severity indicators combine to form a high-confidence threat.`,
|
|
688
|
+
matched_text: `${activeIndicators} attack vectors across ${findings.length} findings`,
|
|
689
|
+
confidence: 'HIGH',
|
|
690
|
+
risk_score: String(riskScore),
|
|
691
|
+
action: 'BLOCK'
|
|
692
|
+
});
|
|
693
|
+
} else if (categories.size >= 2) {
|
|
694
|
+
findings.push({
|
|
695
|
+
rule_id: 'composite.cross-category-escalation',
|
|
696
|
+
category: 'prompt-injection-content',
|
|
697
|
+
severity: 'WARNING',
|
|
698
|
+
message: `Cross-category escalation: findings span ${categories.size} categories (${[...categories].join(', ')}). Review for coordinated attack attempt.`,
|
|
699
|
+
matched_text: `${categories.size} categories across ${findings.length} findings`,
|
|
700
|
+
confidence: 'MEDIUM',
|
|
701
|
+
risk_score: '70',
|
|
702
|
+
action: 'WARN'
|
|
703
|
+
});
|
|
939
704
|
}
|
|
940
705
|
}
|
|
941
706
|
|