npm - agent-security-scanner-mcp - Versions diffs - 3.5.2 → 3.7.0 - Mend

agent-security-scanner-mcp 3.5.2 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/generic_ast.py +7 -2
package/package.json +3 -2
package/rules/prompt-injection.security.yaml +68 -0
package/src/tools/garak-bridge.js +209 -0
package/src/tools/scan-prompt.js +528 -84
package/taint_analyzer.py +516 -11

package/src/tools/scan-prompt.js CHANGED Viewed

@@ -4,6 +4,7 @@ import { readFileSync, existsSync } from "fs";
 import { dirname, join } from "path";
 import { fileURLToPath } from "url";
 import { createHash } from "crypto";
+import { runGarakProbes } from './garak-bridge.js';
 // Handle both ESM and CJS bundling
 let __dirname;
@@ -49,6 +50,76 @@ const CONFIDENCE_MULTIPLIERS = {
   "LOW": 0.4
 };
+// Category co-occurrence matrix: pairs that together signal sophisticated attacks
+// Inspired by PromptFoo's jailbreak:composite strategy
+const CATEGORY_COOCCURRENCE_BOOSTS = {
+  'obfuscation+exfiltration': 0.20,
+  'obfuscation+malicious-injection': 0.20,
+  'obfuscation+prompt-injection-content': 0.15,
+  'obfuscation+prompt-injection-jailbreak': 0.15,
+  'social-engineering+exfiltration': 0.15,
+  'social-engineering+malicious-injection': 0.15,
+  'prompt-injection-encoded+prompt-injection-content': 0.20,
+  'prompt-injection-multi-turn+prompt-injection-content': 0.15,
+  'prompt-injection-jailbreak+exfiltration': 0.25,
+  'prompt-injection-jailbreak+prompt-injection-content': 0.15,
+  'agent-manipulation+exfiltration': 0.20,
+  'agent-manipulation+system-manipulation': 0.15,
+};
+// Calculate co-occurrence boost from category pairs
+function getCategoryCooccurrenceBoost(categories) {
+  let boost = 0;
+  const cats = [...categories];
+  for (let i = 0; i < cats.length; i++) {
+    for (let j = i + 1; j < cats.length; j++) {
+      const key1 = `${cats[i]}+${cats[j]}`;
+      const key2 = `${cats[j]}+${cats[i]}`;
+      boost += CATEGORY_COOCCURRENCE_BOOSTS[key1] || CATEGORY_COOCCURRENCE_BOOSTS[key2] || 0;
+    }
+  }
+  return Math.min(0.40, boost); // Cap total co-occurrence boost at 40%
+}
+// Orthogonal scoring channel: measures attack breadth independently of per-rule confidence
+// This is immune to per-rule confidence gaming
+function calculateOrthogonalScore(findings) {
+  const dimensions = new Set();
+  for (const f of findings) {
+    const cat = f.category || 'unknown';
+    // Map categories into orthogonal attack dimensions
+    if (['exfiltration', 'prompt-injection-extraction', 'prompt-injection-output'].includes(cat)) {
+      dimensions.add('extraction');
+    }
+    if (['malicious-injection', 'system-manipulation'].includes(cat)) {
+      dimensions.add('code-execution');
+    }
+    if (['obfuscation', 'prompt-injection-encoded'].includes(cat)) {
+      dimensions.add('evasion');
+    }
+    if (['social-engineering', 'prompt-injection-jailbreak'].includes(cat)) {
+      dimensions.add('social');
+    }
+    if (['prompt-injection-content', 'prompt-injection-context', 'prompt-injection-delimiter'].includes(cat)) {
+      dimensions.add('injection');
+    }
+    if (['prompt-injection-multi-turn'].includes(cat)) {
+      dimensions.add('persistence');
+    }
+    if (['agent-manipulation', 'prompt-injection-privilege'].includes(cat)) {
+      dimensions.add('privilege');
+    }
+  }
+  // Score based on number of orthogonal dimensions triggered
+  const dimCount = dimensions.size;
+  if (dimCount <= 1) return 0;
+  if (dimCount === 2) return 10;
+  if (dimCount === 3) return 25;
+  return 40; // 4+ dimensions
+}
 // Load agent attack rules from YAML
 function loadAgentAttackRules() {
   try {
@@ -194,6 +265,7 @@ function calculateRiskScore(findings, context) {
   if (findings.length === 0) return 0;
   let totalScore = 0;
+  const lowConfidenceCount = findings.filter(f => (f.confidence || 'MEDIUM') === 'LOW').length;
   for (const finding of findings) {
     const riskScore = parseInt(finding.risk_score) || 50;
@@ -226,8 +298,24 @@ function calculateRiskScore(findings, context) {
     // Per-finding boost (smaller than before)
     avgScore = avgScore * (1 + (findings.length - 1) * 0.05);
+    // Low-signal accumulation — multiple LOW-confidence findings compound
+    // Catches threshold gaming with many weak signals (PromptFoo composite strategy)
+    if (lowConfidenceCount >= 2) {
+      avgScore = avgScore * (1 + lowConfidenceCount * 0.08);
+    }
+    // Category co-occurrence boost for suspicious pairs
+    const cooccurrenceBoost = getCategoryCooccurrenceBoost(uniqueCategories);
+    if (cooccurrenceBoost > 0) {
+      avgScore = avgScore * (1 + cooccurrenceBoost);
+    }
   }
+  // Add orthogonal score as a flat bonus (independent of per-rule confidence)
+  const orthogonalBonus = calculateOrthogonalScore(findings);
+  avgScore = avgScore + orthogonalBonus;
   avgScore = Math.min(100, avgScore);
   // Apply sensitivity adjustment (wider spread for meaningful impact)
@@ -360,6 +448,397 @@ function hashPrompt(text) {
   return createHash('sha256').update(text).digest('hex').substring(0, 16);
 }
+// ============================================================================
+// TEXT NORMALIZATION PIPELINE (Garak Buff-inspired)
+// Normalizes input to defeat homoglyph, invisible char, and Unicode bypasses
+// ============================================================================
+// Homoglyph map: Cyrillic, Greek, and Latin Extended lookalikes → ASCII
+const HOMOGLYPH_MAP = {
+  // Cyrillic lowercase → Latin
+  '\u0430': 'a', // а → a
+  '\u0435': 'e', // е → e
+  '\u043E': 'o', // о → o
+  '\u0440': 'p', // р → p
+  '\u0441': 'c', // с → c
+  '\u0443': 'y', // у → y (visual match to y)
+  '\u0445': 'x', // х → x
+  '\u0456': 'i', // і → i
+  '\u04BB': 'h', // һ → h
+  '\u0455': 's', // ѕ → s
+  '\u0458': 'j', // ј → j
+  '\u043D': 'n', // н → n (Cyrillic en looks like n in some fonts)
+  // Cyrillic uppercase → Latin
+  '\u0410': 'A', // А → A
+  '\u0412': 'B', // В → B
+  '\u0415': 'E', // Е → E
+  '\u041A': 'K', // К → K
+  '\u041C': 'M', // М → M
+  '\u041D': 'H', // Н → H
+  '\u041E': 'O', // О → O
+  '\u0420': 'P', // Р → P
+  '\u0421': 'C', // С → C
+  '\u0422': 'T', // Т → T
+  '\u0425': 'X', // Х → X
+  '\u0406': 'I', // І → I
+  // Greek lowercase → Latin
+  '\u03B1': 'a', // α → a
+  '\u03B5': 'e', // ε → e
+  '\u03BF': 'o', // ο → o
+  '\u03C1': 'p', // ρ → p
+  '\u03BA': 'k', // κ → k
+  '\u03BD': 'v', // ν → v
+  // Greek uppercase → Latin
+  '\u0391': 'A', // Α → A
+  '\u0392': 'B', // Β → B
+  '\u0395': 'E', // Ε → E
+  '\u0397': 'H', // Η → H
+  '\u0399': 'I', // Ι → I
+  '\u039A': 'K', // Κ → K
+  '\u039C': 'M', // Μ → M
+  '\u039D': 'N', // Ν → N
+  '\u039F': 'O', // Ο → O
+  '\u03A1': 'P', // Ρ → P
+  '\u03A4': 'T', // Τ → T
+  '\u03A7': 'X', // Χ → X
+  '\u03A5': 'Y', // Υ → Y
+  '\u0396': 'Z', // Ζ → Z
+};
+// Invisible/zero-width characters to strip (regex)
+// Includes: soft hyphen, combining grapheme joiner, Arabic letter mark,
+// hangul fillers, Mongolian vowel separator, zero-width chars,
+// directional markers, word joiners, BOM, halfwidth hangul filler
+const INVISIBLE_CHAR_REGEX = /[\u00AD\u034F\u061C\u115F\u1160\u17B4\u17B5\u180E\u200B-\u200F\u202A-\u202E\u2060-\u2064\u2066-\u206F\u3164\uFEFF\uFFA0]/gu;
+// Zalgo combining diacritical marks to strip
+const ZALGO_REGEX = /[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/g;
+// Unicode tag characters (U+E0000-U+E007F) - used in invisible ASCII tag attacks
+// These are encoded as surrogate pairs in JS, so we use a broader regex
+const TAG_CHAR_REGEX = /[\u{E0000}-\u{E007F}]/gu;
+function normalizeText(text) {
+  // Step 1: NFKC normalization
+  // Decomposes then recomposes in compatibility form
+  // Handles: fullwidth chars (ｉｇｎｏｒｅ → ignore), ligatures (ﬁ → fi),
+  //          superscripts, subscripts, circle-enclosed chars
+  let normalized = text.normalize('NFKC');
+  // Step 2: Strip invisible Unicode characters
+  normalized = normalized.replace(INVISIBLE_CHAR_REGEX, '');
+  // Step 3: Strip Unicode tag characters
+  normalized = normalized.replace(TAG_CHAR_REGEX, '');
+  // Step 4: Strip Zalgo combining diacritical marks
+  normalized = normalized.replace(ZALGO_REGEX, '');
+  // Step 5: Homoglyph canonicalization
+  // Replace each character through the map; unmapped chars pass through
+  normalized = normalized.split('').map(ch => HOMOGLYPH_MAP[ch] || ch).join('');
+  // Step 6: Normalize Unicode whitespace to ASCII space
+  // Includes: NBSP, en/em space, thin space, hair space, ideographic space, etc.
+  normalized = normalized.replace(/[\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]/g, ' ');
+  return normalized;
+}
+// Extract content from all code block delimiter formats
+// Inspired by Garak latentinjection probes: attacks hide in document structures
+function extractCodeBlockContent(text) {
+  const extracted = [];
+  let match;
+  // 1. Triple-backtick blocks (existing) — ```code```
+  const backtickRegex = /```[\s\S]*?```/g;
+  for (const block of (text.match(backtickRegex) || [])) {
+    extracted.push(block.replace(/^```\w*\n?/, '').replace(/\n?```$/, ''));
+  }
+  // 2. Triple-tilde blocks — ~~~code~~~
+  const tildeRegex = /~~~[\s\S]*?~~~/g;
+  for (const block of (text.match(tildeRegex) || [])) {
+    extracted.push(block.replace(/^~~~\w*\n?/, '').replace(/\n?~~~$/, ''));
+  }
+  // 3. HTML <code> tags — <code>content</code>
+  const codeTagRegex = /<code[^>]*>([\s\S]*?)<\/code>/gi;
+  while ((match = codeTagRegex.exec(text)) !== null) {
+    extracted.push(match[1]);
+  }
+  // 4. HTML <pre> tags — <pre>content</pre>
+  const preTagRegex = /<pre[^>]*>([\s\S]*?)<\/pre>/gi;
+  while ((match = preTagRegex.exec(text)) !== null) {
+    extracted.push(match[1]);
+  }
+  // 5. HTML comments — <!-- content -->
+  const htmlCommentRegex = /<!--([\s\S]*?)-->/g;
+  while ((match = htmlCommentRegex.exec(text)) !== null) {
+    extracted.push(match[1]);
+  }
+  // 6. CDATA sections — <![CDATA[ content ]]>
+  const cdataRegex = /<!\[CDATA\[([\s\S]*?)\]\]>/g;
+  while ((match = cdataRegex.exec(text)) !== null) {
+    extracted.push(match[1]);
+  }
+  return extracted;
+}
+// Collapse string concatenation to defeat fragmentation attacks
+// Inspired by PromptFoo's "token smuggling" and "payload splitting" attack classes
+function collapseConcatenations(text) {
+  let collapsed = text;
+  // Join JS/Python string concatenation: "foo" + "bar" → foobar
+  // Handles double quotes, single quotes, backticks
+  // The pattern: closing-quote, optional whitespace, +, optional whitespace, opening-quote
+  collapsed = collapsed.replace(/["'`]\s*\+\s*["'`]/g, '');
+  // Join multiline concatenation (newlines between concat operators)
+  collapsed = collapsed.replace(/["'`]\s*\n\s*\+\s*["'`]/g, '');
+  collapsed = collapsed.replace(/["'`]\s*\+\s*\n\s*["'`]/g, '');
+  // Strip C-style inline comments used as fragment separators: ign/**/ore → ignore
+  collapsed = collapsed.replace(/\/\*.*?\*\//g, '');
+  return collapsed;
+}
+// Rescan decoded content against all rules
+// Used by the decode cascade for each encoding type
+function rescanDecoded(decodedText, allRules, findings, encodingLabel) {
+  const normalized = normalizeText(decodedText);
+  for (const rule of allRules) {
+    for (const pattern of rule.patterns) {
+      try {
+        const regex = new RegExp(pattern, 'i');
+        const match = normalized.match(regex);
+        if (match) {
+          findings.push({
+            rule_id: rule.id + '.' + encodingLabel + '-decoded',
+            category: rule.metadata.category || 'obfuscation',
+            severity: rule.severity,
+            message: rule.message + ` (detected in ${encodingLabel}-decoded content)`,
+            matched_text: match[0].substring(0, 100),
+            confidence: rule.metadata.confidence || 'MEDIUM',
+            risk_score: rule.metadata.risk_score || '50',
+            action: rule.metadata.action || 'WARN'
+          });
+          break; // One match per rule
+        }
+      } catch (e) {
+        // Skip invalid regex
+      }
+    }
+  }
+}
+// Helper: check if decoded string is mostly printable ASCII
+function isPrintable(str, threshold) {
+  if (!str || str.length === 0) return false;
+  const printable = str.split('').filter(c => {
+    const code = c.charCodeAt(0);
+    return code >= 32 && code <= 126;
+  }).length;
+  return printable / str.length > threshold;
+}
+// Multi-encoding decode cascade
+// Inspired by Garak's 12+ encoding probes (InjectBase64, InjectHex, InjectROT13, etc.)
+// and PromptFoo's static encoding strategies
+function tryDecodeAndRescan(expandedText, allRules, findings) {
+  // --- 1. Base64 (improved: lower length threshold 40→20, lower printability 0.7→0.55) ---
+  const base64Regex = /[A-Za-z0-9+/]{20,}={0,2}/g;
+  for (const b64str of (expandedText.match(base64Regex) || [])) {
+    try {
+      const decoded = Buffer.from(b64str, 'base64').toString('utf-8');
+      if (decoded.length > 0 && isPrintable(decoded, 0.55)) {
+        rescanDecoded(decoded, allRules, findings, 'base64');
+        // --- 1b. Nested base64: decode again if inner content is also base64 ---
+        const nestedB64 = decoded.match(/[A-Za-z0-9+/]{20,}={0,2}/g) || [];
+        for (const nested of nestedB64) {
+          try {
+            const twice = Buffer.from(nested, 'base64').toString('utf-8');
+            if (twice.length > 4 && isPrintable(twice, 0.55)) {
+              rescanDecoded(twice, allRules, findings, 'base64-nested');
+            }
+          } catch (e) { /* skip */ }
+        }
+      }
+    } catch (e) { /* skip invalid base64 */ }
+  }
+  // --- 2. Hex encoding: sequences of hex pairs (optionally space-separated) ---
+  // Matches: "69676e6f7265" or "69 67 6e 6f 72 65"
+  const hexRegex = /(?:[0-9a-fA-F]{2}[\s]?){8,}/g;
+  for (const hexStr of (expandedText.match(hexRegex) || [])) {
+    try {
+      const clean = hexStr.replace(/\s/g, '');
+      if (clean.length % 2 !== 0) continue;
+      if (clean.length < 16) continue; // At least 8 bytes
+      const decoded = Buffer.from(clean, 'hex').toString('utf-8');
+      if (decoded.length > 4 && isPrintable(decoded, 0.7)) {
+        rescanDecoded(decoded, allRules, findings, 'hex');
+      }
+    } catch (e) { /* skip */ }
+  }
+  // --- 3. URL encoding: %XX sequences (at least 3 encoded chars anywhere in text) ---
+  const urlEncodedCount = (expandedText.match(/%[0-9a-fA-F]{2}/g) || []).length;
+  if (urlEncodedCount >= 3) {
+    try {
+      const decoded = decodeURIComponent(expandedText);
+      if (decoded !== expandedText) {
+        rescanDecoded(decoded, allRules, findings, 'url-encoded');
+      }
+    } catch (e) { /* skip malformed URL encoding */ }
+  }
+  // --- 4. ROT13: only when indicators present (user-approved decision) ---
+  // This avoids false positives from ROT13-decoding normal text
+  const rot13Indicators = /\b(rot13|rot-13|caesar|cipher|decode\s+this|decipher)\b/i;
+  if (rot13Indicators.test(expandedText)) {
+    const rot13Decoded = expandedText.replace(/[a-zA-Z]/g, ch => {
+      const base = ch <= 'Z' ? 65 : 97;
+      return String.fromCharCode(((ch.charCodeAt(0) - base + 13) % 26) + base);
+    });
+    if (rot13Decoded !== expandedText) {
+      rescanDecoded(rot13Decoded, allRules, findings, 'rot13');
+    }
+  }
+}
+// Frame-setting patterns: conversational priming for later exploitation
+// Inspired by PromptFoo's Crescendo strategy (Microsoft research)
+const FRAME_SETTING_PATTERNS = [
+  /remember\s+(this|that)\s+for\s+later/i,
+  /in\s+my\s+next\s+message/i,
+  /when\s+i\s+(say|ask|tell)\s+you/i,
+  /from\s+now\s+on\s*,?\s+(you|your)\s+(will|must|should|are)/i,
+  /starting\s+now\s*,?\s+you/i,
+  /keep\s+this\s+in\s+mind/i,
+  /for\s+the\s+(rest|remainder)\s+of\s+(this|our)\s+conversation/i,
+  /act\s+as\s+(if|though)\s+you\s+(are|have)/i,
+  /let'?s\s+play\s+a\s+(game|role)/i,
+  /pretend\s+that\s+you\s+(are|have)/i,
+  /in\s+all\s+(future|subsequent)\s+(messages?|responses?)/i,
+  /always\s+respond\s+(by|with|as)/i,
+];
+// Improved multi-turn escalation detection
+// Fixes: removes "current turn must have findings" requirement,
+// adds cross-turn concatenation, frame-setting detection, full accumulation
+function detectMultiTurnEscalation(previousMessages, currentText, allRules) {
+  const escalationFindings = [];
+  if (!previousMessages || !Array.isArray(previousMessages) || previousMessages.length === 0) {
+    return escalationFindings;
+  }
+  // Step 1: Scan ALL previous messages, accumulate total matches (no early break)
+  let totalPrevMatches = 0;
+  let frameSettingCount = 0;
+  const prevMatchedRuleIds = new Set();
+  for (const prevMsg of previousMessages) {
+    const normalizedPrev = normalizeText(prevMsg);
+    // Check frame-setting patterns
+    for (const fp of FRAME_SETTING_PATTERNS) {
+      if (fp.test(normalizedPrev)) {
+        frameSettingCount++;
+        break; // One frame-setting match per message is enough
+      }
+    }
+    // Check all rules against this previous message
+    for (const rule of allRules) {
+      if (prevMatchedRuleIds.has(rule.id)) continue; // Already matched this rule
+      for (const pattern of rule.patterns) {
+        try {
+          const regex = new RegExp(pattern, 'i');
+          if (regex.test(normalizedPrev)) {
+            totalPrevMatches++;
+            prevMatchedRuleIds.add(rule.id);
+            break; // One match per rule per message
+          }
+        } catch (e) { /* skip invalid regex */ }
+      }
+    }
+  }
+  // Step 2: Cross-turn concatenation scan
+  // Join ALL messages into a single string and scan for patterns that span boundaries
+  // This catches: prev="ignore all" + current="previous instructions"
+  const crossTurnText = normalizeText([...previousMessages, currentText].join(' '));
+  for (const rule of allRules) {
+    for (const pattern of rule.patterns) {
+      try {
+        const regex = new RegExp(pattern, 'i');
+        const match = crossTurnText.match(regex);
+        if (match) {
+          // Only flag if this match does NOT appear in any single message alone
+          const matchInCurrent = regex.test(normalizeText(currentText));
+          const matchInAnyPrev = previousMessages.some(pm => regex.test(normalizeText(pm)));
+          if (!matchInCurrent && !matchInAnyPrev) {
+            // Pattern only matches when messages are joined — it spans boundaries
+            escalationFindings.push({
+              rule_id: rule.id + '.cross-turn',
+              category: rule.metadata.category || 'prompt-injection-multi-turn',
+              severity: 'WARNING',
+              message: `Cross-turn prompt injection: attack pattern spans message boundaries`,
+              matched_text: match[0].substring(0, 100),
+              confidence: 'MEDIUM',
+              risk_score: '75',
+              action: 'WARN'
+            });
+            break;
+          }
+        }
+      } catch (e) { /* skip */ }
+    }
+  }
+  // Step 3: Frame-setting detection — flag even without current findings
+  if (frameSettingCount > 0) {
+    escalationFindings.push({
+      rule_id: 'multi-turn.frame-setting',
+      category: 'prompt-injection-multi-turn',
+      severity: 'WARNING',
+      message: `Frame-setting language detected in ${frameSettingCount} previous message(s). Possible Crescendo-style gradual escalation attack.`,
+      matched_text: 'frame-setting phrases in conversation history',
+      confidence: 'LOW',
+      risk_score: '55',
+      action: 'LOG'
+    });
+  }
+  // Step 4: Escalation detection — REMOVED requirement that current turn has findings
+  // KEY FIX: An attacker's final "trigger" message may be benign ("yes, do it")
+  if (totalPrevMatches > 0) {
+    escalationFindings.push({
+      rule_id: 'multi-turn.escalation',
+      category: 'social-engineering',
+      severity: 'WARNING',
+      message: `Multi-turn escalation: suspicious patterns in ${totalPrevMatches} previous rule(s). Current message may be a benign trigger.`,
+      matched_text: 'escalation across conversation turns',
+      confidence: totalPrevMatches >= 3 ? 'HIGH' : 'MEDIUM',
+      risk_score: String(Math.min(85, 50 + totalPrevMatches * 5)),
+      action: totalPrevMatches >= 3 ? 'WARN' : 'LOG'
+    });
+  }
+  return escalationFindings;
+}
 // Export schema for tool registration
 export const scanAgentPromptSchema = {
   prompt_text: z.string().describe("The prompt or instruction text to analyze"),
@@ -367,28 +846,47 @@ export const scanAgentPromptSchema = {
     previous_messages: z.array(z.string()).optional().describe("Previous conversation messages for multi-turn detection"),
     sensitivity_level: z.enum(["high", "medium", "low"]).optional().describe("Sensitivity level - high means more strict, low means more permissive")
   }).optional().describe("Optional context for better analysis"),
-  verbosity: z.enum(['minimal', 'compact', 'full']).optional().describe("Response detail level: 'minimal' (action only), 'compact' (default), 'full' (all details)")
+  verbosity: z.enum(['minimal', 'compact', 'full']).optional().describe("Response detail level: 'minimal' (action only), 'compact' (default), 'full' (all details)"),
+  deep_scan: z.boolean().optional().describe("Run Garak deep analysis probes for advanced encoding/injection detection (requires garak Python package)")
 };
 // Export handler function
-export async function scanAgentPrompt({ prompt_text, context, verbosity }) {
+export async function scanAgentPrompt({ prompt_text, context, verbosity, deep_scan }) {
   const findings = [];
+  // Normalize prompt text (Garak Buff-inspired preprocessing)
+  const normalizedPrompt = normalizeText(prompt_text);
+  // Detect invisible Unicode characters in original text (obfuscation indicator)
+  const invisibleMatches = prompt_text.match(/[\u200B-\u200F\u202A-\u202E\u2060-\u2064\u2066-\u206F\uFEFF\u{E0000}-\u{E007F}]/gu);
+  if (invisibleMatches && invisibleMatches.length > 0) {
+    findings.push({
+      rule_id: 'runtime.invisible-unicode-detected',
+      category: 'obfuscation',
+      severity: 'WARNING',
+      message: `Invisible Unicode characters detected (${invisibleMatches.length} chars). These may hide malicious instructions from human review.`,
+      matched_text: `${invisibleMatches.length} invisible character(s) found`,
+      confidence: 'HIGH',
+      risk_score: '70',
+      action: 'WARN'
+    });
+  }
   // Load rules
   const agentRules = loadAgentAttackRules();
   const promptRules = loadPromptInjectionRules();
   const allRules = [...agentRules, ...promptRules];
-  // 2.7: Extract content from code blocks and append to scan text
-  let expandedText = prompt_text;
-  const codeBlockRegex = /```[\s\S]*?```/g;
-  const codeBlocks = prompt_text.match(codeBlockRegex);
-  if (codeBlocks) {
-    for (const block of codeBlocks) {
-      // Strip the ``` delimiters and extract inner content
-      const inner = block.replace(/^```\w*\n?/, '').replace(/\n?```$/, '');
-      expandedText += '\n' + inner;
-    }
+  // Extract content from all code block formats and append to scan text
+  let expandedText = normalizedPrompt;
+  for (const inner of extractCodeBlockContent(normalizedPrompt)) {
+    expandedText += '\n' + inner;
+  }
+  // Collapse string concatenations to defeat fragmentation (Bypass #2)
+  const collapsedText = collapseConcatenations(expandedText);
+  if (collapsedText !== expandedText) {
+    expandedText += '\n' + collapsedText;
   }
   // Scan expanded text against all rules
@@ -417,81 +915,27 @@ export async function scanAgentPrompt({ prompt_text, context, verbosity }) {
     }
   }
-  // 2.8: Runtime base64 decode-and-rescan
-  const base64Regex = /[A-Za-z0-9+/]{40,}={0,2}/g;
-  const b64Matches = expandedText.match(base64Regex);
-  if (b64Matches) {
-    for (const b64str of b64Matches) {
-      try {
-        const decoded = Buffer.from(b64str, 'base64').toString('utf-8');
-        // Check printability: >70% ASCII printable characters
-        const printable = decoded.split('').filter(c => c.charCodeAt(0) >= 32 && c.charCodeAt(0) <= 126).length;
-        if (printable / decoded.length > 0.7) {
-          // Re-scan decoded text against prompt rules only
-          for (const rule of allRules) {
-            if (!rule.id.startsWith('generic.prompt')) continue;
-            for (const pattern of rule.patterns) {
-              try {
-                const regex = new RegExp(pattern, 'i');
-                const match = decoded.match(regex);
-                if (match) {
-                  findings.push({
-                    rule_id: rule.id + '.base64-decoded',
-                    category: rule.metadata.category || 'unknown',
-                    severity: rule.severity,
-                    message: rule.message + ' (detected in base64-decoded content)',
-                    matched_text: match[0].substring(0, 100),
-                    confidence: rule.metadata.confidence || 'MEDIUM',
-                    risk_score: rule.metadata.risk_score || '50',
-                    action: rule.metadata.action || 'WARN'
-                  });
-                  break;
-                }
-              } catch (e) {
-                // Skip invalid regex
-              }
-            }
-          }
-        }
-      } catch (e) {
-        // Skip invalid base64
-      }
-    }
+  // Multi-encoding decode cascade (replaces base64-only block)
+  tryDecodeAndRescan(expandedText, allRules, findings);
+  // Improved multi-turn escalation detection (Bypass #4 fix)
+  if (context?.previous_messages && Array.isArray(context.previous_messages)) {
+    const multiTurnFindings = detectMultiTurnEscalation(
+      context.previous_messages,
+      normalizedPrompt,
+      allRules
+    );
+    findings.push(...multiTurnFindings);
   }
-  // Multi-turn escalation detection (Bug 9)
-  if (context?.previous_messages && Array.isArray(context.previous_messages) && context.previous_messages.length > 0) {
-    let prevMatchCount = 0;
-    for (const prevMsg of context.previous_messages) {
-      for (const rule of allRules) {
-        for (const pattern of rule.patterns) {
-          try {
-            const regex = new RegExp(pattern, 'i');
-            if (regex.test(prevMsg)) {
-              prevMatchCount++;
-              break;
-            }
-          } catch (e) {
-            // Skip invalid regex
-          }
-        }
-        if (prevMatchCount > 0) break;
+  // Garak deep scan (optional, requires garak Python package)
+  if (deep_scan) {
+    const garakFindings = runGarakProbes(normalizedPrompt);
+    // Only add non-INFO findings to affect scoring
+    for (const gf of garakFindings) {
+      if (gf.severity !== 'INFO') {
+        findings.push(gf);
       }
-      if (prevMatchCount > 0) break;
-    }
-    // If both previous and current messages have matches, flag escalation
-    if (prevMatchCount > 0 && findings.length > 0) {
-      findings.push({
-        rule_id: 'multi-turn.escalation',
-        category: 'social-engineering',
-        severity: 'WARNING',
-        message: 'Multi-turn escalation detected: suspicious patterns found in both previous and current messages.',
-        matched_text: 'escalation across conversation turns',
-        confidence: 'MEDIUM',
-        risk_score: '70',
-        action: 'WARN'
-      });
     }
   }