npm - @bryan-thompson/inspector-assessment-client - Versions diffs - 1.18.1 → 1.19.1 - Mend

@bryan-thompson/inspector-assessment-client 1.18.1 → 1.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/lib/services/assessment/modules/ToolAnnotationAssessor.js CHANGED Viewed

@@ -12,6 +12,190 @@
  */
 import { BaseAssessor } from "./BaseAssessor.js";
 import { getDefaultCompiledPatterns, matchToolPattern, } from "../config/annotationPatterns.js";
+const DESCRIPTION_POISONING_PATTERNS = [
+    // Hidden instruction tags
+    {
+        name: "hidden_tag",
+        pattern: /<HIDDEN>[\s\S]*?<\/HIDDEN>/gi,
+        severity: "HIGH",
+        category: "hidden_instructions",
+    },
+    {
+        name: "important_tag",
+        pattern: /<IMPORTANT>[\s\S]*?<\/IMPORTANT>/gi,
+        severity: "HIGH",
+        category: "hidden_instructions",
+    },
+    {
+        name: "system_tag",
+        pattern: /<SYSTEM>[\s\S]*?<\/SYSTEM>/gi,
+        severity: "HIGH",
+        category: "hidden_instructions",
+    },
+    {
+        name: "instruction_tag",
+        pattern: /<INSTRUCTION>[\s\S]*?<\/INSTRUCTION>/gi,
+        severity: "HIGH",
+        category: "hidden_instructions",
+    },
+    // Override commands
+    {
+        name: "ignore_instructions",
+        pattern: /ignore\s+(all\s+)?(previous|prior|above)\s+instructions/i,
+        severity: "HIGH",
+        category: "override",
+    },
+    {
+        name: "disregard_instructions",
+        pattern: /disregard\s+(all\s+)?(previous|prior)\s+instructions/i,
+        severity: "HIGH",
+        category: "override",
+    },
+    {
+        name: "you_are_now",
+        pattern: /you\s+are\s+now\s+(a|an|the)/i,
+        severity: "MEDIUM",
+        category: "override",
+    },
+    {
+        name: "system_override",
+        pattern: /system:\s*override/i,
+        severity: "HIGH",
+        category: "override",
+    },
+    // Secrecy/concealment
+    {
+        name: "do_not_mention",
+        pattern: /do\s+not\s+(mention|tell|inform|reveal)/i,
+        severity: "HIGH",
+        category: "concealment",
+    },
+    {
+        name: "secretly",
+        pattern: /\bsecretly\b/i,
+        severity: "MEDIUM",
+        category: "concealment",
+    },
+    {
+        name: "hide_from_user",
+        pattern: /hide\s+(this\s+)?from\s+(the\s+)?user/i,
+        severity: "HIGH",
+        category: "concealment",
+    },
+    // Data exfiltration
+    {
+        name: "return_keys",
+        pattern: /return\s+(all\s+)?(api\s*)?keys/i,
+        severity: "HIGH",
+        category: "exfiltration",
+    },
+    {
+        name: "include_credentials",
+        pattern: /include.*(api.?key|credential|password|secret)/i,
+        severity: "HIGH",
+        category: "exfiltration",
+    },
+    {
+        name: "reveal_secrets",
+        pattern: /reveal\s+(all\s+)?(secrets|credentials|api\s*keys)/i,
+        severity: "HIGH",
+        category: "exfiltration",
+    },
+    {
+        name: "access_internal_secrets",
+        pattern: /access\s+(the\s+)?internal\s+(api\s*)?(key|secret|credential|password|token)/i,
+        severity: "HIGH",
+        category: "exfiltration",
+    },
+    // Delimiter injection
+    {
+        name: "system_codeblock",
+        pattern: /```system[\s\S]*?```/gi,
+        severity: "HIGH",
+        category: "delimiter",
+    },
+    {
+        name: "inst_tags",
+        pattern: /\[INST\][\s\S]*?\[\/INST\]/gi,
+        severity: "HIGH",
+        category: "delimiter",
+    },
+    {
+        name: "chatml_system",
+        pattern: /<\|im_start\|>system/gi,
+        severity: "HIGH",
+        category: "delimiter",
+    },
+    {
+        name: "llama_sys",
+        pattern: /<<SYS>>/gi,
+        severity: "HIGH",
+        category: "delimiter",
+    },
+    {
+        name: "user_assistant_block",
+        pattern: /\[USER\][\s\S]*?\[ASSISTANT\]/gi,
+        severity: "HIGH",
+        category: "delimiter",
+    },
+    // Role/persona injection (Warning #4)
+    {
+        name: "act_as",
+        pattern: /act\s+(like|as)\s+(a|an|the)/i,
+        severity: "MEDIUM",
+        category: "override",
+    },
+    {
+        name: "pretend_to_be",
+        pattern: /pretend\s+(to\s+be|you\s*'?re)/i,
+        severity: "MEDIUM",
+        category: "override",
+    },
+    {
+        name: "roleplay_as",
+        pattern: /role\s*play\s+(as|like)/i,
+        severity: "MEDIUM",
+        category: "override",
+    },
+    {
+        name: "new_task",
+        pattern: /new\s+(task|instruction|objective):\s*/i,
+        severity: "HIGH",
+        category: "override",
+    },
+    // Encoding bypass detection (Warning #1)
+    {
+        name: "base64_encoded_block",
+        pattern: /[A-Za-z0-9+/]{50,}={0,2}/g, // Large Base64 strings (50+ chars)
+        severity: "MEDIUM",
+        category: "encoding_bypass",
+    },
+    {
+        name: "unicode_escape_sequence",
+        pattern: /(?:\\u[0-9a-fA-F]{4}){3,}/gi, // 3+ consecutive Unicode escapes
+        severity: "MEDIUM",
+        category: "encoding_bypass",
+    },
+    {
+        name: "html_entity_block",
+        pattern: /(?:&#x?[0-9a-fA-F]+;){3,}/gi, // 3+ consecutive HTML entities
+        severity: "MEDIUM",
+        category: "encoding_bypass",
+    },
+    // Typoglycemia/evasion patterns (Warning #2)
+    {
+        name: "ignore_instructions_typo",
+        pattern: /ign[o0]r[e3]?\s+(all\s+)?(pr[e3]v[i1][o0]us|pr[i1][o0]r|ab[o0]v[e3])\s+[i1]nstruct[i1][o0]ns?/i,
+        severity: "HIGH",
+        category: "override",
+    },
+    {
+        name: "disregard_typo",
+        pattern: /d[i1]sr[e3]g[a4]rd\s+(all\s+)?(pr[e3]v[i1][o0]us|pr[i1][o0]r)\s+[i1]nstruct[i1][o0]ns?/i,
+        severity: "HIGH",
+        category: "override",
+    },
+];
 // NOTE: Pattern arrays moved to config/annotationPatterns.ts for configurability
 // The patterns are now loaded from getDefaultCompiledPatterns() or custom config
 export class ToolAnnotationAssessor extends BaseAssessor {
@@ -53,6 +237,7 @@ export class ToolAnnotationAssessor extends BaseAssessor {
         let annotatedCount = 0;
         let missingAnnotationsCount = 0;
         let misalignedAnnotationsCount = 0;
+        let poisonedDescriptionsCount = 0;
         // Track annotation sources
         const annotationSourceCounts = {
             mcp: 0,
@@ -128,6 +313,20 @@ export class ToolAnnotationAssessor extends BaseAssessor {
             else {
                 annotationSourceCounts.none++;
             }
+            // Track and emit poisoned description detection (Issue #8)
+            if (latestResult.descriptionPoisoning?.detected) {
+                poisonedDescriptionsCount++;
+                this.log(`POISONED DESCRIPTION DETECTED: ${tool.name} contains suspicious patterns`);
+                if (context.onProgress) {
+                    context.onProgress({
+                        type: "annotation_poisoned",
+                        tool: tool.name,
+                        description: tool.description,
+                        patterns: latestResult.descriptionPoisoning.patterns,
+                        riskLevel: latestResult.descriptionPoisoning.riskLevel,
+                    });
+                }
+            }
             // Emit annotation_missing event with tool details
             if (!latestResult.hasAnnotations) {
                 if (context.onProgress && latestResult.inferredBehavior) {
@@ -231,7 +430,7 @@ export class ToolAnnotationAssessor extends BaseAssessor {
         const recommendations = this.generateRecommendations(toolResults);
         // Calculate new metrics and alignment breakdown
         const { metrics, alignmentBreakdown } = this.calculateMetrics(toolResults, context.tools.length);
-        this.log(`Assessment complete: ${annotatedCount}/${context.tools.length} tools annotated, ${misalignedAnnotationsCount} misaligned, ${alignmentBreakdown.reviewRecommended} need review`);
+        this.log(`Assessment complete: ${annotatedCount}/${context.tools.length} tools annotated, ${misalignedAnnotationsCount} misaligned, ${alignmentBreakdown.reviewRecommended} need review, ${poisonedDescriptionsCount} poisoned`);
         // Return enhanced assessment if Claude was used
         if (useClaudeInference) {
             const highConfidenceMisalignments = toolResults.filter((r) => r.claudeInference &&
@@ -249,6 +448,7 @@ export class ToolAnnotationAssessor extends BaseAssessor {
                 metrics,
                 alignmentBreakdown,
                 annotationSources: annotationSourceCounts,
+                poisonedDescriptionsDetected: poisonedDescriptionsCount,
                 claudeEnhanced: true,
                 highConfidenceMisalignments,
             };
@@ -264,6 +464,7 @@ export class ToolAnnotationAssessor extends BaseAssessor {
             metrics,
             alignmentBreakdown,
             annotationSources: annotationSourceCounts,
+            poisonedDescriptionsDetected: poisonedDescriptionsCount,
         };
     }
     /**
@@ -516,6 +717,12 @@ export class ToolAnnotationAssessor extends BaseAssessor {
                 alignmentStatus = "MISALIGNED";
             }
         }
+        // Scan for description poisoning (Issue #8)
+        const descriptionPoisoning = this.scanDescriptionForPoisoning(tool);
+        if (descriptionPoisoning.detected) {
+            issues.push(`Tool description contains suspicious patterns: ${descriptionPoisoning.patterns.map((p) => p.name).join(", ")}`);
+            recommendations.push(`Review ${tool.name} description for potential prompt injection or hidden instructions`);
+        }
         return {
             toolName: tool.name,
             hasAnnotations,
@@ -525,6 +732,49 @@ export class ToolAnnotationAssessor extends BaseAssessor {
             alignmentStatus,
             issues,
             recommendations,
+            descriptionPoisoning,
+        };
+    }
+    /**
+     * Scan tool description for poisoning patterns (Issue #8)
+     * Detects hidden instructions, override commands, concealment, and exfiltration attempts
+     */
+    scanDescriptionForPoisoning(tool) {
+        const description = tool.description || "";
+        const matches = [];
+        for (const patternDef of DESCRIPTION_POISONING_PATTERNS) {
+            // Create a fresh regex to reset lastIndex
+            const regex = new RegExp(patternDef.pattern.source, patternDef.pattern.flags);
+            // Loop to find all matches (not just first)
+            let match;
+            while ((match = regex.exec(description)) !== null) {
+                matches.push({
+                    name: patternDef.name,
+                    pattern: patternDef.pattern.toString(),
+                    severity: patternDef.severity,
+                    category: patternDef.category,
+                    evidence: match[0].substring(0, 100) + (match[0].length > 100 ? "..." : ""),
+                });
+                // Prevent infinite loop for patterns without 'g' flag
+                if (!regex.global)
+                    break;
+            }
+        }
+        // Determine overall risk level based on highest severity match
+        let riskLevel = "NONE";
+        if (matches.some((m) => m.severity === "HIGH")) {
+            riskLevel = "HIGH";
+        }
+        else if (matches.some((m) => m.severity === "MEDIUM")) {
+            riskLevel = "MEDIUM";
+        }
+        else if (matches.length > 0) {
+            riskLevel = "LOW";
+        }
+        return {
+            detected: matches.length > 0,
+            patterns: matches,
+            riskLevel,
         };
     }
     /**
@@ -700,6 +950,11 @@ export class ToolAnnotationAssessor extends BaseAssessor {
         if (totalTools === 0)
             return "PASS";
         const annotatedCount = results.filter((r) => r.hasAnnotations).length;
+        // Check for poisoned descriptions (Issue #8) - critical security issue
+        const poisonedCount = results.filter((r) => r.descriptionPoisoning?.detected === true).length;
+        if (poisonedCount > 0) {
+            return "FAIL";
+        }
         // Only count actual MISALIGNED, not REVIEW_RECOMMENDED
         const misalignedCount = results.filter((r) => r.alignmentStatus === "MISALIGNED").length;
         // Count high-confidence destructive tools without proper hints

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@bryan-thompson/inspector-assessment-client",
-  "version": "1.18.1",
+  "version": "1.19.1",
   "description": "Client-side application for the Enhanced MCP Inspector with assessment capabilities",
   "license": "MIT",
   "author": "Bryan Thompson <bryan@triepod.ai>",