@bryan-thompson/inspector-assessment-client 1.18.1 → 1.19.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/{OAuthCallback-DhwTOA1q.js → OAuthCallback-BDIUPkR-.js} +1 -1
- package/dist/assets/{OAuthDebugCallback-DzopkA29.js → OAuthDebugCallback-DPLV3zir.js} +1 -1
- package/dist/assets/{index-CzoGuYPy.css → index-32-uLPhe.css} +3 -0
- package/dist/assets/{index-zBRaltBB.js → index-DKTSB7VQ.js} +460 -27
- package/dist/index.html +2 -2
- package/lib/lib/assessmentTypes.d.ts +45 -2
- package/lib/lib/assessmentTypes.d.ts.map +1 -1
- package/lib/lib/securityPatterns.d.ts.map +1 -1
- package/lib/lib/securityPatterns.js +82 -2
- package/lib/services/assessment/AssessmentOrchestrator.d.ts +1 -0
- package/lib/services/assessment/AssessmentOrchestrator.d.ts.map +1 -1
- package/lib/services/assessment/AssessmentOrchestrator.js +5 -2
- package/lib/services/assessment/LanguageAwarePayloadGenerator.d.ts +41 -0
- package/lib/services/assessment/LanguageAwarePayloadGenerator.d.ts.map +1 -0
- package/lib/services/assessment/LanguageAwarePayloadGenerator.js +258 -0
- package/lib/services/assessment/ToolClassifier.d.ts +1 -0
- package/lib/services/assessment/ToolClassifier.d.ts.map +1 -1
- package/lib/services/assessment/ToolClassifier.js +26 -0
- package/lib/services/assessment/modules/ResourceAssessor.d.ts +5 -0
- package/lib/services/assessment/modules/ResourceAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/ResourceAssessor.js +161 -4
- package/lib/services/assessment/modules/SecurityAssessor.d.ts +1 -0
- package/lib/services/assessment/modules/SecurityAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/SecurityAssessor.js +49 -14
- package/lib/services/assessment/modules/TemporalAssessor.d.ts +5 -0
- package/lib/services/assessment/modules/TemporalAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/TemporalAssessor.js +133 -15
- package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts +5 -0
- package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/ToolAnnotationAssessor.js +256 -1
- package/package.json +1 -1
|
@@ -12,6 +12,190 @@
|
|
|
12
12
|
*/
|
|
13
13
|
import { BaseAssessor } from "./BaseAssessor.js";
|
|
14
14
|
import { getDefaultCompiledPatterns, matchToolPattern, } from "../config/annotationPatterns.js";
|
|
15
|
+
const DESCRIPTION_POISONING_PATTERNS = [
|
|
16
|
+
// Hidden instruction tags
|
|
17
|
+
{
|
|
18
|
+
name: "hidden_tag",
|
|
19
|
+
pattern: /<HIDDEN>[\s\S]*?<\/HIDDEN>/gi,
|
|
20
|
+
severity: "HIGH",
|
|
21
|
+
category: "hidden_instructions",
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
name: "important_tag",
|
|
25
|
+
pattern: /<IMPORTANT>[\s\S]*?<\/IMPORTANT>/gi,
|
|
26
|
+
severity: "HIGH",
|
|
27
|
+
category: "hidden_instructions",
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
name: "system_tag",
|
|
31
|
+
pattern: /<SYSTEM>[\s\S]*?<\/SYSTEM>/gi,
|
|
32
|
+
severity: "HIGH",
|
|
33
|
+
category: "hidden_instructions",
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
name: "instruction_tag",
|
|
37
|
+
pattern: /<INSTRUCTION>[\s\S]*?<\/INSTRUCTION>/gi,
|
|
38
|
+
severity: "HIGH",
|
|
39
|
+
category: "hidden_instructions",
|
|
40
|
+
},
|
|
41
|
+
// Override commands
|
|
42
|
+
{
|
|
43
|
+
name: "ignore_instructions",
|
|
44
|
+
pattern: /ignore\s+(all\s+)?(previous|prior|above)\s+instructions/i,
|
|
45
|
+
severity: "HIGH",
|
|
46
|
+
category: "override",
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
name: "disregard_instructions",
|
|
50
|
+
pattern: /disregard\s+(all\s+)?(previous|prior)\s+instructions/i,
|
|
51
|
+
severity: "HIGH",
|
|
52
|
+
category: "override",
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
name: "you_are_now",
|
|
56
|
+
pattern: /you\s+are\s+now\s+(a|an|the)/i,
|
|
57
|
+
severity: "MEDIUM",
|
|
58
|
+
category: "override",
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
name: "system_override",
|
|
62
|
+
pattern: /system:\s*override/i,
|
|
63
|
+
severity: "HIGH",
|
|
64
|
+
category: "override",
|
|
65
|
+
},
|
|
66
|
+
// Secrecy/concealment
|
|
67
|
+
{
|
|
68
|
+
name: "do_not_mention",
|
|
69
|
+
pattern: /do\s+not\s+(mention|tell|inform|reveal)/i,
|
|
70
|
+
severity: "HIGH",
|
|
71
|
+
category: "concealment",
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
name: "secretly",
|
|
75
|
+
pattern: /\bsecretly\b/i,
|
|
76
|
+
severity: "MEDIUM",
|
|
77
|
+
category: "concealment",
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
name: "hide_from_user",
|
|
81
|
+
pattern: /hide\s+(this\s+)?from\s+(the\s+)?user/i,
|
|
82
|
+
severity: "HIGH",
|
|
83
|
+
category: "concealment",
|
|
84
|
+
},
|
|
85
|
+
// Data exfiltration
|
|
86
|
+
{
|
|
87
|
+
name: "return_keys",
|
|
88
|
+
pattern: /return\s+(all\s+)?(api\s*)?keys/i,
|
|
89
|
+
severity: "HIGH",
|
|
90
|
+
category: "exfiltration",
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
name: "include_credentials",
|
|
94
|
+
pattern: /include.*(api.?key|credential|password|secret)/i,
|
|
95
|
+
severity: "HIGH",
|
|
96
|
+
category: "exfiltration",
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
name: "reveal_secrets",
|
|
100
|
+
pattern: /reveal\s+(all\s+)?(secrets|credentials|api\s*keys)/i,
|
|
101
|
+
severity: "HIGH",
|
|
102
|
+
category: "exfiltration",
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
name: "access_internal_secrets",
|
|
106
|
+
pattern: /access\s+(the\s+)?internal\s+(api\s*)?(key|secret|credential|password|token)/i,
|
|
107
|
+
severity: "HIGH",
|
|
108
|
+
category: "exfiltration",
|
|
109
|
+
},
|
|
110
|
+
// Delimiter injection
|
|
111
|
+
{
|
|
112
|
+
name: "system_codeblock",
|
|
113
|
+
pattern: /```system[\s\S]*?```/gi,
|
|
114
|
+
severity: "HIGH",
|
|
115
|
+
category: "delimiter",
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
name: "inst_tags",
|
|
119
|
+
pattern: /\[INST\][\s\S]*?\[\/INST\]/gi,
|
|
120
|
+
severity: "HIGH",
|
|
121
|
+
category: "delimiter",
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
name: "chatml_system",
|
|
125
|
+
pattern: /<\|im_start\|>system/gi,
|
|
126
|
+
severity: "HIGH",
|
|
127
|
+
category: "delimiter",
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
name: "llama_sys",
|
|
131
|
+
pattern: /<<SYS>>/gi,
|
|
132
|
+
severity: "HIGH",
|
|
133
|
+
category: "delimiter",
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
name: "user_assistant_block",
|
|
137
|
+
pattern: /\[USER\][\s\S]*?\[ASSISTANT\]/gi,
|
|
138
|
+
severity: "HIGH",
|
|
139
|
+
category: "delimiter",
|
|
140
|
+
},
|
|
141
|
+
// Role/persona injection (Warning #4)
|
|
142
|
+
{
|
|
143
|
+
name: "act_as",
|
|
144
|
+
pattern: /act\s+(like|as)\s+(a|an|the)/i,
|
|
145
|
+
severity: "MEDIUM",
|
|
146
|
+
category: "override",
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
name: "pretend_to_be",
|
|
150
|
+
pattern: /pretend\s+(to\s+be|you\s*'?re)/i,
|
|
151
|
+
severity: "MEDIUM",
|
|
152
|
+
category: "override",
|
|
153
|
+
},
|
|
154
|
+
{
|
|
155
|
+
name: "roleplay_as",
|
|
156
|
+
pattern: /role\s*play\s+(as|like)/i,
|
|
157
|
+
severity: "MEDIUM",
|
|
158
|
+
category: "override",
|
|
159
|
+
},
|
|
160
|
+
{
|
|
161
|
+
name: "new_task",
|
|
162
|
+
pattern: /new\s+(task|instruction|objective):\s*/i,
|
|
163
|
+
severity: "HIGH",
|
|
164
|
+
category: "override",
|
|
165
|
+
},
|
|
166
|
+
// Encoding bypass detection (Warning #1)
|
|
167
|
+
{
|
|
168
|
+
name: "base64_encoded_block",
|
|
169
|
+
pattern: /[A-Za-z0-9+/]{50,}={0,2}/g, // Large Base64 strings (50+ chars)
|
|
170
|
+
severity: "MEDIUM",
|
|
171
|
+
category: "encoding_bypass",
|
|
172
|
+
},
|
|
173
|
+
{
|
|
174
|
+
name: "unicode_escape_sequence",
|
|
175
|
+
pattern: /(?:\\u[0-9a-fA-F]{4}){3,}/gi, // 3+ consecutive Unicode escapes
|
|
176
|
+
severity: "MEDIUM",
|
|
177
|
+
category: "encoding_bypass",
|
|
178
|
+
},
|
|
179
|
+
{
|
|
180
|
+
name: "html_entity_block",
|
|
181
|
+
pattern: /(?:&#x?[0-9a-fA-F]+;){3,}/gi, // 3+ consecutive HTML entities
|
|
182
|
+
severity: "MEDIUM",
|
|
183
|
+
category: "encoding_bypass",
|
|
184
|
+
},
|
|
185
|
+
// Typoglycemia/evasion patterns (Warning #2)
|
|
186
|
+
{
|
|
187
|
+
name: "ignore_instructions_typo",
|
|
188
|
+
pattern: /ign[o0]r[e3]?\s+(all\s+)?(pr[e3]v[i1][o0]us|pr[i1][o0]r|ab[o0]v[e3])\s+[i1]nstruct[i1][o0]ns?/i,
|
|
189
|
+
severity: "HIGH",
|
|
190
|
+
category: "override",
|
|
191
|
+
},
|
|
192
|
+
{
|
|
193
|
+
name: "disregard_typo",
|
|
194
|
+
pattern: /d[i1]sr[e3]g[a4]rd\s+(all\s+)?(pr[e3]v[i1][o0]us|pr[i1][o0]r)\s+[i1]nstruct[i1][o0]ns?/i,
|
|
195
|
+
severity: "HIGH",
|
|
196
|
+
category: "override",
|
|
197
|
+
},
|
|
198
|
+
];
|
|
15
199
|
// NOTE: Pattern arrays moved to config/annotationPatterns.ts for configurability
|
|
16
200
|
// The patterns are now loaded from getDefaultCompiledPatterns() or custom config
|
|
17
201
|
export class ToolAnnotationAssessor extends BaseAssessor {
|
|
@@ -53,6 +237,7 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
53
237
|
let annotatedCount = 0;
|
|
54
238
|
let missingAnnotationsCount = 0;
|
|
55
239
|
let misalignedAnnotationsCount = 0;
|
|
240
|
+
let poisonedDescriptionsCount = 0;
|
|
56
241
|
// Track annotation sources
|
|
57
242
|
const annotationSourceCounts = {
|
|
58
243
|
mcp: 0,
|
|
@@ -128,6 +313,20 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
128
313
|
else {
|
|
129
314
|
annotationSourceCounts.none++;
|
|
130
315
|
}
|
|
316
|
+
// Track and emit poisoned description detection (Issue #8)
|
|
317
|
+
if (latestResult.descriptionPoisoning?.detected) {
|
|
318
|
+
poisonedDescriptionsCount++;
|
|
319
|
+
this.log(`POISONED DESCRIPTION DETECTED: ${tool.name} contains suspicious patterns`);
|
|
320
|
+
if (context.onProgress) {
|
|
321
|
+
context.onProgress({
|
|
322
|
+
type: "annotation_poisoned",
|
|
323
|
+
tool: tool.name,
|
|
324
|
+
description: tool.description,
|
|
325
|
+
patterns: latestResult.descriptionPoisoning.patterns,
|
|
326
|
+
riskLevel: latestResult.descriptionPoisoning.riskLevel,
|
|
327
|
+
});
|
|
328
|
+
}
|
|
329
|
+
}
|
|
131
330
|
// Emit annotation_missing event with tool details
|
|
132
331
|
if (!latestResult.hasAnnotations) {
|
|
133
332
|
if (context.onProgress && latestResult.inferredBehavior) {
|
|
@@ -231,7 +430,7 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
231
430
|
const recommendations = this.generateRecommendations(toolResults);
|
|
232
431
|
// Calculate new metrics and alignment breakdown
|
|
233
432
|
const { metrics, alignmentBreakdown } = this.calculateMetrics(toolResults, context.tools.length);
|
|
234
|
-
this.log(`Assessment complete: ${annotatedCount}/${context.tools.length} tools annotated, ${misalignedAnnotationsCount} misaligned, ${alignmentBreakdown.reviewRecommended} need review`);
|
|
433
|
+
this.log(`Assessment complete: ${annotatedCount}/${context.tools.length} tools annotated, ${misalignedAnnotationsCount} misaligned, ${alignmentBreakdown.reviewRecommended} need review, ${poisonedDescriptionsCount} poisoned`);
|
|
235
434
|
// Return enhanced assessment if Claude was used
|
|
236
435
|
if (useClaudeInference) {
|
|
237
436
|
const highConfidenceMisalignments = toolResults.filter((r) => r.claudeInference &&
|
|
@@ -249,6 +448,7 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
249
448
|
metrics,
|
|
250
449
|
alignmentBreakdown,
|
|
251
450
|
annotationSources: annotationSourceCounts,
|
|
451
|
+
poisonedDescriptionsDetected: poisonedDescriptionsCount,
|
|
252
452
|
claudeEnhanced: true,
|
|
253
453
|
highConfidenceMisalignments,
|
|
254
454
|
};
|
|
@@ -264,6 +464,7 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
264
464
|
metrics,
|
|
265
465
|
alignmentBreakdown,
|
|
266
466
|
annotationSources: annotationSourceCounts,
|
|
467
|
+
poisonedDescriptionsDetected: poisonedDescriptionsCount,
|
|
267
468
|
};
|
|
268
469
|
}
|
|
269
470
|
/**
|
|
@@ -516,6 +717,12 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
516
717
|
alignmentStatus = "MISALIGNED";
|
|
517
718
|
}
|
|
518
719
|
}
|
|
720
|
+
// Scan for description poisoning (Issue #8)
|
|
721
|
+
const descriptionPoisoning = this.scanDescriptionForPoisoning(tool);
|
|
722
|
+
if (descriptionPoisoning.detected) {
|
|
723
|
+
issues.push(`Tool description contains suspicious patterns: ${descriptionPoisoning.patterns.map((p) => p.name).join(", ")}`);
|
|
724
|
+
recommendations.push(`Review ${tool.name} description for potential prompt injection or hidden instructions`);
|
|
725
|
+
}
|
|
519
726
|
return {
|
|
520
727
|
toolName: tool.name,
|
|
521
728
|
hasAnnotations,
|
|
@@ -525,6 +732,49 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
525
732
|
alignmentStatus,
|
|
526
733
|
issues,
|
|
527
734
|
recommendations,
|
|
735
|
+
descriptionPoisoning,
|
|
736
|
+
};
|
|
737
|
+
}
|
|
738
|
+
/**
|
|
739
|
+
* Scan tool description for poisoning patterns (Issue #8)
|
|
740
|
+
* Detects hidden instructions, override commands, concealment, and exfiltration attempts
|
|
741
|
+
*/
|
|
742
|
+
scanDescriptionForPoisoning(tool) {
|
|
743
|
+
const description = tool.description || "";
|
|
744
|
+
const matches = [];
|
|
745
|
+
for (const patternDef of DESCRIPTION_POISONING_PATTERNS) {
|
|
746
|
+
// Create a fresh regex to reset lastIndex
|
|
747
|
+
const regex = new RegExp(patternDef.pattern.source, patternDef.pattern.flags);
|
|
748
|
+
// Loop to find all matches (not just first)
|
|
749
|
+
let match;
|
|
750
|
+
while ((match = regex.exec(description)) !== null) {
|
|
751
|
+
matches.push({
|
|
752
|
+
name: patternDef.name,
|
|
753
|
+
pattern: patternDef.pattern.toString(),
|
|
754
|
+
severity: patternDef.severity,
|
|
755
|
+
category: patternDef.category,
|
|
756
|
+
evidence: match[0].substring(0, 100) + (match[0].length > 100 ? "..." : ""),
|
|
757
|
+
});
|
|
758
|
+
// Prevent infinite loop for patterns without 'g' flag
|
|
759
|
+
if (!regex.global)
|
|
760
|
+
break;
|
|
761
|
+
}
|
|
762
|
+
}
|
|
763
|
+
// Determine overall risk level based on highest severity match
|
|
764
|
+
let riskLevel = "NONE";
|
|
765
|
+
if (matches.some((m) => m.severity === "HIGH")) {
|
|
766
|
+
riskLevel = "HIGH";
|
|
767
|
+
}
|
|
768
|
+
else if (matches.some((m) => m.severity === "MEDIUM")) {
|
|
769
|
+
riskLevel = "MEDIUM";
|
|
770
|
+
}
|
|
771
|
+
else if (matches.length > 0) {
|
|
772
|
+
riskLevel = "LOW";
|
|
773
|
+
}
|
|
774
|
+
return {
|
|
775
|
+
detected: matches.length > 0,
|
|
776
|
+
patterns: matches,
|
|
777
|
+
riskLevel,
|
|
528
778
|
};
|
|
529
779
|
}
|
|
530
780
|
/**
|
|
@@ -700,6 +950,11 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
700
950
|
if (totalTools === 0)
|
|
701
951
|
return "PASS";
|
|
702
952
|
const annotatedCount = results.filter((r) => r.hasAnnotations).length;
|
|
953
|
+
// Check for poisoned descriptions (Issue #8) - critical security issue
|
|
954
|
+
const poisonedCount = results.filter((r) => r.descriptionPoisoning?.detected === true).length;
|
|
955
|
+
if (poisonedCount > 0) {
|
|
956
|
+
return "FAIL";
|
|
957
|
+
}
|
|
703
958
|
// Only count actual MISALIGNED, not REVIEW_RECOMMENDED
|
|
704
959
|
const misalignedCount = results.filter((r) => r.alignmentStatus === "MISALIGNED").length;
|
|
705
960
|
// Count high-confidence destructive tools without proper hints
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bryan-thompson/inspector-assessment-client",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.19.1",
|
|
4
4
|
"description": "Client-side application for the Enhanced MCP Inspector with assessment capabilities",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Bryan Thompson <bryan@triepod.ai>",
|