@bryan-thompson/inspector-assessment-client 1.11.1 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/{OAuthCallback-DA2koy6X.js → OAuthCallback-D8KW6pFf.js} +1 -1
- package/dist/assets/{OAuthDebugCallback-Bx60PQTT.js → OAuthDebugCallback-D15nNAOl.js} +1 -1
- package/dist/assets/{index-kJ0jPd4m.js → index-cVkEgqCc.js} +130 -5
- package/dist/index.html +1 -1
- package/lib/lib/assessmentTypes.d.ts +72 -1
- package/lib/lib/assessmentTypes.d.ts.map +1 -1
- package/lib/lib/policyMapping.d.ts +183 -0
- package/lib/lib/policyMapping.d.ts.map +1 -0
- package/lib/lib/policyMapping.js +442 -0
- package/lib/lib/reportFormatters/MarkdownReportFormatter.d.ts +91 -0
- package/lib/lib/reportFormatters/MarkdownReportFormatter.d.ts.map +1 -0
- package/lib/lib/reportFormatters/MarkdownReportFormatter.js +498 -0
- package/lib/lib/reportFormatters/index.d.ts +50 -0
- package/lib/lib/reportFormatters/index.d.ts.map +1 -0
- package/lib/lib/reportFormatters/index.js +81 -0
- package/lib/lib/securityPatterns.d.ts +3 -3
- package/lib/lib/securityPatterns.d.ts.map +1 -1
- package/lib/lib/securityPatterns.js +129 -4
- package/lib/services/assessment/AssessmentOrchestrator.d.ts.map +1 -1
- package/lib/services/assessment/AssessmentOrchestrator.js +8 -0
- package/lib/services/assessment/PolicyComplianceGenerator.d.ts +119 -0
- package/lib/services/assessment/PolicyComplianceGenerator.d.ts.map +1 -0
- package/lib/services/assessment/PolicyComplianceGenerator.js +632 -0
- package/lib/services/assessment/config/annotationPatterns.d.ts +70 -0
- package/lib/services/assessment/config/annotationPatterns.d.ts.map +1 -0
- package/lib/services/assessment/config/annotationPatterns.js +305 -0
- package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts +22 -2
- package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/ToolAnnotationAssessor.js +289 -152
- package/package.json +1 -1
|
@@ -11,75 +11,24 @@
|
|
|
11
11
|
* Reference: Anthropic MCP Directory Policy #17
|
|
12
12
|
*/
|
|
13
13
|
import { BaseAssessor } from "./BaseAssessor.js";
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
const READ_ONLY_PATTERNS = [
|
|
18
|
-
/^get[_-]?/i,
|
|
19
|
-
/^list[_-]?/i,
|
|
20
|
-
/^fetch[_-]?/i,
|
|
21
|
-
/^read[_-]?/i,
|
|
22
|
-
/^query[_-]?/i,
|
|
23
|
-
/^search[_-]?/i,
|
|
24
|
-
/^find[_-]?/i,
|
|
25
|
-
/^show[_-]?/i,
|
|
26
|
-
/^view[_-]?/i,
|
|
27
|
-
/^describe[_-]?/i,
|
|
28
|
-
/^check[_-]?/i,
|
|
29
|
-
/^verify[_-]?/i,
|
|
30
|
-
/^validate[_-]?/i,
|
|
31
|
-
/^count[_-]?/i,
|
|
32
|
-
/^status[_-]?/i,
|
|
33
|
-
/^info[_-]?/i,
|
|
34
|
-
/^lookup[_-]?/i,
|
|
35
|
-
/^browse[_-]?/i,
|
|
36
|
-
/^preview[_-]?/i,
|
|
37
|
-
/^download[_-]?/i, // Downloads but doesn't modify server state
|
|
38
|
-
];
|
|
39
|
-
const DESTRUCTIVE_PATTERNS = [
|
|
40
|
-
/^delete[_-]?/i,
|
|
41
|
-
/^remove[_-]?/i,
|
|
42
|
-
/^destroy[_-]?/i,
|
|
43
|
-
/^drop[_-]?/i,
|
|
44
|
-
/^purge[_-]?/i,
|
|
45
|
-
/^clear[_-]?/i,
|
|
46
|
-
/^wipe[_-]?/i,
|
|
47
|
-
/^erase[_-]?/i,
|
|
48
|
-
/^reset[_-]?/i,
|
|
49
|
-
/^truncate[_-]?/i,
|
|
50
|
-
/^revoke[_-]?/i,
|
|
51
|
-
/^terminate[_-]?/i,
|
|
52
|
-
/^cancel[_-]?/i,
|
|
53
|
-
/^kill[_-]?/i,
|
|
54
|
-
/^force[_-]?/i,
|
|
55
|
-
];
|
|
56
|
-
const WRITE_PATTERNS = [
|
|
57
|
-
/^create[_-]?/i,
|
|
58
|
-
/^add[_-]?/i,
|
|
59
|
-
/^insert[_-]?/i,
|
|
60
|
-
/^update[_-]?/i,
|
|
61
|
-
/^modify[_-]?/i,
|
|
62
|
-
/^edit[_-]?/i,
|
|
63
|
-
/^change[_-]?/i,
|
|
64
|
-
/^set[_-]?/i,
|
|
65
|
-
/^put[_-]?/i,
|
|
66
|
-
/^patch[_-]?/i,
|
|
67
|
-
/^post[_-]?/i,
|
|
68
|
-
/^write[_-]?/i,
|
|
69
|
-
/^save[_-]?/i,
|
|
70
|
-
/^upload[_-]?/i,
|
|
71
|
-
/^send[_-]?/i,
|
|
72
|
-
/^submit[_-]?/i,
|
|
73
|
-
/^publish[_-]?/i,
|
|
74
|
-
/^enable[_-]?/i,
|
|
75
|
-
/^disable[_-]?/i,
|
|
76
|
-
/^start[_-]?/i,
|
|
77
|
-
/^stop[_-]?/i,
|
|
78
|
-
/^run[_-]?/i,
|
|
79
|
-
/^execute[_-]?/i,
|
|
80
|
-
];
|
|
14
|
+
import { getDefaultCompiledPatterns, matchToolPattern, } from "../config/annotationPatterns.js";
|
|
15
|
+
// NOTE: Pattern arrays moved to config/annotationPatterns.ts for configurability
|
|
16
|
+
// The patterns are now loaded from getDefaultCompiledPatterns() or custom config
|
|
81
17
|
export class ToolAnnotationAssessor extends BaseAssessor {
|
|
82
18
|
claudeBridge;
|
|
19
|
+
compiledPatterns;
|
|
20
|
+
constructor(config) {
|
|
21
|
+
super(config);
|
|
22
|
+
// Initialize with default patterns (can be overridden via setPatterns)
|
|
23
|
+
this.compiledPatterns = getDefaultCompiledPatterns();
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Set custom compiled patterns for behavior inference
|
|
27
|
+
*/
|
|
28
|
+
setPatterns(patterns) {
|
|
29
|
+
this.compiledPatterns = patterns;
|
|
30
|
+
this.log("Custom annotation patterns configured");
|
|
31
|
+
}
|
|
83
32
|
/**
|
|
84
33
|
* Set Claude Code Bridge for enhanced behavior inference
|
|
85
34
|
*/
|
|
@@ -104,6 +53,13 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
104
53
|
let annotatedCount = 0;
|
|
105
54
|
let missingAnnotationsCount = 0;
|
|
106
55
|
let misalignedAnnotationsCount = 0;
|
|
56
|
+
// Track annotation sources
|
|
57
|
+
const annotationSourceCounts = {
|
|
58
|
+
mcp: 0,
|
|
59
|
+
sourceCode: 0,
|
|
60
|
+
inferred: 0,
|
|
61
|
+
none: 0,
|
|
62
|
+
};
|
|
107
63
|
const useClaudeInference = this.isClaudeEnabled();
|
|
108
64
|
if (useClaudeInference) {
|
|
109
65
|
this.log("Claude Code integration enabled - using semantic behavior inference");
|
|
@@ -157,7 +113,23 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
157
113
|
}
|
|
158
114
|
else {
|
|
159
115
|
missingAnnotationsCount++;
|
|
160
|
-
|
|
116
|
+
}
|
|
117
|
+
// Track annotation source
|
|
118
|
+
const source = latestResult.annotationSource;
|
|
119
|
+
if (source === "mcp") {
|
|
120
|
+
annotationSourceCounts.mcp++;
|
|
121
|
+
}
|
|
122
|
+
else if (source === "source-code") {
|
|
123
|
+
annotationSourceCounts.sourceCode++;
|
|
124
|
+
}
|
|
125
|
+
else if (source === "inferred") {
|
|
126
|
+
annotationSourceCounts.inferred++;
|
|
127
|
+
}
|
|
128
|
+
else {
|
|
129
|
+
annotationSourceCounts.none++;
|
|
130
|
+
}
|
|
131
|
+
// Emit annotation_missing event with tool details
|
|
132
|
+
if (!latestResult.hasAnnotations) {
|
|
161
133
|
if (context.onProgress && latestResult.inferredBehavior) {
|
|
162
134
|
const annotations = this.extractAnnotations(tool);
|
|
163
135
|
context.onProgress({
|
|
@@ -174,51 +146,92 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
174
146
|
});
|
|
175
147
|
}
|
|
176
148
|
}
|
|
177
|
-
// Emit
|
|
149
|
+
// Emit appropriate event based on alignment status
|
|
178
150
|
if (context.onProgress && latestResult.inferredBehavior) {
|
|
179
151
|
const annotations = latestResult.annotations;
|
|
180
152
|
const inferred = latestResult.inferredBehavior;
|
|
181
153
|
const confidence = latestResult.claudeInference?.confidence ?? 50;
|
|
182
154
|
const toolParams = this.extractToolParams(tool.inputSchema);
|
|
183
155
|
const toolAnnotations = this.extractAnnotations(tool);
|
|
184
|
-
|
|
156
|
+
const alignmentStatus = latestResult.alignmentStatus;
|
|
157
|
+
// Check readOnlyHint mismatch
|
|
185
158
|
if (annotations?.readOnlyHint !== undefined &&
|
|
186
159
|
annotations.readOnlyHint !== inferred.expectedReadOnly) {
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
160
|
+
if (alignmentStatus === "REVIEW_RECOMMENDED") {
|
|
161
|
+
// Emit review_recommended for ambiguous cases
|
|
162
|
+
context.onProgress({
|
|
163
|
+
type: "annotation_review_recommended",
|
|
164
|
+
tool: tool.name,
|
|
165
|
+
title: toolAnnotations.title,
|
|
166
|
+
description: tool.description,
|
|
167
|
+
parameters: toolParams,
|
|
168
|
+
field: "readOnlyHint",
|
|
169
|
+
actual: annotations.readOnlyHint,
|
|
170
|
+
inferred: inferred.expectedReadOnly,
|
|
171
|
+
confidence: inferred.confidence,
|
|
172
|
+
isAmbiguous: inferred.isAmbiguous,
|
|
173
|
+
reason: inferred.reason,
|
|
174
|
+
});
|
|
175
|
+
}
|
|
176
|
+
else {
|
|
177
|
+
// Emit misaligned for high-confidence mismatches
|
|
178
|
+
context.onProgress({
|
|
179
|
+
type: "annotation_misaligned",
|
|
180
|
+
tool: tool.name,
|
|
181
|
+
title: toolAnnotations.title,
|
|
182
|
+
description: tool.description,
|
|
183
|
+
parameters: toolParams,
|
|
184
|
+
field: "readOnlyHint",
|
|
185
|
+
actual: annotations.readOnlyHint,
|
|
186
|
+
expected: inferred.expectedReadOnly,
|
|
187
|
+
confidence,
|
|
188
|
+
reason: `Tool has readOnlyHint=${annotations.readOnlyHint}, but ${inferred.reason}`,
|
|
189
|
+
});
|
|
190
|
+
}
|
|
199
191
|
}
|
|
200
|
-
// Check destructiveHint
|
|
192
|
+
// Check destructiveHint mismatch
|
|
201
193
|
if (annotations?.destructiveHint !== undefined &&
|
|
202
194
|
annotations.destructiveHint !== inferred.expectedDestructive) {
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
195
|
+
if (alignmentStatus === "REVIEW_RECOMMENDED") {
|
|
196
|
+
// Emit review_recommended for ambiguous cases
|
|
197
|
+
context.onProgress({
|
|
198
|
+
type: "annotation_review_recommended",
|
|
199
|
+
tool: tool.name,
|
|
200
|
+
title: toolAnnotations.title,
|
|
201
|
+
description: tool.description,
|
|
202
|
+
parameters: toolParams,
|
|
203
|
+
field: "destructiveHint",
|
|
204
|
+
actual: annotations.destructiveHint,
|
|
205
|
+
inferred: inferred.expectedDestructive,
|
|
206
|
+
confidence: inferred.confidence,
|
|
207
|
+
isAmbiguous: inferred.isAmbiguous,
|
|
208
|
+
reason: inferred.reason,
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
else {
|
|
212
|
+
// Emit misaligned for high-confidence mismatches
|
|
213
|
+
context.onProgress({
|
|
214
|
+
type: "annotation_misaligned",
|
|
215
|
+
tool: tool.name,
|
|
216
|
+
title: toolAnnotations.title,
|
|
217
|
+
description: tool.description,
|
|
218
|
+
parameters: toolParams,
|
|
219
|
+
field: "destructiveHint",
|
|
220
|
+
actual: annotations.destructiveHint,
|
|
221
|
+
expected: inferred.expectedDestructive,
|
|
222
|
+
confidence,
|
|
223
|
+
reason: `Tool has destructiveHint=${annotations.destructiveHint}, but ${inferred.reason}`,
|
|
224
|
+
});
|
|
225
|
+
}
|
|
215
226
|
}
|
|
216
227
|
}
|
|
217
228
|
}
|
|
218
229
|
const status = this.determineAnnotationStatus(toolResults, context.tools.length);
|
|
219
230
|
const explanation = this.generateExplanation(annotatedCount, missingAnnotationsCount, misalignedAnnotationsCount, context.tools.length);
|
|
220
231
|
const recommendations = this.generateRecommendations(toolResults);
|
|
221
|
-
|
|
232
|
+
// Calculate new metrics and alignment breakdown
|
|
233
|
+
const { metrics, alignmentBreakdown } = this.calculateMetrics(toolResults, context.tools.length);
|
|
234
|
+
this.log(`Assessment complete: ${annotatedCount}/${context.tools.length} tools annotated, ${misalignedAnnotationsCount} misaligned, ${alignmentBreakdown.reviewRecommended} need review`);
|
|
222
235
|
// Return enhanced assessment if Claude was used
|
|
223
236
|
if (useClaudeInference) {
|
|
224
237
|
const highConfidenceMisalignments = toolResults.filter((r) => r.claudeInference &&
|
|
@@ -233,6 +246,9 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
233
246
|
status,
|
|
234
247
|
explanation: this.generateEnhancedExplanation(annotatedCount, missingAnnotationsCount, highConfidenceMisalignments.length, context.tools.length),
|
|
235
248
|
recommendations: this.generateEnhancedRecommendations(toolResults),
|
|
249
|
+
metrics,
|
|
250
|
+
alignmentBreakdown,
|
|
251
|
+
annotationSources: annotationSourceCounts,
|
|
236
252
|
claudeEnhanced: true,
|
|
237
253
|
highConfidenceMisalignments,
|
|
238
254
|
};
|
|
@@ -245,6 +261,9 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
245
261
|
status,
|
|
246
262
|
explanation,
|
|
247
263
|
recommendations,
|
|
264
|
+
metrics,
|
|
265
|
+
alignmentBreakdown,
|
|
266
|
+
annotationSources: annotationSourceCounts,
|
|
248
267
|
};
|
|
249
268
|
}
|
|
250
269
|
/**
|
|
@@ -433,6 +452,7 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
433
452
|
}
|
|
434
453
|
/**
|
|
435
454
|
* Assess a single tool's annotations
|
|
455
|
+
* Now includes alignment status with confidence-aware logic
|
|
436
456
|
*/
|
|
437
457
|
assessTool(tool) {
|
|
438
458
|
const issues = [];
|
|
@@ -443,35 +463,66 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
443
463
|
annotations.destructiveHint !== undefined;
|
|
444
464
|
// Infer expected behavior from tool name
|
|
445
465
|
const inferredBehavior = this.inferBehavior(tool.name, tool.description);
|
|
466
|
+
// Determine alignment status
|
|
467
|
+
let alignmentStatus = "ALIGNED";
|
|
446
468
|
// Check for missing annotations
|
|
447
469
|
if (!hasAnnotations) {
|
|
448
470
|
issues.push("Missing tool annotations (readOnlyHint, destructiveHint)");
|
|
449
471
|
recommendations.push(`Add annotations to ${tool.name}: readOnlyHint=${inferredBehavior.expectedReadOnly}, destructiveHint=${inferredBehavior.expectedDestructive}`);
|
|
472
|
+
alignmentStatus = "UNKNOWN";
|
|
450
473
|
}
|
|
451
474
|
else {
|
|
452
|
-
// Check for misaligned annotations
|
|
453
|
-
|
|
454
|
-
annotations.readOnlyHint !== inferredBehavior.expectedReadOnly
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
475
|
+
// Check for misaligned annotations with confidence-aware logic
|
|
476
|
+
const readOnlyMismatch = annotations.readOnlyHint !== undefined &&
|
|
477
|
+
annotations.readOnlyHint !== inferredBehavior.expectedReadOnly;
|
|
478
|
+
const destructiveMismatch = annotations.destructiveHint !== undefined &&
|
|
479
|
+
annotations.destructiveHint !== inferredBehavior.expectedDestructive;
|
|
480
|
+
if (readOnlyMismatch || destructiveMismatch) {
|
|
481
|
+
if (inferredBehavior.isAmbiguous ||
|
|
482
|
+
inferredBehavior.confidence === "low") {
|
|
483
|
+
// Ambiguous case: REVIEW_RECOMMENDED, softer language
|
|
484
|
+
alignmentStatus = "REVIEW_RECOMMENDED";
|
|
485
|
+
if (readOnlyMismatch) {
|
|
486
|
+
issues.push(`Review recommended: readOnlyHint=${annotations.readOnlyHint} may or may not match '${tool.name}' behavior (confidence: ${inferredBehavior.confidence})`);
|
|
487
|
+
recommendations.push(`Verify readOnlyHint for ${tool.name}: pattern is ambiguous - manual review recommended`);
|
|
488
|
+
}
|
|
489
|
+
if (destructiveMismatch) {
|
|
490
|
+
issues.push(`Review recommended: destructiveHint=${annotations.destructiveHint} may or may not match '${tool.name}' behavior (confidence: ${inferredBehavior.confidence})`);
|
|
491
|
+
recommendations.push(`Verify destructiveHint for ${tool.name}: pattern is ambiguous - manual review recommended`);
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
else {
|
|
495
|
+
// High/medium confidence mismatch: MISALIGNED
|
|
496
|
+
alignmentStatus = "MISALIGNED";
|
|
497
|
+
if (readOnlyMismatch) {
|
|
498
|
+
issues.push(`Potentially misaligned readOnlyHint: set to ${annotations.readOnlyHint}, expected ${inferredBehavior.expectedReadOnly} based on tool name pattern`);
|
|
499
|
+
recommendations.push(`Verify readOnlyHint for ${tool.name}: currently ${annotations.readOnlyHint}, tool name suggests ${inferredBehavior.expectedReadOnly}`);
|
|
500
|
+
}
|
|
501
|
+
if (destructiveMismatch) {
|
|
502
|
+
issues.push(`Potentially misaligned destructiveHint: set to ${annotations.destructiveHint}, expected ${inferredBehavior.expectedDestructive} based on tool name pattern`);
|
|
503
|
+
recommendations.push(`Verify destructiveHint for ${tool.name}: currently ${annotations.destructiveHint}, tool name suggests ${inferredBehavior.expectedDestructive}`);
|
|
504
|
+
}
|
|
505
|
+
}
|
|
462
506
|
}
|
|
463
507
|
}
|
|
464
|
-
// Check for destructive tools without explicit hint
|
|
508
|
+
// Check for destructive tools without explicit hint (only for high-confidence patterns)
|
|
465
509
|
if (inferredBehavior.expectedDestructive &&
|
|
510
|
+
inferredBehavior.confidence !== "low" &&
|
|
466
511
|
annotations.destructiveHint !== true) {
|
|
467
512
|
issues.push("Tool appears destructive but destructiveHint is not set to true");
|
|
468
513
|
recommendations.push(`Set destructiveHint=true for ${tool.name} - this tool appears to perform destructive operations`);
|
|
514
|
+
// Only upgrade to MISALIGNED if we have high confidence
|
|
515
|
+
if (inferredBehavior.confidence === "high") {
|
|
516
|
+
alignmentStatus = "MISALIGNED";
|
|
517
|
+
}
|
|
469
518
|
}
|
|
470
519
|
return {
|
|
471
520
|
toolName: tool.name,
|
|
472
521
|
hasAnnotations,
|
|
473
522
|
annotations: hasAnnotations ? annotations : undefined,
|
|
523
|
+
annotationSource: annotations.source,
|
|
474
524
|
inferredBehavior,
|
|
525
|
+
alignmentStatus,
|
|
475
526
|
issues,
|
|
476
527
|
recommendations,
|
|
477
528
|
};
|
|
@@ -479,34 +530,65 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
479
530
|
/**
|
|
480
531
|
* Extract annotations from a tool
|
|
481
532
|
* MCP SDK may have annotations in different locations
|
|
533
|
+
*
|
|
534
|
+
* Priority order:
|
|
535
|
+
* 1. tool.annotations (MCP 2024-11 spec) - "mcp" source
|
|
536
|
+
* 2. Direct properties on tool - "mcp" source
|
|
537
|
+
* 3. tool.metadata - "mcp" source
|
|
538
|
+
* 4. No annotations found - "none" source
|
|
482
539
|
*/
|
|
483
540
|
extractAnnotations(tool) {
|
|
484
|
-
// Try to find annotations in various locations
|
|
485
541
|
const toolAny = tool;
|
|
486
|
-
// Check
|
|
487
|
-
let readOnlyHint = toolAny.readOnlyHint;
|
|
488
|
-
let destructiveHint = toolAny.destructiveHint;
|
|
489
|
-
let idempotentHint = toolAny.idempotentHint;
|
|
490
|
-
let openWorldHint = toolAny.openWorldHint;
|
|
491
|
-
// Check annotations object (MCP 2024-11 spec)
|
|
542
|
+
// Priority 1: Check annotations object (MCP 2024-11 spec) - primary source
|
|
492
543
|
if (toolAny.annotations) {
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
544
|
+
const hasAnnotations = toolAny.annotations.readOnlyHint !== undefined ||
|
|
545
|
+
toolAny.annotations.destructiveHint !== undefined;
|
|
546
|
+
if (hasAnnotations) {
|
|
547
|
+
return {
|
|
548
|
+
readOnlyHint: toolAny.annotations.readOnlyHint,
|
|
549
|
+
destructiveHint: toolAny.annotations.destructiveHint,
|
|
550
|
+
title: toolAny.annotations.title || toolAny.title,
|
|
551
|
+
description: tool.description,
|
|
552
|
+
idempotentHint: toolAny.annotations.idempotentHint,
|
|
553
|
+
openWorldHint: toolAny.annotations.openWorldHint,
|
|
554
|
+
source: "mcp",
|
|
555
|
+
};
|
|
556
|
+
}
|
|
497
557
|
}
|
|
498
|
-
// Check
|
|
558
|
+
// Priority 2: Check direct properties on tool object
|
|
559
|
+
if (toolAny.readOnlyHint !== undefined ||
|
|
560
|
+
toolAny.destructiveHint !== undefined) {
|
|
561
|
+
return {
|
|
562
|
+
readOnlyHint: toolAny.readOnlyHint,
|
|
563
|
+
destructiveHint: toolAny.destructiveHint,
|
|
564
|
+
title: toolAny.title,
|
|
565
|
+
description: tool.description,
|
|
566
|
+
idempotentHint: toolAny.idempotentHint,
|
|
567
|
+
openWorldHint: toolAny.openWorldHint,
|
|
568
|
+
source: "mcp",
|
|
569
|
+
};
|
|
570
|
+
}
|
|
571
|
+
// Priority 3: Check metadata (some servers use this)
|
|
499
572
|
if (toolAny.metadata) {
|
|
500
|
-
|
|
501
|
-
|
|
573
|
+
const hasMetadataAnnotations = toolAny.metadata.readOnlyHint !== undefined ||
|
|
574
|
+
toolAny.metadata.destructiveHint !== undefined;
|
|
575
|
+
if (hasMetadataAnnotations) {
|
|
576
|
+
return {
|
|
577
|
+
readOnlyHint: toolAny.metadata.readOnlyHint,
|
|
578
|
+
destructiveHint: toolAny.metadata.destructiveHint,
|
|
579
|
+
title: toolAny.metadata.title || toolAny.title,
|
|
580
|
+
description: tool.description,
|
|
581
|
+
idempotentHint: toolAny.metadata.idempotentHint,
|
|
582
|
+
openWorldHint: toolAny.metadata.openWorldHint,
|
|
583
|
+
source: "mcp",
|
|
584
|
+
};
|
|
585
|
+
}
|
|
502
586
|
}
|
|
587
|
+
// No annotations found from MCP protocol
|
|
503
588
|
return {
|
|
504
|
-
|
|
505
|
-
destructiveHint,
|
|
506
|
-
title: toolAny.title || toolAny.annotations?.title,
|
|
589
|
+
title: toolAny.title,
|
|
507
590
|
description: tool.description,
|
|
508
|
-
|
|
509
|
-
openWorldHint,
|
|
591
|
+
source: "none",
|
|
510
592
|
};
|
|
511
593
|
}
|
|
512
594
|
/**
|
|
@@ -534,46 +616,60 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
534
616
|
}
|
|
535
617
|
/**
|
|
536
618
|
* Infer expected behavior from tool name and description
|
|
619
|
+
* Now returns confidence level and ambiguity flag for better handling
|
|
537
620
|
*/
|
|
538
621
|
inferBehavior(toolName, description) {
|
|
539
|
-
const lowerName = toolName.toLowerCase();
|
|
540
622
|
const lowerDesc = (description || "").toLowerCase();
|
|
541
|
-
//
|
|
542
|
-
|
|
543
|
-
|
|
623
|
+
// Use the configurable pattern matching system
|
|
624
|
+
const patternMatch = matchToolPattern(toolName, this.compiledPatterns);
|
|
625
|
+
// Handle pattern match results
|
|
626
|
+
switch (patternMatch.category) {
|
|
627
|
+
case "ambiguous":
|
|
628
|
+
// Ambiguous patterns - don't make strong assertions
|
|
629
|
+
return {
|
|
630
|
+
expectedReadOnly: false,
|
|
631
|
+
expectedDestructive: false,
|
|
632
|
+
reason: `Tool name matches ambiguous pattern '${patternMatch.pattern}' - behavior varies by implementation context`,
|
|
633
|
+
confidence: "low",
|
|
634
|
+
isAmbiguous: true,
|
|
635
|
+
};
|
|
636
|
+
case "destructive":
|
|
544
637
|
return {
|
|
545
638
|
expectedReadOnly: false,
|
|
546
639
|
expectedDestructive: true,
|
|
547
|
-
reason: `Tool name matches destructive pattern: ${pattern
|
|
640
|
+
reason: `Tool name matches destructive pattern: ${patternMatch.pattern}`,
|
|
641
|
+
confidence: "high",
|
|
642
|
+
isAmbiguous: false,
|
|
548
643
|
};
|
|
549
|
-
|
|
550
|
-
}
|
|
551
|
-
// Check for read-only patterns
|
|
552
|
-
for (const pattern of READ_ONLY_PATTERNS) {
|
|
553
|
-
if (pattern.test(lowerName)) {
|
|
644
|
+
case "readOnly":
|
|
554
645
|
return {
|
|
555
646
|
expectedReadOnly: true,
|
|
556
647
|
expectedDestructive: false,
|
|
557
|
-
reason: `Tool name matches read-only pattern: ${pattern
|
|
648
|
+
reason: `Tool name matches read-only pattern: ${patternMatch.pattern}`,
|
|
649
|
+
confidence: "high",
|
|
650
|
+
isAmbiguous: false,
|
|
558
651
|
};
|
|
559
|
-
|
|
560
|
-
}
|
|
561
|
-
// Check for write patterns (not destructive but not read-only)
|
|
562
|
-
for (const pattern of WRITE_PATTERNS) {
|
|
563
|
-
if (pattern.test(lowerName)) {
|
|
652
|
+
case "write":
|
|
564
653
|
return {
|
|
565
654
|
expectedReadOnly: false,
|
|
566
655
|
expectedDestructive: false,
|
|
567
|
-
reason: `Tool name matches write pattern: ${pattern
|
|
656
|
+
reason: `Tool name matches write pattern: ${patternMatch.pattern}`,
|
|
657
|
+
confidence: "medium",
|
|
658
|
+
isAmbiguous: false,
|
|
568
659
|
};
|
|
569
|
-
|
|
660
|
+
case "unknown":
|
|
661
|
+
default:
|
|
662
|
+
// Fall through to description-based analysis
|
|
663
|
+
break;
|
|
570
664
|
}
|
|
571
|
-
// Check description for hints
|
|
665
|
+
// Check description for hints (medium confidence)
|
|
572
666
|
if (lowerDesc.includes("delete") || lowerDesc.includes("remove")) {
|
|
573
667
|
return {
|
|
574
668
|
expectedReadOnly: false,
|
|
575
669
|
expectedDestructive: true,
|
|
576
670
|
reason: "Description mentions delete/remove operations",
|
|
671
|
+
confidence: "medium",
|
|
672
|
+
isAmbiguous: false,
|
|
577
673
|
};
|
|
578
674
|
}
|
|
579
675
|
if (lowerDesc.includes("read") ||
|
|
@@ -583,30 +679,43 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
583
679
|
expectedReadOnly: true,
|
|
584
680
|
expectedDestructive: false,
|
|
585
681
|
reason: "Description suggests read-only operation",
|
|
682
|
+
confidence: "medium",
|
|
683
|
+
isAmbiguous: false,
|
|
586
684
|
};
|
|
587
685
|
}
|
|
588
|
-
// Default: assume write
|
|
686
|
+
// Default: assume write with low confidence (ambiguous)
|
|
589
687
|
return {
|
|
590
688
|
expectedReadOnly: false,
|
|
591
689
|
expectedDestructive: false,
|
|
592
690
|
reason: "Could not infer from name pattern - defaulting to write operation",
|
|
691
|
+
confidence: "low",
|
|
692
|
+
isAmbiguous: true,
|
|
593
693
|
};
|
|
594
694
|
}
|
|
595
695
|
/**
|
|
596
|
-
* Determine overall status
|
|
696
|
+
* Determine overall status using alignment status.
|
|
697
|
+
* Only MISALIGNED counts as failure; REVIEW_RECOMMENDED does not fail.
|
|
597
698
|
*/
|
|
598
699
|
determineAnnotationStatus(results, totalTools) {
|
|
599
700
|
if (totalTools === 0)
|
|
600
701
|
return "PASS";
|
|
601
702
|
const annotatedCount = results.filter((r) => r.hasAnnotations).length;
|
|
602
|
-
|
|
603
|
-
const
|
|
604
|
-
//
|
|
703
|
+
// Only count actual MISALIGNED, not REVIEW_RECOMMENDED
|
|
704
|
+
const misalignedCount = results.filter((r) => r.alignmentStatus === "MISALIGNED").length;
|
|
705
|
+
// Count high-confidence destructive tools without proper hints
|
|
706
|
+
const destructiveWithoutHint = results.filter((r) => r.inferredBehavior?.expectedDestructive === true &&
|
|
707
|
+
r.inferredBehavior?.confidence === "high" &&
|
|
708
|
+
r.annotations?.destructiveHint !== true).length;
|
|
709
|
+
// Destructive tools without proper hints = FAIL (critical safety issue)
|
|
605
710
|
if (destructiveWithoutHint > 0) {
|
|
606
711
|
return "FAIL";
|
|
607
712
|
}
|
|
608
|
-
//
|
|
609
|
-
if (
|
|
713
|
+
// High-confidence misalignments = FAIL
|
|
714
|
+
if (misalignedCount > 0) {
|
|
715
|
+
return "FAIL";
|
|
716
|
+
}
|
|
717
|
+
// All tools annotated = PASS
|
|
718
|
+
if (annotatedCount === totalTools) {
|
|
610
719
|
return "PASS";
|
|
611
720
|
}
|
|
612
721
|
// Some annotations missing = NEED_MORE_INFO
|
|
@@ -620,6 +729,34 @@ export class ToolAnnotationAssessor extends BaseAssessor {
|
|
|
620
729
|
}
|
|
621
730
|
return "NEED_MORE_INFO";
|
|
622
731
|
}
|
|
732
|
+
/**
|
|
733
|
+
* Calculate metrics and alignment breakdown for the assessment
|
|
734
|
+
*/
|
|
735
|
+
calculateMetrics(results, totalTools) {
|
|
736
|
+
const alignmentBreakdown = {
|
|
737
|
+
aligned: results.filter((r) => r.alignmentStatus === "ALIGNED").length,
|
|
738
|
+
misaligned: results.filter((r) => r.alignmentStatus === "MISALIGNED")
|
|
739
|
+
.length,
|
|
740
|
+
reviewRecommended: results.filter((r) => r.alignmentStatus === "REVIEW_RECOMMENDED").length,
|
|
741
|
+
unknown: results.filter((r) => r.alignmentStatus === "UNKNOWN").length,
|
|
742
|
+
};
|
|
743
|
+
const annotatedCount = results.filter((r) => r.hasAnnotations).length;
|
|
744
|
+
const metrics = {
|
|
745
|
+
// Coverage: percentage of tools with annotations
|
|
746
|
+
coverage: totalTools > 0 ? (annotatedCount / totalTools) * 100 : 100,
|
|
747
|
+
// Consistency: percentage without contradictions (not MISALIGNED)
|
|
748
|
+
consistency: totalTools > 0
|
|
749
|
+
? ((totalTools - alignmentBreakdown.misaligned) / totalTools) * 100
|
|
750
|
+
: 100,
|
|
751
|
+
// Correctness: percentage of annotated tools that are ALIGNED
|
|
752
|
+
correctness: annotatedCount > 0
|
|
753
|
+
? (alignmentBreakdown.aligned / annotatedCount) * 100
|
|
754
|
+
: 0,
|
|
755
|
+
// Review required: count of tools needing manual review
|
|
756
|
+
reviewRequired: alignmentBreakdown.reviewRecommended,
|
|
757
|
+
};
|
|
758
|
+
return { metrics, alignmentBreakdown };
|
|
759
|
+
}
|
|
623
760
|
/**
|
|
624
761
|
* Generate explanation
|
|
625
762
|
*/
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bryan-thompson/inspector-assessment-client",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.13.0",
|
|
4
4
|
"description": "Client-side application for the Enhanced MCP Inspector with assessment capabilities",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Bryan Thompson <bryan@triepod.ai>",
|