@bryan-thompson/inspector-assessment-client 1.11.1 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/dist/assets/{OAuthCallback-DA2koy6X.js → OAuthCallback-D8KW6pFf.js} +1 -1
  2. package/dist/assets/{OAuthDebugCallback-Bx60PQTT.js → OAuthDebugCallback-D15nNAOl.js} +1 -1
  3. package/dist/assets/{index-kJ0jPd4m.js → index-cVkEgqCc.js} +130 -5
  4. package/dist/index.html +1 -1
  5. package/lib/lib/assessmentTypes.d.ts +72 -1
  6. package/lib/lib/assessmentTypes.d.ts.map +1 -1
  7. package/lib/lib/policyMapping.d.ts +183 -0
  8. package/lib/lib/policyMapping.d.ts.map +1 -0
  9. package/lib/lib/policyMapping.js +442 -0
  10. package/lib/lib/reportFormatters/MarkdownReportFormatter.d.ts +91 -0
  11. package/lib/lib/reportFormatters/MarkdownReportFormatter.d.ts.map +1 -0
  12. package/lib/lib/reportFormatters/MarkdownReportFormatter.js +498 -0
  13. package/lib/lib/reportFormatters/index.d.ts +50 -0
  14. package/lib/lib/reportFormatters/index.d.ts.map +1 -0
  15. package/lib/lib/reportFormatters/index.js +81 -0
  16. package/lib/lib/securityPatterns.d.ts +3 -3
  17. package/lib/lib/securityPatterns.d.ts.map +1 -1
  18. package/lib/lib/securityPatterns.js +129 -4
  19. package/lib/services/assessment/AssessmentOrchestrator.d.ts.map +1 -1
  20. package/lib/services/assessment/AssessmentOrchestrator.js +8 -0
  21. package/lib/services/assessment/PolicyComplianceGenerator.d.ts +119 -0
  22. package/lib/services/assessment/PolicyComplianceGenerator.d.ts.map +1 -0
  23. package/lib/services/assessment/PolicyComplianceGenerator.js +632 -0
  24. package/lib/services/assessment/config/annotationPatterns.d.ts +70 -0
  25. package/lib/services/assessment/config/annotationPatterns.d.ts.map +1 -0
  26. package/lib/services/assessment/config/annotationPatterns.js +305 -0
  27. package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts +22 -2
  28. package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts.map +1 -1
  29. package/lib/services/assessment/modules/ToolAnnotationAssessor.js +289 -152
  30. package/package.json +1 -1
@@ -11,75 +11,24 @@
11
11
  * Reference: Anthropic MCP Directory Policy #17
12
12
  */
13
13
  import { BaseAssessor } from "./BaseAssessor.js";
14
- /**
15
- * Patterns for inferring expected tool behavior from name
16
- */
17
- const READ_ONLY_PATTERNS = [
18
- /^get[_-]?/i,
19
- /^list[_-]?/i,
20
- /^fetch[_-]?/i,
21
- /^read[_-]?/i,
22
- /^query[_-]?/i,
23
- /^search[_-]?/i,
24
- /^find[_-]?/i,
25
- /^show[_-]?/i,
26
- /^view[_-]?/i,
27
- /^describe[_-]?/i,
28
- /^check[_-]?/i,
29
- /^verify[_-]?/i,
30
- /^validate[_-]?/i,
31
- /^count[_-]?/i,
32
- /^status[_-]?/i,
33
- /^info[_-]?/i,
34
- /^lookup[_-]?/i,
35
- /^browse[_-]?/i,
36
- /^preview[_-]?/i,
37
- /^download[_-]?/i, // Downloads but doesn't modify server state
38
- ];
39
- const DESTRUCTIVE_PATTERNS = [
40
- /^delete[_-]?/i,
41
- /^remove[_-]?/i,
42
- /^destroy[_-]?/i,
43
- /^drop[_-]?/i,
44
- /^purge[_-]?/i,
45
- /^clear[_-]?/i,
46
- /^wipe[_-]?/i,
47
- /^erase[_-]?/i,
48
- /^reset[_-]?/i,
49
- /^truncate[_-]?/i,
50
- /^revoke[_-]?/i,
51
- /^terminate[_-]?/i,
52
- /^cancel[_-]?/i,
53
- /^kill[_-]?/i,
54
- /^force[_-]?/i,
55
- ];
56
- const WRITE_PATTERNS = [
57
- /^create[_-]?/i,
58
- /^add[_-]?/i,
59
- /^insert[_-]?/i,
60
- /^update[_-]?/i,
61
- /^modify[_-]?/i,
62
- /^edit[_-]?/i,
63
- /^change[_-]?/i,
64
- /^set[_-]?/i,
65
- /^put[_-]?/i,
66
- /^patch[_-]?/i,
67
- /^post[_-]?/i,
68
- /^write[_-]?/i,
69
- /^save[_-]?/i,
70
- /^upload[_-]?/i,
71
- /^send[_-]?/i,
72
- /^submit[_-]?/i,
73
- /^publish[_-]?/i,
74
- /^enable[_-]?/i,
75
- /^disable[_-]?/i,
76
- /^start[_-]?/i,
77
- /^stop[_-]?/i,
78
- /^run[_-]?/i,
79
- /^execute[_-]?/i,
80
- ];
14
+ import { getDefaultCompiledPatterns, matchToolPattern, } from "../config/annotationPatterns.js";
15
+ // NOTE: Pattern arrays moved to config/annotationPatterns.ts for configurability
16
+ // The patterns are now loaded from getDefaultCompiledPatterns() or custom config
81
17
  export class ToolAnnotationAssessor extends BaseAssessor {
82
18
  claudeBridge;
19
+ compiledPatterns;
20
+ constructor(config) {
21
+ super(config);
22
+ // Initialize with default patterns (can be overridden via setPatterns)
23
+ this.compiledPatterns = getDefaultCompiledPatterns();
24
+ }
25
+ /**
26
+ * Set custom compiled patterns for behavior inference
27
+ */
28
+ setPatterns(patterns) {
29
+ this.compiledPatterns = patterns;
30
+ this.log("Custom annotation patterns configured");
31
+ }
83
32
  /**
84
33
  * Set Claude Code Bridge for enhanced behavior inference
85
34
  */
@@ -104,6 +53,13 @@ export class ToolAnnotationAssessor extends BaseAssessor {
104
53
  let annotatedCount = 0;
105
54
  let missingAnnotationsCount = 0;
106
55
  let misalignedAnnotationsCount = 0;
56
+ // Track annotation sources
57
+ const annotationSourceCounts = {
58
+ mcp: 0,
59
+ sourceCode: 0,
60
+ inferred: 0,
61
+ none: 0,
62
+ };
107
63
  const useClaudeInference = this.isClaudeEnabled();
108
64
  if (useClaudeInference) {
109
65
  this.log("Claude Code integration enabled - using semantic behavior inference");
@@ -157,7 +113,23 @@ export class ToolAnnotationAssessor extends BaseAssessor {
157
113
  }
158
114
  else {
159
115
  missingAnnotationsCount++;
160
- // Emit annotation_missing event with tool details
116
+ }
117
+ // Track annotation source
118
+ const source = latestResult.annotationSource;
119
+ if (source === "mcp") {
120
+ annotationSourceCounts.mcp++;
121
+ }
122
+ else if (source === "source-code") {
123
+ annotationSourceCounts.sourceCode++;
124
+ }
125
+ else if (source === "inferred") {
126
+ annotationSourceCounts.inferred++;
127
+ }
128
+ else {
129
+ annotationSourceCounts.none++;
130
+ }
131
+ // Emit annotation_missing event with tool details
132
+ if (!latestResult.hasAnnotations) {
161
133
  if (context.onProgress && latestResult.inferredBehavior) {
162
134
  const annotations = this.extractAnnotations(tool);
163
135
  context.onProgress({
@@ -174,51 +146,92 @@ export class ToolAnnotationAssessor extends BaseAssessor {
174
146
  });
175
147
  }
176
148
  }
177
- // Emit annotation_misaligned events for each misalignment
149
+ // Emit appropriate event based on alignment status
178
150
  if (context.onProgress && latestResult.inferredBehavior) {
179
151
  const annotations = latestResult.annotations;
180
152
  const inferred = latestResult.inferredBehavior;
181
153
  const confidence = latestResult.claudeInference?.confidence ?? 50;
182
154
  const toolParams = this.extractToolParams(tool.inputSchema);
183
155
  const toolAnnotations = this.extractAnnotations(tool);
184
- // Check readOnlyHint misalignment
156
+ const alignmentStatus = latestResult.alignmentStatus;
157
+ // Check readOnlyHint mismatch
185
158
  if (annotations?.readOnlyHint !== undefined &&
186
159
  annotations.readOnlyHint !== inferred.expectedReadOnly) {
187
- context.onProgress({
188
- type: "annotation_misaligned",
189
- tool: tool.name,
190
- title: toolAnnotations.title,
191
- description: tool.description,
192
- parameters: toolParams,
193
- field: "readOnlyHint",
194
- actual: annotations.readOnlyHint,
195
- expected: inferred.expectedReadOnly,
196
- confidence,
197
- reason: `Tool has readOnlyHint=${annotations.readOnlyHint}, but ${inferred.reason}`,
198
- });
160
+ if (alignmentStatus === "REVIEW_RECOMMENDED") {
161
+ // Emit review_recommended for ambiguous cases
162
+ context.onProgress({
163
+ type: "annotation_review_recommended",
164
+ tool: tool.name,
165
+ title: toolAnnotations.title,
166
+ description: tool.description,
167
+ parameters: toolParams,
168
+ field: "readOnlyHint",
169
+ actual: annotations.readOnlyHint,
170
+ inferred: inferred.expectedReadOnly,
171
+ confidence: inferred.confidence,
172
+ isAmbiguous: inferred.isAmbiguous,
173
+ reason: inferred.reason,
174
+ });
175
+ }
176
+ else {
177
+ // Emit misaligned for high-confidence mismatches
178
+ context.onProgress({
179
+ type: "annotation_misaligned",
180
+ tool: tool.name,
181
+ title: toolAnnotations.title,
182
+ description: tool.description,
183
+ parameters: toolParams,
184
+ field: "readOnlyHint",
185
+ actual: annotations.readOnlyHint,
186
+ expected: inferred.expectedReadOnly,
187
+ confidence,
188
+ reason: `Tool has readOnlyHint=${annotations.readOnlyHint}, but ${inferred.reason}`,
189
+ });
190
+ }
199
191
  }
200
- // Check destructiveHint misalignment
192
+ // Check destructiveHint mismatch
201
193
  if (annotations?.destructiveHint !== undefined &&
202
194
  annotations.destructiveHint !== inferred.expectedDestructive) {
203
- context.onProgress({
204
- type: "annotation_misaligned",
205
- tool: tool.name,
206
- title: toolAnnotations.title,
207
- description: tool.description,
208
- parameters: toolParams,
209
- field: "destructiveHint",
210
- actual: annotations.destructiveHint,
211
- expected: inferred.expectedDestructive,
212
- confidence,
213
- reason: `Tool has destructiveHint=${annotations.destructiveHint}, but ${inferred.reason}`,
214
- });
195
+ if (alignmentStatus === "REVIEW_RECOMMENDED") {
196
+ // Emit review_recommended for ambiguous cases
197
+ context.onProgress({
198
+ type: "annotation_review_recommended",
199
+ tool: tool.name,
200
+ title: toolAnnotations.title,
201
+ description: tool.description,
202
+ parameters: toolParams,
203
+ field: "destructiveHint",
204
+ actual: annotations.destructiveHint,
205
+ inferred: inferred.expectedDestructive,
206
+ confidence: inferred.confidence,
207
+ isAmbiguous: inferred.isAmbiguous,
208
+ reason: inferred.reason,
209
+ });
210
+ }
211
+ else {
212
+ // Emit misaligned for high-confidence mismatches
213
+ context.onProgress({
214
+ type: "annotation_misaligned",
215
+ tool: tool.name,
216
+ title: toolAnnotations.title,
217
+ description: tool.description,
218
+ parameters: toolParams,
219
+ field: "destructiveHint",
220
+ actual: annotations.destructiveHint,
221
+ expected: inferred.expectedDestructive,
222
+ confidence,
223
+ reason: `Tool has destructiveHint=${annotations.destructiveHint}, but ${inferred.reason}`,
224
+ });
225
+ }
215
226
  }
216
227
  }
217
228
  }
218
229
  const status = this.determineAnnotationStatus(toolResults, context.tools.length);
219
230
  const explanation = this.generateExplanation(annotatedCount, missingAnnotationsCount, misalignedAnnotationsCount, context.tools.length);
220
231
  const recommendations = this.generateRecommendations(toolResults);
221
- this.log(`Assessment complete: ${annotatedCount}/${context.tools.length} tools annotated, ${misalignedAnnotationsCount} misaligned`);
232
+ // Calculate new metrics and alignment breakdown
233
+ const { metrics, alignmentBreakdown } = this.calculateMetrics(toolResults, context.tools.length);
234
+ this.log(`Assessment complete: ${annotatedCount}/${context.tools.length} tools annotated, ${misalignedAnnotationsCount} misaligned, ${alignmentBreakdown.reviewRecommended} need review`);
222
235
  // Return enhanced assessment if Claude was used
223
236
  if (useClaudeInference) {
224
237
  const highConfidenceMisalignments = toolResults.filter((r) => r.claudeInference &&
@@ -233,6 +246,9 @@ export class ToolAnnotationAssessor extends BaseAssessor {
233
246
  status,
234
247
  explanation: this.generateEnhancedExplanation(annotatedCount, missingAnnotationsCount, highConfidenceMisalignments.length, context.tools.length),
235
248
  recommendations: this.generateEnhancedRecommendations(toolResults),
249
+ metrics,
250
+ alignmentBreakdown,
251
+ annotationSources: annotationSourceCounts,
236
252
  claudeEnhanced: true,
237
253
  highConfidenceMisalignments,
238
254
  };
@@ -245,6 +261,9 @@ export class ToolAnnotationAssessor extends BaseAssessor {
245
261
  status,
246
262
  explanation,
247
263
  recommendations,
264
+ metrics,
265
+ alignmentBreakdown,
266
+ annotationSources: annotationSourceCounts,
248
267
  };
249
268
  }
250
269
  /**
@@ -433,6 +452,7 @@ export class ToolAnnotationAssessor extends BaseAssessor {
433
452
  }
434
453
  /**
435
454
  * Assess a single tool's annotations
455
+ * Now includes alignment status with confidence-aware logic
436
456
  */
437
457
  assessTool(tool) {
438
458
  const issues = [];
@@ -443,35 +463,66 @@ export class ToolAnnotationAssessor extends BaseAssessor {
443
463
  annotations.destructiveHint !== undefined;
444
464
  // Infer expected behavior from tool name
445
465
  const inferredBehavior = this.inferBehavior(tool.name, tool.description);
466
+ // Determine alignment status
467
+ let alignmentStatus = "ALIGNED";
446
468
  // Check for missing annotations
447
469
  if (!hasAnnotations) {
448
470
  issues.push("Missing tool annotations (readOnlyHint, destructiveHint)");
449
471
  recommendations.push(`Add annotations to ${tool.name}: readOnlyHint=${inferredBehavior.expectedReadOnly}, destructiveHint=${inferredBehavior.expectedDestructive}`);
472
+ alignmentStatus = "UNKNOWN";
450
473
  }
451
474
  else {
452
- // Check for misaligned annotations
453
- if (annotations.readOnlyHint !== undefined &&
454
- annotations.readOnlyHint !== inferredBehavior.expectedReadOnly) {
455
- issues.push(`Potentially misaligned readOnlyHint: set to ${annotations.readOnlyHint}, expected ${inferredBehavior.expectedReadOnly} based on tool name pattern`);
456
- recommendations.push(`Verify readOnlyHint for ${tool.name}: currently ${annotations.readOnlyHint}, tool name suggests ${inferredBehavior.expectedReadOnly}`);
457
- }
458
- if (annotations.destructiveHint !== undefined &&
459
- annotations.destructiveHint !== inferredBehavior.expectedDestructive) {
460
- issues.push(`Potentially misaligned destructiveHint: set to ${annotations.destructiveHint}, expected ${inferredBehavior.expectedDestructive} based on tool name pattern`);
461
- recommendations.push(`Verify destructiveHint for ${tool.name}: currently ${annotations.destructiveHint}, tool name suggests ${inferredBehavior.expectedDestructive}`);
475
+ // Check for misaligned annotations with confidence-aware logic
476
+ const readOnlyMismatch = annotations.readOnlyHint !== undefined &&
477
+ annotations.readOnlyHint !== inferredBehavior.expectedReadOnly;
478
+ const destructiveMismatch = annotations.destructiveHint !== undefined &&
479
+ annotations.destructiveHint !== inferredBehavior.expectedDestructive;
480
+ if (readOnlyMismatch || destructiveMismatch) {
481
+ if (inferredBehavior.isAmbiguous ||
482
+ inferredBehavior.confidence === "low") {
483
+ // Ambiguous case: REVIEW_RECOMMENDED, softer language
484
+ alignmentStatus = "REVIEW_RECOMMENDED";
485
+ if (readOnlyMismatch) {
486
+ issues.push(`Review recommended: readOnlyHint=${annotations.readOnlyHint} may or may not match '${tool.name}' behavior (confidence: ${inferredBehavior.confidence})`);
487
+ recommendations.push(`Verify readOnlyHint for ${tool.name}: pattern is ambiguous - manual review recommended`);
488
+ }
489
+ if (destructiveMismatch) {
490
+ issues.push(`Review recommended: destructiveHint=${annotations.destructiveHint} may or may not match '${tool.name}' behavior (confidence: ${inferredBehavior.confidence})`);
491
+ recommendations.push(`Verify destructiveHint for ${tool.name}: pattern is ambiguous - manual review recommended`);
492
+ }
493
+ }
494
+ else {
495
+ // High/medium confidence mismatch: MISALIGNED
496
+ alignmentStatus = "MISALIGNED";
497
+ if (readOnlyMismatch) {
498
+ issues.push(`Potentially misaligned readOnlyHint: set to ${annotations.readOnlyHint}, expected ${inferredBehavior.expectedReadOnly} based on tool name pattern`);
499
+ recommendations.push(`Verify readOnlyHint for ${tool.name}: currently ${annotations.readOnlyHint}, tool name suggests ${inferredBehavior.expectedReadOnly}`);
500
+ }
501
+ if (destructiveMismatch) {
502
+ issues.push(`Potentially misaligned destructiveHint: set to ${annotations.destructiveHint}, expected ${inferredBehavior.expectedDestructive} based on tool name pattern`);
503
+ recommendations.push(`Verify destructiveHint for ${tool.name}: currently ${annotations.destructiveHint}, tool name suggests ${inferredBehavior.expectedDestructive}`);
504
+ }
505
+ }
462
506
  }
463
507
  }
464
- // Check for destructive tools without explicit hint
508
+ // Check for destructive tools without explicit hint (only for high-confidence patterns)
465
509
  if (inferredBehavior.expectedDestructive &&
510
+ inferredBehavior.confidence !== "low" &&
466
511
  annotations.destructiveHint !== true) {
467
512
  issues.push("Tool appears destructive but destructiveHint is not set to true");
468
513
  recommendations.push(`Set destructiveHint=true for ${tool.name} - this tool appears to perform destructive operations`);
514
+ // Only upgrade to MISALIGNED if we have high confidence
515
+ if (inferredBehavior.confidence === "high") {
516
+ alignmentStatus = "MISALIGNED";
517
+ }
469
518
  }
470
519
  return {
471
520
  toolName: tool.name,
472
521
  hasAnnotations,
473
522
  annotations: hasAnnotations ? annotations : undefined,
523
+ annotationSource: annotations.source,
474
524
  inferredBehavior,
525
+ alignmentStatus,
475
526
  issues,
476
527
  recommendations,
477
528
  };
@@ -479,34 +530,65 @@ export class ToolAnnotationAssessor extends BaseAssessor {
479
530
  /**
480
531
  * Extract annotations from a tool
481
532
  * MCP SDK may have annotations in different locations
533
+ *
534
+ * Priority order:
535
+ * 1. tool.annotations (MCP 2024-11 spec) - "mcp" source
536
+ * 2. Direct properties on tool - "mcp" source
537
+ * 3. tool.metadata - "mcp" source
538
+ * 4. No annotations found - "none" source
482
539
  */
483
540
  extractAnnotations(tool) {
484
- // Try to find annotations in various locations
485
541
  const toolAny = tool;
486
- // Check direct properties
487
- let readOnlyHint = toolAny.readOnlyHint;
488
- let destructiveHint = toolAny.destructiveHint;
489
- let idempotentHint = toolAny.idempotentHint;
490
- let openWorldHint = toolAny.openWorldHint;
491
- // Check annotations object (MCP 2024-11 spec)
542
+ // Priority 1: Check annotations object (MCP 2024-11 spec) - primary source
492
543
  if (toolAny.annotations) {
493
- readOnlyHint = readOnlyHint ?? toolAny.annotations.readOnlyHint;
494
- destructiveHint = destructiveHint ?? toolAny.annotations.destructiveHint;
495
- idempotentHint = idempotentHint ?? toolAny.annotations.idempotentHint;
496
- openWorldHint = openWorldHint ?? toolAny.annotations.openWorldHint;
544
+ const hasAnnotations = toolAny.annotations.readOnlyHint !== undefined ||
545
+ toolAny.annotations.destructiveHint !== undefined;
546
+ if (hasAnnotations) {
547
+ return {
548
+ readOnlyHint: toolAny.annotations.readOnlyHint,
549
+ destructiveHint: toolAny.annotations.destructiveHint,
550
+ title: toolAny.annotations.title || toolAny.title,
551
+ description: tool.description,
552
+ idempotentHint: toolAny.annotations.idempotentHint,
553
+ openWorldHint: toolAny.annotations.openWorldHint,
554
+ source: "mcp",
555
+ };
556
+ }
497
557
  }
498
- // Check metadata (some servers use this)
558
+ // Priority 2: Check direct properties on tool object
559
+ if (toolAny.readOnlyHint !== undefined ||
560
+ toolAny.destructiveHint !== undefined) {
561
+ return {
562
+ readOnlyHint: toolAny.readOnlyHint,
563
+ destructiveHint: toolAny.destructiveHint,
564
+ title: toolAny.title,
565
+ description: tool.description,
566
+ idempotentHint: toolAny.idempotentHint,
567
+ openWorldHint: toolAny.openWorldHint,
568
+ source: "mcp",
569
+ };
570
+ }
571
+ // Priority 3: Check metadata (some servers use this)
499
572
  if (toolAny.metadata) {
500
- readOnlyHint = readOnlyHint ?? toolAny.metadata.readOnlyHint;
501
- destructiveHint = destructiveHint ?? toolAny.metadata.destructiveHint;
573
+ const hasMetadataAnnotations = toolAny.metadata.readOnlyHint !== undefined ||
574
+ toolAny.metadata.destructiveHint !== undefined;
575
+ if (hasMetadataAnnotations) {
576
+ return {
577
+ readOnlyHint: toolAny.metadata.readOnlyHint,
578
+ destructiveHint: toolAny.metadata.destructiveHint,
579
+ title: toolAny.metadata.title || toolAny.title,
580
+ description: tool.description,
581
+ idempotentHint: toolAny.metadata.idempotentHint,
582
+ openWorldHint: toolAny.metadata.openWorldHint,
583
+ source: "mcp",
584
+ };
585
+ }
502
586
  }
587
+ // No annotations found from MCP protocol
503
588
  return {
504
- readOnlyHint,
505
- destructiveHint,
506
- title: toolAny.title || toolAny.annotations?.title,
589
+ title: toolAny.title,
507
590
  description: tool.description,
508
- idempotentHint,
509
- openWorldHint,
591
+ source: "none",
510
592
  };
511
593
  }
512
594
  /**
@@ -534,46 +616,60 @@ export class ToolAnnotationAssessor extends BaseAssessor {
534
616
  }
535
617
  /**
536
618
  * Infer expected behavior from tool name and description
619
+ * Now returns confidence level and ambiguity flag for better handling
537
620
  */
538
621
  inferBehavior(toolName, description) {
539
- const lowerName = toolName.toLowerCase();
540
622
  const lowerDesc = (description || "").toLowerCase();
541
- // Check for destructive patterns first (higher priority)
542
- for (const pattern of DESTRUCTIVE_PATTERNS) {
543
- if (pattern.test(lowerName)) {
623
+ // Use the configurable pattern matching system
624
+ const patternMatch = matchToolPattern(toolName, this.compiledPatterns);
625
+ // Handle pattern match results
626
+ switch (patternMatch.category) {
627
+ case "ambiguous":
628
+ // Ambiguous patterns - don't make strong assertions
629
+ return {
630
+ expectedReadOnly: false,
631
+ expectedDestructive: false,
632
+ reason: `Tool name matches ambiguous pattern '${patternMatch.pattern}' - behavior varies by implementation context`,
633
+ confidence: "low",
634
+ isAmbiguous: true,
635
+ };
636
+ case "destructive":
544
637
  return {
545
638
  expectedReadOnly: false,
546
639
  expectedDestructive: true,
547
- reason: `Tool name matches destructive pattern: ${pattern.source}`,
640
+ reason: `Tool name matches destructive pattern: ${patternMatch.pattern}`,
641
+ confidence: "high",
642
+ isAmbiguous: false,
548
643
  };
549
- }
550
- }
551
- // Check for read-only patterns
552
- for (const pattern of READ_ONLY_PATTERNS) {
553
- if (pattern.test(lowerName)) {
644
+ case "readOnly":
554
645
  return {
555
646
  expectedReadOnly: true,
556
647
  expectedDestructive: false,
557
- reason: `Tool name matches read-only pattern: ${pattern.source}`,
648
+ reason: `Tool name matches read-only pattern: ${patternMatch.pattern}`,
649
+ confidence: "high",
650
+ isAmbiguous: false,
558
651
  };
559
- }
560
- }
561
- // Check for write patterns (not destructive but not read-only)
562
- for (const pattern of WRITE_PATTERNS) {
563
- if (pattern.test(lowerName)) {
652
+ case "write":
564
653
  return {
565
654
  expectedReadOnly: false,
566
655
  expectedDestructive: false,
567
- reason: `Tool name matches write pattern: ${pattern.source}`,
656
+ reason: `Tool name matches write pattern: ${patternMatch.pattern}`,
657
+ confidence: "medium",
658
+ isAmbiguous: false,
568
659
  };
569
- }
660
+ case "unknown":
661
+ default:
662
+ // Fall through to description-based analysis
663
+ break;
570
664
  }
571
- // Check description for hints
665
+ // Check description for hints (medium confidence)
572
666
  if (lowerDesc.includes("delete") || lowerDesc.includes("remove")) {
573
667
  return {
574
668
  expectedReadOnly: false,
575
669
  expectedDestructive: true,
576
670
  reason: "Description mentions delete/remove operations",
671
+ confidence: "medium",
672
+ isAmbiguous: false,
577
673
  };
578
674
  }
579
675
  if (lowerDesc.includes("read") ||
@@ -583,30 +679,43 @@ export class ToolAnnotationAssessor extends BaseAssessor {
583
679
  expectedReadOnly: true,
584
680
  expectedDestructive: false,
585
681
  reason: "Description suggests read-only operation",
682
+ confidence: "medium",
683
+ isAmbiguous: false,
586
684
  };
587
685
  }
588
- // Default: assume write (safer to warn about missing annotations)
686
+ // Default: assume write with low confidence (ambiguous)
589
687
  return {
590
688
  expectedReadOnly: false,
591
689
  expectedDestructive: false,
592
690
  reason: "Could not infer from name pattern - defaulting to write operation",
691
+ confidence: "low",
692
+ isAmbiguous: true,
593
693
  };
594
694
  }
595
695
  /**
596
- * Determine overall status
696
+ * Determine overall status using alignment status.
697
+ * Only MISALIGNED counts as failure; REVIEW_RECOMMENDED does not fail.
597
698
  */
598
699
  determineAnnotationStatus(results, totalTools) {
599
700
  if (totalTools === 0)
600
701
  return "PASS";
601
702
  const annotatedCount = results.filter((r) => r.hasAnnotations).length;
602
- const misalignedCount = results.filter((r) => r.issues.some((i) => i.includes("misaligned"))).length;
603
- const destructiveWithoutHint = results.filter((r) => r.issues.some((i) => i.includes("destructive") && i.includes("not set"))).length;
604
- // Destructive tools without proper hints = FAIL (check this FIRST)
703
+ // Only count actual MISALIGNED, not REVIEW_RECOMMENDED
704
+ const misalignedCount = results.filter((r) => r.alignmentStatus === "MISALIGNED").length;
705
+ // Count high-confidence destructive tools without proper hints
706
+ const destructiveWithoutHint = results.filter((r) => r.inferredBehavior?.expectedDestructive === true &&
707
+ r.inferredBehavior?.confidence === "high" &&
708
+ r.annotations?.destructiveHint !== true).length;
709
+ // Destructive tools without proper hints = FAIL (critical safety issue)
605
710
  if (destructiveWithoutHint > 0) {
606
711
  return "FAIL";
607
712
  }
608
- // All tools annotated and no misalignments = PASS
609
- if (annotatedCount === totalTools && misalignedCount === 0) {
713
+ // High-confidence misalignments = FAIL
714
+ if (misalignedCount > 0) {
715
+ return "FAIL";
716
+ }
717
+ // All tools annotated = PASS
718
+ if (annotatedCount === totalTools) {
610
719
  return "PASS";
611
720
  }
612
721
  // Some annotations missing = NEED_MORE_INFO
@@ -620,6 +729,34 @@ export class ToolAnnotationAssessor extends BaseAssessor {
620
729
  }
621
730
  return "NEED_MORE_INFO";
622
731
  }
732
+ /**
733
+ * Calculate metrics and alignment breakdown for the assessment
734
+ */
735
+ calculateMetrics(results, totalTools) {
736
+ const alignmentBreakdown = {
737
+ aligned: results.filter((r) => r.alignmentStatus === "ALIGNED").length,
738
+ misaligned: results.filter((r) => r.alignmentStatus === "MISALIGNED")
739
+ .length,
740
+ reviewRecommended: results.filter((r) => r.alignmentStatus === "REVIEW_RECOMMENDED").length,
741
+ unknown: results.filter((r) => r.alignmentStatus === "UNKNOWN").length,
742
+ };
743
+ const annotatedCount = results.filter((r) => r.hasAnnotations).length;
744
+ const metrics = {
745
+ // Coverage: percentage of tools with annotations
746
+ coverage: totalTools > 0 ? (annotatedCount / totalTools) * 100 : 100,
747
+ // Consistency: percentage without contradictions (not MISALIGNED)
748
+ consistency: totalTools > 0
749
+ ? ((totalTools - alignmentBreakdown.misaligned) / totalTools) * 100
750
+ : 100,
751
+ // Correctness: percentage of annotated tools that are ALIGNED
752
+ correctness: annotatedCount > 0
753
+ ? (alignmentBreakdown.aligned / annotatedCount) * 100
754
+ : 0,
755
+ // Review required: count of tools needing manual review
756
+ reviewRequired: alignmentBreakdown.reviewRecommended,
757
+ };
758
+ return { metrics, alignmentBreakdown };
759
+ }
623
760
  /**
624
761
  * Generate explanation
625
762
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bryan-thompson/inspector-assessment-client",
3
- "version": "1.11.1",
3
+ "version": "1.13.0",
4
4
  "description": "Client-side application for the Enhanced MCP Inspector with assessment capabilities",
5
5
  "license": "MIT",
6
6
  "author": "Bryan Thompson <bryan@triepod.ai>",