npm - @bryan-thompson/inspector-assessment-client - Versions diffs - 1.11.0 → 1.12.0 - Mend

@bryan-thompson/inspector-assessment-client 1.11.0 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/lib/services/assessment/modules/ToolAnnotationAssessor.js CHANGED Viewed

@@ -11,75 +11,24 @@
  * Reference: Anthropic MCP Directory Policy #17
  */
 import { BaseAssessor } from "./BaseAssessor.js";
-/**
- * Patterns for inferring expected tool behavior from name
- */
-const READ_ONLY_PATTERNS = [
-    /^get[_-]?/i,
-    /^list[_-]?/i,
-    /^fetch[_-]?/i,
-    /^read[_-]?/i,
-    /^query[_-]?/i,
-    /^search[_-]?/i,
-    /^find[_-]?/i,
-    /^show[_-]?/i,
-    /^view[_-]?/i,
-    /^describe[_-]?/i,
-    /^check[_-]?/i,
-    /^verify[_-]?/i,
-    /^validate[_-]?/i,
-    /^count[_-]?/i,
-    /^status[_-]?/i,
-    /^info[_-]?/i,
-    /^lookup[_-]?/i,
-    /^browse[_-]?/i,
-    /^preview[_-]?/i,
-    /^download[_-]?/i, // Downloads but doesn't modify server state
-];
-const DESTRUCTIVE_PATTERNS = [
-    /^delete[_-]?/i,
-    /^remove[_-]?/i,
-    /^destroy[_-]?/i,
-    /^drop[_-]?/i,
-    /^purge[_-]?/i,
-    /^clear[_-]?/i,
-    /^wipe[_-]?/i,
-    /^erase[_-]?/i,
-    /^reset[_-]?/i,
-    /^truncate[_-]?/i,
-    /^revoke[_-]?/i,
-    /^terminate[_-]?/i,
-    /^cancel[_-]?/i,
-    /^kill[_-]?/i,
-    /^force[_-]?/i,
-];
-const WRITE_PATTERNS = [
-    /^create[_-]?/i,
-    /^add[_-]?/i,
-    /^insert[_-]?/i,
-    /^update[_-]?/i,
-    /^modify[_-]?/i,
-    /^edit[_-]?/i,
-    /^change[_-]?/i,
-    /^set[_-]?/i,
-    /^put[_-]?/i,
-    /^patch[_-]?/i,
-    /^post[_-]?/i,
-    /^write[_-]?/i,
-    /^save[_-]?/i,
-    /^upload[_-]?/i,
-    /^send[_-]?/i,
-    /^submit[_-]?/i,
-    /^publish[_-]?/i,
-    /^enable[_-]?/i,
-    /^disable[_-]?/i,
-    /^start[_-]?/i,
-    /^stop[_-]?/i,
-    /^run[_-]?/i,
-    /^execute[_-]?/i,
-];
+import { getDefaultCompiledPatterns, matchToolPattern, } from "../config/annotationPatterns.js";
+// NOTE: Pattern arrays moved to config/annotationPatterns.ts for configurability
+// The patterns are now loaded from getDefaultCompiledPatterns() or custom config
 export class ToolAnnotationAssessor extends BaseAssessor {
     claudeBridge;
+    compiledPatterns;
+    constructor(config) {
+        super(config);
+        // Initialize with default patterns (can be overridden via setPatterns)
+        this.compiledPatterns = getDefaultCompiledPatterns();
+    }
+    /**
+     * Set custom compiled patterns for behavior inference
+     */
+    setPatterns(patterns) {
+        this.compiledPatterns = patterns;
+        this.log("Custom annotation patterns configured");
+    }
     /**
      * Set Claude Code Bridge for enhanced behavior inference
      */
@@ -151,17 +100,115 @@ export class ToolAnnotationAssessor extends BaseAssessor {
                     misalignedAnnotationsCount++;
                 }
             }
-            if (toolResults[toolResults.length - 1].hasAnnotations) {
+            const latestResult = toolResults[toolResults.length - 1];
+            if (latestResult.hasAnnotations) {
                 annotatedCount++;
             }
             else {
                 missingAnnotationsCount++;
+                // Emit annotation_missing event with tool details
+                if (context.onProgress && latestResult.inferredBehavior) {
+                    const annotations = this.extractAnnotations(tool);
+                    context.onProgress({
+                        type: "annotation_missing",
+                        tool: tool.name,
+                        title: annotations.title,
+                        description: tool.description,
+                        parameters: this.extractToolParams(tool.inputSchema),
+                        inferredBehavior: {
+                            expectedReadOnly: latestResult.inferredBehavior.expectedReadOnly,
+                            expectedDestructive: latestResult.inferredBehavior.expectedDestructive,
+                            reason: latestResult.inferredBehavior.reason,
+                        },
+                    });
+                }
+            }
+            // Emit appropriate event based on alignment status
+            if (context.onProgress && latestResult.inferredBehavior) {
+                const annotations = latestResult.annotations;
+                const inferred = latestResult.inferredBehavior;
+                const confidence = latestResult.claudeInference?.confidence ?? 50;
+                const toolParams = this.extractToolParams(tool.inputSchema);
+                const toolAnnotations = this.extractAnnotations(tool);
+                const alignmentStatus = latestResult.alignmentStatus;
+                // Check readOnlyHint mismatch
+                if (annotations?.readOnlyHint !== undefined &&
+                    annotations.readOnlyHint !== inferred.expectedReadOnly) {
+                    if (alignmentStatus === "REVIEW_RECOMMENDED") {
+                        // Emit review_recommended for ambiguous cases
+                        context.onProgress({
+                            type: "annotation_review_recommended",
+                            tool: tool.name,
+                            title: toolAnnotations.title,
+                            description: tool.description,
+                            parameters: toolParams,
+                            field: "readOnlyHint",
+                            actual: annotations.readOnlyHint,
+                            inferred: inferred.expectedReadOnly,
+                            confidence: inferred.confidence,
+                            isAmbiguous: inferred.isAmbiguous,
+                            reason: inferred.reason,
+                        });
+                    }
+                    else {
+                        // Emit misaligned for high-confidence mismatches
+                        context.onProgress({
+                            type: "annotation_misaligned",
+                            tool: tool.name,
+                            title: toolAnnotations.title,
+                            description: tool.description,
+                            parameters: toolParams,
+                            field: "readOnlyHint",
+                            actual: annotations.readOnlyHint,
+                            expected: inferred.expectedReadOnly,
+                            confidence,
+                            reason: `Tool has readOnlyHint=${annotations.readOnlyHint}, but ${inferred.reason}`,
+                        });
+                    }
+                }
+                // Check destructiveHint mismatch
+                if (annotations?.destructiveHint !== undefined &&
+                    annotations.destructiveHint !== inferred.expectedDestructive) {
+                    if (alignmentStatus === "REVIEW_RECOMMENDED") {
+                        // Emit review_recommended for ambiguous cases
+                        context.onProgress({
+                            type: "annotation_review_recommended",
+                            tool: tool.name,
+                            title: toolAnnotations.title,
+                            description: tool.description,
+                            parameters: toolParams,
+                            field: "destructiveHint",
+                            actual: annotations.destructiveHint,
+                            inferred: inferred.expectedDestructive,
+                            confidence: inferred.confidence,
+                            isAmbiguous: inferred.isAmbiguous,
+                            reason: inferred.reason,
+                        });
+                    }
+                    else {
+                        // Emit misaligned for high-confidence mismatches
+                        context.onProgress({
+                            type: "annotation_misaligned",
+                            tool: tool.name,
+                            title: toolAnnotations.title,
+                            description: tool.description,
+                            parameters: toolParams,
+                            field: "destructiveHint",
+                            actual: annotations.destructiveHint,
+                            expected: inferred.expectedDestructive,
+                            confidence,
+                            reason: `Tool has destructiveHint=${annotations.destructiveHint}, but ${inferred.reason}`,
+                        });
+                    }
+                }
             }
         }
         const status = this.determineAnnotationStatus(toolResults, context.tools.length);
         const explanation = this.generateExplanation(annotatedCount, missingAnnotationsCount, misalignedAnnotationsCount, context.tools.length);
         const recommendations = this.generateRecommendations(toolResults);
-        this.log(`Assessment complete: ${annotatedCount}/${context.tools.length} tools annotated, ${misalignedAnnotationsCount} misaligned`);
+        // Calculate new metrics and alignment breakdown
+        const { metrics, alignmentBreakdown } = this.calculateMetrics(toolResults, context.tools.length);
+        this.log(`Assessment complete: ${annotatedCount}/${context.tools.length} tools annotated, ${misalignedAnnotationsCount} misaligned, ${alignmentBreakdown.reviewRecommended} need review`);
         // Return enhanced assessment if Claude was used
         if (useClaudeInference) {
             const highConfidenceMisalignments = toolResults.filter((r) => r.claudeInference &&
@@ -176,6 +223,8 @@ export class ToolAnnotationAssessor extends BaseAssessor {
                 status,
                 explanation: this.generateEnhancedExplanation(annotatedCount, missingAnnotationsCount, highConfidenceMisalignments.length, context.tools.length),
                 recommendations: this.generateEnhancedRecommendations(toolResults),
+                metrics,
+                alignmentBreakdown,
                 claudeEnhanced: true,
                 highConfidenceMisalignments,
             };
@@ -188,6 +237,8 @@ export class ToolAnnotationAssessor extends BaseAssessor {
             status,
             explanation,
             recommendations,
+            metrics,
+            alignmentBreakdown,
         };
     }
     /**
@@ -376,6 +427,7 @@ export class ToolAnnotationAssessor extends BaseAssessor {
     }
     /**
      * Assess a single tool's annotations
+     * Now includes alignment status with confidence-aware logic
      */
     assessTool(tool) {
         const issues = [];
@@ -386,35 +438,65 @@ export class ToolAnnotationAssessor extends BaseAssessor {
             annotations.destructiveHint !== undefined;
         // Infer expected behavior from tool name
         const inferredBehavior = this.inferBehavior(tool.name, tool.description);
+        // Determine alignment status
+        let alignmentStatus = "ALIGNED";
         // Check for missing annotations
         if (!hasAnnotations) {
             issues.push("Missing tool annotations (readOnlyHint, destructiveHint)");
             recommendations.push(`Add annotations to ${tool.name}: readOnlyHint=${inferredBehavior.expectedReadOnly}, destructiveHint=${inferredBehavior.expectedDestructive}`);
+            alignmentStatus = "UNKNOWN";
         }
         else {
-            // Check for misaligned annotations
-            if (annotations.readOnlyHint !== undefined &&
-                annotations.readOnlyHint !== inferredBehavior.expectedReadOnly) {
-                issues.push(`Potentially misaligned readOnlyHint: set to ${annotations.readOnlyHint}, expected ${inferredBehavior.expectedReadOnly} based on tool name pattern`);
-                recommendations.push(`Verify readOnlyHint for ${tool.name}: currently ${annotations.readOnlyHint}, tool name suggests ${inferredBehavior.expectedReadOnly}`);
-            }
-            if (annotations.destructiveHint !== undefined &&
-                annotations.destructiveHint !== inferredBehavior.expectedDestructive) {
-                issues.push(`Potentially misaligned destructiveHint: set to ${annotations.destructiveHint}, expected ${inferredBehavior.expectedDestructive} based on tool name pattern`);
-                recommendations.push(`Verify destructiveHint for ${tool.name}: currently ${annotations.destructiveHint}, tool name suggests ${inferredBehavior.expectedDestructive}`);
+            // Check for misaligned annotations with confidence-aware logic
+            const readOnlyMismatch = annotations.readOnlyHint !== undefined &&
+                annotations.readOnlyHint !== inferredBehavior.expectedReadOnly;
+            const destructiveMismatch = annotations.destructiveHint !== undefined &&
+                annotations.destructiveHint !== inferredBehavior.expectedDestructive;
+            if (readOnlyMismatch || destructiveMismatch) {
+                if (inferredBehavior.isAmbiguous ||
+                    inferredBehavior.confidence === "low") {
+                    // Ambiguous case: REVIEW_RECOMMENDED, softer language
+                    alignmentStatus = "REVIEW_RECOMMENDED";
+                    if (readOnlyMismatch) {
+                        issues.push(`Review recommended: readOnlyHint=${annotations.readOnlyHint} may or may not match '${tool.name}' behavior (confidence: ${inferredBehavior.confidence})`);
+                        recommendations.push(`Verify readOnlyHint for ${tool.name}: pattern is ambiguous - manual review recommended`);
+                    }
+                    if (destructiveMismatch) {
+                        issues.push(`Review recommended: destructiveHint=${annotations.destructiveHint} may or may not match '${tool.name}' behavior (confidence: ${inferredBehavior.confidence})`);
+                        recommendations.push(`Verify destructiveHint for ${tool.name}: pattern is ambiguous - manual review recommended`);
+                    }
+                }
+                else {
+                    // High/medium confidence mismatch: MISALIGNED
+                    alignmentStatus = "MISALIGNED";
+                    if (readOnlyMismatch) {
+                        issues.push(`Potentially misaligned readOnlyHint: set to ${annotations.readOnlyHint}, expected ${inferredBehavior.expectedReadOnly} based on tool name pattern`);
+                        recommendations.push(`Verify readOnlyHint for ${tool.name}: currently ${annotations.readOnlyHint}, tool name suggests ${inferredBehavior.expectedReadOnly}`);
+                    }
+                    if (destructiveMismatch) {
+                        issues.push(`Potentially misaligned destructiveHint: set to ${annotations.destructiveHint}, expected ${inferredBehavior.expectedDestructive} based on tool name pattern`);
+                        recommendations.push(`Verify destructiveHint for ${tool.name}: currently ${annotations.destructiveHint}, tool name suggests ${inferredBehavior.expectedDestructive}`);
+                    }
+                }
             }
         }
-        // Check for destructive tools without explicit hint
+        // Check for destructive tools without explicit hint (only for high-confidence patterns)
         if (inferredBehavior.expectedDestructive &&
+            inferredBehavior.confidence !== "low" &&
             annotations.destructiveHint !== true) {
             issues.push("Tool appears destructive but destructiveHint is not set to true");
             recommendations.push(`Set destructiveHint=true for ${tool.name} - this tool appears to perform destructive operations`);
+            // Only upgrade to MISALIGNED if we have high confidence
+            if (inferredBehavior.confidence === "high") {
+                alignmentStatus = "MISALIGNED";
+            }
         }
         return {
             toolName: tool.name,
             hasAnnotations,
             annotations: hasAnnotations ? annotations : undefined,
             inferredBehavior,
+            alignmentStatus,
             issues,
             recommendations,
         };
@@ -452,48 +534,85 @@ export class ToolAnnotationAssessor extends BaseAssessor {
             openWorldHint,
         };
     }
+    /**
+     * Extract parameters from tool input schema for event emission
+     */
+    extractToolParams(schema) {
+        if (!schema || typeof schema !== "object")
+            return [];
+        const s = schema;
+        if (!s.properties || typeof s.properties !== "object")
+            return [];
+        const required = new Set(Array.isArray(s.required) ? s.required : []);
+        const properties = s.properties;
+        return Object.entries(properties).map(([name, prop]) => {
+            const param = {
+                name,
+                type: prop.type || "any",
+                required: required.has(name),
+            };
+            if (prop.description) {
+                param.description = prop.description;
+            }
+            return param;
+        });
+    }
     /**
      * Infer expected behavior from tool name and description
+     * Now returns confidence level and ambiguity flag for better handling
      */
     inferBehavior(toolName, description) {
-        const lowerName = toolName.toLowerCase();
         const lowerDesc = (description || "").toLowerCase();
-        // Check for destructive patterns first (higher priority)
-        for (const pattern of DESTRUCTIVE_PATTERNS) {
-            if (pattern.test(lowerName)) {
+        // Use the configurable pattern matching system
+        const patternMatch = matchToolPattern(toolName, this.compiledPatterns);
+        // Handle pattern match results
+        switch (patternMatch.category) {
+            case "ambiguous":
+                // Ambiguous patterns - don't make strong assertions
+                return {
+                    expectedReadOnly: false,
+                    expectedDestructive: false,
+                    reason: `Tool name matches ambiguous pattern '${patternMatch.pattern}' - behavior varies by implementation context`,
+                    confidence: "low",
+                    isAmbiguous: true,
+                };
+            case "destructive":
                 return {
                     expectedReadOnly: false,
                     expectedDestructive: true,
-                    reason: `Tool name matches destructive pattern: ${pattern.source}`,
+                    reason: `Tool name matches destructive pattern: ${patternMatch.pattern}`,
+                    confidence: "high",
+                    isAmbiguous: false,
                 };
-            }
-        }
-        // Check for read-only patterns
-        for (const pattern of READ_ONLY_PATTERNS) {
-            if (pattern.test(lowerName)) {
+            case "readOnly":
                 return {
                     expectedReadOnly: true,
                     expectedDestructive: false,
-                    reason: `Tool name matches read-only pattern: ${pattern.source}`,
+                    reason: `Tool name matches read-only pattern: ${patternMatch.pattern}`,
+                    confidence: "high",
+                    isAmbiguous: false,
                 };
-            }
-        }
-        // Check for write patterns (not destructive but not read-only)
-        for (const pattern of WRITE_PATTERNS) {
-            if (pattern.test(lowerName)) {
+            case "write":
                 return {
                     expectedReadOnly: false,
                     expectedDestructive: false,
-                    reason: `Tool name matches write pattern: ${pattern.source}`,
+                    reason: `Tool name matches write pattern: ${patternMatch.pattern}`,
+                    confidence: "medium",
+                    isAmbiguous: false,
                 };
-            }
+            case "unknown":
+            default:
+                // Fall through to description-based analysis
+                break;
         }
-        // Check description for hints
+        // Check description for hints (medium confidence)
         if (lowerDesc.includes("delete") || lowerDesc.includes("remove")) {
             return {
                 expectedReadOnly: false,
                 expectedDestructive: true,
                 reason: "Description mentions delete/remove operations",
+                confidence: "medium",
+                isAmbiguous: false,
             };
         }
         if (lowerDesc.includes("read") ||
@@ -503,30 +622,43 @@ export class ToolAnnotationAssessor extends BaseAssessor {
                 expectedReadOnly: true,
                 expectedDestructive: false,
                 reason: "Description suggests read-only operation",
+                confidence: "medium",
+                isAmbiguous: false,
             };
         }
-        // Default: assume write (safer to warn about missing annotations)
+        // Default: assume write with low confidence (ambiguous)
         return {
             expectedReadOnly: false,
             expectedDestructive: false,
             reason: "Could not infer from name pattern - defaulting to write operation",
+            confidence: "low",
+            isAmbiguous: true,
         };
     }
     /**
-     * Determine overall status
+     * Determine overall status using alignment status.
+     * Only MISALIGNED counts as failure; REVIEW_RECOMMENDED does not fail.
      */
     determineAnnotationStatus(results, totalTools) {
         if (totalTools === 0)
             return "PASS";
         const annotatedCount = results.filter((r) => r.hasAnnotations).length;
-        const misalignedCount = results.filter((r) => r.issues.some((i) => i.includes("misaligned"))).length;
-        const destructiveWithoutHint = results.filter((r) => r.issues.some((i) => i.includes("destructive") && i.includes("not set"))).length;
-        // Destructive tools without proper hints = FAIL (check this FIRST)
+        // Only count actual MISALIGNED, not REVIEW_RECOMMENDED
+        const misalignedCount = results.filter((r) => r.alignmentStatus === "MISALIGNED").length;
+        // Count high-confidence destructive tools without proper hints
+        const destructiveWithoutHint = results.filter((r) => r.inferredBehavior?.expectedDestructive === true &&
+            r.inferredBehavior?.confidence === "high" &&
+            r.annotations?.destructiveHint !== true).length;
+        // Destructive tools without proper hints = FAIL (critical safety issue)
         if (destructiveWithoutHint > 0) {
             return "FAIL";
         }
-        // All tools annotated and no misalignments = PASS
-        if (annotatedCount === totalTools && misalignedCount === 0) {
+        // High-confidence misalignments = FAIL
+        if (misalignedCount > 0) {
+            return "FAIL";
+        }
+        // All tools annotated = PASS
+        if (annotatedCount === totalTools) {
             return "PASS";
         }
         // Some annotations missing = NEED_MORE_INFO
@@ -540,6 +672,34 @@ export class ToolAnnotationAssessor extends BaseAssessor {
         }
         return "NEED_MORE_INFO";
     }
+    /**
+     * Calculate metrics and alignment breakdown for the assessment
+     */
+    calculateMetrics(results, totalTools) {
+        const alignmentBreakdown = {
+            aligned: results.filter((r) => r.alignmentStatus === "ALIGNED").length,
+            misaligned: results.filter((r) => r.alignmentStatus === "MISALIGNED")
+                .length,
+            reviewRecommended: results.filter((r) => r.alignmentStatus === "REVIEW_RECOMMENDED").length,
+            unknown: results.filter((r) => r.alignmentStatus === "UNKNOWN").length,
+        };
+        const annotatedCount = results.filter((r) => r.hasAnnotations).length;
+        const metrics = {
+            // Coverage: percentage of tools with annotations
+            coverage: totalTools > 0 ? (annotatedCount / totalTools) * 100 : 100,
+            // Consistency: percentage without contradictions (not MISALIGNED)
+            consistency: totalTools > 0
+                ? ((totalTools - alignmentBreakdown.misaligned) / totalTools) * 100
+                : 100,
+            // Correctness: percentage of annotated tools that are ALIGNED
+            correctness: annotatedCount > 0
+                ? (alignmentBreakdown.aligned / annotatedCount) * 100
+                : 0,
+            // Review required: count of tools needing manual review
+            reviewRequired: alignmentBreakdown.reviewRecommended,
+        };
+        return { metrics, alignmentBreakdown };
+    }
     /**
      * Generate explanation
      */

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@bryan-thompson/inspector-assessment-client",
-  "version": "1.11.0",
+  "version": "1.12.0",
   "description": "Client-side application for the Enhanced MCP Inspector with assessment capabilities",
   "license": "MIT",
   "author": "Bryan Thompson <bryan@triepod.ai>",