@bryan-thompson/inspector-assessment-client 1.27.0 → 1.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/{OAuthCallback-CJWH8Ytw.js → OAuthCallback-9Gbb39Ii.js} +1 -1
- package/dist/assets/{OAuthDebugCallback-DL5adXJw.js → OAuthDebugCallback-B76J2MBn.js} +1 -1
- package/dist/assets/{index-Cu9XzUwB.js → index-CHTOR9VI.js} +77 -39
- package/dist/index.html +1 -1
- package/lib/lib/assessment/configTypes.d.ts +1 -0
- package/lib/lib/assessment/configTypes.d.ts.map +1 -1
- package/lib/lib/assessment/configTypes.js +10 -0
- package/lib/lib/assessment/extendedTypes.d.ts +74 -0
- package/lib/lib/assessment/extendedTypes.d.ts.map +1 -1
- package/lib/lib/assessment/resultTypes.d.ts +11 -1
- package/lib/lib/assessment/resultTypes.d.ts.map +1 -1
- package/lib/lib/securityPatterns.d.ts +8 -3
- package/lib/lib/securityPatterns.d.ts.map +1 -1
- package/lib/lib/securityPatterns.js +205 -3
- package/lib/services/assessment/AssessmentOrchestrator.d.ts +1 -0
- package/lib/services/assessment/AssessmentOrchestrator.d.ts.map +1 -1
- package/lib/services/assessment/AssessmentOrchestrator.js +31 -1
- package/lib/services/assessment/modules/FileModularizationAssessor.d.ts +87 -0
- package/lib/services/assessment/modules/FileModularizationAssessor.d.ts.map +1 -0
- package/lib/services/assessment/modules/FileModularizationAssessor.js +475 -0
- package/lib/services/assessment/modules/TemporalAssessor.d.ts +5 -129
- package/lib/services/assessment/modules/TemporalAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/TemporalAssessor.js +18 -554
- package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts +10 -70
- package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/ToolAnnotationAssessor.js +32 -625
- package/lib/services/assessment/modules/annotations/AlignmentChecker.d.ts +65 -0
- package/lib/services/assessment/modules/annotations/AlignmentChecker.d.ts.map +1 -0
- package/lib/services/assessment/modules/annotations/AlignmentChecker.js +289 -0
- package/lib/services/assessment/modules/annotations/ClaudeIntegration.d.ts +22 -0
- package/lib/services/assessment/modules/annotations/ClaudeIntegration.d.ts.map +1 -0
- package/lib/services/assessment/modules/annotations/ClaudeIntegration.js +139 -0
- package/lib/services/assessment/modules/annotations/EventEmitter.d.ts +20 -0
- package/lib/services/assessment/modules/annotations/EventEmitter.d.ts.map +1 -0
- package/lib/services/assessment/modules/annotations/EventEmitter.js +100 -0
- package/lib/services/assessment/modules/annotations/ExplanationGenerator.d.ts +25 -0
- package/lib/services/assessment/modules/annotations/ExplanationGenerator.d.ts.map +1 -0
- package/lib/services/assessment/modules/annotations/ExplanationGenerator.js +122 -0
- package/lib/services/assessment/modules/annotations/index.d.ts +5 -0
- package/lib/services/assessment/modules/annotations/index.d.ts.map +1 -1
- package/lib/services/assessment/modules/annotations/index.js +8 -0
- package/lib/services/assessment/modules/annotations/types.d.ts +33 -0
- package/lib/services/assessment/modules/annotations/types.d.ts.map +1 -0
- package/lib/services/assessment/modules/annotations/types.js +7 -0
- package/lib/services/assessment/modules/securityTests/SafeResponseDetector.d.ts +3 -0
- package/lib/services/assessment/modules/securityTests/SafeResponseDetector.d.ts.map +1 -1
- package/lib/services/assessment/modules/securityTests/SafeResponseDetector.js +14 -1
- package/lib/services/assessment/modules/securityTests/SecurityPatternLibrary.d.ts +56 -0
- package/lib/services/assessment/modules/securityTests/SecurityPatternLibrary.d.ts.map +1 -1
- package/lib/services/assessment/modules/securityTests/SecurityPatternLibrary.js +121 -0
- package/lib/services/assessment/modules/securityTests/SecurityPayloadGenerator.d.ts.map +1 -1
- package/lib/services/assessment/modules/securityTests/SecurityPayloadGenerator.js +13 -0
- package/lib/services/assessment/modules/securityTests/SecurityPayloadTester.d.ts.map +1 -1
- package/lib/services/assessment/modules/securityTests/SecurityPayloadTester.js +24 -0
- package/lib/services/assessment/modules/securityTests/SecurityResponseAnalyzer.d.ts +80 -0
- package/lib/services/assessment/modules/securityTests/SecurityResponseAnalyzer.d.ts.map +1 -1
- package/lib/services/assessment/modules/securityTests/SecurityResponseAnalyzer.js +273 -3
- package/lib/services/assessment/modules/temporal/MutationDetector.d.ts +75 -0
- package/lib/services/assessment/modules/temporal/MutationDetector.d.ts.map +1 -0
- package/lib/services/assessment/modules/temporal/MutationDetector.js +147 -0
- package/lib/services/assessment/modules/temporal/VarianceClassifier.d.ts +112 -0
- package/lib/services/assessment/modules/temporal/VarianceClassifier.d.ts.map +1 -0
- package/lib/services/assessment/modules/temporal/VarianceClassifier.js +427 -0
- package/lib/services/assessment/modules/temporal/index.d.ts +10 -0
- package/lib/services/assessment/modules/temporal/index.d.ts.map +1 -0
- package/lib/services/assessment/modules/temporal/index.js +9 -0
- package/package.json +1 -1
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Alignment Checker Module
|
|
3
|
+
* Handles tool annotation alignment detection, extraction, and metrics calculation
|
|
4
|
+
*
|
|
5
|
+
* Extracted from ToolAnnotationAssessor.ts as part of Issue #105 refactoring.
|
|
6
|
+
*/
|
|
7
|
+
import type { Tool } from "@modelcontextprotocol/sdk/types.js";
|
|
8
|
+
import type { ToolAnnotationResult, AssessmentStatus, ToolParamProgress, AnnotationSource } from "../../../../lib/assessmentTypes.js";
|
|
9
|
+
import type { CompiledPatterns, ServerPersistenceContext } from "../../config/annotationPatterns.js";
|
|
10
|
+
/**
|
|
11
|
+
* Extracted annotation structure from a tool
|
|
12
|
+
*/
|
|
13
|
+
export interface ExtractedAnnotations {
|
|
14
|
+
readOnlyHint?: boolean;
|
|
15
|
+
destructiveHint?: boolean;
|
|
16
|
+
title?: string;
|
|
17
|
+
description?: string;
|
|
18
|
+
idempotentHint?: boolean;
|
|
19
|
+
openWorldHint?: boolean;
|
|
20
|
+
source: AnnotationSource;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Alignment metrics result
|
|
24
|
+
*/
|
|
25
|
+
export interface AlignmentMetricsResult {
|
|
26
|
+
metrics: {
|
|
27
|
+
coverage: number;
|
|
28
|
+
consistency: number;
|
|
29
|
+
correctness: number;
|
|
30
|
+
reviewRequired: number;
|
|
31
|
+
};
|
|
32
|
+
alignmentBreakdown: {
|
|
33
|
+
aligned: number;
|
|
34
|
+
misaligned: number;
|
|
35
|
+
reviewRecommended: number;
|
|
36
|
+
unknown: number;
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Extract annotations from a tool
|
|
41
|
+
* Checks multiple sources in priority order: annotations object, direct properties, metadata
|
|
42
|
+
*/
|
|
43
|
+
export declare function extractAnnotations(tool: Tool): ExtractedAnnotations;
|
|
44
|
+
/**
|
|
45
|
+
* Extract extended metadata from tool (Issue #54)
|
|
46
|
+
* Extracts rate limits, permissions, return schemas, and bulk operation support
|
|
47
|
+
*/
|
|
48
|
+
export declare function extractExtendedMetadata(tool: Tool): ToolAnnotationResult["extendedMetadata"];
|
|
49
|
+
/**
|
|
50
|
+
* Extract parameters from tool input schema
|
|
51
|
+
*/
|
|
52
|
+
export declare function extractToolParams(schema: unknown): ToolParamProgress[];
|
|
53
|
+
/**
|
|
54
|
+
* Assess a single tool's annotations
|
|
55
|
+
*/
|
|
56
|
+
export declare function assessSingleTool(tool: Tool, compiledPatterns: CompiledPatterns, persistenceContext?: ServerPersistenceContext): ToolAnnotationResult;
|
|
57
|
+
/**
|
|
58
|
+
* Determine overall status based on tool results
|
|
59
|
+
*/
|
|
60
|
+
export declare function determineAnnotationStatus(results: ToolAnnotationResult[], totalTools: number): AssessmentStatus;
|
|
61
|
+
/**
|
|
62
|
+
* Calculate metrics and alignment breakdown
|
|
63
|
+
*/
|
|
64
|
+
export declare function calculateMetrics(results: ToolAnnotationResult[], totalTools: number): AlignmentMetricsResult;
|
|
65
|
+
//# sourceMappingURL=AlignmentChecker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AlignmentChecker.d.ts","sourceRoot":"","sources":["../../../../../src/services/assessment/modules/annotations/AlignmentChecker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,oCAAoC,CAAC;AAC/D,OAAO,KAAK,EACV,oBAAoB,EACpB,gBAAgB,EAEhB,iBAAiB,EACjB,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,KAAK,EACV,gBAAgB,EAChB,wBAAwB,EACzB,MAAM,iCAAiC,CAAC;AAsEzC;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,MAAM,EAAE,gBAAgB,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,OAAO,EAAE;QACP,QAAQ,EAAE,MAAM,CAAC;QACjB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,EAAE,MAAM,CAAC;QACpB,cAAc,EAAE,MAAM,CAAC;KACxB,CAAC;IACF,kBAAkB,EAAE;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,UAAU,EAAE,MAAM,CAAC;QACnB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC;CACH;AAED;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,IAAI,GAAG,oBAAoB,CA8DnE;AAED;;;GAGG;AACH,wBAAgB,uBAAuB,CACrC,IAAI,EAAE,IAAI,GACT,oBAAoB,CAAC,kBAAkB,CAAC,CA6D1C;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,OAAO,GAAG,iBAAiB,EAAE,CAqBtE;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAC9B,IAAI,EAAE,IAAI,EACV,gBAAgB,EAAE,gBAAgB,EAClC,kBAAkB,CAAC,EAAE,wBAAwB,GAC5C,oBAAoB,CAkItB;AAED;;GAEG;AACH,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,oBAAoB,EAAE,EAC/B,UAAU,EAAE,MAAM,GACjB,gBAAgB,CA8BlB;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,oBAAoB,EAAE,EAC/B,UAAU,EAAE,MAAM,GACjB,sBAAsB,CA2BxB"}
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Alignment Checker Module
|
|
3
|
+
* Handles tool annotation alignment detection, extraction, and metrics calculation
|
|
4
|
+
*
|
|
5
|
+
* Extracted from ToolAnnotationAssessor.ts as part of Issue #105 refactoring.
|
|
6
|
+
*/
|
|
7
|
+
import { scanDescriptionForPoisoning } from "./DescriptionPoisoningDetector.js";
|
|
8
|
+
import { detectAnnotationDeception, isActionableConfidence, } from "./AnnotationDeceptionDetector.js";
|
|
9
|
+
import { inferBehavior } from "./BehaviorInference.js";
|
|
10
|
+
/**
|
|
11
|
+
* Extract annotations from a tool
|
|
12
|
+
* Checks multiple sources in priority order: annotations object, direct properties, metadata
|
|
13
|
+
*/
|
|
14
|
+
export function extractAnnotations(tool) {
|
|
15
|
+
const extendedTool = tool;
|
|
16
|
+
// Priority 1: Check annotations object (MCP 2024-11 spec)
|
|
17
|
+
if (extendedTool.annotations) {
|
|
18
|
+
const hasAnnotations = extendedTool.annotations.readOnlyHint !== undefined ||
|
|
19
|
+
extendedTool.annotations.destructiveHint !== undefined;
|
|
20
|
+
if (hasAnnotations) {
|
|
21
|
+
return {
|
|
22
|
+
readOnlyHint: extendedTool.annotations.readOnlyHint,
|
|
23
|
+
destructiveHint: extendedTool.annotations.destructiveHint,
|
|
24
|
+
title: extendedTool.annotations.title || extendedTool.title,
|
|
25
|
+
description: tool.description,
|
|
26
|
+
idempotentHint: extendedTool.annotations.idempotentHint,
|
|
27
|
+
openWorldHint: extendedTool.annotations.openWorldHint,
|
|
28
|
+
source: "mcp",
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
// Priority 2: Check direct properties
|
|
33
|
+
if (extendedTool.readOnlyHint !== undefined ||
|
|
34
|
+
extendedTool.destructiveHint !== undefined) {
|
|
35
|
+
return {
|
|
36
|
+
readOnlyHint: extendedTool.readOnlyHint,
|
|
37
|
+
destructiveHint: extendedTool.destructiveHint,
|
|
38
|
+
title: extendedTool.title,
|
|
39
|
+
description: tool.description,
|
|
40
|
+
idempotentHint: extendedTool.idempotentHint,
|
|
41
|
+
openWorldHint: extendedTool.openWorldHint,
|
|
42
|
+
source: "mcp",
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
// Priority 3: Check metadata
|
|
46
|
+
if (extendedTool.metadata) {
|
|
47
|
+
const hasMetadataAnnotations = extendedTool.metadata.readOnlyHint !== undefined ||
|
|
48
|
+
extendedTool.metadata.destructiveHint !== undefined;
|
|
49
|
+
if (hasMetadataAnnotations) {
|
|
50
|
+
return {
|
|
51
|
+
readOnlyHint: extendedTool.metadata.readOnlyHint,
|
|
52
|
+
destructiveHint: extendedTool.metadata.destructiveHint,
|
|
53
|
+
title: extendedTool.metadata.title || extendedTool.title,
|
|
54
|
+
description: tool.description,
|
|
55
|
+
idempotentHint: extendedTool.metadata.idempotentHint,
|
|
56
|
+
openWorldHint: extendedTool.metadata.openWorldHint,
|
|
57
|
+
source: "mcp",
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return {
|
|
62
|
+
title: extendedTool.title,
|
|
63
|
+
description: tool.description,
|
|
64
|
+
source: "none",
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Extract extended metadata from tool (Issue #54)
|
|
69
|
+
* Extracts rate limits, permissions, return schemas, and bulk operation support
|
|
70
|
+
*/
|
|
71
|
+
export function extractExtendedMetadata(tool) {
|
|
72
|
+
const extendedTool = tool;
|
|
73
|
+
const metadata = {};
|
|
74
|
+
// Rate limiting - check annotations, metadata, and direct props
|
|
75
|
+
const rateLimit = extendedTool.rateLimit ||
|
|
76
|
+
extendedTool.annotations?.rateLimit ||
|
|
77
|
+
extendedTool.metadata?.rateLimit;
|
|
78
|
+
if (rateLimit && typeof rateLimit === "object") {
|
|
79
|
+
metadata.rateLimit = {
|
|
80
|
+
windowMs: rateLimit.windowMs,
|
|
81
|
+
maxRequests: rateLimit.maxRequests,
|
|
82
|
+
requestsPerMinute: rateLimit.requestsPerMinute,
|
|
83
|
+
requestsPerSecond: rateLimit.requestsPerSecond,
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
// Permissions - check requiredPermission, permissions, scopes
|
|
87
|
+
const permissions = extendedTool.requiredPermission ||
|
|
88
|
+
extendedTool.permissions ||
|
|
89
|
+
extendedTool.annotations?.permissions ||
|
|
90
|
+
extendedTool.metadata?.requiredPermission ||
|
|
91
|
+
extendedTool.metadata?.permissions;
|
|
92
|
+
if (permissions) {
|
|
93
|
+
const required = Array.isArray(permissions) ? permissions : [permissions];
|
|
94
|
+
const scopes = extendedTool.scopes ||
|
|
95
|
+
extendedTool.annotations?.scopes ||
|
|
96
|
+
extendedTool.metadata?.scopes;
|
|
97
|
+
metadata.permissions = {
|
|
98
|
+
required: required.filter((p) => typeof p === "string"),
|
|
99
|
+
scopes: Array.isArray(scopes)
|
|
100
|
+
? scopes.filter((s) => typeof s === "string")
|
|
101
|
+
: undefined,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
// Return schema - check outputSchema (MCP 2025-06-18 spec)
|
|
105
|
+
if (extendedTool.outputSchema) {
|
|
106
|
+
metadata.returnSchema = {
|
|
107
|
+
hasSchema: true,
|
|
108
|
+
schema: extendedTool.outputSchema,
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
// Bulk operations - check metadata for batch support
|
|
112
|
+
const bulkSupport = extendedTool.supportsBulkOperations ||
|
|
113
|
+
extendedTool.annotations?.supportsBulkOperations ||
|
|
114
|
+
extendedTool.metadata?.supportsBulkOperations;
|
|
115
|
+
const maxBatchSize = extendedTool.metadata?.maxBatchSize;
|
|
116
|
+
if (bulkSupport !== undefined || maxBatchSize !== undefined) {
|
|
117
|
+
metadata.bulkOperations = {
|
|
118
|
+
supported: !!bulkSupport,
|
|
119
|
+
maxBatchSize: typeof maxBatchSize === "number" ? maxBatchSize : undefined,
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
return Object.keys(metadata).length > 0 ? metadata : undefined;
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Extract parameters from tool input schema
|
|
126
|
+
*/
|
|
127
|
+
export function extractToolParams(schema) {
|
|
128
|
+
if (!schema || typeof schema !== "object")
|
|
129
|
+
return [];
|
|
130
|
+
const s = schema;
|
|
131
|
+
if (!s.properties || typeof s.properties !== "object")
|
|
132
|
+
return [];
|
|
133
|
+
const required = new Set(Array.isArray(s.required) ? s.required : []);
|
|
134
|
+
const properties = s.properties;
|
|
135
|
+
return Object.entries(properties).map(([name, prop]) => {
|
|
136
|
+
const param = {
|
|
137
|
+
name,
|
|
138
|
+
type: prop.type || "any",
|
|
139
|
+
required: required.has(name),
|
|
140
|
+
};
|
|
141
|
+
if (prop.description) {
|
|
142
|
+
param.description = prop.description;
|
|
143
|
+
}
|
|
144
|
+
return param;
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Assess a single tool's annotations
|
|
149
|
+
*/
|
|
150
|
+
export function assessSingleTool(tool, compiledPatterns, persistenceContext) {
|
|
151
|
+
const issues = [];
|
|
152
|
+
const recommendations = [];
|
|
153
|
+
const annotations = extractAnnotations(tool);
|
|
154
|
+
const hasAnnotations = annotations.readOnlyHint !== undefined ||
|
|
155
|
+
annotations.destructiveHint !== undefined;
|
|
156
|
+
const inferredBehavior = inferBehavior(tool.name, tool.description, compiledPatterns, persistenceContext);
|
|
157
|
+
let alignmentStatus = "ALIGNED";
|
|
158
|
+
if (!hasAnnotations) {
|
|
159
|
+
issues.push("Missing tool annotations (readOnlyHint, destructiveHint)");
|
|
160
|
+
recommendations.push(`Add annotations to ${tool.name}: readOnlyHint=${inferredBehavior.expectedReadOnly}, destructiveHint=${inferredBehavior.expectedDestructive}`);
|
|
161
|
+
alignmentStatus = "UNKNOWN";
|
|
162
|
+
}
|
|
163
|
+
else {
|
|
164
|
+
// Check for high-confidence deception
|
|
165
|
+
const deception = detectAnnotationDeception(tool.name, {
|
|
166
|
+
readOnlyHint: annotations.readOnlyHint,
|
|
167
|
+
destructiveHint: annotations.destructiveHint,
|
|
168
|
+
});
|
|
169
|
+
if (deception) {
|
|
170
|
+
alignmentStatus = "MISALIGNED";
|
|
171
|
+
issues.push(`DECEPTIVE ANNOTATION: ${deception.reason}`);
|
|
172
|
+
recommendations.push(`CRITICAL: Fix deceptive ${deception.field} for ${tool.name} - tool name contains '${deception.matchedKeyword}' which contradicts the annotation`);
|
|
173
|
+
if (deception.field === "readOnlyHint") {
|
|
174
|
+
inferredBehavior.expectedReadOnly = false;
|
|
175
|
+
inferredBehavior.confidence = "high";
|
|
176
|
+
inferredBehavior.isAmbiguous = false;
|
|
177
|
+
inferredBehavior.reason = deception.reason;
|
|
178
|
+
}
|
|
179
|
+
else {
|
|
180
|
+
inferredBehavior.expectedDestructive = true;
|
|
181
|
+
inferredBehavior.confidence = "high";
|
|
182
|
+
inferredBehavior.isAmbiguous = false;
|
|
183
|
+
inferredBehavior.reason = deception.reason;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
else {
|
|
187
|
+
// Check for misaligned annotations
|
|
188
|
+
const readOnlyMismatch = annotations.readOnlyHint !== undefined &&
|
|
189
|
+
annotations.readOnlyHint !== inferredBehavior.expectedReadOnly;
|
|
190
|
+
const destructiveMismatch = annotations.destructiveHint !== undefined &&
|
|
191
|
+
annotations.destructiveHint !== inferredBehavior.expectedDestructive;
|
|
192
|
+
if (readOnlyMismatch || destructiveMismatch) {
|
|
193
|
+
if (!inferredBehavior.isAmbiguous &&
|
|
194
|
+
isActionableConfidence(inferredBehavior.confidence)) {
|
|
195
|
+
alignmentStatus = "MISALIGNED";
|
|
196
|
+
if (readOnlyMismatch) {
|
|
197
|
+
issues.push(`Potentially misaligned readOnlyHint: set to ${annotations.readOnlyHint}, expected ${inferredBehavior.expectedReadOnly} based on tool name pattern`);
|
|
198
|
+
recommendations.push(`Verify readOnlyHint for ${tool.name}: currently ${annotations.readOnlyHint}, tool name suggests ${inferredBehavior.expectedReadOnly}`);
|
|
199
|
+
}
|
|
200
|
+
if (destructiveMismatch) {
|
|
201
|
+
issues.push(`Potentially misaligned destructiveHint: set to ${annotations.destructiveHint}, expected ${inferredBehavior.expectedDestructive} based on tool name pattern`);
|
|
202
|
+
recommendations.push(`Verify destructiveHint for ${tool.name}: currently ${annotations.destructiveHint}, tool name suggests ${inferredBehavior.expectedDestructive}`);
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
// Check for destructive tools without explicit hint
|
|
209
|
+
if (inferredBehavior.expectedDestructive &&
|
|
210
|
+
isActionableConfidence(inferredBehavior.confidence) &&
|
|
211
|
+
annotations.destructiveHint !== true) {
|
|
212
|
+
issues.push("Tool appears destructive but destructiveHint is not set to true");
|
|
213
|
+
recommendations.push(`Set destructiveHint=true for ${tool.name} - this tool appears to perform destructive operations`);
|
|
214
|
+
if (inferredBehavior.confidence === "high") {
|
|
215
|
+
alignmentStatus = "MISALIGNED";
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
// Scan for description poisoning
|
|
219
|
+
const descriptionPoisoning = scanDescriptionForPoisoning(tool);
|
|
220
|
+
if (descriptionPoisoning.detected) {
|
|
221
|
+
issues.push(`Tool description contains suspicious patterns: ${descriptionPoisoning.patterns.map((p) => p.name).join(", ")}`);
|
|
222
|
+
recommendations.push(`Review ${tool.name} description for potential prompt injection or hidden instructions`);
|
|
223
|
+
}
|
|
224
|
+
// Extract extended metadata (Issue #54)
|
|
225
|
+
const extendedMetadata = extractExtendedMetadata(tool);
|
|
226
|
+
return {
|
|
227
|
+
toolName: tool.name,
|
|
228
|
+
hasAnnotations,
|
|
229
|
+
annotations: hasAnnotations ? annotations : undefined,
|
|
230
|
+
annotationSource: annotations.source,
|
|
231
|
+
inferredBehavior,
|
|
232
|
+
alignmentStatus,
|
|
233
|
+
issues,
|
|
234
|
+
recommendations,
|
|
235
|
+
descriptionPoisoning,
|
|
236
|
+
extendedMetadata,
|
|
237
|
+
};
|
|
238
|
+
}
|
|
239
|
+
/**
|
|
240
|
+
* Determine overall status based on tool results
|
|
241
|
+
*/
|
|
242
|
+
export function determineAnnotationStatus(results, totalTools) {
|
|
243
|
+
if (totalTools === 0)
|
|
244
|
+
return "PASS";
|
|
245
|
+
const annotatedCount = results.filter((r) => r.hasAnnotations).length;
|
|
246
|
+
const poisonedCount = results.filter((r) => r.descriptionPoisoning?.detected === true).length;
|
|
247
|
+
if (poisonedCount > 0)
|
|
248
|
+
return "FAIL";
|
|
249
|
+
const misalignedCount = results.filter((r) => r.alignmentStatus === "MISALIGNED").length;
|
|
250
|
+
const destructiveWithoutHint = results.filter((r) => r.inferredBehavior?.expectedDestructive === true &&
|
|
251
|
+
r.inferredBehavior?.confidence === "high" &&
|
|
252
|
+
r.annotations?.destructiveHint !== true).length;
|
|
253
|
+
if (destructiveWithoutHint > 0)
|
|
254
|
+
return "FAIL";
|
|
255
|
+
if (misalignedCount > 0)
|
|
256
|
+
return "FAIL";
|
|
257
|
+
if (annotatedCount === totalTools)
|
|
258
|
+
return "PASS";
|
|
259
|
+
const annotationRate = annotatedCount / totalTools;
|
|
260
|
+
if (annotationRate >= 0.8)
|
|
261
|
+
return "NEED_MORE_INFO";
|
|
262
|
+
if (annotationRate < 0.5)
|
|
263
|
+
return "FAIL";
|
|
264
|
+
return "NEED_MORE_INFO";
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Calculate metrics and alignment breakdown
|
|
268
|
+
*/
|
|
269
|
+
export function calculateMetrics(results, totalTools) {
|
|
270
|
+
const alignmentBreakdown = {
|
|
271
|
+
aligned: results.filter((r) => r.alignmentStatus === "ALIGNED").length,
|
|
272
|
+
misaligned: results.filter((r) => r.alignmentStatus === "MISALIGNED")
|
|
273
|
+
.length,
|
|
274
|
+
reviewRecommended: results.filter((r) => r.alignmentStatus === "REVIEW_RECOMMENDED").length,
|
|
275
|
+
unknown: results.filter((r) => r.alignmentStatus === "UNKNOWN").length,
|
|
276
|
+
};
|
|
277
|
+
const annotatedCount = results.filter((r) => r.hasAnnotations).length;
|
|
278
|
+
const metrics = {
|
|
279
|
+
coverage: totalTools > 0 ? (annotatedCount / totalTools) * 100 : 100,
|
|
280
|
+
consistency: totalTools > 0
|
|
281
|
+
? ((totalTools - alignmentBreakdown.misaligned) / totalTools) * 100
|
|
282
|
+
: 100,
|
|
283
|
+
correctness: annotatedCount > 0
|
|
284
|
+
? (alignmentBreakdown.aligned / annotatedCount) * 100
|
|
285
|
+
: 0,
|
|
286
|
+
reviewRequired: alignmentBreakdown.reviewRecommended,
|
|
287
|
+
};
|
|
288
|
+
return { metrics, alignmentBreakdown };
|
|
289
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Claude Integration Module
|
|
3
|
+
* Handles Claude-enhanced tool behavior inference
|
|
4
|
+
*
|
|
5
|
+
* Extracted from ToolAnnotationAssessor.ts as part of Issue #105 refactoring.
|
|
6
|
+
*/
|
|
7
|
+
import type { Tool } from "@modelcontextprotocol/sdk/types.js";
|
|
8
|
+
import type { ToolAnnotationResult } from "../../../../lib/assessmentTypes.js";
|
|
9
|
+
import type { ClaudeCodeBridge } from "../../lib/claudeCodeBridge.js";
|
|
10
|
+
import type { Logger } from "../../lib/logger.js";
|
|
11
|
+
import type { EnhancedToolAnnotationResult } from "./types.js";
|
|
12
|
+
export type { EnhancedToolAnnotationResult } from "./types.js";
|
|
13
|
+
/**
|
|
14
|
+
* Enhance tool assessment with Claude inference
|
|
15
|
+
*/
|
|
16
|
+
export declare function enhanceWithClaudeInference(tool: Tool, baseResult: ToolAnnotationResult, claudeBridge: ClaudeCodeBridge | undefined, logger: Logger): Promise<EnhancedToolAnnotationResult>;
|
|
17
|
+
/**
|
|
18
|
+
* Create pattern-based Claude inference fallback
|
|
19
|
+
* Used when Claude enhancement is not enabled
|
|
20
|
+
*/
|
|
21
|
+
export declare function createPatternBasedInference(baseResult: ToolAnnotationResult): EnhancedToolAnnotationResult;
|
|
22
|
+
//# sourceMappingURL=ClaudeIntegration.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ClaudeIntegration.d.ts","sourceRoot":"","sources":["../../../../../src/services/assessment/modules/annotations/ClaudeIntegration.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,oCAAoC,CAAC;AAC/D,OAAO,KAAK,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAClE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAC;AACnE,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,kBAAkB,CAAC;AAC/C,OAAO,KAAK,EAAE,4BAA4B,EAAE,MAAM,SAAS,CAAC;AAG5D,YAAY,EAAE,4BAA4B,EAAE,MAAM,SAAS,CAAC;AAE5D;;GAEG;AACH,wBAAsB,0BAA0B,CAC9C,IAAI,EAAE,IAAI,EACV,UAAU,EAAE,oBAAoB,EAChC,YAAY,EAAE,gBAAgB,GAAG,SAAS,EAC1C,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,4BAA4B,CAAC,CAoIvC;AAED;;;GAGG;AACH,wBAAgB,2BAA2B,CACzC,UAAU,EAAE,oBAAoB,GAC/B,4BAA4B,CAwB9B"}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Claude Integration Module
|
|
3
|
+
* Handles Claude-enhanced tool behavior inference
|
|
4
|
+
*
|
|
5
|
+
* Extracted from ToolAnnotationAssessor.ts as part of Issue #105 refactoring.
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* Enhance tool assessment with Claude inference
|
|
9
|
+
*/
|
|
10
|
+
export async function enhanceWithClaudeInference(tool, baseResult, claudeBridge, logger) {
|
|
11
|
+
const inferredBehavior = baseResult.inferredBehavior ?? {
|
|
12
|
+
expectedReadOnly: false,
|
|
13
|
+
expectedDestructive: false,
|
|
14
|
+
reason: "No behavior inference available",
|
|
15
|
+
};
|
|
16
|
+
if (!claudeBridge) {
|
|
17
|
+
return {
|
|
18
|
+
...baseResult,
|
|
19
|
+
claudeInference: {
|
|
20
|
+
expectedReadOnly: inferredBehavior.expectedReadOnly,
|
|
21
|
+
expectedDestructive: inferredBehavior.expectedDestructive,
|
|
22
|
+
confidence: 50,
|
|
23
|
+
reasoning: inferredBehavior.reason,
|
|
24
|
+
suggestedAnnotations: {
|
|
25
|
+
readOnlyHint: inferredBehavior.expectedReadOnly,
|
|
26
|
+
destructiveHint: inferredBehavior.expectedDestructive,
|
|
27
|
+
},
|
|
28
|
+
misalignmentDetected: baseResult.issues.some((i) => i.includes("misaligned")),
|
|
29
|
+
source: "pattern-based",
|
|
30
|
+
},
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
try {
|
|
34
|
+
const currentAnnotations = baseResult.annotations
|
|
35
|
+
? {
|
|
36
|
+
readOnlyHint: baseResult.annotations.readOnlyHint,
|
|
37
|
+
destructiveHint: baseResult.annotations.destructiveHint,
|
|
38
|
+
}
|
|
39
|
+
: undefined;
|
|
40
|
+
const inference = await claudeBridge.inferToolBehavior(tool, currentAnnotations);
|
|
41
|
+
if (!inference) {
|
|
42
|
+
return {
|
|
43
|
+
...baseResult,
|
|
44
|
+
claudeInference: {
|
|
45
|
+
expectedReadOnly: inferredBehavior.expectedReadOnly,
|
|
46
|
+
expectedDestructive: inferredBehavior.expectedDestructive,
|
|
47
|
+
confidence: 0,
|
|
48
|
+
reasoning: "Claude inference unavailable. Using pattern-based analysis.",
|
|
49
|
+
suggestedAnnotations: {},
|
|
50
|
+
misalignmentDetected: false,
|
|
51
|
+
source: "pattern-based",
|
|
52
|
+
},
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
const updatedIssues = [...baseResult.issues];
|
|
56
|
+
const updatedRecommendations = [...baseResult.recommendations];
|
|
57
|
+
if (inference.misalignmentDetected && inference.confidence >= 70) {
|
|
58
|
+
const misalignmentMsg = inference.misalignmentDetails
|
|
59
|
+
? `Claude analysis (${inference.confidence}% confidence): ${inference.misalignmentDetails}`
|
|
60
|
+
: `Claude analysis detected annotation misalignment with ${inference.confidence}% confidence`;
|
|
61
|
+
if (!updatedIssues.some((i) => i.includes("Claude analysis"))) {
|
|
62
|
+
updatedIssues.push(misalignmentMsg);
|
|
63
|
+
}
|
|
64
|
+
if (inference.suggestedAnnotations) {
|
|
65
|
+
const { readOnlyHint, destructiveHint, idempotentHint } = inference.suggestedAnnotations;
|
|
66
|
+
if (readOnlyHint !== undefined &&
|
|
67
|
+
readOnlyHint !== baseResult.annotations?.readOnlyHint) {
|
|
68
|
+
updatedRecommendations.push(`Claude suggests: Set readOnlyHint=${readOnlyHint} for ${tool.name}`);
|
|
69
|
+
}
|
|
70
|
+
if (destructiveHint !== undefined &&
|
|
71
|
+
destructiveHint !== baseResult.annotations?.destructiveHint) {
|
|
72
|
+
updatedRecommendations.push(`Claude suggests: Set destructiveHint=${destructiveHint} for ${tool.name}`);
|
|
73
|
+
}
|
|
74
|
+
if (idempotentHint !== undefined) {
|
|
75
|
+
updatedRecommendations.push(`Claude suggests: Consider adding idempotentHint=${idempotentHint} for ${tool.name}`);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return {
|
|
80
|
+
...baseResult,
|
|
81
|
+
issues: updatedIssues,
|
|
82
|
+
recommendations: updatedRecommendations,
|
|
83
|
+
claudeInference: {
|
|
84
|
+
expectedReadOnly: inference.expectedReadOnly,
|
|
85
|
+
expectedDestructive: inference.expectedDestructive,
|
|
86
|
+
confidence: inference.confidence,
|
|
87
|
+
reasoning: inference.reasoning,
|
|
88
|
+
suggestedAnnotations: inference.suggestedAnnotations,
|
|
89
|
+
misalignmentDetected: inference.misalignmentDetected,
|
|
90
|
+
misalignmentDetails: inference.misalignmentDetails,
|
|
91
|
+
source: "claude-inferred",
|
|
92
|
+
},
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
catch (error) {
|
|
96
|
+
logger.error(`Claude inference failed for ${tool.name}`, { error });
|
|
97
|
+
return {
|
|
98
|
+
...baseResult,
|
|
99
|
+
claudeInference: {
|
|
100
|
+
expectedReadOnly: inferredBehavior.expectedReadOnly,
|
|
101
|
+
expectedDestructive: inferredBehavior.expectedDestructive,
|
|
102
|
+
confidence: 50,
|
|
103
|
+
reasoning: `Claude inference failed, using pattern-based: ${inferredBehavior.reason}`,
|
|
104
|
+
suggestedAnnotations: {
|
|
105
|
+
readOnlyHint: inferredBehavior.expectedReadOnly,
|
|
106
|
+
destructiveHint: inferredBehavior.expectedDestructive,
|
|
107
|
+
},
|
|
108
|
+
misalignmentDetected: baseResult.issues.some((i) => i.includes("misaligned")),
|
|
109
|
+
source: "pattern-based",
|
|
110
|
+
},
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Create pattern-based Claude inference fallback
|
|
116
|
+
* Used when Claude enhancement is not enabled
|
|
117
|
+
*/
|
|
118
|
+
export function createPatternBasedInference(baseResult) {
|
|
119
|
+
const inferredBehavior = baseResult.inferredBehavior ?? {
|
|
120
|
+
expectedReadOnly: false,
|
|
121
|
+
expectedDestructive: false,
|
|
122
|
+
reason: "No behavior inference available",
|
|
123
|
+
};
|
|
124
|
+
return {
|
|
125
|
+
...baseResult,
|
|
126
|
+
claudeInference: {
|
|
127
|
+
expectedReadOnly: inferredBehavior.expectedReadOnly,
|
|
128
|
+
expectedDestructive: inferredBehavior.expectedDestructive,
|
|
129
|
+
confidence: 50,
|
|
130
|
+
reasoning: inferredBehavior.reason,
|
|
131
|
+
suggestedAnnotations: {
|
|
132
|
+
readOnlyHint: inferredBehavior.expectedReadOnly,
|
|
133
|
+
destructiveHint: inferredBehavior.expectedDestructive,
|
|
134
|
+
},
|
|
135
|
+
misalignmentDetected: baseResult.issues.some((i) => i.includes("misaligned")),
|
|
136
|
+
source: "pattern-based",
|
|
137
|
+
},
|
|
138
|
+
};
|
|
139
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Annotation Event Emitter Module
|
|
3
|
+
* Handles emitting annotation-related progress events during assessment
|
|
4
|
+
*
|
|
5
|
+
* Extracted from ToolAnnotationAssessor.ts as part of Issue #105 refactoring.
|
|
6
|
+
*/
|
|
7
|
+
import type { Tool } from "@modelcontextprotocol/sdk/types.js";
|
|
8
|
+
import type { ToolAnnotationResult, AlignmentStatus, ToolParamProgress, ProgressCallback } from "../../../../lib/assessmentTypes.js";
|
|
9
|
+
import type { EnhancedToolAnnotationResult } from "./types.js";
|
|
10
|
+
/**
|
|
11
|
+
* Emit annotation-related progress events
|
|
12
|
+
*/
|
|
13
|
+
export declare function emitAnnotationEvents(onProgress: ProgressCallback | undefined, tool: Tool, result: EnhancedToolAnnotationResult): void;
|
|
14
|
+
/**
|
|
15
|
+
* Emit mismatch event (misaligned or review_recommended)
|
|
16
|
+
*/
|
|
17
|
+
export declare function emitMismatchEvent(onProgress: ProgressCallback | undefined, tool: Tool, toolParams: ToolParamProgress[], toolAnnotations: {
|
|
18
|
+
title?: string;
|
|
19
|
+
}, field: "readOnlyHint" | "destructiveHint", actual: boolean | undefined, expected: boolean, confidence: number, inferred: NonNullable<ToolAnnotationResult["inferredBehavior"]>, alignmentStatus: AlignmentStatus): void;
|
|
20
|
+
//# sourceMappingURL=EventEmitter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"EventEmitter.d.ts","sourceRoot":"","sources":["../../../../../src/services/assessment/modules/annotations/EventEmitter.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,oCAAoC,CAAC;AAC/D,OAAO,KAAK,EACV,oBAAoB,EACpB,eAAe,EACf,iBAAiB,EACjB,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAI/B,OAAO,KAAK,EAAE,4BAA4B,EAAE,MAAM,SAAS,CAAC;AAE5D;;GAEG;AACH,wBAAgB,oBAAoB,CAClC,UAAU,EAAE,gBAAgB,GAAG,SAAS,EACxC,IAAI,EAAE,IAAI,EACV,MAAM,EAAE,4BAA4B,GACnC,IAAI,CAkFN;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAC/B,UAAU,EAAE,gBAAgB,GAAG,SAAS,EACxC,IAAI,EAAE,IAAI,EACV,UAAU,EAAE,iBAAiB,EAAE,EAC/B,eAAe,EAAE;IAAE,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,EACnC,KAAK,EAAE,cAAc,GAAG,iBAAiB,EACzC,MAAM,EAAE,OAAO,GAAG,SAAS,EAC3B,QAAQ,EAAE,OAAO,EACjB,UAAU,EAAE,MAAM,EAClB,QAAQ,EAAE,WAAW,CAAC,oBAAoB,CAAC,kBAAkB,CAAC,CAAC,EAC/D,eAAe,EAAE,eAAe,GAC/B,IAAI,CAkCN"}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Annotation Event Emitter Module
|
|
3
|
+
* Handles emitting annotation-related progress events during assessment
|
|
4
|
+
*
|
|
5
|
+
* Extracted from ToolAnnotationAssessor.ts as part of Issue #105 refactoring.
|
|
6
|
+
*/
|
|
7
|
+
import { isActionableConfidence } from "./AnnotationDeceptionDetector.js";
|
|
8
|
+
import { extractAnnotations, extractToolParams } from "./AlignmentChecker.js";
|
|
9
|
+
/**
|
|
10
|
+
* Emit annotation-related progress events
|
|
11
|
+
*/
|
|
12
|
+
export function emitAnnotationEvents(onProgress, tool, result) {
|
|
13
|
+
if (!onProgress || !result.inferredBehavior)
|
|
14
|
+
return;
|
|
15
|
+
const annotations = result.annotations;
|
|
16
|
+
const inferred = result.inferredBehavior;
|
|
17
|
+
const confidence = result.claudeInference?.confidence ?? 50;
|
|
18
|
+
const toolParams = extractToolParams(tool.inputSchema);
|
|
19
|
+
const toolAnnotations = extractAnnotations(tool);
|
|
20
|
+
// Emit missing annotation event
|
|
21
|
+
if (!result.hasAnnotations) {
|
|
22
|
+
onProgress({
|
|
23
|
+
type: "annotation_missing",
|
|
24
|
+
tool: tool.name,
|
|
25
|
+
title: toolAnnotations.title,
|
|
26
|
+
description: tool.description,
|
|
27
|
+
parameters: toolParams,
|
|
28
|
+
inferredBehavior: {
|
|
29
|
+
expectedReadOnly: inferred.expectedReadOnly,
|
|
30
|
+
expectedDestructive: inferred.expectedDestructive,
|
|
31
|
+
reason: inferred.reason,
|
|
32
|
+
},
|
|
33
|
+
});
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
// Emit aligned event
|
|
37
|
+
if (result.alignmentStatus === "ALIGNED") {
|
|
38
|
+
onProgress({
|
|
39
|
+
type: "annotation_aligned",
|
|
40
|
+
tool: tool.name,
|
|
41
|
+
confidence: inferred.confidence ?? "medium",
|
|
42
|
+
annotations: {
|
|
43
|
+
readOnlyHint: annotations?.readOnlyHint,
|
|
44
|
+
destructiveHint: annotations?.destructiveHint,
|
|
45
|
+
openWorldHint: annotations?.openWorldHint,
|
|
46
|
+
idempotentHint: annotations?.idempotentHint,
|
|
47
|
+
},
|
|
48
|
+
});
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
// Get alignment status with fallback to UNKNOWN if undefined
|
|
52
|
+
const alignmentStatus = result.alignmentStatus ?? "UNKNOWN";
|
|
53
|
+
// Check readOnlyHint mismatch
|
|
54
|
+
if (annotations?.readOnlyHint !== undefined &&
|
|
55
|
+
annotations.readOnlyHint !== inferred.expectedReadOnly) {
|
|
56
|
+
emitMismatchEvent(onProgress, tool, toolParams, toolAnnotations, "readOnlyHint", annotations.readOnlyHint, inferred.expectedReadOnly, confidence, inferred, alignmentStatus);
|
|
57
|
+
}
|
|
58
|
+
// Check destructiveHint mismatch
|
|
59
|
+
if (annotations?.destructiveHint !== undefined &&
|
|
60
|
+
annotations.destructiveHint !== inferred.expectedDestructive) {
|
|
61
|
+
emitMismatchEvent(onProgress, tool, toolParams, toolAnnotations, "destructiveHint", annotations.destructiveHint, inferred.expectedDestructive, confidence, inferred, alignmentStatus);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Emit mismatch event (misaligned or review_recommended)
|
|
66
|
+
*/
|
|
67
|
+
export function emitMismatchEvent(onProgress, tool, toolParams, toolAnnotations, field, actual, expected, confidence, inferred, alignmentStatus) {
|
|
68
|
+
if (!onProgress)
|
|
69
|
+
return;
|
|
70
|
+
if (alignmentStatus === "REVIEW_RECOMMENDED") {
|
|
71
|
+
onProgress({
|
|
72
|
+
type: "annotation_review_recommended",
|
|
73
|
+
tool: tool.name,
|
|
74
|
+
title: toolAnnotations.title,
|
|
75
|
+
description: tool.description,
|
|
76
|
+
parameters: toolParams,
|
|
77
|
+
field,
|
|
78
|
+
actual,
|
|
79
|
+
inferred: expected,
|
|
80
|
+
confidence: inferred.confidence,
|
|
81
|
+
isAmbiguous: inferred.isAmbiguous,
|
|
82
|
+
reason: inferred.reason,
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
else if (!inferred.isAmbiguous &&
|
|
86
|
+
isActionableConfidence(inferred.confidence)) {
|
|
87
|
+
onProgress({
|
|
88
|
+
type: "annotation_misaligned",
|
|
89
|
+
tool: tool.name,
|
|
90
|
+
title: toolAnnotations.title,
|
|
91
|
+
description: tool.description,
|
|
92
|
+
parameters: toolParams,
|
|
93
|
+
field,
|
|
94
|
+
actual,
|
|
95
|
+
expected,
|
|
96
|
+
confidence,
|
|
97
|
+
reason: `Tool has ${field}=${actual}, but ${inferred.reason}`,
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
}
|