@bryan-thompson/inspector-assessment 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/build/assess-full.js +528 -0
- package/cli/build/assess-security.js +342 -0
- package/client/dist/assets/{OAuthCallback-ZcXdfhZQ.js → OAuthCallback-Xo9zS7pv.js} +1 -1
- package/client/dist/assets/{OAuthDebugCallback-xt1SlIHS.js → OAuthDebugCallback-CaIey8K_.js} +1 -1
- package/client/dist/assets/{index-B3lTiDVe.js → index-nCPw6E-c.js} +4 -4
- package/client/dist/index.html +1 -1
- package/client/lib/lib/assessmentTypes.d.ts +670 -0
- package/client/lib/lib/assessmentTypes.d.ts.map +1 -0
- package/client/lib/lib/assessmentTypes.js +220 -0
- package/client/lib/lib/aupPatterns.d.ts +63 -0
- package/client/lib/lib/aupPatterns.d.ts.map +1 -0
- package/client/lib/lib/aupPatterns.js +344 -0
- package/client/lib/lib/prohibitedLibraries.d.ts +76 -0
- package/client/lib/lib/prohibitedLibraries.d.ts.map +1 -0
- package/client/lib/lib/prohibitedLibraries.js +364 -0
- package/client/lib/lib/securityPatterns.d.ts +64 -0
- package/client/lib/lib/securityPatterns.d.ts.map +1 -0
- package/client/lib/lib/securityPatterns.js +453 -0
- package/client/lib/services/assessment/AssessmentOrchestrator.d.ts +88 -0
- package/client/lib/services/assessment/AssessmentOrchestrator.d.ts.map +1 -0
- package/client/lib/services/assessment/AssessmentOrchestrator.js +418 -0
- package/client/lib/services/assessment/ResponseValidator.d.ts +69 -0
- package/client/lib/services/assessment/ResponseValidator.d.ts.map +1 -0
- package/client/lib/services/assessment/ResponseValidator.js +1038 -0
- package/client/lib/services/assessment/TestDataGenerator.d.ts +86 -0
- package/client/lib/services/assessment/TestDataGenerator.d.ts.map +1 -0
- package/client/lib/services/assessment/TestDataGenerator.js +669 -0
- package/client/lib/services/assessment/TestScenarioEngine.d.ts +91 -0
- package/client/lib/services/assessment/TestScenarioEngine.d.ts.map +1 -0
- package/client/lib/services/assessment/TestScenarioEngine.js +505 -0
- package/client/lib/services/assessment/ToolClassifier.d.ts +61 -0
- package/client/lib/services/assessment/ToolClassifier.d.ts.map +1 -0
- package/client/lib/services/assessment/ToolClassifier.js +349 -0
- package/client/lib/services/assessment/lib/claudeCodeBridge.d.ts +160 -0
- package/client/lib/services/assessment/lib/claudeCodeBridge.d.ts.map +1 -0
- package/client/lib/services/assessment/lib/claudeCodeBridge.js +357 -0
- package/client/lib/services/assessment/modules/AUPComplianceAssessor.d.ts +100 -0
- package/client/lib/services/assessment/modules/AUPComplianceAssessor.d.ts.map +1 -0
- package/client/lib/services/assessment/modules/AUPComplianceAssessor.js +474 -0
- package/client/lib/services/assessment/modules/BaseAssessor.d.ts +71 -0
- package/client/lib/services/assessment/modules/BaseAssessor.d.ts.map +1 -0
- package/client/lib/services/assessment/modules/BaseAssessor.js +171 -0
- package/client/lib/services/assessment/modules/DocumentationAssessor.d.ts +45 -0
- package/client/lib/services/assessment/modules/DocumentationAssessor.d.ts.map +1 -0
- package/client/lib/services/assessment/modules/DocumentationAssessor.js +355 -0
- package/client/lib/services/assessment/modules/ErrorHandlingAssessor.d.ts +25 -0
- package/client/lib/services/assessment/modules/ErrorHandlingAssessor.d.ts.map +1 -0
- package/client/lib/services/assessment/modules/ErrorHandlingAssessor.js +564 -0
- package/client/lib/services/assessment/modules/FunctionalityAssessor.d.ts +20 -0
- package/client/lib/services/assessment/modules/FunctionalityAssessor.d.ts.map +1 -0
- package/client/lib/services/assessment/modules/FunctionalityAssessor.js +253 -0
- package/client/lib/services/assessment/modules/MCPSpecComplianceAssessor.d.ts +70 -0
- package/client/lib/services/assessment/modules/MCPSpecComplianceAssessor.d.ts.map +1 -0
- package/client/lib/services/assessment/modules/MCPSpecComplianceAssessor.js +508 -0
- package/client/lib/services/assessment/modules/ManifestValidationAssessor.d.ts +70 -0
- package/client/lib/services/assessment/modules/ManifestValidationAssessor.d.ts.map +1 -0
- package/client/lib/services/assessment/modules/ManifestValidationAssessor.js +430 -0
- package/client/lib/services/assessment/modules/PortabilityAssessor.d.ts +43 -0
- package/client/lib/services/assessment/modules/PortabilityAssessor.d.ts.map +1 -0
- package/client/lib/services/assessment/modules/PortabilityAssessor.js +347 -0
- package/client/lib/services/assessment/modules/ProhibitedLibrariesAssessor.d.ts +41 -0
- package/client/lib/services/assessment/modules/ProhibitedLibrariesAssessor.d.ts.map +1 -0
- package/client/lib/services/assessment/modules/ProhibitedLibrariesAssessor.js +256 -0
- package/client/lib/services/assessment/modules/SecurityAssessor.d.ts +176 -0
- package/client/lib/services/assessment/modules/SecurityAssessor.d.ts.map +1 -0
- package/client/lib/services/assessment/modules/SecurityAssessor.js +1333 -0
- package/client/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts +96 -0
- package/client/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts.map +1 -0
- package/client/lib/services/assessment/modules/ToolAnnotationAssessor.js +593 -0
- package/client/lib/services/assessment/modules/UsabilityAssessor.d.ts +21 -0
- package/client/lib/services/assessment/modules/UsabilityAssessor.d.ts.map +1 -0
- package/client/lib/services/assessment/modules/UsabilityAssessor.js +241 -0
- package/client/lib/services/assessment/modules/index.d.ts +33 -0
- package/client/lib/services/assessment/modules/index.d.ts.map +1 -0
- package/client/lib/services/assessment/modules/index.js +35 -0
- package/package.json +5 -2
|
@@ -0,0 +1,564 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Error Handling Assessor Module
|
|
3
|
+
* Tests error handling and input validation
|
|
4
|
+
*/
|
|
5
|
+
import { BaseAssessor } from "./BaseAssessor.js";
|
|
6
|
+
export class ErrorHandlingAssessor extends BaseAssessor {
|
|
7
|
+
async assess(context) {
|
|
8
|
+
this.log("Starting error handling assessment");
|
|
9
|
+
const testDetails = [];
|
|
10
|
+
let passedTests = 0;
|
|
11
|
+
// Test a sample of tools for error handling
|
|
12
|
+
const toolsToTest = this.selectToolsForTesting(context.tools);
|
|
13
|
+
for (const tool of toolsToTest) {
|
|
14
|
+
const toolTests = await this.testToolErrorHandling(tool, context.callTool);
|
|
15
|
+
testDetails.push(...toolTests);
|
|
16
|
+
passedTests += toolTests.filter((t) => t.passed).length;
|
|
17
|
+
// Add delay between tests to avoid rate limiting
|
|
18
|
+
if (this.config.delayBetweenTests && this.config.delayBetweenTests > 0) {
|
|
19
|
+
await this.sleep(this.config.delayBetweenTests);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
this.testCount = testDetails.length;
|
|
23
|
+
const metrics = this.calculateMetrics(testDetails, passedTests);
|
|
24
|
+
const status = this.determineErrorHandlingStatus(metrics, testDetails.length);
|
|
25
|
+
const explanation = this.generateExplanation(metrics, testDetails);
|
|
26
|
+
const recommendations = this.generateRecommendations(metrics, testDetails);
|
|
27
|
+
return {
|
|
28
|
+
metrics,
|
|
29
|
+
status,
|
|
30
|
+
explanation,
|
|
31
|
+
recommendations,
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
selectToolsForTesting(tools) {
|
|
35
|
+
// Prefer new selectedToolsForTesting configuration
|
|
36
|
+
// Note: undefined/null means "test all" (default), empty array [] means "test none" (explicit)
|
|
37
|
+
if (this.config.selectedToolsForTesting !== undefined) {
|
|
38
|
+
const selectedNames = new Set(this.config.selectedToolsForTesting);
|
|
39
|
+
const selectedTools = tools.filter((tool) => selectedNames.has(tool.name));
|
|
40
|
+
// Empty array means user explicitly selected 0 tools
|
|
41
|
+
if (this.config.selectedToolsForTesting.length === 0) {
|
|
42
|
+
this.log(`User selected 0 tools for error handling - skipping tests`);
|
|
43
|
+
return [];
|
|
44
|
+
}
|
|
45
|
+
// If no tools matched the names (config out of sync), log warning but respect selection
|
|
46
|
+
if (selectedTools.length === 0) {
|
|
47
|
+
this.log(`Warning: No tools matched selection (${this.config.selectedToolsForTesting.join(", ")})`);
|
|
48
|
+
return [];
|
|
49
|
+
}
|
|
50
|
+
this.log(`Testing ${selectedTools.length} selected tools out of ${tools.length} for error handling`);
|
|
51
|
+
return selectedTools;
|
|
52
|
+
}
|
|
53
|
+
// Backward compatibility: use old maxToolsToTestForErrors configuration
|
|
54
|
+
const configLimit = this.config.maxToolsToTestForErrors;
|
|
55
|
+
// If -1, test all tools
|
|
56
|
+
if (configLimit === -1) {
|
|
57
|
+
this.log(`Testing all ${tools.length} tools for error handling`);
|
|
58
|
+
return tools;
|
|
59
|
+
}
|
|
60
|
+
// Otherwise use the configured limit (default to 5 if not set)
|
|
61
|
+
const maxTools = Math.min(configLimit ?? 5, tools.length);
|
|
62
|
+
this.log(`Testing ${maxTools} out of ${tools.length} tools for error handling`);
|
|
63
|
+
return tools.slice(0, maxTools);
|
|
64
|
+
}
|
|
65
|
+
async testToolErrorHandling(tool, callTool) {
|
|
66
|
+
const tests = [];
|
|
67
|
+
// Scored tests first (affect compliance score)
|
|
68
|
+
// Test 1: Missing required parameters
|
|
69
|
+
tests.push(await this.testMissingParameters(tool, callTool));
|
|
70
|
+
// Test 2: Wrong parameter types
|
|
71
|
+
tests.push(await this.testWrongTypes(tool, callTool));
|
|
72
|
+
// Test 3: Excessive input size
|
|
73
|
+
tests.push(await this.testExcessiveInput(tool, callTool));
|
|
74
|
+
// Informational tests last (do not affect compliance score)
|
|
75
|
+
// Test 4: Invalid parameter values (edge case handling)
|
|
76
|
+
tests.push(await this.testInvalidValues(tool, callTool));
|
|
77
|
+
return tests;
|
|
78
|
+
}
|
|
79
|
+
async testMissingParameters(tool, callTool) {
|
|
80
|
+
const testInput = {}; // Empty params
|
|
81
|
+
// Check if tool has any required parameters
|
|
82
|
+
const schema = this.getToolSchema(tool);
|
|
83
|
+
const hasRequiredParams = schema?.required &&
|
|
84
|
+
Array.isArray(schema.required) &&
|
|
85
|
+
schema.required.length > 0;
|
|
86
|
+
// If no required parameters, this test should pass (empty input is valid)
|
|
87
|
+
if (!hasRequiredParams) {
|
|
88
|
+
return {
|
|
89
|
+
toolName: tool.name,
|
|
90
|
+
testType: "missing_required",
|
|
91
|
+
testInput,
|
|
92
|
+
expectedError: "Missing required parameters",
|
|
93
|
+
actualResponse: {
|
|
94
|
+
isError: false,
|
|
95
|
+
errorMessage: undefined,
|
|
96
|
+
rawResponse: "Skipped - no required parameters",
|
|
97
|
+
},
|
|
98
|
+
passed: true,
|
|
99
|
+
reason: "No required parameters (tool correctly accepts empty input)",
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
try {
|
|
103
|
+
const response = await this.executeWithTimeout(callTool(tool.name, testInput), 5000);
|
|
104
|
+
const isError = this.isErrorResponse(response);
|
|
105
|
+
const errorInfo = this.extractErrorInfo(response);
|
|
106
|
+
// More intelligent pattern matching for missing parameter errors
|
|
107
|
+
const messageLower = errorInfo.message?.toLowerCase() ?? "";
|
|
108
|
+
const hasValidError = isError &&
|
|
109
|
+
(messageLower.includes("required") ||
|
|
110
|
+
messageLower.includes("missing") ||
|
|
111
|
+
messageLower.includes("must provide") ||
|
|
112
|
+
messageLower.includes("must be provided") ||
|
|
113
|
+
messageLower.includes("is required") ||
|
|
114
|
+
messageLower.includes("cannot be empty") ||
|
|
115
|
+
messageLower.includes("must specify") ||
|
|
116
|
+
// Also accept field-specific errors (even better!)
|
|
117
|
+
/\b(query|field|parameter|argument|value|input)\b/i.test(errorInfo.message ?? ""));
|
|
118
|
+
return {
|
|
119
|
+
toolName: tool.name,
|
|
120
|
+
testType: "missing_required",
|
|
121
|
+
testInput,
|
|
122
|
+
expectedError: "Missing required parameters",
|
|
123
|
+
actualResponse: {
|
|
124
|
+
isError,
|
|
125
|
+
errorCode: errorInfo.code,
|
|
126
|
+
errorMessage: errorInfo.message,
|
|
127
|
+
rawResponse: response,
|
|
128
|
+
},
|
|
129
|
+
passed: hasValidError,
|
|
130
|
+
reason: isError ? undefined : "Tool did not reject missing parameters",
|
|
131
|
+
};
|
|
132
|
+
}
|
|
133
|
+
catch (error) {
|
|
134
|
+
// Check if the error message is meaningful (not just a generic crash)
|
|
135
|
+
const errorInfo = this.extractErrorInfo(error);
|
|
136
|
+
const messageLower = errorInfo.message?.toLowerCase() ?? "";
|
|
137
|
+
const isMeaningfulError = messageLower.includes("required") ||
|
|
138
|
+
messageLower.includes("missing") ||
|
|
139
|
+
messageLower.includes("parameter") ||
|
|
140
|
+
messageLower.includes("must") ||
|
|
141
|
+
messageLower.includes("invalid") ||
|
|
142
|
+
messageLower.includes("validation") ||
|
|
143
|
+
(errorInfo.message?.length ?? 0) > 20; // Longer messages are likely intentional
|
|
144
|
+
return {
|
|
145
|
+
toolName: tool.name,
|
|
146
|
+
testType: "missing_required",
|
|
147
|
+
testInput,
|
|
148
|
+
expectedError: "Missing required parameters",
|
|
149
|
+
actualResponse: {
|
|
150
|
+
isError: true,
|
|
151
|
+
errorCode: errorInfo.code,
|
|
152
|
+
errorMessage: errorInfo.message,
|
|
153
|
+
rawResponse: error,
|
|
154
|
+
},
|
|
155
|
+
passed: isMeaningfulError,
|
|
156
|
+
reason: isMeaningfulError ? undefined : "Generic unhandled exception",
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
async testWrongTypes(tool, callTool) {
|
|
161
|
+
const schema = this.getToolSchema(tool);
|
|
162
|
+
const testInput = this.generateWrongTypeParams(schema);
|
|
163
|
+
try {
|
|
164
|
+
const response = await this.executeWithTimeout(callTool(tool.name, testInput), 5000);
|
|
165
|
+
const isError = this.isErrorResponse(response);
|
|
166
|
+
const errorInfo = this.extractErrorInfo(response);
|
|
167
|
+
// More intelligent pattern matching for type errors
|
|
168
|
+
const messageLower = errorInfo.message?.toLowerCase() ?? "";
|
|
169
|
+
const hasValidError = isError &&
|
|
170
|
+
(messageLower.includes("type") ||
|
|
171
|
+
messageLower.includes("invalid") ||
|
|
172
|
+
messageLower.includes("expected") ||
|
|
173
|
+
messageLower.includes("must be") ||
|
|
174
|
+
messageLower.includes("should be") ||
|
|
175
|
+
messageLower.includes("cannot be") ||
|
|
176
|
+
messageLower.includes("not a") ||
|
|
177
|
+
messageLower.includes("received") ||
|
|
178
|
+
messageLower.includes("string") ||
|
|
179
|
+
messageLower.includes("number") ||
|
|
180
|
+
messageLower.includes("boolean") ||
|
|
181
|
+
messageLower.includes("array") ||
|
|
182
|
+
messageLower.includes("object") ||
|
|
183
|
+
// Also accept validation framework messages
|
|
184
|
+
/\b(validation|validate|schema|format)\b/i.test(errorInfo.message ?? ""));
|
|
185
|
+
return {
|
|
186
|
+
toolName: tool.name,
|
|
187
|
+
testType: "wrong_type",
|
|
188
|
+
testInput,
|
|
189
|
+
expectedError: "Type validation error",
|
|
190
|
+
actualResponse: {
|
|
191
|
+
isError,
|
|
192
|
+
errorCode: errorInfo.code,
|
|
193
|
+
errorMessage: errorInfo.message,
|
|
194
|
+
rawResponse: response,
|
|
195
|
+
},
|
|
196
|
+
passed: hasValidError,
|
|
197
|
+
reason: isError ? undefined : "Tool accepted wrong parameter types",
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
catch (error) {
|
|
201
|
+
// Check if the error message is meaningful (not just a generic crash)
|
|
202
|
+
const errorInfo = this.extractErrorInfo(error);
|
|
203
|
+
const messageLower = errorInfo.message?.toLowerCase() ?? "";
|
|
204
|
+
const isMeaningfulError = messageLower.includes("type") ||
|
|
205
|
+
messageLower.includes("invalid") ||
|
|
206
|
+
messageLower.includes("expected") ||
|
|
207
|
+
messageLower.includes("must be") ||
|
|
208
|
+
messageLower.includes("validation") ||
|
|
209
|
+
messageLower.includes("string") ||
|
|
210
|
+
messageLower.includes("number") ||
|
|
211
|
+
(errorInfo.message?.length ?? 0) > 20; // Longer messages are likely intentional
|
|
212
|
+
return {
|
|
213
|
+
toolName: tool.name,
|
|
214
|
+
testType: "wrong_type",
|
|
215
|
+
testInput,
|
|
216
|
+
expectedError: "Type validation error",
|
|
217
|
+
actualResponse: {
|
|
218
|
+
isError: true,
|
|
219
|
+
errorCode: errorInfo.code,
|
|
220
|
+
errorMessage: errorInfo.message,
|
|
221
|
+
rawResponse: error,
|
|
222
|
+
},
|
|
223
|
+
passed: isMeaningfulError,
|
|
224
|
+
reason: isMeaningfulError ? undefined : "Generic unhandled exception",
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
async testInvalidValues(tool, callTool) {
|
|
229
|
+
const schema = this.getToolSchema(tool);
|
|
230
|
+
const testInput = this.generateInvalidValueParams(schema);
|
|
231
|
+
try {
|
|
232
|
+
const response = await this.executeWithTimeout(callTool(tool.name, testInput), 5000);
|
|
233
|
+
const isError = this.isErrorResponse(response);
|
|
234
|
+
const errorInfo = this.extractErrorInfo(response);
|
|
235
|
+
// For invalid values, any error response is good
|
|
236
|
+
// The server is validating inputs properly
|
|
237
|
+
return {
|
|
238
|
+
toolName: tool.name,
|
|
239
|
+
testType: "invalid_values",
|
|
240
|
+
testInput,
|
|
241
|
+
expectedError: "Invalid parameter values",
|
|
242
|
+
actualResponse: {
|
|
243
|
+
isError,
|
|
244
|
+
errorCode: errorInfo.code,
|
|
245
|
+
errorMessage: errorInfo.message,
|
|
246
|
+
rawResponse: response,
|
|
247
|
+
},
|
|
248
|
+
passed: isError,
|
|
249
|
+
reason: isError ? undefined : "Tool accepted invalid values",
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
catch (error) {
|
|
253
|
+
// Check if the error message is meaningful (not just a generic crash)
|
|
254
|
+
const errorInfo = this.extractErrorInfo(error);
|
|
255
|
+
const messageLower = errorInfo.message?.toLowerCase() ?? "";
|
|
256
|
+
const isMeaningfulError = messageLower.includes("invalid") ||
|
|
257
|
+
messageLower.includes("not allowed") ||
|
|
258
|
+
messageLower.includes("must") ||
|
|
259
|
+
messageLower.includes("cannot") ||
|
|
260
|
+
messageLower.includes("validation") ||
|
|
261
|
+
messageLower.includes("error") ||
|
|
262
|
+
(errorInfo.message?.length ?? 0) > 15; // Even shorter messages OK for invalid values
|
|
263
|
+
return {
|
|
264
|
+
toolName: tool.name,
|
|
265
|
+
testType: "invalid_values",
|
|
266
|
+
testInput,
|
|
267
|
+
expectedError: "Invalid parameter values",
|
|
268
|
+
actualResponse: {
|
|
269
|
+
isError: true,
|
|
270
|
+
errorCode: errorInfo.code,
|
|
271
|
+
errorMessage: errorInfo.message,
|
|
272
|
+
rawResponse: error,
|
|
273
|
+
},
|
|
274
|
+
passed: isMeaningfulError,
|
|
275
|
+
reason: isMeaningfulError ? undefined : "Generic unhandled exception",
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
async testExcessiveInput(tool, callTool) {
|
|
280
|
+
const largeString = "x".repeat(100000); // 100KB string
|
|
281
|
+
const testInput = this.generateParamsWithValue(tool, largeString);
|
|
282
|
+
try {
|
|
283
|
+
const response = await this.executeWithTimeout(callTool(tool.name, testInput), 5000);
|
|
284
|
+
const isError = this.isErrorResponse(response);
|
|
285
|
+
const errorInfo = this.extractErrorInfo(response);
|
|
286
|
+
return {
|
|
287
|
+
toolName: tool.name,
|
|
288
|
+
testType: "excessive_input",
|
|
289
|
+
testInput: { ...testInput, value: "[100KB string]" }, // Don't store huge string
|
|
290
|
+
expectedError: "Input size limit exceeded",
|
|
291
|
+
actualResponse: {
|
|
292
|
+
isError,
|
|
293
|
+
errorCode: errorInfo.code,
|
|
294
|
+
errorMessage: errorInfo.message,
|
|
295
|
+
rawResponse: response ? "[response omitted]" : undefined,
|
|
296
|
+
},
|
|
297
|
+
passed: isError || response !== null, // Either error or handled gracefully
|
|
298
|
+
reason: !isError && !response ? "Tool crashed on large input" : undefined,
|
|
299
|
+
};
|
|
300
|
+
}
|
|
301
|
+
catch (error) {
|
|
302
|
+
// Check if the error message is meaningful (not just a generic crash)
|
|
303
|
+
const errorInfo = this.extractErrorInfo(error);
|
|
304
|
+
const messageLower = errorInfo.message?.toLowerCase() ?? "";
|
|
305
|
+
const isMeaningfulError = messageLower.includes("size") ||
|
|
306
|
+
messageLower.includes("large") ||
|
|
307
|
+
messageLower.includes("limit") ||
|
|
308
|
+
messageLower.includes("exceed") ||
|
|
309
|
+
messageLower.includes("too") ||
|
|
310
|
+
messageLower.includes("maximum") ||
|
|
311
|
+
(errorInfo.message?.length ?? 0) > 10; // Short messages OK for size limits
|
|
312
|
+
return {
|
|
313
|
+
toolName: tool.name,
|
|
314
|
+
testType: "excessive_input",
|
|
315
|
+
testInput: { value: "[100KB string]" },
|
|
316
|
+
expectedError: "Input size limit exceeded",
|
|
317
|
+
actualResponse: {
|
|
318
|
+
isError: true,
|
|
319
|
+
errorCode: errorInfo.code,
|
|
320
|
+
errorMessage: errorInfo.message,
|
|
321
|
+
rawResponse: "[error details omitted]",
|
|
322
|
+
},
|
|
323
|
+
passed: isMeaningfulError,
|
|
324
|
+
reason: isMeaningfulError ? undefined : "Generic unhandled exception",
|
|
325
|
+
};
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
getToolSchema(tool) {
|
|
329
|
+
if (!tool.inputSchema)
|
|
330
|
+
return {};
|
|
331
|
+
return typeof tool.inputSchema === "string"
|
|
332
|
+
? this.safeJsonParse(tool.inputSchema)
|
|
333
|
+
: tool.inputSchema;
|
|
334
|
+
}
|
|
335
|
+
generateWrongTypeParams(schema) {
|
|
336
|
+
const params = {};
|
|
337
|
+
if (!schema?.properties)
|
|
338
|
+
return { value: 123 }; // Default wrong type
|
|
339
|
+
for (const [key, prop] of Object.entries(schema.properties)) {
|
|
340
|
+
// Intentionally use wrong types
|
|
341
|
+
switch (prop.type) {
|
|
342
|
+
case "string":
|
|
343
|
+
params[key] = 123; // Number instead of string
|
|
344
|
+
break;
|
|
345
|
+
case "number":
|
|
346
|
+
case "integer":
|
|
347
|
+
params[key] = "not a number"; // String instead of number
|
|
348
|
+
break;
|
|
349
|
+
case "boolean":
|
|
350
|
+
params[key] = "yes"; // String instead of boolean
|
|
351
|
+
break;
|
|
352
|
+
case "array":
|
|
353
|
+
params[key] = "not an array"; // String instead of array
|
|
354
|
+
break;
|
|
355
|
+
case "object":
|
|
356
|
+
params[key] = "not an object"; // String instead of object
|
|
357
|
+
break;
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
return params;
|
|
361
|
+
}
|
|
362
|
+
generateInvalidValueParams(schema) {
|
|
363
|
+
const params = {};
|
|
364
|
+
if (!schema?.properties)
|
|
365
|
+
return { value: null };
|
|
366
|
+
for (const [key, prop] of Object.entries(schema.properties)) {
|
|
367
|
+
if (prop.type === "string") {
|
|
368
|
+
if (prop.enum) {
|
|
369
|
+
params[key] = "not_in_enum"; // Value not in enum
|
|
370
|
+
}
|
|
371
|
+
else if (prop.format === "email") {
|
|
372
|
+
params[key] = "invalid-email"; // Invalid email
|
|
373
|
+
}
|
|
374
|
+
else if (prop.format === "uri") {
|
|
375
|
+
params[key] = "not://a/valid/uri"; // Invalid URI
|
|
376
|
+
}
|
|
377
|
+
else {
|
|
378
|
+
params[key] = ""; // Empty string
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
else if (prop.type === "number" || prop.type === "integer") {
|
|
382
|
+
if (prop.minimum !== undefined) {
|
|
383
|
+
params[key] = prop.minimum - 1; // Below minimum
|
|
384
|
+
}
|
|
385
|
+
else if (prop.maximum !== undefined) {
|
|
386
|
+
params[key] = prop.maximum + 1; // Above maximum
|
|
387
|
+
}
|
|
388
|
+
else {
|
|
389
|
+
params[key] = -999999; // Extreme value
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
return params;
|
|
394
|
+
}
|
|
395
|
+
generateParamsWithValue(tool, value) {
|
|
396
|
+
const schema = this.getToolSchema(tool);
|
|
397
|
+
const params = {};
|
|
398
|
+
if (schema?.properties) {
|
|
399
|
+
// Find first string parameter
|
|
400
|
+
for (const [key, prop] of Object.entries(schema.properties)) {
|
|
401
|
+
if (prop.type === "string") {
|
|
402
|
+
params[key] = value;
|
|
403
|
+
break;
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
if (Object.keys(params).length === 0) {
|
|
408
|
+
params.value = value; // Default parameter name
|
|
409
|
+
}
|
|
410
|
+
return params;
|
|
411
|
+
}
|
|
412
|
+
// isErrorResponse and extractErrorInfo moved to BaseAssessor for reuse across all assessors
|
|
413
|
+
calculateMetrics(tests, _passed) {
|
|
414
|
+
// Calculate enhanced score with bonus points for quality
|
|
415
|
+
let enhancedScore = 0;
|
|
416
|
+
let maxPossibleScore = 0;
|
|
417
|
+
tests.forEach((test) => {
|
|
418
|
+
// Phase 1: Exclude "invalid_values" tests from scoring (informational only)
|
|
419
|
+
// Reason: These tests penalize tools that handle edge cases gracefully (empty strings, etc.)
|
|
420
|
+
// Instead of rejecting them, which is often correct defensive programming.
|
|
421
|
+
// Real schema violations will be tested separately in Phase 2+.
|
|
422
|
+
if (test.testType === "invalid_values") {
|
|
423
|
+
return; // Skip scoring, but still included in testDetails
|
|
424
|
+
}
|
|
425
|
+
maxPossibleScore += 100; // Base score for each test
|
|
426
|
+
if (test.passed) {
|
|
427
|
+
enhancedScore += 100; // Base points for passing
|
|
428
|
+
// Extra points for specific field names in error
|
|
429
|
+
if (/\b(query|field|parameter|argument|prop|key)\b/i.test(test.actualResponse.errorMessage ?? "")) {
|
|
430
|
+
enhancedScore += 10;
|
|
431
|
+
maxPossibleScore += 10;
|
|
432
|
+
}
|
|
433
|
+
// Extra points for helpful context
|
|
434
|
+
if (test.actualResponse.errorMessage &&
|
|
435
|
+
test.actualResponse.errorMessage.length > 30) {
|
|
436
|
+
enhancedScore += 5;
|
|
437
|
+
maxPossibleScore += 5;
|
|
438
|
+
}
|
|
439
|
+
// Extra points for proper error codes
|
|
440
|
+
if (test.actualResponse.errorCode) {
|
|
441
|
+
enhancedScore += 5;
|
|
442
|
+
maxPossibleScore += 5;
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
});
|
|
446
|
+
const score = maxPossibleScore > 0 ? (enhancedScore / maxPossibleScore) * 100 : 0;
|
|
447
|
+
// Determine quality rating based on enhanced score
|
|
448
|
+
let quality;
|
|
449
|
+
if (score >= 85)
|
|
450
|
+
quality = "excellent";
|
|
451
|
+
else if (score >= 70)
|
|
452
|
+
quality = "good";
|
|
453
|
+
else if (score >= 50)
|
|
454
|
+
quality = "fair";
|
|
455
|
+
else
|
|
456
|
+
quality = "poor";
|
|
457
|
+
// Check for proper error codes and messages (only among actual errors)
|
|
458
|
+
const actualErrors = tests.filter((t) => t.actualResponse.isError);
|
|
459
|
+
const errorsWithCodes = actualErrors.filter((t) => t.actualResponse.errorCode !== undefined).length;
|
|
460
|
+
const errorsWithMessages = actualErrors.filter((t) => t.actualResponse.errorMessage &&
|
|
461
|
+
t.actualResponse.errorMessage.length > 10).length;
|
|
462
|
+
// Handle case when no tests were run
|
|
463
|
+
// Don't claim "Yes" for error codes/messages when we didn't test anything
|
|
464
|
+
const hasProperErrorCodes = tests.length === 0
|
|
465
|
+
? false // No tests = can't assess
|
|
466
|
+
: actualErrors.length === 0
|
|
467
|
+
? true // Tests run but no errors triggered = can't assess, assume OK
|
|
468
|
+
: errorsWithCodes / actualErrors.length >= 0.5;
|
|
469
|
+
const hasDescriptiveMessages = tests.length === 0
|
|
470
|
+
? false // No tests = can't assess
|
|
471
|
+
: actualErrors.length === 0
|
|
472
|
+
? true // Tests run but no errors triggered = can't assess, assume OK
|
|
473
|
+
: errorsWithMessages / actualErrors.length >= 0.5;
|
|
474
|
+
const validatesInputs = tests
|
|
475
|
+
.filter((t) => ["missing_required", "wrong_type"].includes(t.testType))
|
|
476
|
+
.some((t) => t.passed);
|
|
477
|
+
return {
|
|
478
|
+
mcpComplianceScore: score,
|
|
479
|
+
errorResponseQuality: quality,
|
|
480
|
+
hasProperErrorCodes,
|
|
481
|
+
hasDescriptiveMessages,
|
|
482
|
+
validatesInputs,
|
|
483
|
+
testDetails: tests,
|
|
484
|
+
};
|
|
485
|
+
}
|
|
486
|
+
determineErrorHandlingStatus(metrics, testCount) {
|
|
487
|
+
// If no tests were run, we can't determine error handling status
|
|
488
|
+
if (testCount === 0)
|
|
489
|
+
return "NEED_MORE_INFO";
|
|
490
|
+
// More lenient thresholds that recognize good error handling
|
|
491
|
+
if (metrics.mcpComplianceScore >= 70)
|
|
492
|
+
return "PASS";
|
|
493
|
+
if (metrics.mcpComplianceScore >= 40)
|
|
494
|
+
return "NEED_MORE_INFO";
|
|
495
|
+
return "FAIL";
|
|
496
|
+
}
|
|
497
|
+
generateExplanation(metrics, tests) {
|
|
498
|
+
// Handle case when no tools were tested
|
|
499
|
+
if (tests.length === 0) {
|
|
500
|
+
return "No tools selected for error handling testing. Select tools to run error handling assessments.";
|
|
501
|
+
}
|
|
502
|
+
const parts = [];
|
|
503
|
+
// Filter out invalid_values for scoring context
|
|
504
|
+
const scoredTests = tests.filter((t) => t.testType !== "invalid_values");
|
|
505
|
+
const passedScoredTests = scoredTests.filter((t) => t.passed).length;
|
|
506
|
+
const totalScoredTests = scoredTests.length;
|
|
507
|
+
parts.push(`Error handling compliance score: ${metrics.mcpComplianceScore.toFixed(1)}% (${passedScoredTests}/${totalScoredTests} scored tests passed).`);
|
|
508
|
+
// Count how many types of validation are working (only scored tests)
|
|
509
|
+
const validationTypes = [];
|
|
510
|
+
if (tests.some((t) => t.testType === "missing_required" && t.passed)) {
|
|
511
|
+
validationTypes.push("missing parameter validation");
|
|
512
|
+
}
|
|
513
|
+
if (tests.some((t) => t.testType === "wrong_type" && t.passed)) {
|
|
514
|
+
validationTypes.push("type validation");
|
|
515
|
+
}
|
|
516
|
+
if (tests.some((t) => t.testType === "excessive_input" && t.passed)) {
|
|
517
|
+
validationTypes.push("input size validation");
|
|
518
|
+
}
|
|
519
|
+
// Add informational note about invalid_values tests
|
|
520
|
+
const invalidValuesTests = tests.filter((t) => t.testType === "invalid_values");
|
|
521
|
+
if (invalidValuesTests.length > 0) {
|
|
522
|
+
const passedInvalidValues = invalidValuesTests.filter((t) => t.passed).length;
|
|
523
|
+
validationTypes.push(`edge case handling (${passedInvalidValues}/${invalidValuesTests.length} - informational only)`);
|
|
524
|
+
}
|
|
525
|
+
if (validationTypes.length > 0) {
|
|
526
|
+
const scoredValidationCount = validationTypes.filter((v) => !v.includes("informational only")).length;
|
|
527
|
+
parts.push(`Implements ${scoredValidationCount}/3 validation types (scored): ${validationTypes.join(", ")}.`);
|
|
528
|
+
}
|
|
529
|
+
else {
|
|
530
|
+
parts.push("No input validation detected.");
|
|
531
|
+
}
|
|
532
|
+
parts.push(`${metrics.hasDescriptiveMessages ? "Has" : "Missing"} descriptive error messages,`, `${metrics.hasProperErrorCodes ? "uses" : "missing"} proper error codes.`);
|
|
533
|
+
// Count tools tested
|
|
534
|
+
const toolsTested = [...new Set(tests.map((t) => t.toolName))].length;
|
|
535
|
+
const totalTests = tests.length;
|
|
536
|
+
parts.push(`Tested ${toolsTested} tools with ${totalScoredTests} scored scenarios (${totalTests} total including informational).`);
|
|
537
|
+
return parts.join(" ");
|
|
538
|
+
}
|
|
539
|
+
generateRecommendations(metrics, tests) {
|
|
540
|
+
const recommendations = [];
|
|
541
|
+
if (!metrics.hasProperErrorCodes) {
|
|
542
|
+
recommendations.push("Implement consistent error codes for different error types");
|
|
543
|
+
}
|
|
544
|
+
if (!metrics.hasDescriptiveMessages) {
|
|
545
|
+
recommendations.push("Provide descriptive error messages that help users understand the issue");
|
|
546
|
+
}
|
|
547
|
+
if (!metrics.validatesInputs) {
|
|
548
|
+
recommendations.push("Implement proper input validation for all parameters");
|
|
549
|
+
}
|
|
550
|
+
const failedTypes = [
|
|
551
|
+
...new Set(tests.filter((t) => !t.passed).map((t) => t.testType)),
|
|
552
|
+
];
|
|
553
|
+
if (failedTypes.includes("missing_required")) {
|
|
554
|
+
recommendations.push("Validate and report missing required parameters");
|
|
555
|
+
}
|
|
556
|
+
if (failedTypes.includes("wrong_type")) {
|
|
557
|
+
recommendations.push("Implement type checking for all parameters");
|
|
558
|
+
}
|
|
559
|
+
if (failedTypes.includes("excessive_input")) {
|
|
560
|
+
recommendations.push("Implement input size limits and handle large inputs gracefully");
|
|
561
|
+
}
|
|
562
|
+
return recommendations;
|
|
563
|
+
}
|
|
564
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Functionality Assessor Module
|
|
3
|
+
* Tests tool functionality and basic operations
|
|
4
|
+
*/
|
|
5
|
+
import { FunctionalityAssessment } from "../../../lib/assessmentTypes.js";
|
|
6
|
+
import { BaseAssessor } from "./BaseAssessor.js";
|
|
7
|
+
import { AssessmentContext } from "../AssessmentOrchestrator.js";
|
|
8
|
+
export declare class FunctionalityAssessor extends BaseAssessor {
|
|
9
|
+
/**
|
|
10
|
+
* Select tools for testing based on configuration
|
|
11
|
+
*/
|
|
12
|
+
private selectToolsForTesting;
|
|
13
|
+
assess(context: AssessmentContext): Promise<FunctionalityAssessment>;
|
|
14
|
+
private testTool;
|
|
15
|
+
private generateMinimalParams;
|
|
16
|
+
private generateParamValue;
|
|
17
|
+
generateTestInput(schema: any): unknown;
|
|
18
|
+
private generateExplanation;
|
|
19
|
+
}
|
|
20
|
+
//# sourceMappingURL=FunctionalityAssessor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"FunctionalityAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/FunctionalityAssessor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,uBAAuB,EAAkB,MAAM,uBAAuB,CAAC;AAChF,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAG9D,qBAAa,qBAAsB,SAAQ,YAAY;IACrD;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAoCvB,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,uBAAuB,CAAC;YA2D5D,QAAQ;IA6EtB,OAAO,CAAC,qBAAqB;IA2B7B,OAAO,CAAC,kBAAkB;IA0FnB,iBAAiB,CAAC,MAAM,EAAE,GAAG,GAAG,OAAO;IAI9C,OAAO,CAAC,mBAAmB;CA+B5B"}
|