@bryan-thompson/inspector-assessment 1.43.2 → 1.43.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +1062 -224
  2. package/cli/build/assess-full.js +532 -106
  3. package/cli/build/assess-security.js +54 -90
  4. package/cli/build/lib/cli-parser.js +14 -1
  5. package/cli/build/lib/cli-parserSchemas.js +1 -0
  6. package/cli/build/lib/result-output.js +21 -0
  7. package/cli/build/profiles.js +20 -0
  8. package/cli/build/validate-testbed.js +0 -0
  9. package/cli/package.json +1 -1
  10. package/client/dist/assets/{OAuthCallback-BS8-A1sU.js → OAuthCallback-Chi58kRc.js} +1 -1
  11. package/client/dist/assets/{OAuthDebugCallback-025_TM2i.js → OAuthDebugCallback-BluD_Wxg.js} +1 -1
  12. package/client/dist/assets/{index-DEhlIjy-.js → index-KW2LwGdp.js} +4 -4
  13. package/client/dist/index.html +1 -1
  14. package/client/lib/lib/assessment/configSchemas.d.ts +64 -64
  15. package/client/lib/lib/assessment/jsonlEventSchemas.d.ts +286 -286
  16. package/client/lib/lib/assessment/resultTypes.d.ts +10 -0
  17. package/client/lib/lib/assessment/resultTypes.d.ts.map +1 -1
  18. package/client/lib/lib/assessmentTypes.d.ts +1 -20
  19. package/client/lib/lib/assessmentTypes.d.ts.map +1 -1
  20. package/client/lib/lib/assessmentTypes.js +1 -20
  21. package/client/lib/services/assessment/AssessmentOrchestrator.d.ts +57 -104
  22. package/client/lib/services/assessment/AssessmentOrchestrator.d.ts.map +1 -1
  23. package/client/lib/services/assessment/AssessmentOrchestrator.js +298 -133
  24. package/client/lib/services/assessment/modules/ErrorHandlingAssessor.d.ts +25 -0
  25. package/client/lib/services/assessment/modules/ErrorHandlingAssessor.d.ts.map +1 -0
  26. package/client/lib/services/assessment/modules/ErrorHandlingAssessor.deprecated.js +1 -1
  27. package/client/lib/services/assessment/modules/ErrorHandlingAssessor.js +564 -0
  28. package/client/lib/services/assessment/modules/SecurityAssessor.d.ts +5 -0
  29. package/client/lib/services/assessment/modules/SecurityAssessor.d.ts.map +1 -1
  30. package/client/lib/services/assessment/modules/SecurityAssessor.js +62 -0
  31. package/client/lib/services/assessment/modules/index.d.ts +1 -1
  32. package/client/lib/services/assessment/modules/index.js +1 -1
  33. package/client/lib/services/assessment/modules/securityTests/SecurityPayloadTester.d.ts +15 -0
  34. package/client/lib/services/assessment/modules/securityTests/SecurityPayloadTester.d.ts.map +1 -1
  35. package/client/lib/services/assessment/modules/securityTests/SecurityPayloadTester.js +72 -0
  36. package/client/lib/services/assessment/modules/securityTests/factory.d.ts +2 -0
  37. package/client/lib/services/assessment/modules/securityTests/factory.d.ts.map +1 -1
  38. package/client/lib/services/assessment/modules/securityTests/factory.js +1 -0
  39. package/client/lib/services/assessment/registry/AssessorDefinitions.js +1 -1
  40. package/client/lib/services/assessment/responseValidatorSchemas.d.ts +12 -12
  41. package/client/package.json +3 -3
  42. package/package.json +4 -2
  43. package/server/package.json +1 -1
  44. package/cli/build/lib/__tests__/zodErrorFormatter.test.js +0 -282
  45. package/client/lib/services/assessment/modules/ProtocolComplianceAssessor.d.ts +0 -109
  46. package/client/lib/services/assessment/modules/ProtocolComplianceAssessor.d.ts.map +0 -1
  47. package/client/lib/services/assessment/modules/ProtocolComplianceAssessor.deprecated.d.ts +0 -109
  48. package/client/lib/services/assessment/modules/ProtocolComplianceAssessor.deprecated.d.ts.map +0 -1
  49. package/client/lib/services/assessment/modules/ProtocolComplianceAssessor.deprecated.js +0 -852
  50. package/client/lib/services/assessment/modules/ProtocolComplianceAssessor.js +0 -852
@@ -0,0 +1,564 @@
1
+ /**
2
+ * Error Handling Assessor Module
3
+ * Tests error handling and input validation
4
+ */
5
+ import { BaseAssessor } from "./BaseAssessor.js";
6
+ export class ErrorHandlingAssessor extends BaseAssessor {
7
+ async assess(context) {
8
+ this.log("Starting error handling assessment");
9
+ const testDetails = [];
10
+ let passedTests = 0;
11
+ // Test a sample of tools for error handling
12
+ const toolsToTest = this.selectToolsForTesting(context.tools);
13
+ for (const tool of toolsToTest) {
14
+ const toolTests = await this.testToolErrorHandling(tool, context.callTool);
15
+ testDetails.push(...toolTests);
16
+ passedTests += toolTests.filter((t) => t.passed).length;
17
+ // Add delay between tests to avoid rate limiting
18
+ if (this.config.delayBetweenTests && this.config.delayBetweenTests > 0) {
19
+ await this.sleep(this.config.delayBetweenTests);
20
+ }
21
+ }
22
+ this.testCount = testDetails.length;
23
+ const metrics = this.calculateMetrics(testDetails, passedTests);
24
+ const status = this.determineErrorHandlingStatus(metrics, testDetails.length);
25
+ const explanation = this.generateExplanation(metrics, testDetails);
26
+ const recommendations = this.generateRecommendations(metrics, testDetails);
27
+ return {
28
+ metrics,
29
+ status,
30
+ explanation,
31
+ recommendations,
32
+ };
33
+ }
34
+ selectToolsForTesting(tools) {
35
+ // Prefer new selectedToolsForTesting configuration
36
+ // Note: undefined/null means "test all" (default), empty array [] means "test none" (explicit)
37
+ if (this.config.selectedToolsForTesting !== undefined) {
38
+ const selectedNames = new Set(this.config.selectedToolsForTesting);
39
+ const selectedTools = tools.filter((tool) => selectedNames.has(tool.name));
40
+ // Empty array means user explicitly selected 0 tools
41
+ if (this.config.selectedToolsForTesting.length === 0) {
42
+ this.log(`User selected 0 tools for error handling - skipping tests`);
43
+ return [];
44
+ }
45
+ // If no tools matched the names (config out of sync), log warning but respect selection
46
+ if (selectedTools.length === 0) {
47
+ this.log(`Warning: No tools matched selection (${this.config.selectedToolsForTesting.join(", ")})`);
48
+ return [];
49
+ }
50
+ this.log(`Testing ${selectedTools.length} selected tools out of ${tools.length} for error handling`);
51
+ return selectedTools;
52
+ }
53
+ // Backward compatibility: use old maxToolsToTestForErrors configuration
54
+ const configLimit = this.config.maxToolsToTestForErrors;
55
+ // If -1, test all tools
56
+ if (configLimit === -1) {
57
+ this.log(`Testing all ${tools.length} tools for error handling`);
58
+ return tools;
59
+ }
60
+ // Otherwise use the configured limit (default to 5 if not set)
61
+ const maxTools = Math.min(configLimit ?? 5, tools.length);
62
+ this.log(`Testing ${maxTools} out of ${tools.length} tools for error handling`);
63
+ return tools.slice(0, maxTools);
64
+ }
65
+ async testToolErrorHandling(tool, callTool) {
66
+ const tests = [];
67
+ // Scored tests first (affect compliance score)
68
+ // Test 1: Missing required parameters
69
+ tests.push(await this.testMissingParameters(tool, callTool));
70
+ // Test 2: Wrong parameter types
71
+ tests.push(await this.testWrongTypes(tool, callTool));
72
+ // Test 3: Excessive input size
73
+ tests.push(await this.testExcessiveInput(tool, callTool));
74
+ // Informational tests last (do not affect compliance score)
75
+ // Test 4: Invalid parameter values (edge case handling)
76
+ tests.push(await this.testInvalidValues(tool, callTool));
77
+ return tests;
78
+ }
79
+ async testMissingParameters(tool, callTool) {
80
+ const testInput = {}; // Empty params
81
+ // Check if tool has any required parameters
82
+ const schema = this.getToolSchema(tool);
83
+ const hasRequiredParams = schema?.required &&
84
+ Array.isArray(schema.required) &&
85
+ schema.required.length > 0;
86
+ // If no required parameters, this test should pass (empty input is valid)
87
+ if (!hasRequiredParams) {
88
+ return {
89
+ toolName: tool.name,
90
+ testType: "missing_required",
91
+ testInput,
92
+ expectedError: "Missing required parameters",
93
+ actualResponse: {
94
+ isError: false,
95
+ errorMessage: undefined,
96
+ rawResponse: "Skipped - no required parameters",
97
+ },
98
+ passed: true,
99
+ reason: "No required parameters (tool correctly accepts empty input)",
100
+ };
101
+ }
102
+ try {
103
+ const response = await this.executeWithTimeout(callTool(tool.name, testInput), 5000);
104
+ const isError = this.isErrorResponse(response);
105
+ const errorInfo = this.extractErrorInfo(response);
106
+ // More intelligent pattern matching for missing parameter errors
107
+ const messageLower = errorInfo.message?.toLowerCase() ?? "";
108
+ const hasValidError = isError &&
109
+ (messageLower.includes("required") ||
110
+ messageLower.includes("missing") ||
111
+ messageLower.includes("must provide") ||
112
+ messageLower.includes("must be provided") ||
113
+ messageLower.includes("is required") ||
114
+ messageLower.includes("cannot be empty") ||
115
+ messageLower.includes("must specify") ||
116
+ // Also accept field-specific errors (even better!)
117
+ /\b(query|field|parameter|argument|value|input)\b/i.test(errorInfo.message ?? ""));
118
+ return {
119
+ toolName: tool.name,
120
+ testType: "missing_required",
121
+ testInput,
122
+ expectedError: "Missing required parameters",
123
+ actualResponse: {
124
+ isError,
125
+ errorCode: errorInfo.code,
126
+ errorMessage: errorInfo.message,
127
+ rawResponse: response,
128
+ },
129
+ passed: hasValidError,
130
+ reason: isError ? undefined : "Tool did not reject missing parameters",
131
+ };
132
+ }
133
+ catch (error) {
134
+ // Check if the error message is meaningful (not just a generic crash)
135
+ const errorInfo = this.extractErrorInfo(error);
136
+ const messageLower = errorInfo.message?.toLowerCase() ?? "";
137
+ const isMeaningfulError = messageLower.includes("required") ||
138
+ messageLower.includes("missing") ||
139
+ messageLower.includes("parameter") ||
140
+ messageLower.includes("must") ||
141
+ messageLower.includes("invalid") ||
142
+ messageLower.includes("validation") ||
143
+ (errorInfo.message?.length ?? 0) > 20; // Longer messages are likely intentional
144
+ return {
145
+ toolName: tool.name,
146
+ testType: "missing_required",
147
+ testInput,
148
+ expectedError: "Missing required parameters",
149
+ actualResponse: {
150
+ isError: true,
151
+ errorCode: errorInfo.code,
152
+ errorMessage: errorInfo.message,
153
+ rawResponse: error,
154
+ },
155
+ passed: isMeaningfulError,
156
+ reason: isMeaningfulError ? undefined : "Generic unhandled exception",
157
+ };
158
+ }
159
+ }
160
+ async testWrongTypes(tool, callTool) {
161
+ const schema = this.getToolSchema(tool);
162
+ const testInput = this.generateWrongTypeParams(schema);
163
+ try {
164
+ const response = await this.executeWithTimeout(callTool(tool.name, testInput), 5000);
165
+ const isError = this.isErrorResponse(response);
166
+ const errorInfo = this.extractErrorInfo(response);
167
+ // More intelligent pattern matching for type errors
168
+ const messageLower = errorInfo.message?.toLowerCase() ?? "";
169
+ const hasValidError = isError &&
170
+ (messageLower.includes("type") ||
171
+ messageLower.includes("invalid") ||
172
+ messageLower.includes("expected") ||
173
+ messageLower.includes("must be") ||
174
+ messageLower.includes("should be") ||
175
+ messageLower.includes("cannot be") ||
176
+ messageLower.includes("not a") ||
177
+ messageLower.includes("received") ||
178
+ messageLower.includes("string") ||
179
+ messageLower.includes("number") ||
180
+ messageLower.includes("boolean") ||
181
+ messageLower.includes("array") ||
182
+ messageLower.includes("object") ||
183
+ // Also accept validation framework messages
184
+ /\b(validation|validate|schema|format)\b/i.test(errorInfo.message ?? ""));
185
+ return {
186
+ toolName: tool.name,
187
+ testType: "wrong_type",
188
+ testInput,
189
+ expectedError: "Type validation error",
190
+ actualResponse: {
191
+ isError,
192
+ errorCode: errorInfo.code,
193
+ errorMessage: errorInfo.message,
194
+ rawResponse: response,
195
+ },
196
+ passed: hasValidError,
197
+ reason: isError ? undefined : "Tool accepted wrong parameter types",
198
+ };
199
+ }
200
+ catch (error) {
201
+ // Check if the error message is meaningful (not just a generic crash)
202
+ const errorInfo = this.extractErrorInfo(error);
203
+ const messageLower = errorInfo.message?.toLowerCase() ?? "";
204
+ const isMeaningfulError = messageLower.includes("type") ||
205
+ messageLower.includes("invalid") ||
206
+ messageLower.includes("expected") ||
207
+ messageLower.includes("must be") ||
208
+ messageLower.includes("validation") ||
209
+ messageLower.includes("string") ||
210
+ messageLower.includes("number") ||
211
+ (errorInfo.message?.length ?? 0) > 20; // Longer messages are likely intentional
212
+ return {
213
+ toolName: tool.name,
214
+ testType: "wrong_type",
215
+ testInput,
216
+ expectedError: "Type validation error",
217
+ actualResponse: {
218
+ isError: true,
219
+ errorCode: errorInfo.code,
220
+ errorMessage: errorInfo.message,
221
+ rawResponse: error,
222
+ },
223
+ passed: isMeaningfulError,
224
+ reason: isMeaningfulError ? undefined : "Generic unhandled exception",
225
+ };
226
+ }
227
+ }
228
+ async testInvalidValues(tool, callTool) {
229
+ const schema = this.getToolSchema(tool);
230
+ const testInput = this.generateInvalidValueParams(schema);
231
+ try {
232
+ const response = await this.executeWithTimeout(callTool(tool.name, testInput), 5000);
233
+ const isError = this.isErrorResponse(response);
234
+ const errorInfo = this.extractErrorInfo(response);
235
+ // For invalid values, any error response is good
236
+ // The server is validating inputs properly
237
+ return {
238
+ toolName: tool.name,
239
+ testType: "invalid_values",
240
+ testInput,
241
+ expectedError: "Invalid parameter values",
242
+ actualResponse: {
243
+ isError,
244
+ errorCode: errorInfo.code,
245
+ errorMessage: errorInfo.message,
246
+ rawResponse: response,
247
+ },
248
+ passed: isError,
249
+ reason: isError ? undefined : "Tool accepted invalid values",
250
+ };
251
+ }
252
+ catch (error) {
253
+ // Check if the error message is meaningful (not just a generic crash)
254
+ const errorInfo = this.extractErrorInfo(error);
255
+ const messageLower = errorInfo.message?.toLowerCase() ?? "";
256
+ const isMeaningfulError = messageLower.includes("invalid") ||
257
+ messageLower.includes("not allowed") ||
258
+ messageLower.includes("must") ||
259
+ messageLower.includes("cannot") ||
260
+ messageLower.includes("validation") ||
261
+ messageLower.includes("error") ||
262
+ (errorInfo.message?.length ?? 0) > 15; // Even shorter messages OK for invalid values
263
+ return {
264
+ toolName: tool.name,
265
+ testType: "invalid_values",
266
+ testInput,
267
+ expectedError: "Invalid parameter values",
268
+ actualResponse: {
269
+ isError: true,
270
+ errorCode: errorInfo.code,
271
+ errorMessage: errorInfo.message,
272
+ rawResponse: error,
273
+ },
274
+ passed: isMeaningfulError,
275
+ reason: isMeaningfulError ? undefined : "Generic unhandled exception",
276
+ };
277
+ }
278
+ }
279
+ async testExcessiveInput(tool, callTool) {
280
+ const largeString = "x".repeat(100000); // 100KB string
281
+ const testInput = this.generateParamsWithValue(tool, largeString);
282
+ try {
283
+ const response = await this.executeWithTimeout(callTool(tool.name, testInput), 5000);
284
+ const isError = this.isErrorResponse(response);
285
+ const errorInfo = this.extractErrorInfo(response);
286
+ return {
287
+ toolName: tool.name,
288
+ testType: "excessive_input",
289
+ testInput: { ...testInput, value: "[100KB string]" }, // Don't store huge string
290
+ expectedError: "Input size limit exceeded",
291
+ actualResponse: {
292
+ isError,
293
+ errorCode: errorInfo.code,
294
+ errorMessage: errorInfo.message,
295
+ rawResponse: response ? "[response omitted]" : undefined,
296
+ },
297
+ passed: isError || response !== null, // Either error or handled gracefully
298
+ reason: !isError && !response ? "Tool crashed on large input" : undefined,
299
+ };
300
+ }
301
+ catch (error) {
302
+ // Check if the error message is meaningful (not just a generic crash)
303
+ const errorInfo = this.extractErrorInfo(error);
304
+ const messageLower = errorInfo.message?.toLowerCase() ?? "";
305
+ const isMeaningfulError = messageLower.includes("size") ||
306
+ messageLower.includes("large") ||
307
+ messageLower.includes("limit") ||
308
+ messageLower.includes("exceed") ||
309
+ messageLower.includes("too") ||
310
+ messageLower.includes("maximum") ||
311
+ (errorInfo.message?.length ?? 0) > 10; // Short messages OK for size limits
312
+ return {
313
+ toolName: tool.name,
314
+ testType: "excessive_input",
315
+ testInput: { value: "[100KB string]" },
316
+ expectedError: "Input size limit exceeded",
317
+ actualResponse: {
318
+ isError: true,
319
+ errorCode: errorInfo.code,
320
+ errorMessage: errorInfo.message,
321
+ rawResponse: "[error details omitted]",
322
+ },
323
+ passed: isMeaningfulError,
324
+ reason: isMeaningfulError ? undefined : "Generic unhandled exception",
325
+ };
326
+ }
327
+ }
328
+ getToolSchema(tool) {
329
+ if (!tool.inputSchema)
330
+ return {};
331
+ return typeof tool.inputSchema === "string"
332
+ ? this.safeJsonParse(tool.inputSchema)
333
+ : tool.inputSchema;
334
+ }
335
+ generateWrongTypeParams(schema) {
336
+ const params = {};
337
+ if (!schema?.properties)
338
+ return { value: 123 }; // Default wrong type
339
+ for (const [key, prop] of Object.entries(schema.properties)) {
340
+ // Intentionally use wrong types
341
+ switch (prop.type) {
342
+ case "string":
343
+ params[key] = 123; // Number instead of string
344
+ break;
345
+ case "number":
346
+ case "integer":
347
+ params[key] = "not a number"; // String instead of number
348
+ break;
349
+ case "boolean":
350
+ params[key] = "yes"; // String instead of boolean
351
+ break;
352
+ case "array":
353
+ params[key] = "not an array"; // String instead of array
354
+ break;
355
+ case "object":
356
+ params[key] = "not an object"; // String instead of object
357
+ break;
358
+ }
359
+ }
360
+ return params;
361
+ }
362
+ generateInvalidValueParams(schema) {
363
+ const params = {};
364
+ if (!schema?.properties)
365
+ return { value: null };
366
+ for (const [key, prop] of Object.entries(schema.properties)) {
367
+ if (prop.type === "string") {
368
+ if (prop.enum) {
369
+ params[key] = "not_in_enum"; // Value not in enum
370
+ }
371
+ else if (prop.format === "email") {
372
+ params[key] = "invalid-email"; // Invalid email
373
+ }
374
+ else if (prop.format === "uri") {
375
+ params[key] = "not://a/valid/uri"; // Invalid URI
376
+ }
377
+ else {
378
+ params[key] = ""; // Empty string
379
+ }
380
+ }
381
+ else if (prop.type === "number" || prop.type === "integer") {
382
+ if (prop.minimum !== undefined) {
383
+ params[key] = prop.minimum - 1; // Below minimum
384
+ }
385
+ else if (prop.maximum !== undefined) {
386
+ params[key] = prop.maximum + 1; // Above maximum
387
+ }
388
+ else {
389
+ params[key] = -999999; // Extreme value
390
+ }
391
+ }
392
+ }
393
+ return params;
394
+ }
395
+ generateParamsWithValue(tool, value) {
396
+ const schema = this.getToolSchema(tool);
397
+ const params = {};
398
+ if (schema?.properties) {
399
+ // Find first string parameter
400
+ for (const [key, prop] of Object.entries(schema.properties)) {
401
+ if (prop.type === "string") {
402
+ params[key] = value;
403
+ break;
404
+ }
405
+ }
406
+ }
407
+ if (Object.keys(params).length === 0) {
408
+ params.value = value; // Default parameter name
409
+ }
410
+ return params;
411
+ }
412
+ // isErrorResponse and extractErrorInfo moved to BaseAssessor for reuse across all assessors
413
+ calculateMetrics(tests, _passed) {
414
+ // Calculate enhanced score with bonus points for quality
415
+ let enhancedScore = 0;
416
+ let maxPossibleScore = 0;
417
+ tests.forEach((test) => {
418
+ // Phase 1: Exclude "invalid_values" tests from scoring (informational only)
419
+ // Reason: These tests penalize tools that handle edge cases gracefully (empty strings, etc.)
420
+ // Instead of rejecting them, which is often correct defensive programming.
421
+ // Real schema violations will be tested separately in Phase 2+.
422
+ if (test.testType === "invalid_values") {
423
+ return; // Skip scoring, but still included in testDetails
424
+ }
425
+ maxPossibleScore += 100; // Base score for each test
426
+ if (test.passed) {
427
+ enhancedScore += 100; // Base points for passing
428
+ // Extra points for specific field names in error
429
+ if (/\b(query|field|parameter|argument|prop|key)\b/i.test(test.actualResponse.errorMessage ?? "")) {
430
+ enhancedScore += 10;
431
+ maxPossibleScore += 10;
432
+ }
433
+ // Extra points for helpful context
434
+ if (test.actualResponse.errorMessage &&
435
+ test.actualResponse.errorMessage.length > 30) {
436
+ enhancedScore += 5;
437
+ maxPossibleScore += 5;
438
+ }
439
+ // Extra points for proper error codes
440
+ if (test.actualResponse.errorCode) {
441
+ enhancedScore += 5;
442
+ maxPossibleScore += 5;
443
+ }
444
+ }
445
+ });
446
+ const score = maxPossibleScore > 0 ? (enhancedScore / maxPossibleScore) * 100 : 0;
447
+ // Determine quality rating based on enhanced score
448
+ let quality;
449
+ if (score >= 85)
450
+ quality = "excellent";
451
+ else if (score >= 70)
452
+ quality = "good";
453
+ else if (score >= 50)
454
+ quality = "fair";
455
+ else
456
+ quality = "poor";
457
+ // Check for proper error codes and messages (only among actual errors)
458
+ const actualErrors = tests.filter((t) => t.actualResponse.isError);
459
+ const errorsWithCodes = actualErrors.filter((t) => t.actualResponse.errorCode !== undefined).length;
460
+ const errorsWithMessages = actualErrors.filter((t) => t.actualResponse.errorMessage &&
461
+ t.actualResponse.errorMessage.length > 10).length;
462
+ // Handle case when no tests were run
463
+ // Don't claim "Yes" for error codes/messages when we didn't test anything
464
+ const hasProperErrorCodes = tests.length === 0
465
+ ? false // No tests = can't assess
466
+ : actualErrors.length === 0
467
+ ? true // Tests run but no errors triggered = can't assess, assume OK
468
+ : errorsWithCodes / actualErrors.length >= 0.5;
469
+ const hasDescriptiveMessages = tests.length === 0
470
+ ? false // No tests = can't assess
471
+ : actualErrors.length === 0
472
+ ? true // Tests run but no errors triggered = can't assess, assume OK
473
+ : errorsWithMessages / actualErrors.length >= 0.5;
474
+ const validatesInputs = tests
475
+ .filter((t) => ["missing_required", "wrong_type"].includes(t.testType))
476
+ .some((t) => t.passed);
477
+ return {
478
+ mcpComplianceScore: score,
479
+ errorResponseQuality: quality,
480
+ hasProperErrorCodes,
481
+ hasDescriptiveMessages,
482
+ validatesInputs,
483
+ testDetails: tests,
484
+ };
485
+ }
486
+ determineErrorHandlingStatus(metrics, testCount) {
487
+ // If no tests were run, we can't determine error handling status
488
+ if (testCount === 0)
489
+ return "NEED_MORE_INFO";
490
+ // More lenient thresholds that recognize good error handling
491
+ if (metrics.mcpComplianceScore >= 70)
492
+ return "PASS";
493
+ if (metrics.mcpComplianceScore >= 40)
494
+ return "NEED_MORE_INFO";
495
+ return "FAIL";
496
+ }
497
+ generateExplanation(metrics, tests) {
498
+ // Handle case when no tools were tested
499
+ if (tests.length === 0) {
500
+ return "No tools selected for error handling testing. Select tools to run error handling assessments.";
501
+ }
502
+ const parts = [];
503
+ // Filter out invalid_values for scoring context
504
+ const scoredTests = tests.filter((t) => t.testType !== "invalid_values");
505
+ const passedScoredTests = scoredTests.filter((t) => t.passed).length;
506
+ const totalScoredTests = scoredTests.length;
507
+ parts.push(`Error handling compliance score: ${metrics.mcpComplianceScore.toFixed(1)}% (${passedScoredTests}/${totalScoredTests} scored tests passed).`);
508
+ // Count how many types of validation are working (only scored tests)
509
+ const validationTypes = [];
510
+ if (tests.some((t) => t.testType === "missing_required" && t.passed)) {
511
+ validationTypes.push("missing parameter validation");
512
+ }
513
+ if (tests.some((t) => t.testType === "wrong_type" && t.passed)) {
514
+ validationTypes.push("type validation");
515
+ }
516
+ if (tests.some((t) => t.testType === "excessive_input" && t.passed)) {
517
+ validationTypes.push("input size validation");
518
+ }
519
+ // Add informational note about invalid_values tests
520
+ const invalidValuesTests = tests.filter((t) => t.testType === "invalid_values");
521
+ if (invalidValuesTests.length > 0) {
522
+ const passedInvalidValues = invalidValuesTests.filter((t) => t.passed).length;
523
+ validationTypes.push(`edge case handling (${passedInvalidValues}/${invalidValuesTests.length} - informational only)`);
524
+ }
525
+ if (validationTypes.length > 0) {
526
+ const scoredValidationCount = validationTypes.filter((v) => !v.includes("informational only")).length;
527
+ parts.push(`Implements ${scoredValidationCount}/3 validation types (scored): ${validationTypes.join(", ")}.`);
528
+ }
529
+ else {
530
+ parts.push("No input validation detected.");
531
+ }
532
+ parts.push(`${metrics.hasDescriptiveMessages ? "Has" : "Missing"} descriptive error messages,`, `${metrics.hasProperErrorCodes ? "uses" : "missing"} proper error codes.`);
533
+ // Count tools tested
534
+ const toolsTested = [...new Set(tests.map((t) => t.toolName))].length;
535
+ const totalTests = tests.length;
536
+ parts.push(`Tested ${toolsTested} tools with ${totalScoredTests} scored scenarios (${totalTests} total including informational).`);
537
+ return parts.join(" ");
538
+ }
539
+ generateRecommendations(metrics, tests) {
540
+ const recommendations = [];
541
+ if (!metrics.hasProperErrorCodes) {
542
+ recommendations.push("Implement consistent error codes for different error types");
543
+ }
544
+ if (!metrics.hasDescriptiveMessages) {
545
+ recommendations.push("Provide descriptive error messages that help users understand the issue");
546
+ }
547
+ if (!metrics.validatesInputs) {
548
+ recommendations.push("Implement proper input validation for all parameters");
549
+ }
550
+ const failedTypes = [
551
+ ...new Set(tests.filter((t) => !t.passed).map((t) => t.testType)),
552
+ ];
553
+ if (failedTypes.includes("missing_required")) {
554
+ recommendations.push("Validate and report missing required parameters");
555
+ }
556
+ if (failedTypes.includes("wrong_type")) {
557
+ recommendations.push("Implement type checking for all parameters");
558
+ }
559
+ if (failedTypes.includes("excessive_input")) {
560
+ recommendations.push("Implement input size limits and handle large inputs gracefully");
561
+ }
562
+ return recommendations;
563
+ }
564
+ }
@@ -86,6 +86,11 @@ export declare class SecurityAssessor extends BaseAssessor {
86
86
  * - State poisoning between chain steps
87
87
  */
88
88
  private runChainExploitationTests;
89
+ /**
90
+ * Compute audit analysis for automated consumption
91
+ * Pre-computes false positive likelihood and response uniformity per tool
92
+ */
93
+ private computeAuditAnalysis;
89
94
  /**
90
95
  * Determine overall risk level
91
96
  */
@@ -1 +1 @@
1
- {"version":3,"file":"SecurityAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/SecurityAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,EACL,kBAAkB,EAInB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAE9D,OAAO,EAIL,KAAK,eAAe,EACrB,MAAM,iBAAiB,CAAC;AAMzB,OAAO,EACL,gBAAgB,EAGjB,MAAM,yBAAyB,CAAC;AAEjC,qBAAa,gBAAiB,SAAQ,YAAY;IAChD;;;OAGG;IACH,OAAO,CAAC,OAAO,CAAkB;IACjC,OAAO,CAAC,YAAY,CAAiC;IAErD;;;OAGG;IACH,eAAe,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI,GAAG,IAAI;IAStD;;OAEG;IACH,OAAO,CAAC,yBAAyB;IAOjC;;;OAGG;YACW,0BAA0B;IAuBxC;;;;;;;;;;;;;OAaG;gBAED,MAAM,EAAE,OAAO,8BAA8B,EAAE,uBAAuB,EACtE,OAAO,CAAC,EAAE,eAAe;IAwBrB,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAwQrE;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAoC7B;;OAEG;YACW,+BAA+B;IAiC7C;;;OAGG;YACW,yBAAyB;IA2CvC;;;;;;;OAOG;YACW,yBAAyB;IAqFvC;;OAEG;IACH,OAAO,CAAC,yBAAyB;IAYjC;;OAEG;IACH,OAAO,CAAC,uBAAuB;IA0B/B;;OAEG;IACH,OAAO,CAAC,2BAA2B;IAkEnC;;;OAGG;IACH,OAAO,CAAC,0BAA0B;CAgDnC"}
1
+ {"version":3,"file":"SecurityAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/SecurityAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AAEH,OAAO,EACL,kBAAkB,EAInB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAE9D,OAAO,EAIL,KAAK,eAAe,EACrB,MAAM,iBAAiB,CAAC;AAMzB,OAAO,EACL,gBAAgB,EAGjB,MAAM,yBAAyB,CAAC;AAEjC,qBAAa,gBAAiB,SAAQ,YAAY;IAChD;;;OAGG;IACH,OAAO,CAAC,OAAO,CAAkB;IACjC,OAAO,CAAC,YAAY,CAAiC;IAErD;;;OAGG;IACH,eAAe,CAAC,MAAM,EAAE,gBAAgB,GAAG,IAAI,GAAG,IAAI;IAStD;;OAEG;IACH,OAAO,CAAC,yBAAyB;IAOjC;;;OAGG;YACW,0BAA0B;IAuBxC;;;;;;;;;;;;;OAaG;gBAED,MAAM,EAAE,OAAO,8BAA8B,EAAE,uBAAuB,EACtE,OAAO,CAAC,EAAE,eAAe;IAwBrB,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC;IAiRrE;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAoC7B;;OAEG;YACW,+BAA+B;IAiC7C;;;OAGG;YACW,yBAAyB;IA2CvC;;;;;;;OAOG;YACW,yBAAyB;IAqFvC;;;OAGG;IACH,OAAO,CAAC,oBAAoB;IA4E5B;;OAEG;IACH,OAAO,CAAC,yBAAyB;IAYjC;;OAEG;IACH,OAAO,CAAC,uBAAuB;IA0B/B;;OAEG;IACH,OAAO,CAAC,2BAA2B;IAkEnC;;;OAGG;IACH,OAAO,CAAC,0BAA0B;CAgDnC"}
@@ -115,6 +115,10 @@ export class SecurityAssessor extends BaseAssessor {
115
115
  this.logger.warn("No tool annotations context provided - severity adjustment disabled");
116
116
  }
117
117
  this.testers.payloadTester.setToolAnnotationsContext(context.toolAnnotationsContext);
118
+ // Pass transport type for context-aware test skipping (FP reduction)
119
+ if (context.transportType) {
120
+ this.testers.payloadTester.setTransportType(context.transportType);
121
+ }
118
122
  // Run universal security testing via extracted payload tester
119
123
  const allTests = await this.testers.payloadTester.runUniversalSecurityTests(toolsToTest, context.callTool, context.onProgress);
120
124
  // Separate connection errors from valid tests
@@ -272,6 +276,8 @@ export class SecurityAssessor extends BaseAssessor {
272
276
  const testCoveragePercent = totalTestsAttempted > 0
273
277
  ? Math.round((validTestsCompleted / totalTestsAttempted) * 100)
274
278
  : 0;
279
+ // Compute audit analysis (pre-computed FP analysis for automated consumption)
280
+ const auditAnalysis = this.computeAuditAnalysis(validTests);
275
281
  return {
276
282
  promptInjectionTests: allTests,
277
283
  vulnerabilities,
@@ -289,6 +295,7 @@ export class SecurityAssessor extends BaseAssessor {
289
295
  connectionErrorCount: connectionErrors.length,
290
296
  testCoveragePercent,
291
297
  },
298
+ auditAnalysis,
292
299
  };
293
300
  }
294
301
  /**
@@ -427,6 +434,61 @@ export class SecurityAssessor extends BaseAssessor {
427
434
  }
428
435
  return allResults;
429
436
  }
437
+ /**
438
+ * Compute audit analysis for automated consumption
439
+ * Pre-computes false positive likelihood and response uniformity per tool
440
+ */
441
+ computeAuditAnalysis(validTests) {
442
+ const highConfVulns = [];
443
+ const needsReview = [];
444
+ const fpLikelihood = {};
445
+ const uniformity = {};
446
+ // Group vulnerable tests by tool
447
+ const vulnByTool = {};
448
+ for (const test of validTests) {
449
+ if (test.vulnerable) {
450
+ const name = test.toolName || "unknown";
451
+ if (!vulnByTool[name])
452
+ vulnByTool[name] = [];
453
+ vulnByTool[name].push(test);
454
+ }
455
+ }
456
+ for (const [toolName, tests] of Object.entries(vulnByTool)) {
457
+ // High confidence vulnerabilities
458
+ const highConf = tests.filter((t) => !t.confidence || t.confidence === "high");
459
+ if (highConf.length > 0) {
460
+ highConfVulns.push(`${toolName}: ${highConf.map((t) => t.testName).join(", ")}`);
461
+ }
462
+ // Needs review (medium/low confidence)
463
+ const reviewNeeded = tests.filter((t) => t.confidence === "medium" || t.confidence === "low");
464
+ if (reviewNeeded.length > 0) {
465
+ needsReview.push(`${toolName}: ${reviewNeeded.map((t) => t.testName).join(", ")}`);
466
+ }
467
+ // Response uniformity analysis
468
+ const responses = tests.map((t) => (t.response || "").trim().substring(0, 200));
469
+ const uniqueResponses = new Set(responses).size;
470
+ uniformity[toolName] = {
471
+ uniqueResponses,
472
+ totalTests: tests.length,
473
+ };
474
+ // FP likelihood based on response uniformity
475
+ if (uniqueResponses === 1 && tests.length >= 2) {
476
+ fpLikelihood[toolName] = "HIGH";
477
+ }
478
+ else if (uniqueResponses < tests.length / 2 && tests.length >= 4) {
479
+ fpLikelihood[toolName] = "MEDIUM";
480
+ }
481
+ else {
482
+ fpLikelihood[toolName] = "LOW";
483
+ }
484
+ }
485
+ return {
486
+ highConfidenceVulnerabilities: highConfVulns,
487
+ needsReview,
488
+ falsePositiveLikelihood: fpLikelihood,
489
+ responseUniformity: uniformity,
490
+ };
491
+ }
430
492
  /**
431
493
  * Determine overall risk level
432
494
  */