@bryan-thompson/inspector-assessment 1.5.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/cli/build/assess-full.js +528 -0
  2. package/cli/build/assess-security.js +342 -0
  3. package/cli/build/cli.js +10 -1
  4. package/client/dist/assets/{OAuthCallback-TeTvKfWE.js → OAuthCallback-Xo9zS7pv.js} +1 -1
  5. package/client/dist/assets/{OAuthDebugCallback-DwA2sKy9.js → OAuthDebugCallback-CaIey8K_.js} +1 -1
  6. package/client/dist/assets/{index-BwAoxcvr.js → index-nCPw6E-c.js} +4 -4
  7. package/client/dist/index.html +1 -1
  8. package/client/lib/lib/assessmentTypes.d.ts +670 -0
  9. package/client/lib/lib/assessmentTypes.d.ts.map +1 -0
  10. package/client/lib/lib/assessmentTypes.js +220 -0
  11. package/client/lib/lib/aupPatterns.d.ts +63 -0
  12. package/client/lib/lib/aupPatterns.d.ts.map +1 -0
  13. package/client/lib/lib/aupPatterns.js +344 -0
  14. package/client/lib/lib/prohibitedLibraries.d.ts +76 -0
  15. package/client/lib/lib/prohibitedLibraries.d.ts.map +1 -0
  16. package/client/lib/lib/prohibitedLibraries.js +364 -0
  17. package/client/lib/lib/securityPatterns.d.ts +64 -0
  18. package/client/lib/lib/securityPatterns.d.ts.map +1 -0
  19. package/client/lib/lib/securityPatterns.js +453 -0
  20. package/client/lib/services/assessment/AssessmentOrchestrator.d.ts +88 -0
  21. package/client/lib/services/assessment/AssessmentOrchestrator.d.ts.map +1 -0
  22. package/client/lib/services/assessment/AssessmentOrchestrator.js +418 -0
  23. package/client/lib/services/assessment/ResponseValidator.d.ts +69 -0
  24. package/client/lib/services/assessment/ResponseValidator.d.ts.map +1 -0
  25. package/client/lib/services/assessment/ResponseValidator.js +1038 -0
  26. package/client/lib/services/assessment/TestDataGenerator.d.ts +86 -0
  27. package/client/lib/services/assessment/TestDataGenerator.d.ts.map +1 -0
  28. package/client/lib/services/assessment/TestDataGenerator.js +669 -0
  29. package/client/lib/services/assessment/TestScenarioEngine.d.ts +91 -0
  30. package/client/lib/services/assessment/TestScenarioEngine.d.ts.map +1 -0
  31. package/client/lib/services/assessment/TestScenarioEngine.js +505 -0
  32. package/client/lib/services/assessment/ToolClassifier.d.ts +61 -0
  33. package/client/lib/services/assessment/ToolClassifier.d.ts.map +1 -0
  34. package/client/lib/services/assessment/ToolClassifier.js +349 -0
  35. package/client/lib/services/assessment/lib/claudeCodeBridge.d.ts +160 -0
  36. package/client/lib/services/assessment/lib/claudeCodeBridge.d.ts.map +1 -0
  37. package/client/lib/services/assessment/lib/claudeCodeBridge.js +357 -0
  38. package/client/lib/services/assessment/modules/AUPComplianceAssessor.d.ts +100 -0
  39. package/client/lib/services/assessment/modules/AUPComplianceAssessor.d.ts.map +1 -0
  40. package/client/lib/services/assessment/modules/AUPComplianceAssessor.js +474 -0
  41. package/client/lib/services/assessment/modules/BaseAssessor.d.ts +71 -0
  42. package/client/lib/services/assessment/modules/BaseAssessor.d.ts.map +1 -0
  43. package/client/lib/services/assessment/modules/BaseAssessor.js +171 -0
  44. package/client/lib/services/assessment/modules/DocumentationAssessor.d.ts +45 -0
  45. package/client/lib/services/assessment/modules/DocumentationAssessor.d.ts.map +1 -0
  46. package/client/lib/services/assessment/modules/DocumentationAssessor.js +355 -0
  47. package/client/lib/services/assessment/modules/ErrorHandlingAssessor.d.ts +25 -0
  48. package/client/lib/services/assessment/modules/ErrorHandlingAssessor.d.ts.map +1 -0
  49. package/client/lib/services/assessment/modules/ErrorHandlingAssessor.js +564 -0
  50. package/client/lib/services/assessment/modules/FunctionalityAssessor.d.ts +20 -0
  51. package/client/lib/services/assessment/modules/FunctionalityAssessor.d.ts.map +1 -0
  52. package/client/lib/services/assessment/modules/FunctionalityAssessor.js +253 -0
  53. package/client/lib/services/assessment/modules/MCPSpecComplianceAssessor.d.ts +70 -0
  54. package/client/lib/services/assessment/modules/MCPSpecComplianceAssessor.d.ts.map +1 -0
  55. package/client/lib/services/assessment/modules/MCPSpecComplianceAssessor.js +508 -0
  56. package/client/lib/services/assessment/modules/ManifestValidationAssessor.d.ts +70 -0
  57. package/client/lib/services/assessment/modules/ManifestValidationAssessor.d.ts.map +1 -0
  58. package/client/lib/services/assessment/modules/ManifestValidationAssessor.js +430 -0
  59. package/client/lib/services/assessment/modules/PortabilityAssessor.d.ts +43 -0
  60. package/client/lib/services/assessment/modules/PortabilityAssessor.d.ts.map +1 -0
  61. package/client/lib/services/assessment/modules/PortabilityAssessor.js +347 -0
  62. package/client/lib/services/assessment/modules/ProhibitedLibrariesAssessor.d.ts +41 -0
  63. package/client/lib/services/assessment/modules/ProhibitedLibrariesAssessor.d.ts.map +1 -0
  64. package/client/lib/services/assessment/modules/ProhibitedLibrariesAssessor.js +256 -0
  65. package/client/lib/services/assessment/modules/SecurityAssessor.d.ts +176 -0
  66. package/client/lib/services/assessment/modules/SecurityAssessor.d.ts.map +1 -0
  67. package/client/lib/services/assessment/modules/SecurityAssessor.js +1333 -0
  68. package/client/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts +96 -0
  69. package/client/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts.map +1 -0
  70. package/client/lib/services/assessment/modules/ToolAnnotationAssessor.js +593 -0
  71. package/client/lib/services/assessment/modules/UsabilityAssessor.d.ts +21 -0
  72. package/client/lib/services/assessment/modules/UsabilityAssessor.d.ts.map +1 -0
  73. package/client/lib/services/assessment/modules/UsabilityAssessor.js +241 -0
  74. package/client/lib/services/assessment/modules/index.d.ts +33 -0
  75. package/client/lib/services/assessment/modules/index.d.ts.map +1 -0
  76. package/client/lib/services/assessment/modules/index.js +35 -0
  77. package/package.json +7 -2
@@ -0,0 +1,91 @@
1
+ /**
2
+ * Test Scenario Engine for Multi-Scenario MCP Tool Testing
3
+ * Orchestrates comprehensive testing with multiple scenarios per tool
4
+ */
5
+ import { Tool, CompatibilityCallToolResult } from "@modelcontextprotocol/sdk/types.js";
6
+ import { TestScenario } from "./TestDataGenerator.js";
7
+ import { ValidationResult } from "./ResponseValidator.js";
8
+ export interface ScenarioTestResult {
9
+ scenario: TestScenario;
10
+ executed: boolean;
11
+ executionTime: number;
12
+ response?: CompatibilityCallToolResult;
13
+ error?: string;
14
+ validation: ValidationResult;
15
+ }
16
+ export interface ComprehensiveToolTestResult {
17
+ toolName: string;
18
+ tested: boolean;
19
+ totalScenarios: number;
20
+ scenariosExecuted: number;
21
+ scenariosPassed: number;
22
+ scenariosFailed: number;
23
+ overallStatus: "fully_working" | "partially_working" | "connectivity_only" | "broken" | "untested";
24
+ confidence: number;
25
+ executionTime: number;
26
+ scenarioResults: ScenarioTestResult[];
27
+ summary: {
28
+ happyPathSuccess: boolean;
29
+ edgeCasesHandled: number;
30
+ edgeCasesTotal: number;
31
+ boundariesRespected: number;
32
+ boundariesTotal: number;
33
+ errorHandlingWorks: boolean;
34
+ };
35
+ progressiveComplexity?: {
36
+ minimalWorks: boolean;
37
+ simpleWorks: boolean;
38
+ failurePoint?: "minimal" | "simple" | "none";
39
+ };
40
+ recommendations: string[];
41
+ }
42
+ export declare class TestScenarioEngine {
43
+ private testTimeout;
44
+ private delayBetweenTests;
45
+ constructor(testTimeout?: number, delayBetweenTests?: number);
46
+ /**
47
+ * Sleep for specified milliseconds (for rate limiting)
48
+ */
49
+ private sleep;
50
+ /**
51
+ * Test tool with progressive complexity to identify failure points
52
+ */
53
+ testProgressiveComplexity(tool: Tool, callTool: (name: string, params: Record<string, unknown>) => Promise<CompatibilityCallToolResult>): Promise<ComprehensiveToolTestResult["progressiveComplexity"]>;
54
+ /**
55
+ * Generate minimal parameters (only absolutely required fields)
56
+ */
57
+ private generateMinimalParams;
58
+ /**
59
+ * Generate simple parameters (required fields with simple values)
60
+ */
61
+ private generateSimpleParams;
62
+ /**
63
+ * Generate minimal value for a schema
64
+ */
65
+ private generateMinimalValue;
66
+ /**
67
+ * Run comprehensive testing for a tool with multiple scenarios
68
+ */
69
+ testToolComprehensively(tool: Tool, callTool: (name: string, params: Record<string, unknown>) => Promise<CompatibilityCallToolResult>): Promise<ComprehensiveToolTestResult>;
70
+ /**
71
+ * Execute a single test scenario
72
+ */
73
+ private executeScenario;
74
+ /**
75
+ * Determine overall status based on scenario results
76
+ */
77
+ private determineOverallStatus;
78
+ /**
79
+ * Calculate confidence score based on test coverage and results
80
+ */
81
+ private calculateConfidence;
82
+ /**
83
+ * Generate recommendations based on test results
84
+ */
85
+ private generateRecommendations;
86
+ /**
87
+ * Generate a detailed report for a tool test
88
+ */
89
+ static generateDetailedReport(result: ComprehensiveToolTestResult): string;
90
+ }
91
+ //# sourceMappingURL=TestScenarioEngine.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TestScenarioEngine.d.ts","sourceRoot":"","sources":["../../../src/services/assessment/TestScenarioEngine.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,IAAI,EACJ,2BAA2B,EAC5B,MAAM,oCAAoC,CAAC;AAC5C,OAAO,EAAqB,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACtE,OAAO,EAEL,gBAAgB,EAEjB,MAAM,qBAAqB,CAAC;AAE7B,MAAM,WAAW,kBAAkB;IACjC,QAAQ,EAAE,YAAY,CAAC;IACvB,QAAQ,EAAE,OAAO,CAAC;IAClB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,EAAE,2BAA2B,CAAC;IACvC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,gBAAgB,CAAC;CAC9B;AAED,MAAM,WAAW,2BAA2B;IAC1C,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,OAAO,CAAC;IAChB,cAAc,EAAE,MAAM,CAAC;IACvB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,eAAe,EAAE,MAAM,CAAC;IACxB,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,EACT,eAAe,GACf,mBAAmB,GACnB,mBAAmB,GACnB,QAAQ,GACR,UAAU,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;IACnB,aAAa,EAAE,MAAM,CAAC;IACtB,eAAe,EAAE,kBAAkB,EAAE,CAAC;IACtC,OAAO,EAAE;QACP,gBAAgB,EAAE,OAAO,CAAC;QAC1B,gBAAgB,EAAE,MAAM,CAAC;QACzB,cAAc,EAAE,MAAM,CAAC;QACvB,mBAAmB,EAAE,MAAM,CAAC;QAC5B,eAAe,EAAE,MAAM,CAAC;QACxB,kBAAkB,EAAE,OAAO,CAAC;KAC7B,CAAC;IAGF,qBAAqB,CAAC,EAAE;QACtB,YAAY,EAAE,OAAO,CAAC;QACtB,WAAW,EAAE,OAAO,CAAC;QACrB,YAAY,CAAC,EAAE,SAAS,GAAG,QAAQ,GAAG,MAAM,CAAC;KAC9C,CAAC;IACF,eAAe,EAAE,MAAM,EAAE,CAAC;CAC3B;AAED,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,WAAW,CAAS;IAC5B,OAAO,CAAC,iBAAiB,CAAS;gBAEtB,WAAW,GAAE,MAAa,EAAE,iBAAiB,GAAE,MAAU;IAKrE;;OAEG;YACW,KAAK;IAInB;;OAEG;IACG,yBAAyB,CAC7B,IAAI,EAAE,IAAI,EACV,QAAQ,EAAE,CACR,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAC5B,OAAO,CAAC,2BAA2B,CAAC,GACxC,OAAO,CAAC,2BAA2B,CAAC,uBAAuB,CAAC,CAAC;IAwEhE;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAoB7B;;OAEG;IACH,OAAO,CAAC,oBAAoB;IAuB5B;;OAEG;IACH,OAAO,CAAC,oBAAoB;IAkB5B;;OAEG;IACG,uBAAuB,CAC3B,IAAI,EAAE,IAAI,EACV,QAAQ,EAAE,CACR,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,KAC5B,OAAO,CAAC,2BAA2B,CAAC,GACxC,OAAO,CAAC,2BAA2B,CAAC;IAiGvC;;OAEG;YACW,eAAe;IA0E7B;;OAEG;IACH,OAAO,CAAC,sBAAsB;IAoD9B;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAuC3B;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAoK/B;;OAEG;IACH,MAAM,CAAC,sBAAsB,CAAC,MAAM,EAAE,2BAA2B,GAAG,MAAM;CAsD3E"}
@@ -0,0 +1,505 @@
1
+ /**
2
+ * Test Scenario Engine for Multi-Scenario MCP Tool Testing
3
+ * Orchestrates comprehensive testing with multiple scenarios per tool
4
+ */
5
+ import { TestDataGenerator } from "./TestDataGenerator.js";
6
+ import { ResponseValidator, } from "./ResponseValidator.js";
7
+ export class TestScenarioEngine {
8
+ testTimeout;
9
+ delayBetweenTests;
10
+ constructor(testTimeout = 5000, delayBetweenTests = 0) {
11
+ this.testTimeout = testTimeout;
12
+ this.delayBetweenTests = delayBetweenTests;
13
+ }
14
+ /**
15
+ * Sleep for specified milliseconds (for rate limiting)
16
+ */
17
+ async sleep(ms) {
18
+ return new Promise((resolve) => setTimeout(resolve, ms));
19
+ }
20
+ /**
21
+ * Test tool with progressive complexity to identify failure points
22
+ */
23
+ async testProgressiveComplexity(tool, callTool) {
24
+ const result = {
25
+ minimalWorks: false,
26
+ simpleWorks: false,
27
+ failurePoint: undefined,
28
+ };
29
+ // Test 1: Minimal complexity - absolute minimum params
30
+ const minimalParams = this.generateMinimalParams(tool);
31
+ try {
32
+ const minimalResult = await Promise.race([
33
+ callTool(tool.name, minimalParams),
34
+ new Promise((_, reject) => setTimeout(() => reject(new Error("Timeout")), this.testTimeout)),
35
+ ]);
36
+ // Tool works if it returns successfully OR if it returns a business logic error
37
+ // (business logic errors indicate the tool is validating correctly)
38
+ const isBusinessError = minimalResult.isError
39
+ ? ResponseValidator.isBusinessLogicError({
40
+ tool,
41
+ input: minimalParams,
42
+ response: minimalResult,
43
+ scenarioCategory: "happy_path",
44
+ })
45
+ : false;
46
+ result.minimalWorks = !minimalResult.isError || isBusinessError;
47
+ }
48
+ catch {
49
+ result.minimalWorks = false;
50
+ result.failurePoint = "minimal";
51
+ return result; // Stop if minimal fails
52
+ }
53
+ // Test 2: Simple complexity - one required param with simple value
54
+ const simpleParams = this.generateSimpleParams(tool);
55
+ try {
56
+ const simpleResult = await Promise.race([
57
+ callTool(tool.name, simpleParams),
58
+ new Promise((_, reject) => setTimeout(() => reject(new Error("Timeout")), this.testTimeout)),
59
+ ]);
60
+ // Tool works if it returns successfully OR if it returns a business logic error
61
+ const isBusinessError = simpleResult.isError
62
+ ? ResponseValidator.isBusinessLogicError({
63
+ tool,
64
+ input: simpleParams,
65
+ response: simpleResult,
66
+ scenarioCategory: "happy_path",
67
+ })
68
+ : false;
69
+ result.simpleWorks = !simpleResult.isError || isBusinessError;
70
+ }
71
+ catch {
72
+ result.simpleWorks = false;
73
+ result.failurePoint = "simple";
74
+ return result;
75
+ }
76
+ // Test 3 & 4: REMOVED (redundant with Happy Path and Edge Case scenarios)
77
+ // - Typical test duplicates Happy Path scenario (both use generateRealisticParams("typical"))
78
+ // - Maximum test duplicates Edge Case - Maximum Values scenario
79
+ // Progressive complexity now focuses on diagnostic testing (minimal → simple)
80
+ // Full coverage provided by multi-scenario testing with validation
81
+ result.failurePoint = "none"; // Passed minimal and simple tests
82
+ return result;
83
+ }
84
+ /**
85
+ * Generate minimal parameters (only absolutely required fields)
86
+ */
87
+ generateMinimalParams(tool) {
88
+ const params = {};
89
+ if (!tool.inputSchema || tool.inputSchema.type !== "object") {
90
+ return params;
91
+ }
92
+ // Only include required fields with minimal values
93
+ if (tool.inputSchema.required && tool.inputSchema.properties) {
94
+ for (const requiredField of tool.inputSchema.required) {
95
+ const schema = tool.inputSchema.properties[requiredField];
96
+ if (schema) {
97
+ params[requiredField] = this.generateMinimalValue(schema);
98
+ }
99
+ }
100
+ }
101
+ return params;
102
+ }
103
+ /**
104
+ * Generate simple parameters (required fields with simple values)
105
+ */
106
+ generateSimpleParams(tool) {
107
+ const params = {};
108
+ if (!tool.inputSchema || tool.inputSchema.type !== "object") {
109
+ return params;
110
+ }
111
+ // Include required fields with simple realistic values
112
+ if (tool.inputSchema.required && tool.inputSchema.properties) {
113
+ for (const requiredField of tool.inputSchema.required) {
114
+ const schema = tool.inputSchema.properties[requiredField];
115
+ if (schema) {
116
+ params[requiredField] = TestDataGenerator.generateSingleValue(requiredField, schema);
117
+ }
118
+ }
119
+ }
120
+ return params;
121
+ }
122
+ /**
123
+ * Generate minimal value for a schema
124
+ */
125
+ generateMinimalValue(schema) {
126
+ switch (schema.type) {
127
+ case "string":
128
+ return schema.enum ? schema.enum[0] : "test";
129
+ case "number":
130
+ case "integer":
131
+ return schema.minimum ?? 1;
132
+ case "boolean":
133
+ return true;
134
+ case "array":
135
+ return [];
136
+ case "object":
137
+ return {};
138
+ default:
139
+ return null;
140
+ }
141
+ }
142
+ /**
143
+ * Run comprehensive testing for a tool with multiple scenarios
144
+ */
145
+ async testToolComprehensively(tool, callTool) {
146
+ const startTime = Date.now();
147
+ // First, run progressive complexity testing
148
+ const progressiveComplexity = await this.testProgressiveComplexity(tool, callTool);
149
+ // Generate test scenarios
150
+ const scenarios = TestDataGenerator.generateTestScenarios(tool);
151
+ // Initialize result
152
+ const result = {
153
+ toolName: tool.name,
154
+ tested: true,
155
+ totalScenarios: scenarios.length,
156
+ scenariosExecuted: 0,
157
+ scenariosPassed: 0,
158
+ scenariosFailed: 0,
159
+ overallStatus: "untested",
160
+ confidence: 0,
161
+ executionTime: 0,
162
+ scenarioResults: [],
163
+ summary: {
164
+ happyPathSuccess: false,
165
+ edgeCasesHandled: 0,
166
+ edgeCasesTotal: 0,
167
+ boundariesRespected: 0,
168
+ boundariesTotal: 0,
169
+ errorHandlingWorks: false,
170
+ },
171
+ progressiveComplexity, // Add progressive complexity analysis
172
+ recommendations: [],
173
+ };
174
+ // Execute each scenario
175
+ for (const scenario of scenarios) {
176
+ const scenarioResult = await this.executeScenario(tool, scenario, callTool);
177
+ result.scenarioResults.push(scenarioResult);
178
+ // Add delay between tests to avoid rate limiting
179
+ if (this.delayBetweenTests > 0) {
180
+ await this.sleep(this.delayBetweenTests);
181
+ }
182
+ if (scenarioResult.executed) {
183
+ result.scenariosExecuted++;
184
+ // Update counters based on validation
185
+ if (scenarioResult.validation.isValid) {
186
+ result.scenariosPassed++;
187
+ // Update summary based on category
188
+ switch (scenario.category) {
189
+ case "happy_path":
190
+ result.summary.happyPathSuccess = true;
191
+ break;
192
+ case "edge_case":
193
+ result.summary.edgeCasesHandled++;
194
+ break;
195
+ case "boundary":
196
+ result.summary.boundariesRespected++;
197
+ break;
198
+ case "error_case":
199
+ result.summary.errorHandlingWorks = true;
200
+ break;
201
+ }
202
+ }
203
+ else {
204
+ result.scenariosFailed++;
205
+ }
206
+ // Count totals for categories
207
+ switch (scenario.category) {
208
+ case "edge_case":
209
+ result.summary.edgeCasesTotal++;
210
+ break;
211
+ case "boundary":
212
+ result.summary.boundariesTotal++;
213
+ break;
214
+ }
215
+ }
216
+ }
217
+ // Calculate overall status and confidence
218
+ result.executionTime = Date.now() - startTime;
219
+ result.overallStatus = this.determineOverallStatus(result);
220
+ result.confidence = this.calculateConfidence(result);
221
+ result.recommendations = this.generateRecommendations(result);
222
+ return result;
223
+ }
224
+ /**
225
+ * Execute a single test scenario
226
+ */
227
+ async executeScenario(tool, scenario, callTool) {
228
+ const startTime = Date.now();
229
+ try {
230
+ // Call tool with timeout
231
+ const response = await Promise.race([
232
+ callTool(tool.name, scenario.params),
233
+ new Promise((_, reject) => setTimeout(() => reject(new Error("Timeout")), this.testTimeout)),
234
+ ]);
235
+ // Validate response
236
+ const validationContext = {
237
+ tool,
238
+ input: scenario.params,
239
+ response,
240
+ scenarioCategory: scenario.category,
241
+ };
242
+ const validation = ResponseValidator.validateResponse(validationContext);
243
+ return {
244
+ scenario,
245
+ executed: true,
246
+ executionTime: Date.now() - startTime,
247
+ response,
248
+ validation,
249
+ };
250
+ }
251
+ catch (error) {
252
+ // Handle execution errors
253
+ const errorMessage = error instanceof Error ? error.message : String(error);
254
+ // Create error validation result
255
+ const validation = {
256
+ isValid: false,
257
+ isError: true,
258
+ confidence: 0,
259
+ issues: [`Execution error: ${errorMessage}`],
260
+ evidence: [],
261
+ classification: "broken",
262
+ };
263
+ // For error scenarios, exceptions might be expected
264
+ if (scenario.category === "error_case" &&
265
+ !errorMessage.includes("Timeout")) {
266
+ validation.isValid = true;
267
+ validation.confidence = 80;
268
+ validation.classification = "partially_working";
269
+ validation.evidence.push("Tool properly rejected invalid input with exception");
270
+ }
271
+ return {
272
+ scenario,
273
+ executed: true,
274
+ executionTime: Date.now() - startTime,
275
+ error: errorMessage,
276
+ validation,
277
+ };
278
+ }
279
+ }
280
+ /**
281
+ * Determine overall status based on scenario results
282
+ */
283
+ determineOverallStatus(result) {
284
+ // If no scenarios executed, it's untested
285
+ if (result.scenariosExecuted === 0) {
286
+ return "untested";
287
+ }
288
+ // Check how many "failures" are actually business logic validation (tool working correctly)
289
+ const businessLogicSuccesses = result.scenarioResults.filter((sr) => sr.validation.classification === "fully_working" &&
290
+ sr.validation.evidence.some((e) => e.includes("business logic"))).length;
291
+ // Adjust pass rate to include business logic validation as successes
292
+ const actualPasses = result.scenariosPassed + businessLogicSuccesses;
293
+ const adjustedPassRate = Math.min(1, actualPasses / result.scenariosExecuted);
294
+ // Check critical scenarios
295
+ const happyPathResult = result.scenarioResults.find((sr) => sr.scenario.category === "happy_path");
296
+ const happyPathWorks = result.summary.happyPathSuccess ||
297
+ happyPathResult?.validation.classification === "fully_working";
298
+ const errorHandlingWorks = result.summary.errorHandlingWorks;
299
+ // Determine status based on adjusted metrics
300
+ if (adjustedPassRate >= 0.9 && errorHandlingWorks) {
301
+ return "fully_working";
302
+ }
303
+ else if (adjustedPassRate >= 0.7 && errorHandlingWorks) {
304
+ return "partially_working";
305
+ }
306
+ else if (adjustedPassRate >= 0.4 ||
307
+ (errorHandlingWorks && businessLogicSuccesses > 0)) {
308
+ return "partially_working"; // Tool validates correctly even if test data is invalid
309
+ }
310
+ else if (adjustedPassRate >= 0.2 ||
311
+ happyPathWorks ||
312
+ businessLogicSuccesses > 0) {
313
+ return "connectivity_only";
314
+ }
315
+ else {
316
+ return "broken";
317
+ }
318
+ }
319
+ /**
320
+ * Calculate confidence score based on test coverage and results
321
+ */
322
+ calculateConfidence(result) {
323
+ // Base confidence on execution rate
324
+ const executionRate = result.scenariosExecuted / result.totalScenarios;
325
+ let confidence = executionRate * 100;
326
+ // Adjust based on pass rate
327
+ const passRate = result.scenariosExecuted > 0
328
+ ? result.scenariosPassed / result.scenariosExecuted
329
+ : 0;
330
+ confidence *= passRate;
331
+ // Bonus for critical scenarios
332
+ if (result.summary.happyPathSuccess) {
333
+ confidence = Math.min(100, confidence + 10);
334
+ }
335
+ if (result.summary.errorHandlingWorks) {
336
+ confidence = Math.min(100, confidence + 5);
337
+ }
338
+ // Penalty for low test coverage
339
+ if (result.scenariosExecuted < 3) {
340
+ confidence *= 0.7;
341
+ }
342
+ // Consider validation confidence from individual scenarios
343
+ if (result.scenarioResults.length > 0) {
344
+ const avgValidationConfidence = result.scenarioResults
345
+ .map((sr) => sr.validation.confidence)
346
+ .reduce((a, b) => a + b, 0) / result.scenarioResults.length;
347
+ // Weighted average with execution confidence
348
+ confidence = confidence * 0.6 + avgValidationConfidence * 0.4;
349
+ }
350
+ return Math.round(confidence);
351
+ }
352
+ /**
353
+ * Generate recommendations based on test results
354
+ */
355
+ generateRecommendations(result) {
356
+ const recommendations = [];
357
+ // Add progressive complexity insights
358
+ if (result.progressiveComplexity) {
359
+ const pc = result.progressiveComplexity;
360
+ if (pc.failurePoint) {
361
+ switch (pc.failurePoint) {
362
+ case "minimal":
363
+ recommendations.push("⚠️ Tool fails with minimal parameters - check basic connectivity and required field handling");
364
+ break;
365
+ case "simple":
366
+ recommendations.push("Tool works with minimal params but fails with simple realistic data");
367
+ recommendations.push("Check parameter validation and type handling");
368
+ break;
369
+ case "none":
370
+ recommendations.push("✅ Progressive complexity tests passed - see scenario results for typical and edge case coverage");
371
+ break;
372
+ }
373
+ }
374
+ }
375
+ // Check if most failures are business logic errors
376
+ const businessErrorCount = result.scenarioResults.filter((sr) => sr.validation.classification === "fully_working" &&
377
+ sr.validation.evidence.some((e) => e.includes("business logic"))).length;
378
+ if (businessErrorCount > result.scenariosFailed * 0.7) {
379
+ // Most failures are actually business logic validation - tool is working!
380
+ recommendations.push("✅ Tool properly validates business logic and rejects invalid resources");
381
+ recommendations.push("Note: Test failures are due to synthetic test data, not tool malfunction");
382
+ return recommendations;
383
+ }
384
+ // Check happy path
385
+ if (!result.summary.happyPathSuccess) {
386
+ // Check if happy path failed due to business logic
387
+ const happyPathResult = result.scenarioResults.find((sr) => sr.scenario.category === "happy_path");
388
+ if (happyPathResult?.validation.classification === "fully_working") {
389
+ recommendations.push("Tool works correctly but requires valid resource IDs (test data uses synthetic IDs)");
390
+ }
391
+ else {
392
+ recommendations.push("Fix basic functionality - happy path scenario is failing");
393
+ }
394
+ }
395
+ // Check error handling
396
+ if (!result.summary.errorHandlingWorks) {
397
+ recommendations.push("Improve error handling - tool doesn't properly validate inputs");
398
+ }
399
+ // Check edge cases
400
+ if (result.summary.edgeCasesTotal > 0 &&
401
+ result.summary.edgeCasesHandled < result.summary.edgeCasesTotal) {
402
+ const failedEdgeCases = result.summary.edgeCasesTotal - result.summary.edgeCasesHandled;
403
+ // Check if edge case failures are business logic errors
404
+ const edgeCaseBusinessErrors = result.scenarioResults.filter((sr) => sr.scenario.category === "edge_case" &&
405
+ sr.validation.classification === "fully_working").length;
406
+ if (edgeCaseBusinessErrors > 0) {
407
+ recommendations.push(`Edge cases properly validate business rules (${edgeCaseBusinessErrors} validation checks working)`);
408
+ }
409
+ else {
410
+ recommendations.push(`Handle edge cases better - ${failedEdgeCases} edge case(s) failed`);
411
+ }
412
+ }
413
+ // Check boundaries
414
+ if (result.summary.boundariesTotal > 0 &&
415
+ result.summary.boundariesRespected < result.summary.boundariesTotal) {
416
+ const failedBoundaries = result.summary.boundariesTotal - result.summary.boundariesRespected;
417
+ recommendations.push(`Respect schema boundaries - ${failedBoundaries} boundary test(s) failed`);
418
+ }
419
+ // Analyze specific validation issues
420
+ const allIssues = new Set();
421
+ const allEvidence = new Set();
422
+ for (const scenarioResult of result.scenarioResults) {
423
+ scenarioResult.validation.issues.forEach((issue) => allIssues.add(issue));
424
+ scenarioResult.validation.evidence.forEach((evidence) => allEvidence.add(evidence));
425
+ }
426
+ // Add specific recommendations based on common issues
427
+ if (allIssues.has("Response appears to just echo input")) {
428
+ recommendations.push("Implement actual functionality - tool is just echoing inputs");
429
+ }
430
+ if (allIssues.has("Response content is too short to be meaningful")) {
431
+ recommendations.push("Return more substantial responses with actual data");
432
+ }
433
+ if (allIssues.has("Response doesn't demonstrate clear functionality")) {
434
+ recommendations.push("Ensure responses clearly demonstrate the tool's intended purpose");
435
+ }
436
+ // Add evidence-based assessment summary
437
+ if (result.overallStatus === "fully_working") {
438
+ recommendations.push(`✅ All test categories passed: ${result.scenariosPassed}/${result.totalScenarios} scenarios verified (happy path, edge cases, boundaries, error handling)`);
439
+ }
440
+ else if (result.overallStatus === "partially_working") {
441
+ const failedCount = result.scenariosFailed;
442
+ const categories = [];
443
+ if (!result.summary.happyPathSuccess)
444
+ categories.push("happy path");
445
+ if (result.summary.edgeCasesHandled < result.summary.edgeCasesTotal)
446
+ categories.push("edge cases");
447
+ if (result.summary.boundariesRespected < result.summary.boundariesTotal)
448
+ categories.push("boundaries");
449
+ if (!result.summary.errorHandlingWorks)
450
+ categories.push("error handling");
451
+ recommendations.push(`⚠️ Partial functionality: ${result.scenariosPassed}/${result.totalScenarios} scenarios passed, ${failedCount} failed. Issues in: ${categories.join(", ")}`);
452
+ }
453
+ return recommendations;
454
+ }
455
+ /**
456
+ * Generate a detailed report for a tool test
457
+ */
458
+ static generateDetailedReport(result) {
459
+ const lines = [
460
+ `## Tool: ${result.toolName}`,
461
+ ``,
462
+ `### Overall Assessment`,
463
+ `- **Status**: ${result.overallStatus}`,
464
+ `- **Confidence**: ${result.confidence}%`,
465
+ `- **Scenarios**: ${result.scenariosPassed}/${result.scenariosExecuted} passed (${result.totalScenarios} total)`,
466
+ `- **Execution Time**: ${result.executionTime}ms`,
467
+ ``,
468
+ `### Summary`,
469
+ `- Happy Path: ${result.summary.happyPathSuccess ? "✅ Working" : "❌ Failed"}`,
470
+ `- Edge Cases: ${result.summary.edgeCasesHandled}/${result.summary.edgeCasesTotal} handled`,
471
+ `- Boundaries: ${result.summary.boundariesRespected}/${result.summary.boundariesTotal} respected`,
472
+ `- Error Handling: ${result.summary.errorHandlingWorks ? "✅ Working" : "❌ Failed"}`,
473
+ ``,
474
+ ];
475
+ if (result.recommendations.length > 0) {
476
+ lines.push(`### Recommendations`);
477
+ result.recommendations.forEach((rec) => {
478
+ lines.push(`- ${rec}`);
479
+ });
480
+ lines.push(``);
481
+ }
482
+ // Add scenario details
483
+ lines.push(`### Scenario Details`);
484
+ for (const scenarioResult of result.scenarioResults) {
485
+ const status = scenarioResult.validation.isValid ? "✅" : "❌";
486
+ lines.push(`- **${scenarioResult.scenario.name}** ${status}`);
487
+ lines.push(` - Category: ${scenarioResult.scenario.category}`);
488
+ lines.push(` - Confidence: ${scenarioResult.validation.confidence}%`);
489
+ lines.push(` - Classification: ${scenarioResult.validation.classification}`);
490
+ if (scenarioResult.validation.issues.length > 0) {
491
+ lines.push(` - Issues:`);
492
+ scenarioResult.validation.issues.forEach((issue) => {
493
+ lines.push(` - ${issue}`);
494
+ });
495
+ }
496
+ if (scenarioResult.validation.evidence.length > 0) {
497
+ lines.push(` - Evidence:`);
498
+ scenarioResult.validation.evidence.forEach((evidence) => {
499
+ lines.push(` - ${evidence}`);
500
+ });
501
+ }
502
+ }
503
+ return lines.join("\n");
504
+ }
505
+ }
@@ -0,0 +1,61 @@
1
+ /**
2
+ * Tool Classifier
3
+ * Categorizes MCP tools based on name/description to select appropriate security test patterns
4
+ *
5
+ * Validated against broken-mcp server with 16 tools (6 HIGH, 4 MEDIUM, 6 SAFE)
6
+ */
7
+ export declare enum ToolCategory {
8
+ CALCULATOR = "calculator",
9
+ SYSTEM_EXEC = "system_exec",
10
+ DATA_ACCESS = "data_access",
11
+ TOOL_OVERRIDE = "tool_override",
12
+ CONFIG_MODIFIER = "config_modifier",
13
+ URL_FETCHER = "fetcher",
14
+ UNICODE_PROCESSOR = "unicode",
15
+ JSON_PARSER = "parser",
16
+ PACKAGE_INSTALLER = "installer",
17
+ RUG_PULL = "rug_pull",
18
+ SAFE_STORAGE = "safe_storage",
19
+ API_WRAPPER = "api_wrapper",
20
+ SEARCH_RETRIEVAL = "search_retrieval",
21
+ CRUD_CREATION = "crud_creation",
22
+ READ_ONLY_INFO = "read_only_info",
23
+ GENERIC = "generic"
24
+ }
25
+ export interface ToolClassification {
26
+ toolName: string;
27
+ categories: ToolCategory[];
28
+ confidence: number;
29
+ reasoning: string;
30
+ }
31
+ /**
32
+ * Classifies MCP tools into vulnerability categories based on naming patterns
33
+ * and descriptions. Uses patterns validated by testing against broken-mcp server.
34
+ */
35
+ export declare class ToolClassifier {
36
+ /**
37
+ * Classify a tool into one or more categories
38
+ * Returns multiple categories if tool matches multiple patterns
39
+ */
40
+ classify(toolName: string, description?: string): ToolClassification;
41
+ /**
42
+ * Check if text matches any of the provided patterns
43
+ */
44
+ private matchesPattern;
45
+ /**
46
+ * Get all tool categories (for testing/debugging)
47
+ */
48
+ static getAllCategories(): ToolCategory[];
49
+ /**
50
+ * Get risk level for a category
51
+ */
52
+ static getRiskLevel(category: ToolCategory): "HIGH" | "MEDIUM" | "LOW";
53
+ /**
54
+ * Classify multiple tools at once
55
+ */
56
+ classifyBatch(tools: Array<{
57
+ name: string;
58
+ description?: string;
59
+ }>): ToolClassification[];
60
+ }
61
+ //# sourceMappingURL=ToolClassifier.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ToolClassifier.d.ts","sourceRoot":"","sources":["../../../src/services/assessment/ToolClassifier.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,oBAAY,YAAY;IACtB,UAAU,eAAe;IACzB,WAAW,gBAAgB;IAC3B,WAAW,gBAAgB;IAC3B,aAAa,kBAAkB;IAC/B,eAAe,oBAAoB;IACnC,WAAW,YAAY;IACvB,iBAAiB,YAAY;IAC7B,WAAW,WAAW;IACtB,iBAAiB,cAAc;IAC/B,QAAQ,aAAa;IACrB,YAAY,iBAAiB;IAC7B,WAAW,gBAAgB;IAC3B,gBAAgB,qBAAqB;IACrC,aAAa,kBAAkB;IAC/B,cAAc,mBAAmB;IACjC,OAAO,YAAY;CACpB;AAED,MAAM,WAAW,kBAAkB;IACjC,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,YAAY,EAAE,CAAC;IAC3B,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;;GAGG;AACH,qBAAa,cAAc;IACzB;;;OAGG;IACH,QAAQ,CAAC,QAAQ,EAAE,MAAM,EAAE,WAAW,CAAC,EAAE,MAAM,GAAG,kBAAkB;IA2UpE;;OAEG;IACH,OAAO,CAAC,cAAc;IAItB;;OAEG;IACH,MAAM,CAAC,gBAAgB,IAAI,YAAY,EAAE;IAIzC;;OAEG;IACH,MAAM,CAAC,YAAY,CAAC,QAAQ,EAAE,YAAY,GAAG,MAAM,GAAG,QAAQ,GAAG,KAAK;IAgCtE;;OAEG;IACH,aAAa,CACX,KAAK,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,WAAW,CAAC,EAAE,MAAM,CAAA;KAAE,CAAC,GACnD,kBAAkB,EAAE;CAGxB"}