@bryan-thompson/inspector-assessment 1.5.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. package/cli/build/assess-full.js +528 -0
  2. package/cli/build/assess-security.js +342 -0
  3. package/cli/build/cli.js +10 -1
  4. package/client/dist/assets/{OAuthCallback-TeTvKfWE.js → OAuthCallback-Xo9zS7pv.js} +1 -1
  5. package/client/dist/assets/{OAuthDebugCallback-DwA2sKy9.js → OAuthDebugCallback-CaIey8K_.js} +1 -1
  6. package/client/dist/assets/{index-BwAoxcvr.js → index-nCPw6E-c.js} +4 -4
  7. package/client/dist/index.html +1 -1
  8. package/client/lib/lib/assessmentTypes.d.ts +670 -0
  9. package/client/lib/lib/assessmentTypes.d.ts.map +1 -0
  10. package/client/lib/lib/assessmentTypes.js +220 -0
  11. package/client/lib/lib/aupPatterns.d.ts +63 -0
  12. package/client/lib/lib/aupPatterns.d.ts.map +1 -0
  13. package/client/lib/lib/aupPatterns.js +344 -0
  14. package/client/lib/lib/prohibitedLibraries.d.ts +76 -0
  15. package/client/lib/lib/prohibitedLibraries.d.ts.map +1 -0
  16. package/client/lib/lib/prohibitedLibraries.js +364 -0
  17. package/client/lib/lib/securityPatterns.d.ts +64 -0
  18. package/client/lib/lib/securityPatterns.d.ts.map +1 -0
  19. package/client/lib/lib/securityPatterns.js +453 -0
  20. package/client/lib/services/assessment/AssessmentOrchestrator.d.ts +88 -0
  21. package/client/lib/services/assessment/AssessmentOrchestrator.d.ts.map +1 -0
  22. package/client/lib/services/assessment/AssessmentOrchestrator.js +418 -0
  23. package/client/lib/services/assessment/ResponseValidator.d.ts +69 -0
  24. package/client/lib/services/assessment/ResponseValidator.d.ts.map +1 -0
  25. package/client/lib/services/assessment/ResponseValidator.js +1038 -0
  26. package/client/lib/services/assessment/TestDataGenerator.d.ts +86 -0
  27. package/client/lib/services/assessment/TestDataGenerator.d.ts.map +1 -0
  28. package/client/lib/services/assessment/TestDataGenerator.js +669 -0
  29. package/client/lib/services/assessment/TestScenarioEngine.d.ts +91 -0
  30. package/client/lib/services/assessment/TestScenarioEngine.d.ts.map +1 -0
  31. package/client/lib/services/assessment/TestScenarioEngine.js +505 -0
  32. package/client/lib/services/assessment/ToolClassifier.d.ts +61 -0
  33. package/client/lib/services/assessment/ToolClassifier.d.ts.map +1 -0
  34. package/client/lib/services/assessment/ToolClassifier.js +349 -0
  35. package/client/lib/services/assessment/lib/claudeCodeBridge.d.ts +160 -0
  36. package/client/lib/services/assessment/lib/claudeCodeBridge.d.ts.map +1 -0
  37. package/client/lib/services/assessment/lib/claudeCodeBridge.js +357 -0
  38. package/client/lib/services/assessment/modules/AUPComplianceAssessor.d.ts +100 -0
  39. package/client/lib/services/assessment/modules/AUPComplianceAssessor.d.ts.map +1 -0
  40. package/client/lib/services/assessment/modules/AUPComplianceAssessor.js +474 -0
  41. package/client/lib/services/assessment/modules/BaseAssessor.d.ts +71 -0
  42. package/client/lib/services/assessment/modules/BaseAssessor.d.ts.map +1 -0
  43. package/client/lib/services/assessment/modules/BaseAssessor.js +171 -0
  44. package/client/lib/services/assessment/modules/DocumentationAssessor.d.ts +45 -0
  45. package/client/lib/services/assessment/modules/DocumentationAssessor.d.ts.map +1 -0
  46. package/client/lib/services/assessment/modules/DocumentationAssessor.js +355 -0
  47. package/client/lib/services/assessment/modules/ErrorHandlingAssessor.d.ts +25 -0
  48. package/client/lib/services/assessment/modules/ErrorHandlingAssessor.d.ts.map +1 -0
  49. package/client/lib/services/assessment/modules/ErrorHandlingAssessor.js +564 -0
  50. package/client/lib/services/assessment/modules/FunctionalityAssessor.d.ts +20 -0
  51. package/client/lib/services/assessment/modules/FunctionalityAssessor.d.ts.map +1 -0
  52. package/client/lib/services/assessment/modules/FunctionalityAssessor.js +253 -0
  53. package/client/lib/services/assessment/modules/MCPSpecComplianceAssessor.d.ts +70 -0
  54. package/client/lib/services/assessment/modules/MCPSpecComplianceAssessor.d.ts.map +1 -0
  55. package/client/lib/services/assessment/modules/MCPSpecComplianceAssessor.js +508 -0
  56. package/client/lib/services/assessment/modules/ManifestValidationAssessor.d.ts +70 -0
  57. package/client/lib/services/assessment/modules/ManifestValidationAssessor.d.ts.map +1 -0
  58. package/client/lib/services/assessment/modules/ManifestValidationAssessor.js +430 -0
  59. package/client/lib/services/assessment/modules/PortabilityAssessor.d.ts +43 -0
  60. package/client/lib/services/assessment/modules/PortabilityAssessor.d.ts.map +1 -0
  61. package/client/lib/services/assessment/modules/PortabilityAssessor.js +347 -0
  62. package/client/lib/services/assessment/modules/ProhibitedLibrariesAssessor.d.ts +41 -0
  63. package/client/lib/services/assessment/modules/ProhibitedLibrariesAssessor.d.ts.map +1 -0
  64. package/client/lib/services/assessment/modules/ProhibitedLibrariesAssessor.js +256 -0
  65. package/client/lib/services/assessment/modules/SecurityAssessor.d.ts +176 -0
  66. package/client/lib/services/assessment/modules/SecurityAssessor.d.ts.map +1 -0
  67. package/client/lib/services/assessment/modules/SecurityAssessor.js +1333 -0
  68. package/client/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts +96 -0
  69. package/client/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts.map +1 -0
  70. package/client/lib/services/assessment/modules/ToolAnnotationAssessor.js +593 -0
  71. package/client/lib/services/assessment/modules/UsabilityAssessor.d.ts +21 -0
  72. package/client/lib/services/assessment/modules/UsabilityAssessor.d.ts.map +1 -0
  73. package/client/lib/services/assessment/modules/UsabilityAssessor.js +241 -0
  74. package/client/lib/services/assessment/modules/index.d.ts +33 -0
  75. package/client/lib/services/assessment/modules/index.d.ts.map +1 -0
  76. package/client/lib/services/assessment/modules/index.js +35 -0
  77. package/package.json +7 -2
@@ -0,0 +1,1038 @@
1
+ /**
2
+ * Response Validator for MCP Tool Testing
3
+ * Validates that tool responses are actually functional, not just present
4
+ */
5
+ export class ResponseValidator {
6
+ /**
7
+ * Validate a tool response comprehensively
8
+ */
9
+ static validateResponse(context) {
10
+ const result = {
11
+ isValid: false,
12
+ isError: false,
13
+ confidence: 0,
14
+ issues: [],
15
+ evidence: [],
16
+ classification: "broken",
17
+ };
18
+ // Check if response indicates an error
19
+ if (context.response.isError) {
20
+ result.isError = true;
21
+ // Simplified: ANY error response means the tool is functional
22
+ // The tool responded (even with an error) - that's functionality!
23
+ result.isValid = true;
24
+ result.classification = "fully_working";
25
+ result.confidence = 100;
26
+ result.evidence.push("Tool responded with error (tool is functional)");
27
+ // Add context about the error for debugging
28
+ const content = context.response.content;
29
+ const errorText = content?.[0]?.text || "Unknown error";
30
+ result.evidence.push(`Error message: ${errorText.substring(0, 100)}`);
31
+ return result;
32
+ }
33
+ // Simplified functionality validation:
34
+ // If the tool responds with content, it's functional.
35
+ // We don't check response quality/structure - that's for error handling tests.
36
+ // Check 1: Response has content
37
+ if (!context.response.content) {
38
+ result.issues.push("Response has no content");
39
+ result.classification = "broken";
40
+ result.confidence = 0;
41
+ return result;
42
+ }
43
+ // Check 2: Content is a non-empty array
44
+ const content = context.response.content;
45
+ if (!Array.isArray(content) || content.length === 0) {
46
+ result.issues.push("Response content is empty or not an array");
47
+ result.classification = "broken";
48
+ result.confidence = 0;
49
+ return result;
50
+ }
51
+ // Tool responded successfully - it's functional!
52
+ result.isValid = true;
53
+ result.classification = "fully_working";
54
+ result.confidence = 100;
55
+ result.evidence.push("Tool responded successfully with content");
56
+ // Add details about response type for debugging
57
+ const hasText = content.some((item) => item.type === "text");
58
+ const hasResource = content.some((item) => item.type === "resource");
59
+ if (hasText) {
60
+ result.evidence.push("Response includes text content");
61
+ }
62
+ if (hasResource) {
63
+ result.evidence.push("Response includes resource content");
64
+ }
65
+ return result;
66
+ }
67
+ /**
68
+ * Check if error is a business logic error (not a tool failure)
69
+ * These errors indicate the tool is working correctly but rejecting invalid business data
70
+ */
71
+ static isBusinessLogicError(context) {
72
+ const content = context.response.content;
73
+ const errorText = content?.[0]?.type === "text" && content[0].text
74
+ ? content[0].text.toLowerCase()
75
+ : JSON.stringify(context.response.content).toLowerCase();
76
+ // Extract any error code from the response
77
+ const errorCodeMatch = errorText.match(/(?:code|error_code)["\s:]+([^",\s]+)/);
78
+ const errorCode = errorCodeMatch ? errorCodeMatch[1] : null;
79
+ // MCP standard error codes that indicate proper validation
80
+ const mcpValidationCodes = [
81
+ "-32602", // Invalid params - tool is validating input correctly
82
+ "-32603", // Internal error - tool handled error gracefully
83
+ "invalid_params",
84
+ "validation_error",
85
+ "bad_request",
86
+ ];
87
+ if (errorCode &&
88
+ mcpValidationCodes.some((code) => errorText.includes(code))) {
89
+ return true; // Tool is properly implementing MCP error codes
90
+ }
91
+ // Common business logic error patterns that indicate the tool is working correctly
92
+ const businessErrorPatterns = [
93
+ // Resource validation errors (tool is checking if resources exist)
94
+ "not found",
95
+ "does not exist",
96
+ "doesn't exist",
97
+ "no such",
98
+ "cannot find",
99
+ "could not find",
100
+ "unable to find",
101
+ "invalid id",
102
+ "invalid identifier",
103
+ "unknown resource",
104
+ "resource not found",
105
+ "entity not found",
106
+ "object not found",
107
+ "record not found",
108
+ "item not found",
109
+ "node not found",
110
+ "nodes not found",
111
+ "no entities",
112
+ "no results",
113
+ "not exist",
114
+ "no nodes",
115
+ "no matching",
116
+ "no matches",
117
+ "empty result",
118
+ "zero results",
119
+ "nothing found",
120
+ "no data",
121
+ "no items",
122
+ // Data validation errors (tool is validating data format/content)
123
+ "invalid format",
124
+ "invalid value",
125
+ "invalid type",
126
+ "invalid input",
127
+ "invalid parameter",
128
+ "invalid data",
129
+ "type mismatch",
130
+ "schema validation",
131
+ "constraint violation",
132
+ "out of range",
133
+ "exceeds maximum",
134
+ "below minimum",
135
+ "invalid length",
136
+ "pattern mismatch",
137
+ "regex failed",
138
+ "must have",
139
+ "must be",
140
+ // Permission and authorization (tool is checking access rights)
141
+ "unauthorized",
142
+ "permission denied",
143
+ "access denied",
144
+ "forbidden",
145
+ "not authorized",
146
+ "insufficient permissions",
147
+ "no access",
148
+ "authentication required",
149
+ "token expired",
150
+ "invalid credentials",
151
+ // Business rule validation (tool is enforcing business logic)
152
+ "already exists",
153
+ "duplicate",
154
+ "conflict",
155
+ "quota exceeded",
156
+ "limit reached",
157
+ "not allowed",
158
+ "operation not permitted",
159
+ "invalid state",
160
+ "precondition failed",
161
+ "dependency not met",
162
+ // API-specific validation
163
+ "invalid parent",
164
+ "invalid reference",
165
+ "invalid relationship",
166
+ "missing required",
167
+ "required field",
168
+ "required parameter",
169
+ "validation failed",
170
+ "invalid request",
171
+ "bad request",
172
+ "malformed",
173
+ // Rate limiting (shows API integration is working)
174
+ "rate limit",
175
+ "too many requests",
176
+ "throttled",
177
+ "quota",
178
+ "exceeded",
179
+ // API operational/billing errors (shows API integration is working)
180
+ "insufficient credits",
181
+ "credits",
182
+ "no credits",
183
+ "credit balance",
184
+ "billing",
185
+ "subscription",
186
+ "plan upgrade",
187
+ "payment required",
188
+ "account suspended",
189
+ "trial expired",
190
+ "usage limit",
191
+ // Configuration validation
192
+ "not configured",
193
+ "not enabled",
194
+ "not available",
195
+ "not supported",
196
+ "feature disabled",
197
+ "service unavailable",
198
+ ];
199
+ // Check if error matches any business logic pattern
200
+ const hasBusinessErrorPattern = businessErrorPatterns.some((pattern) => errorText.includes(pattern));
201
+ // HTTP status codes that indicate business logic validation
202
+ const businessStatusCodes = [
203
+ "400", // Bad Request - input validation
204
+ "401", // Unauthorized - auth validation
205
+ "403", // Forbidden - permission validation
206
+ "404", // Not Found - resource validation
207
+ "409", // Conflict - state validation
208
+ "422", // Unprocessable Entity - semantic validation
209
+ "429", // Too Many Requests - rate limit validation
210
+ ];
211
+ const hasBusinessStatusCode = businessStatusCodes.some((code) => errorText.includes(code) ||
212
+ errorText.includes(`status: ${code}`) ||
213
+ errorText.includes(`statuscode: ${code}`));
214
+ // Check for structured error response (indicates proper error handling)
215
+ const hasStructuredError = (errorText.includes("error") || errorText.includes("message")) &&
216
+ (errorText.includes("code") ||
217
+ errorText.includes("type") ||
218
+ errorText.includes("status")) &&
219
+ (errorText.includes("{") || errorText.includes(":")); // JSON-like structure
220
+ // Check if the tool is validating our test data
221
+ const validatesTestData =
222
+ // Rejects test IDs
223
+ ((errorText.includes("test") ||
224
+ errorText.includes("example") ||
225
+ errorText.includes("demo")) &&
226
+ (errorText.includes("invalid") ||
227
+ errorText.includes("not found") ||
228
+ errorText.includes("does not exist"))) ||
229
+ // Rejects placeholder values
230
+ errorText.includes("test_value") ||
231
+ errorText.includes("test@example.com") ||
232
+ errorText.includes("example.com") ||
233
+ // Shows it validated UUID format
234
+ /[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/.test(errorText) ||
235
+ // Shows it parsed and validated numeric IDs
236
+ /\bid["\s:]+\d+/.test(errorText) ||
237
+ /\bid["\s:]+["'][^"']+["']/.test(errorText);
238
+ // Check tool operation type - resource operations are expected to validate
239
+ const toolName = context.tool.name.toLowerCase();
240
+ const isValidationExpected =
241
+ // CRUD operations
242
+ toolName.includes("create") ||
243
+ toolName.includes("update") ||
244
+ toolName.includes("delete") ||
245
+ toolName.includes("get") ||
246
+ toolName.includes("fetch") ||
247
+ toolName.includes("read") ||
248
+ toolName.includes("write") ||
249
+ // Data operations
250
+ toolName.includes("query") ||
251
+ toolName.includes("search") ||
252
+ toolName.includes("find") ||
253
+ toolName.includes("list") ||
254
+ // State operations
255
+ toolName.includes("move") ||
256
+ toolName.includes("copy") ||
257
+ toolName.includes("duplicate") ||
258
+ toolName.includes("archive") ||
259
+ // Relationship operations
260
+ toolName.includes("link") ||
261
+ toolName.includes("associate") ||
262
+ toolName.includes("connect") ||
263
+ toolName.includes("attach") ||
264
+ // API/scraping operations
265
+ toolName.includes("scrape") ||
266
+ toolName.includes("crawl") ||
267
+ toolName.includes("map") ||
268
+ toolName.includes("extract") ||
269
+ toolName.includes("parse") ||
270
+ toolName.includes("analyze") ||
271
+ toolName.includes("process");
272
+ // Calculate confidence that this is a business logic error
273
+ let confidenceFactors = 0;
274
+ let totalFactors = 0;
275
+ // High confidence indicators
276
+ if (errorCode &&
277
+ mcpValidationCodes.some((code) => errorText.includes(code))) {
278
+ confidenceFactors += 2; // MCP compliance is strong indicator
279
+ }
280
+ totalFactors += 2;
281
+ if (hasBusinessErrorPattern)
282
+ confidenceFactors += 2; // Increased weight for business error patterns
283
+ totalFactors += 2;
284
+ if (hasBusinessStatusCode)
285
+ confidenceFactors++;
286
+ totalFactors++;
287
+ if (hasStructuredError)
288
+ confidenceFactors++;
289
+ totalFactors++;
290
+ if (validatesTestData)
291
+ confidenceFactors++;
292
+ totalFactors++;
293
+ if (isValidationExpected)
294
+ confidenceFactors += 2; // Increased weight for validation-expected tools
295
+ totalFactors += 2;
296
+ // Require at least 50% confidence that this is business logic validation
297
+ const confidence = confidenceFactors / totalFactors;
298
+ // Special case: Strong operational error indicators (quota, rate limit, billing)
299
+ // These are almost always business logic errors, not tool failures
300
+ const hasStrongOperationalError = hasBusinessErrorPattern &&
301
+ (errorText.includes("quota") ||
302
+ errorText.includes("credit") ||
303
+ errorText.includes("rate limit") ||
304
+ errorText.includes("throttle") ||
305
+ errorText.includes("billing") ||
306
+ errorText.includes("payment") ||
307
+ errorText.includes("subscription") ||
308
+ errorText.includes("trial"));
309
+ // Determine confidence threshold based on error type and tool type
310
+ // - Strong operational errors: 20% (very lenient, these are obvious)
311
+ // - Validation-expected tools: 30% (lenient)
312
+ // - Other tools: 50% (standard)
313
+ const confidenceThreshold = hasStrongOperationalError
314
+ ? 0.2
315
+ : isValidationExpected
316
+ ? 0.3
317
+ : 0.5;
318
+ return confidence >= confidenceThreshold;
319
+ }
320
+ /**
321
+ * Validate error responses are proper and informative
322
+ * NOTE: Currently unused - kept for potential future use
323
+ */
324
+ // @ts-ignore - Unused method kept for potential future use
325
+ static validateErrorResponse(context, result) {
326
+ const content = context.response.content;
327
+ const errorText = content?.[0]?.type === "text" && content[0].text
328
+ ? content[0].text
329
+ : JSON.stringify(context.response.content);
330
+ // Check for proper error structure
331
+ let hasProperError = false;
332
+ // Check for MCP standard error codes
333
+ if (errorText.includes("-32602") || errorText.includes("Invalid params")) {
334
+ result.evidence.push("Proper MCP error code for invalid parameters");
335
+ hasProperError = true;
336
+ }
337
+ // Check for descriptive error messages
338
+ if (errorText.length > 20 &&
339
+ (errorText.toLowerCase().includes("invalid") ||
340
+ errorText.toLowerCase().includes("required") ||
341
+ errorText.toLowerCase().includes("type") ||
342
+ errorText.toLowerCase().includes("validation"))) {
343
+ result.evidence.push("Descriptive error message provided");
344
+ hasProperError = true;
345
+ }
346
+ if (!hasProperError) {
347
+ result.issues.push("Error response lacks proper error codes or descriptive messages");
348
+ }
349
+ return hasProperError;
350
+ }
351
+ /**
352
+ * Validate response structure matches expectations
353
+ * NOTE: Currently unused - kept for potential future use
354
+ */
355
+ // @ts-ignore - Unused method kept for potential future use
356
+ static validateResponseStructure(context, result) {
357
+ // Check if response has content
358
+ if (!context.response.content) {
359
+ result.issues.push("Response has no content");
360
+ return false;
361
+ }
362
+ // Check content structure
363
+ const content = context.response.content;
364
+ if (!Array.isArray(content) || content.length === 0) {
365
+ result.issues.push("Response content is empty or not an array");
366
+ return false;
367
+ }
368
+ // Check for expected content type
369
+ const hasTextContent = content.some((item) => item.type === "text" && item.text);
370
+ const hasResourceContent = content.some((item) => item.type === "resource");
371
+ if (!hasTextContent && !hasResourceContent) {
372
+ result.issues.push("Response lacks text or resource content");
373
+ return false;
374
+ }
375
+ result.evidence.push("Response has valid structure");
376
+ return true;
377
+ }
378
+ /**
379
+ * Validate response content is meaningful
380
+ * NOTE: Currently unused - kept for potential future use
381
+ */
382
+ // @ts-ignore - Unused method kept for potential future use
383
+ static validateResponseContent(context, result) {
384
+ const content = context.response.content;
385
+ const textContent = content.find((item) => item.type === "text")?.text || "";
386
+ // Check if response is just echoing input (bad)
387
+ const inputStr = JSON.stringify(context.input);
388
+ if (textContent === inputStr || textContent === "test_value") {
389
+ result.issues.push("Response appears to just echo input");
390
+ return false;
391
+ }
392
+ // Check for minimal content length
393
+ // But allow short responses for mutation tools (create/update/delete) that might return simple "Success"
394
+ const toolName = context.tool.name.toLowerCase();
395
+ const isMutationTool = toolName.includes("create") ||
396
+ toolName.includes("update") ||
397
+ toolName.includes("delete") ||
398
+ toolName.includes("add") ||
399
+ toolName.includes("remove") ||
400
+ toolName.includes("insert");
401
+ if (textContent.length < 10 && !isMutationTool) {
402
+ result.issues.push("Response content is too short to be meaningful");
403
+ return false;
404
+ }
405
+ // For mutation tools, accept common success indicators even if short
406
+ if (isMutationTool && textContent.length < 10) {
407
+ const successIndicators = [
408
+ "success",
409
+ "ok",
410
+ "done",
411
+ "created",
412
+ "updated",
413
+ "deleted",
414
+ "added",
415
+ "removed",
416
+ ];
417
+ const hasSuccessIndicator = successIndicators.some((indicator) => textContent.toLowerCase().includes(indicator));
418
+ if (!hasSuccessIndicator) {
419
+ result.issues.push("Short response lacks success confirmation for mutation operation");
420
+ return false;
421
+ }
422
+ // Short success message is acceptable
423
+ result.evidence.push("Mutation operation confirmed with short response");
424
+ return true;
425
+ }
426
+ // MCP 2025-06-18: Check structuredContent first (modern MCP tools)
427
+ // Modern tools provide structuredContent even without outputSchema
428
+ const response = context.response;
429
+ if (response.structuredContent) {
430
+ const structured = response.structuredContent;
431
+ // Handle structured array responses
432
+ if (Array.isArray(structured)) {
433
+ if (structured.length === 0) {
434
+ // Empty array is valid - tool processed request successfully but had no data
435
+ // Example: create_relations returns [] when referenced entities don't exist
436
+ result.evidence.push("Tool returned empty array (processed successfully, no matching data)");
437
+ return true;
438
+ }
439
+ // For mutation tools, check for IDs
440
+ if (isMutationTool) {
441
+ const hasIds = structured.some((item) => item &&
442
+ typeof item === "object" &&
443
+ ("id" in item || "_id" in item || "ID" in item));
444
+ if (hasIds) {
445
+ result.evidence.push(`Mutation operation returned ${structured.length} item(s) with IDs in structuredContent`);
446
+ return true;
447
+ }
448
+ }
449
+ result.evidence.push(`Response has structuredContent array with ${structured.length} item(s)`);
450
+ return true;
451
+ }
452
+ // Handle structured object responses
453
+ if (typeof structured === "object" && structured !== null) {
454
+ const keys = Object.keys(structured);
455
+ if (keys.length === 0) {
456
+ result.issues.push("structuredContent object is empty");
457
+ return false;
458
+ }
459
+ const hasNonNullValues = keys.some((key) => structured[key] !== null && structured[key] !== undefined);
460
+ if (!hasNonNullValues) {
461
+ result.issues.push("structuredContent contains only null/undefined values");
462
+ return false;
463
+ }
464
+ result.evidence.push(`Response has structuredContent with ${keys.length} data fields`);
465
+ return true;
466
+ }
467
+ }
468
+ // Fallback: Check for actual data/information in content.text
469
+ try {
470
+ const parsed = JSON.parse(textContent);
471
+ // Handle JSON array responses (common for batch operations)
472
+ if (Array.isArray(parsed)) {
473
+ if (parsed.length === 0) {
474
+ // Empty array is valid - tool processed request successfully but had no data
475
+ result.evidence.push("Tool returned empty array (processed successfully, no matching data)");
476
+ return true;
477
+ }
478
+ // For mutation tools, check if array items have IDs (indicates successful creation)
479
+ if (isMutationTool && parsed.length > 0) {
480
+ const hasIds = parsed.some((item) => typeof item === "object" &&
481
+ item !== null &&
482
+ ("id" in item || "_id" in item || "ID" in item));
483
+ if (hasIds) {
484
+ result.evidence.push(`Mutation operation returned ${parsed.length} item(s) with IDs`);
485
+ return true;
486
+ }
487
+ }
488
+ result.evidence.push(`Response is array with ${parsed.length} item(s)`);
489
+ return true;
490
+ }
491
+ // Handle JSON object responses
492
+ if (typeof parsed === "object" && parsed !== null) {
493
+ const keys = Object.keys(parsed);
494
+ if (keys.length === 0) {
495
+ result.issues.push("Response object is empty");
496
+ return false;
497
+ }
498
+ // Check for null/undefined values
499
+ const hasNonNullValues = keys.some((key) => parsed[key] !== null && parsed[key] !== undefined);
500
+ if (!hasNonNullValues) {
501
+ result.issues.push("Response contains only null/undefined values");
502
+ return false;
503
+ }
504
+ result.evidence.push(`Response contains ${keys.length} data fields`);
505
+ return true;
506
+ }
507
+ }
508
+ catch {
509
+ // Not JSON, check as plain text
510
+ if (textContent.includes("error") || textContent.includes("Error")) {
511
+ // If it contains error but isError is false, that's suspicious
512
+ if (!context.response.isError) {
513
+ result.issues.push("Response contains error text but isError flag is false");
514
+ return false;
515
+ }
516
+ }
517
+ }
518
+ result.evidence.push("Response contains meaningful content");
519
+ return true;
520
+ }
521
+ /**
522
+ * Validate semantic correctness based on input/output relationship
523
+ * NOTE: Currently unused - kept for potential future use
524
+ */
525
+ // @ts-ignore - Unused method kept for potential future use
526
+ static validateSemanticCorrectness(context, result) {
527
+ const toolName = context.tool.name.toLowerCase();
528
+ const content = context.response.content;
529
+ const textContent = content.find((item) => item.type === "text")?.text || "";
530
+ // Tool-specific semantic validation
531
+ if (toolName.includes("search") ||
532
+ toolName.includes("find") ||
533
+ toolName.includes("get")) {
534
+ // MCP 2025-06-18: Check structuredContent first
535
+ const response = context.response;
536
+ if (response.structuredContent) {
537
+ const structured = response.structuredContent;
538
+ // Check for array results
539
+ if (Array.isArray(structured)) {
540
+ result.evidence.push(`Search returned ${structured.length} result(s) in structuredContent (empty results are valid)`);
541
+ return true;
542
+ }
543
+ // Check for object with search result structure
544
+ if (typeof structured === "object" && structured !== null) {
545
+ const hasSearchStructure = "entities" in structured ||
546
+ "relations" in structured ||
547
+ "results" in structured ||
548
+ "items" in structured ||
549
+ "data" in structured ||
550
+ "matches" in structured;
551
+ if (hasSearchStructure) {
552
+ result.evidence.push("Search response has proper result structure in structuredContent");
553
+ return true;
554
+ }
555
+ // Single result object
556
+ if (Object.keys(structured).length > 0) {
557
+ result.evidence.push("Search returned single result object in structuredContent");
558
+ return true;
559
+ }
560
+ }
561
+ }
562
+ // Fallback: Search tools should return results structure (even if empty)
563
+ try {
564
+ const parsed = JSON.parse(textContent);
565
+ // Check for common search response structures
566
+ if (Array.isArray(parsed)) {
567
+ // Array of results (even empty is valid - means no matches)
568
+ result.evidence.push(`Search returned ${parsed.length} result(s) (empty results are valid)`);
569
+ return true;
570
+ }
571
+ if (typeof parsed === "object" && parsed !== null) {
572
+ // Check for common search result object structures
573
+ const hasSearchStructure = "entities" in parsed ||
574
+ "relations" in parsed ||
575
+ "results" in parsed ||
576
+ "items" in parsed ||
577
+ "data" in parsed ||
578
+ "matches" in parsed;
579
+ if (hasSearchStructure) {
580
+ result.evidence.push("Search response has proper result structure");
581
+ return true;
582
+ }
583
+ // Single result object (e.g., get by ID)
584
+ if (Object.keys(parsed).length > 0) {
585
+ result.evidence.push("Search returned single result object");
586
+ return true;
587
+ }
588
+ }
589
+ }
590
+ catch {
591
+ // Not JSON, check text patterns
592
+ }
593
+ // Fallback to text-based validation
594
+ const query = this.findQueryParameter(context.input);
595
+ if (query && typeof query === "string") {
596
+ // Very basic check - response should reference the query somehow
597
+ if (!textContent.toLowerCase().includes(query.toLowerCase()) &&
598
+ !textContent.includes("results") &&
599
+ !textContent.includes("found")) {
600
+ result.issues.push("Search response doesn't seem related to query");
601
+ return false;
602
+ }
603
+ result.evidence.push("Search response appears related to query");
604
+ return true;
605
+ }
606
+ // If no query parameter, just check for search-related keywords
607
+ if (textContent.includes("result") ||
608
+ textContent.includes("found") ||
609
+ textContent.includes("match") ||
610
+ textContent.includes("entity") ||
611
+ textContent.includes("entities")) {
612
+ result.evidence.push("Search response contains search-related data");
613
+ return true;
614
+ }
615
+ }
616
+ if (toolName.includes("create") ||
617
+ toolName.includes("add") ||
618
+ toolName.includes("insert")) {
619
+ // MCP 2025-06-18: Check structuredContent first
620
+ const response = context.response;
621
+ if (response.structuredContent) {
622
+ const structured = response.structuredContent;
623
+ // Check if response is array with IDs
624
+ if (Array.isArray(structured)) {
625
+ const hasIds = structured.some((item) => item &&
626
+ typeof item === "object" &&
627
+ ("id" in item || "_id" in item || "ID" in item));
628
+ if (hasIds) {
629
+ result.evidence.push(`Creation response includes ${structured.length} item(s) with IDs in structuredContent`);
630
+ return true;
631
+ }
632
+ // Even without IDs, array response indicates success
633
+ if (structured.length > 0) {
634
+ result.evidence.push("Creation response includes created items in structuredContent");
635
+ return true;
636
+ }
637
+ }
638
+ // Check if response is object with ID
639
+ if (typeof structured === "object" && structured !== null) {
640
+ if ("id" in structured || "_id" in structured || "ID" in structured) {
641
+ result.evidence.push("Creation response includes resource ID in structuredContent");
642
+ return true;
643
+ }
644
+ // Check for entity/relation structures
645
+ if ("entities" in structured ||
646
+ "relations" in structured ||
647
+ "observations" in structured) {
648
+ result.evidence.push("Creation response includes entity/relation data in structuredContent");
649
+ return true;
650
+ }
651
+ }
652
+ }
653
+ // Fallback: Creation tools should return created resource or ID
654
+ // Try to parse as JSON first to check for structured data with IDs
655
+ try {
656
+ const parsed = JSON.parse(textContent);
657
+ // Check if response is array with IDs (common for batch creation)
658
+ if (Array.isArray(parsed)) {
659
+ const hasIds = parsed.some((item) => typeof item === "object" &&
660
+ item !== null &&
661
+ ("id" in item || "_id" in item || "ID" in item));
662
+ if (hasIds) {
663
+ result.evidence.push(`Creation response includes ${parsed.length} item(s) with IDs`);
664
+ return true;
665
+ }
666
+ }
667
+ // Check if response is object with ID
668
+ if (typeof parsed === "object" && parsed !== null) {
669
+ if ("id" in parsed || "_id" in parsed || "ID" in parsed) {
670
+ result.evidence.push("Creation response includes resource ID");
671
+ return true;
672
+ }
673
+ }
674
+ }
675
+ catch {
676
+ // Not JSON, check text patterns
677
+ }
678
+ // Fallback to text-based validation
679
+ if (!textContent.includes("id") &&
680
+ !textContent.includes("created") &&
681
+ !textContent.includes("success")) {
682
+ result.issues.push("Creation response lacks confirmation or resource ID");
683
+ return false;
684
+ }
685
+ result.evidence.push("Creation response includes confirmation");
686
+ return true;
687
+ }
688
+ if (toolName.includes("delete") || toolName.includes("remove")) {
689
+ // Deletion tools should confirm deletion
690
+ if (!textContent.includes("deleted") &&
691
+ !textContent.includes("removed") &&
692
+ !textContent.includes("success")) {
693
+ result.issues.push("Deletion response lacks confirmation");
694
+ return false;
695
+ }
696
+ result.evidence.push("Deletion response confirms action");
697
+ return true;
698
+ }
699
+ if (toolName.includes("update") ||
700
+ toolName.includes("modify") ||
701
+ toolName.includes("edit")) {
702
+ // Update tools should confirm update
703
+ if (!textContent.includes("updated") &&
704
+ !textContent.includes("modified") &&
705
+ !textContent.includes("changed") &&
706
+ !textContent.includes("success")) {
707
+ result.issues.push("Update response lacks confirmation");
708
+ return false;
709
+ }
710
+ result.evidence.push("Update response confirms changes");
711
+ return true;
712
+ }
713
+ if (toolName.includes("list") || toolName.includes("all")) {
714
+ // List tools should return array or multiple items
715
+ try {
716
+ const parsed = JSON.parse(textContent);
717
+ if (Array.isArray(parsed) ||
718
+ (parsed &&
719
+ typeof parsed === "object" &&
720
+ ("items" in parsed || "results" in parsed))) {
721
+ result.evidence.push("List response contains array or collection");
722
+ return true;
723
+ }
724
+ }
725
+ catch {
726
+ // Check for list-like text response
727
+ if (textContent.includes(",") || textContent.includes("\n")) {
728
+ result.evidence.push("Response appears to contain multiple items");
729
+ return true;
730
+ }
731
+ }
732
+ result.issues.push("List response doesn't contain collection");
733
+ return false;
734
+ }
735
+ // Default validation - response should be different from input
736
+ const inputStr = JSON.stringify(context.input);
737
+ if (textContent !== inputStr &&
738
+ textContent.length > inputStr.length * 0.5) {
739
+ result.evidence.push("Response is substantively different from input");
740
+ return true;
741
+ }
742
+ result.issues.push("Response doesn't demonstrate clear functionality");
743
+ return false;
744
+ }
745
+ /**
746
+ * Validate tool-specific logic and patterns
747
+ * NOTE: Currently unused - kept for potential future use
748
+ */
749
+ // @ts-ignore - Unused method kept for potential future use
750
+ static validateToolSpecificLogic(context, result) {
751
+ const toolName = context.tool.name.toLowerCase();
752
+ const content = context.response.content;
753
+ const textContent = content.find((item) => item.type === "text")?.text || "";
754
+ // Creation/mutation tools (entities, relations, observations, etc.)
755
+ if (toolName.includes("create") ||
756
+ toolName.includes("add") ||
757
+ toolName.includes("insert") ||
758
+ toolName.includes("entity") ||
759
+ toolName.includes("entities") ||
760
+ toolName.includes("relation") ||
761
+ toolName.includes("observation")) {
762
+ // MCP 2025-06-18: Check structuredContent first (CRITICAL FIX)
763
+ const response = context.response;
764
+ if (response.structuredContent) {
765
+ const structured = response.structuredContent;
766
+ // Check for array responses with IDs
767
+ if (Array.isArray(structured)) {
768
+ const hasIds = structured.some((item) => item &&
769
+ typeof item === "object" &&
770
+ ("id" in item || "_id" in item || "ID" in item));
771
+ if (hasIds) {
772
+ result.evidence.push(`Creation tool returned ${structured.length} entity/entities with IDs in structuredContent`);
773
+ return true;
774
+ }
775
+ // Check for entity-like objects even without IDs
776
+ const hasEntityStructure = structured.some((item) => item &&
777
+ typeof item === "object" &&
778
+ ("name" in item ||
779
+ "entityType" in item ||
780
+ "from" in item ||
781
+ "to" in item));
782
+ if (hasEntityStructure) {
783
+ result.evidence.push("Creation tool returned entity-like objects in structuredContent");
784
+ return true;
785
+ }
786
+ }
787
+ // Check for object with ID
788
+ if (structured &&
789
+ typeof structured === "object" &&
790
+ ("id" in structured || "_id" in structured || "ID" in structured)) {
791
+ result.evidence.push("Creation tool returned entity with ID in structuredContent");
792
+ return true;
793
+ }
794
+ // Check for entity/relation structure
795
+ if (structured &&
796
+ typeof structured === "object" &&
797
+ ("name" in structured ||
798
+ "entityType" in structured ||
799
+ "from" in structured ||
800
+ "to" in structured ||
801
+ "entities" in structured ||
802
+ "relations" in structured ||
803
+ "observations" in structured)) {
804
+ result.evidence.push("Creation tool returned entity/relation structure in structuredContent");
805
+ return true;
806
+ }
807
+ }
808
+ // Fallback: Try parsing content.text as JSON
809
+ try {
810
+ const parsed = JSON.parse(textContent);
811
+ // Check for array responses with IDs (common for batch operations)
812
+ if (Array.isArray(parsed)) {
813
+ const hasIds = parsed.some((item) => item &&
814
+ typeof item === "object" &&
815
+ ("id" in item || "_id" in item || "ID" in item));
816
+ if (hasIds) {
817
+ result.evidence.push(`Creation tool returned ${parsed.length} entity/entities with IDs`);
818
+ return true;
819
+ }
820
+ // Even without IDs, if array has entity-like objects, it's valid
821
+ const hasEntityStructure = parsed.some((item) => item &&
822
+ typeof item === "object" &&
823
+ ("name" in item ||
824
+ "entityType" in item ||
825
+ "from" in item ||
826
+ "to" in item));
827
+ if (hasEntityStructure) {
828
+ result.evidence.push("Creation tool returned entity-like objects");
829
+ return true;
830
+ }
831
+ }
832
+ // Check for object with ID
833
+ if (parsed &&
834
+ typeof parsed === "object" &&
835
+ ("id" in parsed || "_id" in parsed || "ID" in parsed)) {
836
+ result.evidence.push("Creation tool returned entity with ID");
837
+ return true;
838
+ }
839
+ // Check for entity structure
840
+ if (parsed &&
841
+ typeof parsed === "object" &&
842
+ ("name" in parsed ||
843
+ "entityType" in parsed ||
844
+ "from" in parsed ||
845
+ "to" in parsed ||
846
+ "entities" in parsed ||
847
+ "relations" in parsed)) {
848
+ result.evidence.push("Creation tool returned entity/relation structure");
849
+ return true;
850
+ }
851
+ }
852
+ catch {
853
+ // Not JSON, check text patterns
854
+ }
855
+ // Fallback: check for success indicators in text
856
+ if (textContent.includes("id") ||
857
+ textContent.includes("created") ||
858
+ textContent.includes("entity") ||
859
+ textContent.includes("entities") ||
860
+ textContent.includes("relation") ||
861
+ textContent.includes("observation")) {
862
+ result.evidence.push("Creation tool response contains entity/relation indicators");
863
+ return true;
864
+ }
865
+ }
866
+ // Database/store tools
867
+ if (toolName.includes("database") ||
868
+ toolName.includes("store") ||
869
+ toolName.includes("db")) {
870
+ if (textContent.includes("connection") &&
871
+ textContent.includes("failed")) {
872
+ result.issues.push("Database connection failure");
873
+ return false;
874
+ }
875
+ // Should have some indication of data operation
876
+ if (textContent.includes("rows") ||
877
+ textContent.includes("records") ||
878
+ textContent.includes("documents") ||
879
+ textContent.includes("query")) {
880
+ result.evidence.push("Response indicates database operation");
881
+ return true;
882
+ }
883
+ }
884
+ // File system tools
885
+ if (toolName.includes("file") ||
886
+ toolName.includes("read") ||
887
+ toolName.includes("write")) {
888
+ if (textContent.includes("permission") &&
889
+ textContent.includes("denied")) {
890
+ result.issues.push("File permission error");
891
+ return false;
892
+ }
893
+ if (textContent.includes("not found") &&
894
+ context.scenarioCategory !== "error_case") {
895
+ result.issues.push("File not found error");
896
+ return false;
897
+ }
898
+ // Should have file operation indication
899
+ if (textContent.includes("bytes") ||
900
+ textContent.includes("content") ||
901
+ textContent.includes("saved") ||
902
+ textContent.includes("written")) {
903
+ result.evidence.push("Response indicates file operation");
904
+ return true;
905
+ }
906
+ }
907
+ // API/HTTP tools
908
+ if (toolName.includes("http") ||
909
+ toolName.includes("api") ||
910
+ toolName.includes("fetch")) {
911
+ // Check for HTTP status codes
912
+ if (textContent.includes("200") ||
913
+ textContent.includes("201") ||
914
+ textContent.includes("success")) {
915
+ result.evidence.push("Response indicates successful HTTP operation");
916
+ return true;
917
+ }
918
+ if (textContent.includes("404") ||
919
+ textContent.includes("500") ||
920
+ textContent.includes("error")) {
921
+ result.issues.push("HTTP error in response");
922
+ return false;
923
+ }
924
+ }
925
+ // Computation/calculation tools
926
+ if (toolName.includes("calc") ||
927
+ toolName.includes("compute") ||
928
+ toolName.includes("math")) {
929
+ // Should return numeric result
930
+ try {
931
+ const parsed = JSON.parse(textContent);
932
+ if (typeof parsed === "number" ||
933
+ (parsed && "result" in parsed && typeof parsed.result === "number")) {
934
+ result.evidence.push("Response contains numeric computation result");
935
+ return true;
936
+ }
937
+ }
938
+ catch {
939
+ // Check for number in text
940
+ if (/\d+/.test(textContent)) {
941
+ result.evidence.push("Response contains numeric value");
942
+ return true;
943
+ }
944
+ }
945
+ result.issues.push("Computation tool didn't return numeric result");
946
+ return false;
947
+ }
948
+ // Default - tool responded with non-empty content
949
+ if (textContent.length > 20) {
950
+ result.evidence.push("Tool provided substantive response");
951
+ return true;
952
+ }
953
+ result.issues.push("Response lacks tool-specific indicators");
954
+ return false;
955
+ }
956
+ /**
957
+ * Find query-like parameter in input
958
+ */
959
+ static findQueryParameter(input) {
960
+ const queryKeys = [
961
+ "query",
962
+ "q",
963
+ "search",
964
+ "term",
965
+ "keyword",
966
+ "filter",
967
+ "name",
968
+ "id",
969
+ ];
970
+ for (const key of queryKeys) {
971
+ if (key in input) {
972
+ return input[key];
973
+ }
974
+ }
975
+ // Return first string parameter as fallback
976
+ for (const value of Object.values(input)) {
977
+ if (typeof value === "string") {
978
+ return value;
979
+ }
980
+ }
981
+ return null;
982
+ }
983
+ /**
984
+ * Validate structured output against outputSchema (MCP 2025-06-18 feature)
985
+ * NOTE: Currently unused - kept for potential future use
986
+ */
987
+ // @ts-ignore - Unused method kept for potential future use
988
+ static validateStructuredOutput(context, result) {
989
+ // Check if tool has outputSchema defined
990
+ const tool = context.tool; // Cast to any to access potential outputSchema property
991
+ if (!tool.outputSchema) {
992
+ // Tool doesn't define outputSchema, this validation is not applicable
993
+ result.evidence.push("Tool does not define outputSchema (optional MCP 2025-06-18 feature)");
994
+ return true; // Not a failure if not using structured output
995
+ }
996
+ // Check if response contains structuredContent
997
+ const response = context.response;
998
+ if (response.structuredContent) {
999
+ // Tool provides structuredContent - this is the modern MCP 2025-06-18 pattern
1000
+ // outputSchema validation is optional and rarely used, so we accept any structuredContent
1001
+ result.evidence.push("Tool provides structuredContent (MCP 2025-06-18 modern response format)");
1002
+ return true;
1003
+ }
1004
+ // Check if response contains resource URIs (another MCP 2025-06-18 feature)
1005
+ const content = context.response.content;
1006
+ const hasResourceUris = content.some((item) => item.type === "resource" && item.uri);
1007
+ if (hasResourceUris) {
1008
+ result.evidence.push("Response uses resource URIs for external content (MCP 2025-06-18 feature)");
1009
+ return true;
1010
+ }
1011
+ // Tool has outputSchema but didn't provide structuredContent
1012
+ // This is okay - tools can provide both text and structured output
1013
+ result.evidence.push("Tool has outputSchema but provided text response (backward compatibility)");
1014
+ return true;
1015
+ }
1016
+ /**
1017
+ * Calculate confidence score for a set of validation results
1018
+ */
1019
+ static calculateOverallConfidence(results) {
1020
+ if (results.length === 0)
1021
+ return 0;
1022
+ const weights = {
1023
+ fully_working: 1.0,
1024
+ partially_working: 0.7,
1025
+ connectivity_only: 0.3,
1026
+ error: 0.2,
1027
+ broken: 0.0,
1028
+ };
1029
+ let totalWeight = 0;
1030
+ let weightedSum = 0;
1031
+ for (const result of results) {
1032
+ const weight = weights[result.classification];
1033
+ weightedSum += result.confidence * weight;
1034
+ totalWeight += 100; // Max confidence per result
1035
+ }
1036
+ return totalWeight > 0 ? (weightedSum / totalWeight) * 100 : 0;
1037
+ }
1038
+ }