@bryan-thompson/inspector-assessment-client 1.15.1 → 1.16.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/{OAuthCallback-tZBHqkSF.js → OAuthCallback-CayYEvRg.js} +1 -1
- package/dist/assets/{OAuthDebugCallback-D73S8G8X.js → OAuthDebugCallback-CijTatbL.js} +1 -1
- package/dist/assets/{index-BAbFakRL.js → index-B9ESGk3E.js} +745 -4350
- package/dist/index.html +1 -1
- package/lib/lib/assessmentTypes.d.ts +129 -0
- package/lib/lib/assessmentTypes.d.ts.map +1 -1
- package/lib/lib/assessmentTypes.js +20 -0
- package/lib/lib/securityPatterns.d.ts +2 -2
- package/lib/lib/securityPatterns.d.ts.map +1 -1
- package/lib/lib/securityPatterns.js +290 -15
- package/lib/services/assessment/AssessmentOrchestrator.d.ts +67 -0
- package/lib/services/assessment/AssessmentOrchestrator.d.ts.map +1 -1
- package/lib/services/assessment/AssessmentOrchestrator.js +91 -1
- package/lib/services/assessment/ResponseValidator.d.ts +7 -34
- package/lib/services/assessment/ResponseValidator.d.ts.map +1 -1
- package/lib/services/assessment/ResponseValidator.js +100 -704
- package/lib/services/assessment/config/annotationPatterns.js +1 -1
- package/lib/services/assessment/lib/RequestHistoryAnalyzer.d.ts +67 -0
- package/lib/services/assessment/lib/RequestHistoryAnalyzer.d.ts.map +1 -0
- package/lib/services/assessment/lib/RequestHistoryAnalyzer.js +191 -0
- package/lib/services/assessment/lib/claudeCodeBridge.d.ts +1 -0
- package/lib/services/assessment/lib/claudeCodeBridge.d.ts.map +1 -1
- package/lib/services/assessment/lib/claudeCodeBridge.js +5 -4
- package/lib/services/assessment/modules/AuthenticationAssessor.d.ts +4 -0
- package/lib/services/assessment/modules/AuthenticationAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/AuthenticationAssessor.js +97 -1
- package/lib/services/assessment/modules/CrossCapabilitySecurityAssessor.d.ts +39 -0
- package/lib/services/assessment/modules/CrossCapabilitySecurityAssessor.d.ts.map +1 -0
- package/lib/services/assessment/modules/CrossCapabilitySecurityAssessor.js +330 -0
- package/lib/services/assessment/modules/FunctionalityAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/FunctionalityAssessor.js +46 -13
- package/lib/services/assessment/modules/MCPSpecComplianceAssessor.d.ts +5 -0
- package/lib/services/assessment/modules/MCPSpecComplianceAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/MCPSpecComplianceAssessor.js +81 -0
- package/lib/services/assessment/modules/ManifestValidationAssessor.js +1 -1
- package/lib/services/assessment/modules/PromptAssessor.d.ts +30 -0
- package/lib/services/assessment/modules/PromptAssessor.d.ts.map +1 -0
- package/lib/services/assessment/modules/PromptAssessor.js +367 -0
- package/lib/services/assessment/modules/ResourceAssessor.d.ts +28 -0
- package/lib/services/assessment/modules/ResourceAssessor.d.ts.map +1 -0
- package/lib/services/assessment/modules/ResourceAssessor.js +296 -0
- package/lib/services/assessment/modules/SecurityAssessor.d.ts +4 -2
- package/lib/services/assessment/modules/SecurityAssessor.d.ts.map +1 -1
- package/lib/services/assessment/modules/SecurityAssessor.js +10 -41
- package/lib/utils/jsonUtils.d.ts +68 -0
- package/lib/utils/jsonUtils.d.ts.map +1 -0
- package/lib/utils/jsonUtils.js +141 -0
- package/lib/utils/paramUtils.d.ts +11 -0
- package/lib/utils/paramUtils.d.ts.map +1 -0
- package/lib/utils/paramUtils.js +37 -0
- package/lib/utils/schemaUtils.d.ts +74 -0
- package/lib/utils/schemaUtils.d.ts.map +1 -0
- package/lib/utils/schemaUtils.js +268 -0
- package/package.json +1 -1
|
@@ -2,7 +2,75 @@
|
|
|
2
2
|
* Response Validator for MCP Tool Testing
|
|
3
3
|
* Validates that tool responses are actually functional, not just present
|
|
4
4
|
*/
|
|
5
|
+
import { validateToolOutput, hasOutputSchema } from "../../utils/schemaUtils.js";
|
|
5
6
|
export class ResponseValidator {
|
|
7
|
+
/**
|
|
8
|
+
* Extract response metadata including content types, structuredContent, and _meta
|
|
9
|
+
*/
|
|
10
|
+
static extractResponseMetadata(context) {
|
|
11
|
+
const content = context.response.content;
|
|
12
|
+
const response = context.response;
|
|
13
|
+
// Track content types present
|
|
14
|
+
const contentTypes = [];
|
|
15
|
+
let textBlockCount = 0;
|
|
16
|
+
let imageCount = 0;
|
|
17
|
+
let resourceCount = 0;
|
|
18
|
+
if (Array.isArray(content)) {
|
|
19
|
+
for (const item of content) {
|
|
20
|
+
const type = item.type;
|
|
21
|
+
if (!contentTypes.includes(type)) {
|
|
22
|
+
contentTypes.push(type);
|
|
23
|
+
}
|
|
24
|
+
// Count by type
|
|
25
|
+
switch (type) {
|
|
26
|
+
case "text":
|
|
27
|
+
textBlockCount++;
|
|
28
|
+
break;
|
|
29
|
+
case "image":
|
|
30
|
+
imageCount++;
|
|
31
|
+
break;
|
|
32
|
+
case "resource":
|
|
33
|
+
case "resource_link":
|
|
34
|
+
resourceCount++;
|
|
35
|
+
break;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
// Check for structuredContent property (MCP 2024-11-05+)
|
|
40
|
+
const hasStructuredContent = "structuredContent" in response &&
|
|
41
|
+
response.structuredContent !== undefined;
|
|
42
|
+
// Check for _meta property
|
|
43
|
+
const hasMeta = "_meta" in response && response._meta !== undefined;
|
|
44
|
+
// Output schema validation
|
|
45
|
+
let outputSchemaValidation;
|
|
46
|
+
const toolHasOutputSchema = hasOutputSchema(context.tool.name);
|
|
47
|
+
if (toolHasOutputSchema) {
|
|
48
|
+
if (!hasStructuredContent) {
|
|
49
|
+
outputSchemaValidation = {
|
|
50
|
+
hasOutputSchema: true,
|
|
51
|
+
isValid: false,
|
|
52
|
+
error: "Tool has output schema but did not return structuredContent",
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
const validation = validateToolOutput(context.tool.name, response.structuredContent);
|
|
57
|
+
outputSchemaValidation = {
|
|
58
|
+
hasOutputSchema: true,
|
|
59
|
+
isValid: validation.isValid,
|
|
60
|
+
error: validation.error,
|
|
61
|
+
};
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return {
|
|
65
|
+
contentTypes,
|
|
66
|
+
hasStructuredContent,
|
|
67
|
+
hasMeta,
|
|
68
|
+
textBlockCount,
|
|
69
|
+
imageCount,
|
|
70
|
+
resourceCount,
|
|
71
|
+
outputSchemaValidation,
|
|
72
|
+
};
|
|
73
|
+
}
|
|
6
74
|
/**
|
|
7
75
|
* Validate a tool response comprehensively
|
|
8
76
|
*/
|
|
@@ -15,6 +83,9 @@ export class ResponseValidator {
|
|
|
15
83
|
evidence: [],
|
|
16
84
|
classification: "broken",
|
|
17
85
|
};
|
|
86
|
+
// Extract response metadata for content type tracking
|
|
87
|
+
const responseMetadata = this.extractResponseMetadata(context);
|
|
88
|
+
result.responseMetadata = responseMetadata;
|
|
18
89
|
// Check if response indicates an error
|
|
19
90
|
if (context.response.isError) {
|
|
20
91
|
result.isError = true;
|
|
@@ -48,19 +119,40 @@ export class ResponseValidator {
|
|
|
48
119
|
result.confidence = 0;
|
|
49
120
|
return result;
|
|
50
121
|
}
|
|
51
|
-
// Tool responded successfully -
|
|
122
|
+
// Tool responded successfully - start with fully_working
|
|
52
123
|
result.isValid = true;
|
|
53
124
|
result.classification = "fully_working";
|
|
54
125
|
result.confidence = 100;
|
|
55
126
|
result.evidence.push("Tool responded successfully with content");
|
|
56
|
-
// Add details about
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
if (hasText) {
|
|
60
|
-
result.evidence.push("Response includes text content");
|
|
127
|
+
// Add details about content types for debugging
|
|
128
|
+
if (responseMetadata.textBlockCount > 0) {
|
|
129
|
+
result.evidence.push(`Response includes ${responseMetadata.textBlockCount} text block(s)`);
|
|
61
130
|
}
|
|
62
|
-
if (
|
|
63
|
-
result.evidence.push(
|
|
131
|
+
if (responseMetadata.imageCount > 0) {
|
|
132
|
+
result.evidence.push(`Response includes ${responseMetadata.imageCount} image(s)`);
|
|
133
|
+
}
|
|
134
|
+
if (responseMetadata.resourceCount > 0) {
|
|
135
|
+
result.evidence.push(`Response includes ${responseMetadata.resourceCount} resource(s)`);
|
|
136
|
+
}
|
|
137
|
+
if (responseMetadata.hasStructuredContent) {
|
|
138
|
+
result.evidence.push("Response includes structuredContent");
|
|
139
|
+
}
|
|
140
|
+
if (responseMetadata.hasMeta) {
|
|
141
|
+
result.evidence.push("Response includes _meta field");
|
|
142
|
+
}
|
|
143
|
+
// Check output schema validation (P0 enhancement)
|
|
144
|
+
if (responseMetadata.outputSchemaValidation) {
|
|
145
|
+
const { hasOutputSchema: hasSchema, isValid, error, } = responseMetadata.outputSchemaValidation;
|
|
146
|
+
if (hasSchema && !isValid) {
|
|
147
|
+
// Downgrade classification if output schema validation fails
|
|
148
|
+
result.classification = "partially_working";
|
|
149
|
+
result.confidence = 70;
|
|
150
|
+
result.issues.push(error || "Output schema validation failed");
|
|
151
|
+
result.evidence.push("Tool has output schema but response does not conform");
|
|
152
|
+
}
|
|
153
|
+
else if (hasSchema && isValid) {
|
|
154
|
+
result.evidence.push("Output schema validation passed");
|
|
155
|
+
}
|
|
64
156
|
}
|
|
65
157
|
return result;
|
|
66
158
|
}
|
|
@@ -317,702 +409,6 @@ export class ResponseValidator {
|
|
|
317
409
|
: 0.5;
|
|
318
410
|
return confidence >= confidenceThreshold;
|
|
319
411
|
}
|
|
320
|
-
/**
|
|
321
|
-
* Validate error responses are proper and informative
|
|
322
|
-
* NOTE: Currently unused - kept for potential future use
|
|
323
|
-
*/
|
|
324
|
-
// @ts-ignore - Unused method kept for potential future use
|
|
325
|
-
static validateErrorResponse(context, result) {
|
|
326
|
-
const content = context.response.content;
|
|
327
|
-
const errorText = content?.[0]?.type === "text" && content[0].text
|
|
328
|
-
? content[0].text
|
|
329
|
-
: JSON.stringify(context.response.content);
|
|
330
|
-
// Check for proper error structure
|
|
331
|
-
let hasProperError = false;
|
|
332
|
-
// Check for MCP standard error codes
|
|
333
|
-
if (errorText.includes("-32602") || errorText.includes("Invalid params")) {
|
|
334
|
-
result.evidence.push("Proper MCP error code for invalid parameters");
|
|
335
|
-
hasProperError = true;
|
|
336
|
-
}
|
|
337
|
-
// Check for descriptive error messages
|
|
338
|
-
if (errorText.length > 20 &&
|
|
339
|
-
(errorText.toLowerCase().includes("invalid") ||
|
|
340
|
-
errorText.toLowerCase().includes("required") ||
|
|
341
|
-
errorText.toLowerCase().includes("type") ||
|
|
342
|
-
errorText.toLowerCase().includes("validation"))) {
|
|
343
|
-
result.evidence.push("Descriptive error message provided");
|
|
344
|
-
hasProperError = true;
|
|
345
|
-
}
|
|
346
|
-
if (!hasProperError) {
|
|
347
|
-
result.issues.push("Error response lacks proper error codes or descriptive messages");
|
|
348
|
-
}
|
|
349
|
-
return hasProperError;
|
|
350
|
-
}
|
|
351
|
-
/**
|
|
352
|
-
* Validate response structure matches expectations
|
|
353
|
-
* NOTE: Currently unused - kept for potential future use
|
|
354
|
-
*/
|
|
355
|
-
// @ts-ignore - Unused method kept for potential future use
|
|
356
|
-
static validateResponseStructure(context, result) {
|
|
357
|
-
// Check if response has content
|
|
358
|
-
if (!context.response.content) {
|
|
359
|
-
result.issues.push("Response has no content");
|
|
360
|
-
return false;
|
|
361
|
-
}
|
|
362
|
-
// Check content structure
|
|
363
|
-
const content = context.response.content;
|
|
364
|
-
if (!Array.isArray(content) || content.length === 0) {
|
|
365
|
-
result.issues.push("Response content is empty or not an array");
|
|
366
|
-
return false;
|
|
367
|
-
}
|
|
368
|
-
// Check for expected content type
|
|
369
|
-
const hasTextContent = content.some((item) => item.type === "text" && item.text);
|
|
370
|
-
const hasResourceContent = content.some((item) => item.type === "resource");
|
|
371
|
-
if (!hasTextContent && !hasResourceContent) {
|
|
372
|
-
result.issues.push("Response lacks text or resource content");
|
|
373
|
-
return false;
|
|
374
|
-
}
|
|
375
|
-
result.evidence.push("Response has valid structure");
|
|
376
|
-
return true;
|
|
377
|
-
}
|
|
378
|
-
/**
|
|
379
|
-
* Validate response content is meaningful
|
|
380
|
-
* NOTE: Currently unused - kept for potential future use
|
|
381
|
-
*/
|
|
382
|
-
// @ts-ignore - Unused method kept for potential future use
|
|
383
|
-
static validateResponseContent(context, result) {
|
|
384
|
-
const content = context.response.content;
|
|
385
|
-
const textContent = content.find((item) => item.type === "text")?.text || "";
|
|
386
|
-
// Check if response is just echoing input (bad)
|
|
387
|
-
const inputStr = JSON.stringify(context.input);
|
|
388
|
-
if (textContent === inputStr || textContent === "test_value") {
|
|
389
|
-
result.issues.push("Response appears to just echo input");
|
|
390
|
-
return false;
|
|
391
|
-
}
|
|
392
|
-
// Check for minimal content length
|
|
393
|
-
// But allow short responses for mutation tools (create/update/delete) that might return simple "Success"
|
|
394
|
-
const toolName = context.tool.name.toLowerCase();
|
|
395
|
-
const isMutationTool = toolName.includes("create") ||
|
|
396
|
-
toolName.includes("update") ||
|
|
397
|
-
toolName.includes("delete") ||
|
|
398
|
-
toolName.includes("add") ||
|
|
399
|
-
toolName.includes("remove") ||
|
|
400
|
-
toolName.includes("insert");
|
|
401
|
-
if (textContent.length < 10 && !isMutationTool) {
|
|
402
|
-
result.issues.push("Response content is too short to be meaningful");
|
|
403
|
-
return false;
|
|
404
|
-
}
|
|
405
|
-
// For mutation tools, accept common success indicators even if short
|
|
406
|
-
if (isMutationTool && textContent.length < 10) {
|
|
407
|
-
const successIndicators = [
|
|
408
|
-
"success",
|
|
409
|
-
"ok",
|
|
410
|
-
"done",
|
|
411
|
-
"created",
|
|
412
|
-
"updated",
|
|
413
|
-
"deleted",
|
|
414
|
-
"added",
|
|
415
|
-
"removed",
|
|
416
|
-
];
|
|
417
|
-
const hasSuccessIndicator = successIndicators.some((indicator) => textContent.toLowerCase().includes(indicator));
|
|
418
|
-
if (!hasSuccessIndicator) {
|
|
419
|
-
result.issues.push("Short response lacks success confirmation for mutation operation");
|
|
420
|
-
return false;
|
|
421
|
-
}
|
|
422
|
-
// Short success message is acceptable
|
|
423
|
-
result.evidence.push("Mutation operation confirmed with short response");
|
|
424
|
-
return true;
|
|
425
|
-
}
|
|
426
|
-
// MCP 2025-06-18: Check structuredContent first (modern MCP tools)
|
|
427
|
-
// Modern tools provide structuredContent even without outputSchema
|
|
428
|
-
const response = context.response;
|
|
429
|
-
if (response.structuredContent) {
|
|
430
|
-
const structured = response.structuredContent;
|
|
431
|
-
// Handle structured array responses
|
|
432
|
-
if (Array.isArray(structured)) {
|
|
433
|
-
if (structured.length === 0) {
|
|
434
|
-
// Empty array is valid - tool processed request successfully but had no data
|
|
435
|
-
// Example: create_relations returns [] when referenced entities don't exist
|
|
436
|
-
result.evidence.push("Tool returned empty array (processed successfully, no matching data)");
|
|
437
|
-
return true;
|
|
438
|
-
}
|
|
439
|
-
// For mutation tools, check for IDs
|
|
440
|
-
if (isMutationTool) {
|
|
441
|
-
const hasIds = structured.some((item) => item &&
|
|
442
|
-
typeof item === "object" &&
|
|
443
|
-
("id" in item || "_id" in item || "ID" in item));
|
|
444
|
-
if (hasIds) {
|
|
445
|
-
result.evidence.push(`Mutation operation returned ${structured.length} item(s) with IDs in structuredContent`);
|
|
446
|
-
return true;
|
|
447
|
-
}
|
|
448
|
-
}
|
|
449
|
-
result.evidence.push(`Response has structuredContent array with ${structured.length} item(s)`);
|
|
450
|
-
return true;
|
|
451
|
-
}
|
|
452
|
-
// Handle structured object responses
|
|
453
|
-
if (typeof structured === "object" && structured !== null) {
|
|
454
|
-
const keys = Object.keys(structured);
|
|
455
|
-
if (keys.length === 0) {
|
|
456
|
-
result.issues.push("structuredContent object is empty");
|
|
457
|
-
return false;
|
|
458
|
-
}
|
|
459
|
-
const hasNonNullValues = keys.some((key) => structured[key] !== null && structured[key] !== undefined);
|
|
460
|
-
if (!hasNonNullValues) {
|
|
461
|
-
result.issues.push("structuredContent contains only null/undefined values");
|
|
462
|
-
return false;
|
|
463
|
-
}
|
|
464
|
-
result.evidence.push(`Response has structuredContent with ${keys.length} data fields`);
|
|
465
|
-
return true;
|
|
466
|
-
}
|
|
467
|
-
}
|
|
468
|
-
// Fallback: Check for actual data/information in content.text
|
|
469
|
-
try {
|
|
470
|
-
const parsed = JSON.parse(textContent);
|
|
471
|
-
// Handle JSON array responses (common for batch operations)
|
|
472
|
-
if (Array.isArray(parsed)) {
|
|
473
|
-
if (parsed.length === 0) {
|
|
474
|
-
// Empty array is valid - tool processed request successfully but had no data
|
|
475
|
-
result.evidence.push("Tool returned empty array (processed successfully, no matching data)");
|
|
476
|
-
return true;
|
|
477
|
-
}
|
|
478
|
-
// For mutation tools, check if array items have IDs (indicates successful creation)
|
|
479
|
-
if (isMutationTool && parsed.length > 0) {
|
|
480
|
-
const hasIds = parsed.some((item) => typeof item === "object" &&
|
|
481
|
-
item !== null &&
|
|
482
|
-
("id" in item || "_id" in item || "ID" in item));
|
|
483
|
-
if (hasIds) {
|
|
484
|
-
result.evidence.push(`Mutation operation returned ${parsed.length} item(s) with IDs`);
|
|
485
|
-
return true;
|
|
486
|
-
}
|
|
487
|
-
}
|
|
488
|
-
result.evidence.push(`Response is array with ${parsed.length} item(s)`);
|
|
489
|
-
return true;
|
|
490
|
-
}
|
|
491
|
-
// Handle JSON object responses
|
|
492
|
-
if (typeof parsed === "object" && parsed !== null) {
|
|
493
|
-
const keys = Object.keys(parsed);
|
|
494
|
-
if (keys.length === 0) {
|
|
495
|
-
result.issues.push("Response object is empty");
|
|
496
|
-
return false;
|
|
497
|
-
}
|
|
498
|
-
// Check for null/undefined values
|
|
499
|
-
const hasNonNullValues = keys.some((key) => parsed[key] !== null && parsed[key] !== undefined);
|
|
500
|
-
if (!hasNonNullValues) {
|
|
501
|
-
result.issues.push("Response contains only null/undefined values");
|
|
502
|
-
return false;
|
|
503
|
-
}
|
|
504
|
-
result.evidence.push(`Response contains ${keys.length} data fields`);
|
|
505
|
-
return true;
|
|
506
|
-
}
|
|
507
|
-
}
|
|
508
|
-
catch {
|
|
509
|
-
// Not JSON, check as plain text
|
|
510
|
-
if (textContent.includes("error") || textContent.includes("Error")) {
|
|
511
|
-
// If it contains error but isError is false, that's suspicious
|
|
512
|
-
if (!context.response.isError) {
|
|
513
|
-
result.issues.push("Response contains error text but isError flag is false");
|
|
514
|
-
return false;
|
|
515
|
-
}
|
|
516
|
-
}
|
|
517
|
-
}
|
|
518
|
-
result.evidence.push("Response contains meaningful content");
|
|
519
|
-
return true;
|
|
520
|
-
}
|
|
521
|
-
/**
|
|
522
|
-
* Validate semantic correctness based on input/output relationship
|
|
523
|
-
* NOTE: Currently unused - kept for potential future use
|
|
524
|
-
*/
|
|
525
|
-
// @ts-ignore - Unused method kept for potential future use
|
|
526
|
-
static validateSemanticCorrectness(context, result) {
|
|
527
|
-
const toolName = context.tool.name.toLowerCase();
|
|
528
|
-
const content = context.response.content;
|
|
529
|
-
const textContent = content.find((item) => item.type === "text")?.text || "";
|
|
530
|
-
// Tool-specific semantic validation
|
|
531
|
-
if (toolName.includes("search") ||
|
|
532
|
-
toolName.includes("find") ||
|
|
533
|
-
toolName.includes("get")) {
|
|
534
|
-
// MCP 2025-06-18: Check structuredContent first
|
|
535
|
-
const response = context.response;
|
|
536
|
-
if (response.structuredContent) {
|
|
537
|
-
const structured = response.structuredContent;
|
|
538
|
-
// Check for array results
|
|
539
|
-
if (Array.isArray(structured)) {
|
|
540
|
-
result.evidence.push(`Search returned ${structured.length} result(s) in structuredContent (empty results are valid)`);
|
|
541
|
-
return true;
|
|
542
|
-
}
|
|
543
|
-
// Check for object with search result structure
|
|
544
|
-
if (typeof structured === "object" && structured !== null) {
|
|
545
|
-
const hasSearchStructure = "entities" in structured ||
|
|
546
|
-
"relations" in structured ||
|
|
547
|
-
"results" in structured ||
|
|
548
|
-
"items" in structured ||
|
|
549
|
-
"data" in structured ||
|
|
550
|
-
"matches" in structured;
|
|
551
|
-
if (hasSearchStructure) {
|
|
552
|
-
result.evidence.push("Search response has proper result structure in structuredContent");
|
|
553
|
-
return true;
|
|
554
|
-
}
|
|
555
|
-
// Single result object
|
|
556
|
-
if (Object.keys(structured).length > 0) {
|
|
557
|
-
result.evidence.push("Search returned single result object in structuredContent");
|
|
558
|
-
return true;
|
|
559
|
-
}
|
|
560
|
-
}
|
|
561
|
-
}
|
|
562
|
-
// Fallback: Search tools should return results structure (even if empty)
|
|
563
|
-
try {
|
|
564
|
-
const parsed = JSON.parse(textContent);
|
|
565
|
-
// Check for common search response structures
|
|
566
|
-
if (Array.isArray(parsed)) {
|
|
567
|
-
// Array of results (even empty is valid - means no matches)
|
|
568
|
-
result.evidence.push(`Search returned ${parsed.length} result(s) (empty results are valid)`);
|
|
569
|
-
return true;
|
|
570
|
-
}
|
|
571
|
-
if (typeof parsed === "object" && parsed !== null) {
|
|
572
|
-
// Check for common search result object structures
|
|
573
|
-
const hasSearchStructure = "entities" in parsed ||
|
|
574
|
-
"relations" in parsed ||
|
|
575
|
-
"results" in parsed ||
|
|
576
|
-
"items" in parsed ||
|
|
577
|
-
"data" in parsed ||
|
|
578
|
-
"matches" in parsed;
|
|
579
|
-
if (hasSearchStructure) {
|
|
580
|
-
result.evidence.push("Search response has proper result structure");
|
|
581
|
-
return true;
|
|
582
|
-
}
|
|
583
|
-
// Single result object (e.g., get by ID)
|
|
584
|
-
if (Object.keys(parsed).length > 0) {
|
|
585
|
-
result.evidence.push("Search returned single result object");
|
|
586
|
-
return true;
|
|
587
|
-
}
|
|
588
|
-
}
|
|
589
|
-
}
|
|
590
|
-
catch {
|
|
591
|
-
// Not JSON, check text patterns
|
|
592
|
-
}
|
|
593
|
-
// Fallback to text-based validation
|
|
594
|
-
const query = this.findQueryParameter(context.input);
|
|
595
|
-
if (query && typeof query === "string") {
|
|
596
|
-
// Very basic check - response should reference the query somehow
|
|
597
|
-
if (!textContent.toLowerCase().includes(query.toLowerCase()) &&
|
|
598
|
-
!textContent.includes("results") &&
|
|
599
|
-
!textContent.includes("found")) {
|
|
600
|
-
result.issues.push("Search response doesn't seem related to query");
|
|
601
|
-
return false;
|
|
602
|
-
}
|
|
603
|
-
result.evidence.push("Search response appears related to query");
|
|
604
|
-
return true;
|
|
605
|
-
}
|
|
606
|
-
// If no query parameter, just check for search-related keywords
|
|
607
|
-
if (textContent.includes("result") ||
|
|
608
|
-
textContent.includes("found") ||
|
|
609
|
-
textContent.includes("match") ||
|
|
610
|
-
textContent.includes("entity") ||
|
|
611
|
-
textContent.includes("entities")) {
|
|
612
|
-
result.evidence.push("Search response contains search-related data");
|
|
613
|
-
return true;
|
|
614
|
-
}
|
|
615
|
-
}
|
|
616
|
-
if (toolName.includes("create") ||
|
|
617
|
-
toolName.includes("add") ||
|
|
618
|
-
toolName.includes("insert")) {
|
|
619
|
-
// MCP 2025-06-18: Check structuredContent first
|
|
620
|
-
const response = context.response;
|
|
621
|
-
if (response.structuredContent) {
|
|
622
|
-
const structured = response.structuredContent;
|
|
623
|
-
// Check if response is array with IDs
|
|
624
|
-
if (Array.isArray(structured)) {
|
|
625
|
-
const hasIds = structured.some((item) => item &&
|
|
626
|
-
typeof item === "object" &&
|
|
627
|
-
("id" in item || "_id" in item || "ID" in item));
|
|
628
|
-
if (hasIds) {
|
|
629
|
-
result.evidence.push(`Creation response includes ${structured.length} item(s) with IDs in structuredContent`);
|
|
630
|
-
return true;
|
|
631
|
-
}
|
|
632
|
-
// Even without IDs, array response indicates success
|
|
633
|
-
if (structured.length > 0) {
|
|
634
|
-
result.evidence.push("Creation response includes created items in structuredContent");
|
|
635
|
-
return true;
|
|
636
|
-
}
|
|
637
|
-
}
|
|
638
|
-
// Check if response is object with ID
|
|
639
|
-
if (typeof structured === "object" && structured !== null) {
|
|
640
|
-
if ("id" in structured || "_id" in structured || "ID" in structured) {
|
|
641
|
-
result.evidence.push("Creation response includes resource ID in structuredContent");
|
|
642
|
-
return true;
|
|
643
|
-
}
|
|
644
|
-
// Check for entity/relation structures
|
|
645
|
-
if ("entities" in structured ||
|
|
646
|
-
"relations" in structured ||
|
|
647
|
-
"observations" in structured) {
|
|
648
|
-
result.evidence.push("Creation response includes entity/relation data in structuredContent");
|
|
649
|
-
return true;
|
|
650
|
-
}
|
|
651
|
-
}
|
|
652
|
-
}
|
|
653
|
-
// Fallback: Creation tools should return created resource or ID
|
|
654
|
-
// Try to parse as JSON first to check for structured data with IDs
|
|
655
|
-
try {
|
|
656
|
-
const parsed = JSON.parse(textContent);
|
|
657
|
-
// Check if response is array with IDs (common for batch creation)
|
|
658
|
-
if (Array.isArray(parsed)) {
|
|
659
|
-
const hasIds = parsed.some((item) => typeof item === "object" &&
|
|
660
|
-
item !== null &&
|
|
661
|
-
("id" in item || "_id" in item || "ID" in item));
|
|
662
|
-
if (hasIds) {
|
|
663
|
-
result.evidence.push(`Creation response includes ${parsed.length} item(s) with IDs`);
|
|
664
|
-
return true;
|
|
665
|
-
}
|
|
666
|
-
}
|
|
667
|
-
// Check if response is object with ID
|
|
668
|
-
if (typeof parsed === "object" && parsed !== null) {
|
|
669
|
-
if ("id" in parsed || "_id" in parsed || "ID" in parsed) {
|
|
670
|
-
result.evidence.push("Creation response includes resource ID");
|
|
671
|
-
return true;
|
|
672
|
-
}
|
|
673
|
-
}
|
|
674
|
-
}
|
|
675
|
-
catch {
|
|
676
|
-
// Not JSON, check text patterns
|
|
677
|
-
}
|
|
678
|
-
// Fallback to text-based validation
|
|
679
|
-
if (!textContent.includes("id") &&
|
|
680
|
-
!textContent.includes("created") &&
|
|
681
|
-
!textContent.includes("success")) {
|
|
682
|
-
result.issues.push("Creation response lacks confirmation or resource ID");
|
|
683
|
-
return false;
|
|
684
|
-
}
|
|
685
|
-
result.evidence.push("Creation response includes confirmation");
|
|
686
|
-
return true;
|
|
687
|
-
}
|
|
688
|
-
if (toolName.includes("delete") || toolName.includes("remove")) {
|
|
689
|
-
// Deletion tools should confirm deletion
|
|
690
|
-
if (!textContent.includes("deleted") &&
|
|
691
|
-
!textContent.includes("removed") &&
|
|
692
|
-
!textContent.includes("success")) {
|
|
693
|
-
result.issues.push("Deletion response lacks confirmation");
|
|
694
|
-
return false;
|
|
695
|
-
}
|
|
696
|
-
result.evidence.push("Deletion response confirms action");
|
|
697
|
-
return true;
|
|
698
|
-
}
|
|
699
|
-
if (toolName.includes("update") ||
|
|
700
|
-
toolName.includes("modify") ||
|
|
701
|
-
toolName.includes("edit")) {
|
|
702
|
-
// Update tools should confirm update
|
|
703
|
-
if (!textContent.includes("updated") &&
|
|
704
|
-
!textContent.includes("modified") &&
|
|
705
|
-
!textContent.includes("changed") &&
|
|
706
|
-
!textContent.includes("success")) {
|
|
707
|
-
result.issues.push("Update response lacks confirmation");
|
|
708
|
-
return false;
|
|
709
|
-
}
|
|
710
|
-
result.evidence.push("Update response confirms changes");
|
|
711
|
-
return true;
|
|
712
|
-
}
|
|
713
|
-
if (toolName.includes("list") || toolName.includes("all")) {
|
|
714
|
-
// List tools should return array or multiple items
|
|
715
|
-
try {
|
|
716
|
-
const parsed = JSON.parse(textContent);
|
|
717
|
-
if (Array.isArray(parsed) ||
|
|
718
|
-
(parsed &&
|
|
719
|
-
typeof parsed === "object" &&
|
|
720
|
-
("items" in parsed || "results" in parsed))) {
|
|
721
|
-
result.evidence.push("List response contains array or collection");
|
|
722
|
-
return true;
|
|
723
|
-
}
|
|
724
|
-
}
|
|
725
|
-
catch {
|
|
726
|
-
// Check for list-like text response
|
|
727
|
-
if (textContent.includes(",") || textContent.includes("\n")) {
|
|
728
|
-
result.evidence.push("Response appears to contain multiple items");
|
|
729
|
-
return true;
|
|
730
|
-
}
|
|
731
|
-
}
|
|
732
|
-
result.issues.push("List response doesn't contain collection");
|
|
733
|
-
return false;
|
|
734
|
-
}
|
|
735
|
-
// Default validation - response should be different from input
|
|
736
|
-
const inputStr = JSON.stringify(context.input);
|
|
737
|
-
if (textContent !== inputStr &&
|
|
738
|
-
textContent.length > inputStr.length * 0.5) {
|
|
739
|
-
result.evidence.push("Response is substantively different from input");
|
|
740
|
-
return true;
|
|
741
|
-
}
|
|
742
|
-
result.issues.push("Response doesn't demonstrate clear functionality");
|
|
743
|
-
return false;
|
|
744
|
-
}
|
|
745
|
-
/**
|
|
746
|
-
* Validate tool-specific logic and patterns
|
|
747
|
-
* NOTE: Currently unused - kept for potential future use
|
|
748
|
-
*/
|
|
749
|
-
// @ts-ignore - Unused method kept for potential future use
|
|
750
|
-
static validateToolSpecificLogic(context, result) {
|
|
751
|
-
const toolName = context.tool.name.toLowerCase();
|
|
752
|
-
const content = context.response.content;
|
|
753
|
-
const textContent = content.find((item) => item.type === "text")?.text || "";
|
|
754
|
-
// Creation/mutation tools (entities, relations, observations, etc.)
|
|
755
|
-
if (toolName.includes("create") ||
|
|
756
|
-
toolName.includes("add") ||
|
|
757
|
-
toolName.includes("insert") ||
|
|
758
|
-
toolName.includes("entity") ||
|
|
759
|
-
toolName.includes("entities") ||
|
|
760
|
-
toolName.includes("relation") ||
|
|
761
|
-
toolName.includes("observation")) {
|
|
762
|
-
// MCP 2025-06-18: Check structuredContent first (CRITICAL FIX)
|
|
763
|
-
const response = context.response;
|
|
764
|
-
if (response.structuredContent) {
|
|
765
|
-
const structured = response.structuredContent;
|
|
766
|
-
// Check for array responses with IDs
|
|
767
|
-
if (Array.isArray(structured)) {
|
|
768
|
-
const hasIds = structured.some((item) => item &&
|
|
769
|
-
typeof item === "object" &&
|
|
770
|
-
("id" in item || "_id" in item || "ID" in item));
|
|
771
|
-
if (hasIds) {
|
|
772
|
-
result.evidence.push(`Creation tool returned ${structured.length} entity/entities with IDs in structuredContent`);
|
|
773
|
-
return true;
|
|
774
|
-
}
|
|
775
|
-
// Check for entity-like objects even without IDs
|
|
776
|
-
const hasEntityStructure = structured.some((item) => item &&
|
|
777
|
-
typeof item === "object" &&
|
|
778
|
-
("name" in item ||
|
|
779
|
-
"entityType" in item ||
|
|
780
|
-
"from" in item ||
|
|
781
|
-
"to" in item));
|
|
782
|
-
if (hasEntityStructure) {
|
|
783
|
-
result.evidence.push("Creation tool returned entity-like objects in structuredContent");
|
|
784
|
-
return true;
|
|
785
|
-
}
|
|
786
|
-
}
|
|
787
|
-
// Check for object with ID
|
|
788
|
-
if (structured &&
|
|
789
|
-
typeof structured === "object" &&
|
|
790
|
-
("id" in structured || "_id" in structured || "ID" in structured)) {
|
|
791
|
-
result.evidence.push("Creation tool returned entity with ID in structuredContent");
|
|
792
|
-
return true;
|
|
793
|
-
}
|
|
794
|
-
// Check for entity/relation structure
|
|
795
|
-
if (structured &&
|
|
796
|
-
typeof structured === "object" &&
|
|
797
|
-
("name" in structured ||
|
|
798
|
-
"entityType" in structured ||
|
|
799
|
-
"from" in structured ||
|
|
800
|
-
"to" in structured ||
|
|
801
|
-
"entities" in structured ||
|
|
802
|
-
"relations" in structured ||
|
|
803
|
-
"observations" in structured)) {
|
|
804
|
-
result.evidence.push("Creation tool returned entity/relation structure in structuredContent");
|
|
805
|
-
return true;
|
|
806
|
-
}
|
|
807
|
-
}
|
|
808
|
-
// Fallback: Try parsing content.text as JSON
|
|
809
|
-
try {
|
|
810
|
-
const parsed = JSON.parse(textContent);
|
|
811
|
-
// Check for array responses with IDs (common for batch operations)
|
|
812
|
-
if (Array.isArray(parsed)) {
|
|
813
|
-
const hasIds = parsed.some((item) => item &&
|
|
814
|
-
typeof item === "object" &&
|
|
815
|
-
("id" in item || "_id" in item || "ID" in item));
|
|
816
|
-
if (hasIds) {
|
|
817
|
-
result.evidence.push(`Creation tool returned ${parsed.length} entity/entities with IDs`);
|
|
818
|
-
return true;
|
|
819
|
-
}
|
|
820
|
-
// Even without IDs, if array has entity-like objects, it's valid
|
|
821
|
-
const hasEntityStructure = parsed.some((item) => item &&
|
|
822
|
-
typeof item === "object" &&
|
|
823
|
-
("name" in item ||
|
|
824
|
-
"entityType" in item ||
|
|
825
|
-
"from" in item ||
|
|
826
|
-
"to" in item));
|
|
827
|
-
if (hasEntityStructure) {
|
|
828
|
-
result.evidence.push("Creation tool returned entity-like objects");
|
|
829
|
-
return true;
|
|
830
|
-
}
|
|
831
|
-
}
|
|
832
|
-
// Check for object with ID
|
|
833
|
-
if (parsed &&
|
|
834
|
-
typeof parsed === "object" &&
|
|
835
|
-
("id" in parsed || "_id" in parsed || "ID" in parsed)) {
|
|
836
|
-
result.evidence.push("Creation tool returned entity with ID");
|
|
837
|
-
return true;
|
|
838
|
-
}
|
|
839
|
-
// Check for entity structure
|
|
840
|
-
if (parsed &&
|
|
841
|
-
typeof parsed === "object" &&
|
|
842
|
-
("name" in parsed ||
|
|
843
|
-
"entityType" in parsed ||
|
|
844
|
-
"from" in parsed ||
|
|
845
|
-
"to" in parsed ||
|
|
846
|
-
"entities" in parsed ||
|
|
847
|
-
"relations" in parsed)) {
|
|
848
|
-
result.evidence.push("Creation tool returned entity/relation structure");
|
|
849
|
-
return true;
|
|
850
|
-
}
|
|
851
|
-
}
|
|
852
|
-
catch {
|
|
853
|
-
// Not JSON, check text patterns
|
|
854
|
-
}
|
|
855
|
-
// Fallback: check for success indicators in text
|
|
856
|
-
if (textContent.includes("id") ||
|
|
857
|
-
textContent.includes("created") ||
|
|
858
|
-
textContent.includes("entity") ||
|
|
859
|
-
textContent.includes("entities") ||
|
|
860
|
-
textContent.includes("relation") ||
|
|
861
|
-
textContent.includes("observation")) {
|
|
862
|
-
result.evidence.push("Creation tool response contains entity/relation indicators");
|
|
863
|
-
return true;
|
|
864
|
-
}
|
|
865
|
-
}
|
|
866
|
-
// Database/store tools
|
|
867
|
-
if (toolName.includes("database") ||
|
|
868
|
-
toolName.includes("store") ||
|
|
869
|
-
toolName.includes("db")) {
|
|
870
|
-
if (textContent.includes("connection") &&
|
|
871
|
-
textContent.includes("failed")) {
|
|
872
|
-
result.issues.push("Database connection failure");
|
|
873
|
-
return false;
|
|
874
|
-
}
|
|
875
|
-
// Should have some indication of data operation
|
|
876
|
-
if (textContent.includes("rows") ||
|
|
877
|
-
textContent.includes("records") ||
|
|
878
|
-
textContent.includes("documents") ||
|
|
879
|
-
textContent.includes("query")) {
|
|
880
|
-
result.evidence.push("Response indicates database operation");
|
|
881
|
-
return true;
|
|
882
|
-
}
|
|
883
|
-
}
|
|
884
|
-
// File system tools
|
|
885
|
-
if (toolName.includes("file") ||
|
|
886
|
-
toolName.includes("read") ||
|
|
887
|
-
toolName.includes("write")) {
|
|
888
|
-
if (textContent.includes("permission") &&
|
|
889
|
-
textContent.includes("denied")) {
|
|
890
|
-
result.issues.push("File permission error");
|
|
891
|
-
return false;
|
|
892
|
-
}
|
|
893
|
-
if (textContent.includes("not found") &&
|
|
894
|
-
context.scenarioCategory !== "error_case") {
|
|
895
|
-
result.issues.push("File not found error");
|
|
896
|
-
return false;
|
|
897
|
-
}
|
|
898
|
-
// Should have file operation indication
|
|
899
|
-
if (textContent.includes("bytes") ||
|
|
900
|
-
textContent.includes("content") ||
|
|
901
|
-
textContent.includes("saved") ||
|
|
902
|
-
textContent.includes("written")) {
|
|
903
|
-
result.evidence.push("Response indicates file operation");
|
|
904
|
-
return true;
|
|
905
|
-
}
|
|
906
|
-
}
|
|
907
|
-
// API/HTTP tools
|
|
908
|
-
if (toolName.includes("http") ||
|
|
909
|
-
toolName.includes("api") ||
|
|
910
|
-
toolName.includes("fetch")) {
|
|
911
|
-
// Check for HTTP status codes
|
|
912
|
-
if (textContent.includes("200") ||
|
|
913
|
-
textContent.includes("201") ||
|
|
914
|
-
textContent.includes("success")) {
|
|
915
|
-
result.evidence.push("Response indicates successful HTTP operation");
|
|
916
|
-
return true;
|
|
917
|
-
}
|
|
918
|
-
if (textContent.includes("404") ||
|
|
919
|
-
textContent.includes("500") ||
|
|
920
|
-
textContent.includes("error")) {
|
|
921
|
-
result.issues.push("HTTP error in response");
|
|
922
|
-
return false;
|
|
923
|
-
}
|
|
924
|
-
}
|
|
925
|
-
// Computation/calculation tools
|
|
926
|
-
if (toolName.includes("calc") ||
|
|
927
|
-
toolName.includes("compute") ||
|
|
928
|
-
toolName.includes("math")) {
|
|
929
|
-
// Should return numeric result
|
|
930
|
-
try {
|
|
931
|
-
const parsed = JSON.parse(textContent);
|
|
932
|
-
if (typeof parsed === "number" ||
|
|
933
|
-
(parsed && "result" in parsed && typeof parsed.result === "number")) {
|
|
934
|
-
result.evidence.push("Response contains numeric computation result");
|
|
935
|
-
return true;
|
|
936
|
-
}
|
|
937
|
-
}
|
|
938
|
-
catch {
|
|
939
|
-
// Check for number in text
|
|
940
|
-
if (/\d+/.test(textContent)) {
|
|
941
|
-
result.evidence.push("Response contains numeric value");
|
|
942
|
-
return true;
|
|
943
|
-
}
|
|
944
|
-
}
|
|
945
|
-
result.issues.push("Computation tool didn't return numeric result");
|
|
946
|
-
return false;
|
|
947
|
-
}
|
|
948
|
-
// Default - tool responded with non-empty content
|
|
949
|
-
if (textContent.length > 20) {
|
|
950
|
-
result.evidence.push("Tool provided substantive response");
|
|
951
|
-
return true;
|
|
952
|
-
}
|
|
953
|
-
result.issues.push("Response lacks tool-specific indicators");
|
|
954
|
-
return false;
|
|
955
|
-
}
|
|
956
|
-
/**
|
|
957
|
-
* Find query-like parameter in input
|
|
958
|
-
*/
|
|
959
|
-
static findQueryParameter(input) {
|
|
960
|
-
const queryKeys = [
|
|
961
|
-
"query",
|
|
962
|
-
"q",
|
|
963
|
-
"search",
|
|
964
|
-
"term",
|
|
965
|
-
"keyword",
|
|
966
|
-
"filter",
|
|
967
|
-
"name",
|
|
968
|
-
"id",
|
|
969
|
-
];
|
|
970
|
-
for (const key of queryKeys) {
|
|
971
|
-
if (key in input) {
|
|
972
|
-
return input[key];
|
|
973
|
-
}
|
|
974
|
-
}
|
|
975
|
-
// Return first string parameter as fallback
|
|
976
|
-
for (const value of Object.values(input)) {
|
|
977
|
-
if (typeof value === "string") {
|
|
978
|
-
return value;
|
|
979
|
-
}
|
|
980
|
-
}
|
|
981
|
-
return null;
|
|
982
|
-
}
|
|
983
|
-
/**
|
|
984
|
-
* Validate structured output against outputSchema (MCP 2025-06-18 feature)
|
|
985
|
-
* NOTE: Currently unused - kept for potential future use
|
|
986
|
-
*/
|
|
987
|
-
// @ts-ignore - Unused method kept for potential future use
|
|
988
|
-
static validateStructuredOutput(context, result) {
|
|
989
|
-
// Check if tool has outputSchema defined
|
|
990
|
-
const tool = context.tool; // Cast to any to access potential outputSchema property
|
|
991
|
-
if (!tool.outputSchema) {
|
|
992
|
-
// Tool doesn't define outputSchema, this validation is not applicable
|
|
993
|
-
result.evidence.push("Tool does not define outputSchema (optional MCP 2025-06-18 feature)");
|
|
994
|
-
return true; // Not a failure if not using structured output
|
|
995
|
-
}
|
|
996
|
-
// Check if response contains structuredContent
|
|
997
|
-
const response = context.response;
|
|
998
|
-
if (response.structuredContent) {
|
|
999
|
-
// Tool provides structuredContent - this is the modern MCP 2025-06-18 pattern
|
|
1000
|
-
// outputSchema validation is optional and rarely used, so we accept any structuredContent
|
|
1001
|
-
result.evidence.push("Tool provides structuredContent (MCP 2025-06-18 modern response format)");
|
|
1002
|
-
return true;
|
|
1003
|
-
}
|
|
1004
|
-
// Check if response contains resource URIs (another MCP 2025-06-18 feature)
|
|
1005
|
-
const content = context.response.content;
|
|
1006
|
-
const hasResourceUris = content.some((item) => item.type === "resource" && item.uri);
|
|
1007
|
-
if (hasResourceUris) {
|
|
1008
|
-
result.evidence.push("Response uses resource URIs for external content (MCP 2025-06-18 feature)");
|
|
1009
|
-
return true;
|
|
1010
|
-
}
|
|
1011
|
-
// Tool has outputSchema but didn't provide structuredContent
|
|
1012
|
-
// This is okay - tools can provide both text and structured output
|
|
1013
|
-
result.evidence.push("Tool has outputSchema but provided text response (backward compatibility)");
|
|
1014
|
-
return true;
|
|
1015
|
-
}
|
|
1016
412
|
/**
|
|
1017
413
|
* Calculate confidence score for a set of validation results
|
|
1018
414
|
*/
|