@bryan-thompson/inspector-assessment-client 1.5.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assets/{OAuthCallback-DGVqLct6.js → OAuthCallback-Xo9zS7pv.js} +1 -1
- package/dist/assets/{OAuthDebugCallback-DHflRQgp.js → OAuthDebugCallback-CaIey8K_.js} +1 -1
- package/dist/assets/{index-Btl7vuTl.js → index-nCPw6E-c.js} +4 -4
- package/dist/index.html +1 -1
- package/lib/lib/assessmentTypes.d.ts +670 -0
- package/lib/lib/assessmentTypes.d.ts.map +1 -0
- package/lib/lib/assessmentTypes.js +220 -0
- package/lib/lib/aupPatterns.d.ts +63 -0
- package/lib/lib/aupPatterns.d.ts.map +1 -0
- package/lib/lib/aupPatterns.js +344 -0
- package/lib/lib/prohibitedLibraries.d.ts +76 -0
- package/lib/lib/prohibitedLibraries.d.ts.map +1 -0
- package/lib/lib/prohibitedLibraries.js +364 -0
- package/lib/lib/securityPatterns.d.ts +64 -0
- package/lib/lib/securityPatterns.d.ts.map +1 -0
- package/lib/lib/securityPatterns.js +453 -0
- package/lib/services/assessment/AssessmentOrchestrator.d.ts +88 -0
- package/lib/services/assessment/AssessmentOrchestrator.d.ts.map +1 -0
- package/lib/services/assessment/AssessmentOrchestrator.js +418 -0
- package/lib/services/assessment/ResponseValidator.d.ts +69 -0
- package/lib/services/assessment/ResponseValidator.d.ts.map +1 -0
- package/lib/services/assessment/ResponseValidator.js +1038 -0
- package/lib/services/assessment/TestDataGenerator.d.ts +86 -0
- package/lib/services/assessment/TestDataGenerator.d.ts.map +1 -0
- package/lib/services/assessment/TestDataGenerator.js +669 -0
- package/lib/services/assessment/TestScenarioEngine.d.ts +91 -0
- package/lib/services/assessment/TestScenarioEngine.d.ts.map +1 -0
- package/lib/services/assessment/TestScenarioEngine.js +505 -0
- package/lib/services/assessment/ToolClassifier.d.ts +61 -0
- package/lib/services/assessment/ToolClassifier.d.ts.map +1 -0
- package/lib/services/assessment/ToolClassifier.js +349 -0
- package/lib/services/assessment/lib/claudeCodeBridge.d.ts +160 -0
- package/lib/services/assessment/lib/claudeCodeBridge.d.ts.map +1 -0
- package/lib/services/assessment/lib/claudeCodeBridge.js +357 -0
- package/lib/services/assessment/modules/AUPComplianceAssessor.d.ts +100 -0
- package/lib/services/assessment/modules/AUPComplianceAssessor.d.ts.map +1 -0
- package/lib/services/assessment/modules/AUPComplianceAssessor.js +474 -0
- package/lib/services/assessment/modules/BaseAssessor.d.ts +71 -0
- package/lib/services/assessment/modules/BaseAssessor.d.ts.map +1 -0
- package/lib/services/assessment/modules/BaseAssessor.js +171 -0
- package/lib/services/assessment/modules/DocumentationAssessor.d.ts +45 -0
- package/lib/services/assessment/modules/DocumentationAssessor.d.ts.map +1 -0
- package/lib/services/assessment/modules/DocumentationAssessor.js +355 -0
- package/lib/services/assessment/modules/ErrorHandlingAssessor.d.ts +25 -0
- package/lib/services/assessment/modules/ErrorHandlingAssessor.d.ts.map +1 -0
- package/lib/services/assessment/modules/ErrorHandlingAssessor.js +564 -0
- package/lib/services/assessment/modules/FunctionalityAssessor.d.ts +20 -0
- package/lib/services/assessment/modules/FunctionalityAssessor.d.ts.map +1 -0
- package/lib/services/assessment/modules/FunctionalityAssessor.js +253 -0
- package/lib/services/assessment/modules/MCPSpecComplianceAssessor.d.ts +70 -0
- package/lib/services/assessment/modules/MCPSpecComplianceAssessor.d.ts.map +1 -0
- package/lib/services/assessment/modules/MCPSpecComplianceAssessor.js +508 -0
- package/lib/services/assessment/modules/ManifestValidationAssessor.d.ts +70 -0
- package/lib/services/assessment/modules/ManifestValidationAssessor.d.ts.map +1 -0
- package/lib/services/assessment/modules/ManifestValidationAssessor.js +430 -0
- package/lib/services/assessment/modules/PortabilityAssessor.d.ts +43 -0
- package/lib/services/assessment/modules/PortabilityAssessor.d.ts.map +1 -0
- package/lib/services/assessment/modules/PortabilityAssessor.js +347 -0
- package/lib/services/assessment/modules/ProhibitedLibrariesAssessor.d.ts +41 -0
- package/lib/services/assessment/modules/ProhibitedLibrariesAssessor.d.ts.map +1 -0
- package/lib/services/assessment/modules/ProhibitedLibrariesAssessor.js +256 -0
- package/lib/services/assessment/modules/SecurityAssessor.d.ts +176 -0
- package/lib/services/assessment/modules/SecurityAssessor.d.ts.map +1 -0
- package/lib/services/assessment/modules/SecurityAssessor.js +1333 -0
- package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts +96 -0
- package/lib/services/assessment/modules/ToolAnnotationAssessor.d.ts.map +1 -0
- package/lib/services/assessment/modules/ToolAnnotationAssessor.js +593 -0
- package/lib/services/assessment/modules/UsabilityAssessor.d.ts +21 -0
- package/lib/services/assessment/modules/UsabilityAssessor.d.ts.map +1 -0
- package/lib/services/assessment/modules/UsabilityAssessor.js +241 -0
- package/lib/services/assessment/modules/index.d.ts +33 -0
- package/lib/services/assessment/modules/index.d.ts.map +1 -0
- package/lib/services/assessment/modules/index.js +35 -0
- package/package.json +15 -3
|
@@ -0,0 +1,1038 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Response Validator for MCP Tool Testing
|
|
3
|
+
* Validates that tool responses are actually functional, not just present
|
|
4
|
+
*/
|
|
5
|
+
export class ResponseValidator {
|
|
6
|
+
/**
|
|
7
|
+
* Validate a tool response comprehensively
|
|
8
|
+
*/
|
|
9
|
+
static validateResponse(context) {
|
|
10
|
+
const result = {
|
|
11
|
+
isValid: false,
|
|
12
|
+
isError: false,
|
|
13
|
+
confidence: 0,
|
|
14
|
+
issues: [],
|
|
15
|
+
evidence: [],
|
|
16
|
+
classification: "broken",
|
|
17
|
+
};
|
|
18
|
+
// Check if response indicates an error
|
|
19
|
+
if (context.response.isError) {
|
|
20
|
+
result.isError = true;
|
|
21
|
+
// Simplified: ANY error response means the tool is functional
|
|
22
|
+
// The tool responded (even with an error) - that's functionality!
|
|
23
|
+
result.isValid = true;
|
|
24
|
+
result.classification = "fully_working";
|
|
25
|
+
result.confidence = 100;
|
|
26
|
+
result.evidence.push("Tool responded with error (tool is functional)");
|
|
27
|
+
// Add context about the error for debugging
|
|
28
|
+
const content = context.response.content;
|
|
29
|
+
const errorText = content?.[0]?.text || "Unknown error";
|
|
30
|
+
result.evidence.push(`Error message: ${errorText.substring(0, 100)}`);
|
|
31
|
+
return result;
|
|
32
|
+
}
|
|
33
|
+
// Simplified functionality validation:
|
|
34
|
+
// If the tool responds with content, it's functional.
|
|
35
|
+
// We don't check response quality/structure - that's for error handling tests.
|
|
36
|
+
// Check 1: Response has content
|
|
37
|
+
if (!context.response.content) {
|
|
38
|
+
result.issues.push("Response has no content");
|
|
39
|
+
result.classification = "broken";
|
|
40
|
+
result.confidence = 0;
|
|
41
|
+
return result;
|
|
42
|
+
}
|
|
43
|
+
// Check 2: Content is a non-empty array
|
|
44
|
+
const content = context.response.content;
|
|
45
|
+
if (!Array.isArray(content) || content.length === 0) {
|
|
46
|
+
result.issues.push("Response content is empty or not an array");
|
|
47
|
+
result.classification = "broken";
|
|
48
|
+
result.confidence = 0;
|
|
49
|
+
return result;
|
|
50
|
+
}
|
|
51
|
+
// Tool responded successfully - it's functional!
|
|
52
|
+
result.isValid = true;
|
|
53
|
+
result.classification = "fully_working";
|
|
54
|
+
result.confidence = 100;
|
|
55
|
+
result.evidence.push("Tool responded successfully with content");
|
|
56
|
+
// Add details about response type for debugging
|
|
57
|
+
const hasText = content.some((item) => item.type === "text");
|
|
58
|
+
const hasResource = content.some((item) => item.type === "resource");
|
|
59
|
+
if (hasText) {
|
|
60
|
+
result.evidence.push("Response includes text content");
|
|
61
|
+
}
|
|
62
|
+
if (hasResource) {
|
|
63
|
+
result.evidence.push("Response includes resource content");
|
|
64
|
+
}
|
|
65
|
+
return result;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Check if error is a business logic error (not a tool failure)
|
|
69
|
+
* These errors indicate the tool is working correctly but rejecting invalid business data
|
|
70
|
+
*/
|
|
71
|
+
static isBusinessLogicError(context) {
|
|
72
|
+
const content = context.response.content;
|
|
73
|
+
const errorText = content?.[0]?.type === "text" && content[0].text
|
|
74
|
+
? content[0].text.toLowerCase()
|
|
75
|
+
: JSON.stringify(context.response.content).toLowerCase();
|
|
76
|
+
// Extract any error code from the response
|
|
77
|
+
const errorCodeMatch = errorText.match(/(?:code|error_code)["\s:]+([^",\s]+)/);
|
|
78
|
+
const errorCode = errorCodeMatch ? errorCodeMatch[1] : null;
|
|
79
|
+
// MCP standard error codes that indicate proper validation
|
|
80
|
+
const mcpValidationCodes = [
|
|
81
|
+
"-32602", // Invalid params - tool is validating input correctly
|
|
82
|
+
"-32603", // Internal error - tool handled error gracefully
|
|
83
|
+
"invalid_params",
|
|
84
|
+
"validation_error",
|
|
85
|
+
"bad_request",
|
|
86
|
+
];
|
|
87
|
+
if (errorCode &&
|
|
88
|
+
mcpValidationCodes.some((code) => errorText.includes(code))) {
|
|
89
|
+
return true; // Tool is properly implementing MCP error codes
|
|
90
|
+
}
|
|
91
|
+
// Common business logic error patterns that indicate the tool is working correctly
|
|
92
|
+
const businessErrorPatterns = [
|
|
93
|
+
// Resource validation errors (tool is checking if resources exist)
|
|
94
|
+
"not found",
|
|
95
|
+
"does not exist",
|
|
96
|
+
"doesn't exist",
|
|
97
|
+
"no such",
|
|
98
|
+
"cannot find",
|
|
99
|
+
"could not find",
|
|
100
|
+
"unable to find",
|
|
101
|
+
"invalid id",
|
|
102
|
+
"invalid identifier",
|
|
103
|
+
"unknown resource",
|
|
104
|
+
"resource not found",
|
|
105
|
+
"entity not found",
|
|
106
|
+
"object not found",
|
|
107
|
+
"record not found",
|
|
108
|
+
"item not found",
|
|
109
|
+
"node not found",
|
|
110
|
+
"nodes not found",
|
|
111
|
+
"no entities",
|
|
112
|
+
"no results",
|
|
113
|
+
"not exist",
|
|
114
|
+
"no nodes",
|
|
115
|
+
"no matching",
|
|
116
|
+
"no matches",
|
|
117
|
+
"empty result",
|
|
118
|
+
"zero results",
|
|
119
|
+
"nothing found",
|
|
120
|
+
"no data",
|
|
121
|
+
"no items",
|
|
122
|
+
// Data validation errors (tool is validating data format/content)
|
|
123
|
+
"invalid format",
|
|
124
|
+
"invalid value",
|
|
125
|
+
"invalid type",
|
|
126
|
+
"invalid input",
|
|
127
|
+
"invalid parameter",
|
|
128
|
+
"invalid data",
|
|
129
|
+
"type mismatch",
|
|
130
|
+
"schema validation",
|
|
131
|
+
"constraint violation",
|
|
132
|
+
"out of range",
|
|
133
|
+
"exceeds maximum",
|
|
134
|
+
"below minimum",
|
|
135
|
+
"invalid length",
|
|
136
|
+
"pattern mismatch",
|
|
137
|
+
"regex failed",
|
|
138
|
+
"must have",
|
|
139
|
+
"must be",
|
|
140
|
+
// Permission and authorization (tool is checking access rights)
|
|
141
|
+
"unauthorized",
|
|
142
|
+
"permission denied",
|
|
143
|
+
"access denied",
|
|
144
|
+
"forbidden",
|
|
145
|
+
"not authorized",
|
|
146
|
+
"insufficient permissions",
|
|
147
|
+
"no access",
|
|
148
|
+
"authentication required",
|
|
149
|
+
"token expired",
|
|
150
|
+
"invalid credentials",
|
|
151
|
+
// Business rule validation (tool is enforcing business logic)
|
|
152
|
+
"already exists",
|
|
153
|
+
"duplicate",
|
|
154
|
+
"conflict",
|
|
155
|
+
"quota exceeded",
|
|
156
|
+
"limit reached",
|
|
157
|
+
"not allowed",
|
|
158
|
+
"operation not permitted",
|
|
159
|
+
"invalid state",
|
|
160
|
+
"precondition failed",
|
|
161
|
+
"dependency not met",
|
|
162
|
+
// API-specific validation
|
|
163
|
+
"invalid parent",
|
|
164
|
+
"invalid reference",
|
|
165
|
+
"invalid relationship",
|
|
166
|
+
"missing required",
|
|
167
|
+
"required field",
|
|
168
|
+
"required parameter",
|
|
169
|
+
"validation failed",
|
|
170
|
+
"invalid request",
|
|
171
|
+
"bad request",
|
|
172
|
+
"malformed",
|
|
173
|
+
// Rate limiting (shows API integration is working)
|
|
174
|
+
"rate limit",
|
|
175
|
+
"too many requests",
|
|
176
|
+
"throttled",
|
|
177
|
+
"quota",
|
|
178
|
+
"exceeded",
|
|
179
|
+
// API operational/billing errors (shows API integration is working)
|
|
180
|
+
"insufficient credits",
|
|
181
|
+
"credits",
|
|
182
|
+
"no credits",
|
|
183
|
+
"credit balance",
|
|
184
|
+
"billing",
|
|
185
|
+
"subscription",
|
|
186
|
+
"plan upgrade",
|
|
187
|
+
"payment required",
|
|
188
|
+
"account suspended",
|
|
189
|
+
"trial expired",
|
|
190
|
+
"usage limit",
|
|
191
|
+
// Configuration validation
|
|
192
|
+
"not configured",
|
|
193
|
+
"not enabled",
|
|
194
|
+
"not available",
|
|
195
|
+
"not supported",
|
|
196
|
+
"feature disabled",
|
|
197
|
+
"service unavailable",
|
|
198
|
+
];
|
|
199
|
+
// Check if error matches any business logic pattern
|
|
200
|
+
const hasBusinessErrorPattern = businessErrorPatterns.some((pattern) => errorText.includes(pattern));
|
|
201
|
+
// HTTP status codes that indicate business logic validation
|
|
202
|
+
const businessStatusCodes = [
|
|
203
|
+
"400", // Bad Request - input validation
|
|
204
|
+
"401", // Unauthorized - auth validation
|
|
205
|
+
"403", // Forbidden - permission validation
|
|
206
|
+
"404", // Not Found - resource validation
|
|
207
|
+
"409", // Conflict - state validation
|
|
208
|
+
"422", // Unprocessable Entity - semantic validation
|
|
209
|
+
"429", // Too Many Requests - rate limit validation
|
|
210
|
+
];
|
|
211
|
+
const hasBusinessStatusCode = businessStatusCodes.some((code) => errorText.includes(code) ||
|
|
212
|
+
errorText.includes(`status: ${code}`) ||
|
|
213
|
+
errorText.includes(`statuscode: ${code}`));
|
|
214
|
+
// Check for structured error response (indicates proper error handling)
|
|
215
|
+
const hasStructuredError = (errorText.includes("error") || errorText.includes("message")) &&
|
|
216
|
+
(errorText.includes("code") ||
|
|
217
|
+
errorText.includes("type") ||
|
|
218
|
+
errorText.includes("status")) &&
|
|
219
|
+
(errorText.includes("{") || errorText.includes(":")); // JSON-like structure
|
|
220
|
+
// Check if the tool is validating our test data
|
|
221
|
+
const validatesTestData =
|
|
222
|
+
// Rejects test IDs
|
|
223
|
+
((errorText.includes("test") ||
|
|
224
|
+
errorText.includes("example") ||
|
|
225
|
+
errorText.includes("demo")) &&
|
|
226
|
+
(errorText.includes("invalid") ||
|
|
227
|
+
errorText.includes("not found") ||
|
|
228
|
+
errorText.includes("does not exist"))) ||
|
|
229
|
+
// Rejects placeholder values
|
|
230
|
+
errorText.includes("test_value") ||
|
|
231
|
+
errorText.includes("test@example.com") ||
|
|
232
|
+
errorText.includes("example.com") ||
|
|
233
|
+
// Shows it validated UUID format
|
|
234
|
+
/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}/.test(errorText) ||
|
|
235
|
+
// Shows it parsed and validated numeric IDs
|
|
236
|
+
/\bid["\s:]+\d+/.test(errorText) ||
|
|
237
|
+
/\bid["\s:]+["'][^"']+["']/.test(errorText);
|
|
238
|
+
// Check tool operation type - resource operations are expected to validate
|
|
239
|
+
const toolName = context.tool.name.toLowerCase();
|
|
240
|
+
const isValidationExpected =
|
|
241
|
+
// CRUD operations
|
|
242
|
+
toolName.includes("create") ||
|
|
243
|
+
toolName.includes("update") ||
|
|
244
|
+
toolName.includes("delete") ||
|
|
245
|
+
toolName.includes("get") ||
|
|
246
|
+
toolName.includes("fetch") ||
|
|
247
|
+
toolName.includes("read") ||
|
|
248
|
+
toolName.includes("write") ||
|
|
249
|
+
// Data operations
|
|
250
|
+
toolName.includes("query") ||
|
|
251
|
+
toolName.includes("search") ||
|
|
252
|
+
toolName.includes("find") ||
|
|
253
|
+
toolName.includes("list") ||
|
|
254
|
+
// State operations
|
|
255
|
+
toolName.includes("move") ||
|
|
256
|
+
toolName.includes("copy") ||
|
|
257
|
+
toolName.includes("duplicate") ||
|
|
258
|
+
toolName.includes("archive") ||
|
|
259
|
+
// Relationship operations
|
|
260
|
+
toolName.includes("link") ||
|
|
261
|
+
toolName.includes("associate") ||
|
|
262
|
+
toolName.includes("connect") ||
|
|
263
|
+
toolName.includes("attach") ||
|
|
264
|
+
// API/scraping operations
|
|
265
|
+
toolName.includes("scrape") ||
|
|
266
|
+
toolName.includes("crawl") ||
|
|
267
|
+
toolName.includes("map") ||
|
|
268
|
+
toolName.includes("extract") ||
|
|
269
|
+
toolName.includes("parse") ||
|
|
270
|
+
toolName.includes("analyze") ||
|
|
271
|
+
toolName.includes("process");
|
|
272
|
+
// Calculate confidence that this is a business logic error
|
|
273
|
+
let confidenceFactors = 0;
|
|
274
|
+
let totalFactors = 0;
|
|
275
|
+
// High confidence indicators
|
|
276
|
+
if (errorCode &&
|
|
277
|
+
mcpValidationCodes.some((code) => errorText.includes(code))) {
|
|
278
|
+
confidenceFactors += 2; // MCP compliance is strong indicator
|
|
279
|
+
}
|
|
280
|
+
totalFactors += 2;
|
|
281
|
+
if (hasBusinessErrorPattern)
|
|
282
|
+
confidenceFactors += 2; // Increased weight for business error patterns
|
|
283
|
+
totalFactors += 2;
|
|
284
|
+
if (hasBusinessStatusCode)
|
|
285
|
+
confidenceFactors++;
|
|
286
|
+
totalFactors++;
|
|
287
|
+
if (hasStructuredError)
|
|
288
|
+
confidenceFactors++;
|
|
289
|
+
totalFactors++;
|
|
290
|
+
if (validatesTestData)
|
|
291
|
+
confidenceFactors++;
|
|
292
|
+
totalFactors++;
|
|
293
|
+
if (isValidationExpected)
|
|
294
|
+
confidenceFactors += 2; // Increased weight for validation-expected tools
|
|
295
|
+
totalFactors += 2;
|
|
296
|
+
// Require at least 50% confidence that this is business logic validation
|
|
297
|
+
const confidence = confidenceFactors / totalFactors;
|
|
298
|
+
// Special case: Strong operational error indicators (quota, rate limit, billing)
|
|
299
|
+
// These are almost always business logic errors, not tool failures
|
|
300
|
+
const hasStrongOperationalError = hasBusinessErrorPattern &&
|
|
301
|
+
(errorText.includes("quota") ||
|
|
302
|
+
errorText.includes("credit") ||
|
|
303
|
+
errorText.includes("rate limit") ||
|
|
304
|
+
errorText.includes("throttle") ||
|
|
305
|
+
errorText.includes("billing") ||
|
|
306
|
+
errorText.includes("payment") ||
|
|
307
|
+
errorText.includes("subscription") ||
|
|
308
|
+
errorText.includes("trial"));
|
|
309
|
+
// Determine confidence threshold based on error type and tool type
|
|
310
|
+
// - Strong operational errors: 20% (very lenient, these are obvious)
|
|
311
|
+
// - Validation-expected tools: 30% (lenient)
|
|
312
|
+
// - Other tools: 50% (standard)
|
|
313
|
+
const confidenceThreshold = hasStrongOperationalError
|
|
314
|
+
? 0.2
|
|
315
|
+
: isValidationExpected
|
|
316
|
+
? 0.3
|
|
317
|
+
: 0.5;
|
|
318
|
+
return confidence >= confidenceThreshold;
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Validate error responses are proper and informative
|
|
322
|
+
* NOTE: Currently unused - kept for potential future use
|
|
323
|
+
*/
|
|
324
|
+
// @ts-ignore - Unused method kept for potential future use
|
|
325
|
+
static validateErrorResponse(context, result) {
|
|
326
|
+
const content = context.response.content;
|
|
327
|
+
const errorText = content?.[0]?.type === "text" && content[0].text
|
|
328
|
+
? content[0].text
|
|
329
|
+
: JSON.stringify(context.response.content);
|
|
330
|
+
// Check for proper error structure
|
|
331
|
+
let hasProperError = false;
|
|
332
|
+
// Check for MCP standard error codes
|
|
333
|
+
if (errorText.includes("-32602") || errorText.includes("Invalid params")) {
|
|
334
|
+
result.evidence.push("Proper MCP error code for invalid parameters");
|
|
335
|
+
hasProperError = true;
|
|
336
|
+
}
|
|
337
|
+
// Check for descriptive error messages
|
|
338
|
+
if (errorText.length > 20 &&
|
|
339
|
+
(errorText.toLowerCase().includes("invalid") ||
|
|
340
|
+
errorText.toLowerCase().includes("required") ||
|
|
341
|
+
errorText.toLowerCase().includes("type") ||
|
|
342
|
+
errorText.toLowerCase().includes("validation"))) {
|
|
343
|
+
result.evidence.push("Descriptive error message provided");
|
|
344
|
+
hasProperError = true;
|
|
345
|
+
}
|
|
346
|
+
if (!hasProperError) {
|
|
347
|
+
result.issues.push("Error response lacks proper error codes or descriptive messages");
|
|
348
|
+
}
|
|
349
|
+
return hasProperError;
|
|
350
|
+
}
|
|
351
|
+
/**
|
|
352
|
+
* Validate response structure matches expectations
|
|
353
|
+
* NOTE: Currently unused - kept for potential future use
|
|
354
|
+
*/
|
|
355
|
+
// @ts-ignore - Unused method kept for potential future use
|
|
356
|
+
static validateResponseStructure(context, result) {
|
|
357
|
+
// Check if response has content
|
|
358
|
+
if (!context.response.content) {
|
|
359
|
+
result.issues.push("Response has no content");
|
|
360
|
+
return false;
|
|
361
|
+
}
|
|
362
|
+
// Check content structure
|
|
363
|
+
const content = context.response.content;
|
|
364
|
+
if (!Array.isArray(content) || content.length === 0) {
|
|
365
|
+
result.issues.push("Response content is empty or not an array");
|
|
366
|
+
return false;
|
|
367
|
+
}
|
|
368
|
+
// Check for expected content type
|
|
369
|
+
const hasTextContent = content.some((item) => item.type === "text" && item.text);
|
|
370
|
+
const hasResourceContent = content.some((item) => item.type === "resource");
|
|
371
|
+
if (!hasTextContent && !hasResourceContent) {
|
|
372
|
+
result.issues.push("Response lacks text or resource content");
|
|
373
|
+
return false;
|
|
374
|
+
}
|
|
375
|
+
result.evidence.push("Response has valid structure");
|
|
376
|
+
return true;
|
|
377
|
+
}
|
|
378
|
+
/**
|
|
379
|
+
* Validate response content is meaningful
|
|
380
|
+
* NOTE: Currently unused - kept for potential future use
|
|
381
|
+
*/
|
|
382
|
+
// @ts-ignore - Unused method kept for potential future use
|
|
383
|
+
static validateResponseContent(context, result) {
|
|
384
|
+
const content = context.response.content;
|
|
385
|
+
const textContent = content.find((item) => item.type === "text")?.text || "";
|
|
386
|
+
// Check if response is just echoing input (bad)
|
|
387
|
+
const inputStr = JSON.stringify(context.input);
|
|
388
|
+
if (textContent === inputStr || textContent === "test_value") {
|
|
389
|
+
result.issues.push("Response appears to just echo input");
|
|
390
|
+
return false;
|
|
391
|
+
}
|
|
392
|
+
// Check for minimal content length
|
|
393
|
+
// But allow short responses for mutation tools (create/update/delete) that might return simple "Success"
|
|
394
|
+
const toolName = context.tool.name.toLowerCase();
|
|
395
|
+
const isMutationTool = toolName.includes("create") ||
|
|
396
|
+
toolName.includes("update") ||
|
|
397
|
+
toolName.includes("delete") ||
|
|
398
|
+
toolName.includes("add") ||
|
|
399
|
+
toolName.includes("remove") ||
|
|
400
|
+
toolName.includes("insert");
|
|
401
|
+
if (textContent.length < 10 && !isMutationTool) {
|
|
402
|
+
result.issues.push("Response content is too short to be meaningful");
|
|
403
|
+
return false;
|
|
404
|
+
}
|
|
405
|
+
// For mutation tools, accept common success indicators even if short
|
|
406
|
+
if (isMutationTool && textContent.length < 10) {
|
|
407
|
+
const successIndicators = [
|
|
408
|
+
"success",
|
|
409
|
+
"ok",
|
|
410
|
+
"done",
|
|
411
|
+
"created",
|
|
412
|
+
"updated",
|
|
413
|
+
"deleted",
|
|
414
|
+
"added",
|
|
415
|
+
"removed",
|
|
416
|
+
];
|
|
417
|
+
const hasSuccessIndicator = successIndicators.some((indicator) => textContent.toLowerCase().includes(indicator));
|
|
418
|
+
if (!hasSuccessIndicator) {
|
|
419
|
+
result.issues.push("Short response lacks success confirmation for mutation operation");
|
|
420
|
+
return false;
|
|
421
|
+
}
|
|
422
|
+
// Short success message is acceptable
|
|
423
|
+
result.evidence.push("Mutation operation confirmed with short response");
|
|
424
|
+
return true;
|
|
425
|
+
}
|
|
426
|
+
// MCP 2025-06-18: Check structuredContent first (modern MCP tools)
|
|
427
|
+
// Modern tools provide structuredContent even without outputSchema
|
|
428
|
+
const response = context.response;
|
|
429
|
+
if (response.structuredContent) {
|
|
430
|
+
const structured = response.structuredContent;
|
|
431
|
+
// Handle structured array responses
|
|
432
|
+
if (Array.isArray(structured)) {
|
|
433
|
+
if (structured.length === 0) {
|
|
434
|
+
// Empty array is valid - tool processed request successfully but had no data
|
|
435
|
+
// Example: create_relations returns [] when referenced entities don't exist
|
|
436
|
+
result.evidence.push("Tool returned empty array (processed successfully, no matching data)");
|
|
437
|
+
return true;
|
|
438
|
+
}
|
|
439
|
+
// For mutation tools, check for IDs
|
|
440
|
+
if (isMutationTool) {
|
|
441
|
+
const hasIds = structured.some((item) => item &&
|
|
442
|
+
typeof item === "object" &&
|
|
443
|
+
("id" in item || "_id" in item || "ID" in item));
|
|
444
|
+
if (hasIds) {
|
|
445
|
+
result.evidence.push(`Mutation operation returned ${structured.length} item(s) with IDs in structuredContent`);
|
|
446
|
+
return true;
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
result.evidence.push(`Response has structuredContent array with ${structured.length} item(s)`);
|
|
450
|
+
return true;
|
|
451
|
+
}
|
|
452
|
+
// Handle structured object responses
|
|
453
|
+
if (typeof structured === "object" && structured !== null) {
|
|
454
|
+
const keys = Object.keys(structured);
|
|
455
|
+
if (keys.length === 0) {
|
|
456
|
+
result.issues.push("structuredContent object is empty");
|
|
457
|
+
return false;
|
|
458
|
+
}
|
|
459
|
+
const hasNonNullValues = keys.some((key) => structured[key] !== null && structured[key] !== undefined);
|
|
460
|
+
if (!hasNonNullValues) {
|
|
461
|
+
result.issues.push("structuredContent contains only null/undefined values");
|
|
462
|
+
return false;
|
|
463
|
+
}
|
|
464
|
+
result.evidence.push(`Response has structuredContent with ${keys.length} data fields`);
|
|
465
|
+
return true;
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
// Fallback: Check for actual data/information in content.text
|
|
469
|
+
try {
|
|
470
|
+
const parsed = JSON.parse(textContent);
|
|
471
|
+
// Handle JSON array responses (common for batch operations)
|
|
472
|
+
if (Array.isArray(parsed)) {
|
|
473
|
+
if (parsed.length === 0) {
|
|
474
|
+
// Empty array is valid - tool processed request successfully but had no data
|
|
475
|
+
result.evidence.push("Tool returned empty array (processed successfully, no matching data)");
|
|
476
|
+
return true;
|
|
477
|
+
}
|
|
478
|
+
// For mutation tools, check if array items have IDs (indicates successful creation)
|
|
479
|
+
if (isMutationTool && parsed.length > 0) {
|
|
480
|
+
const hasIds = parsed.some((item) => typeof item === "object" &&
|
|
481
|
+
item !== null &&
|
|
482
|
+
("id" in item || "_id" in item || "ID" in item));
|
|
483
|
+
if (hasIds) {
|
|
484
|
+
result.evidence.push(`Mutation operation returned ${parsed.length} item(s) with IDs`);
|
|
485
|
+
return true;
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
result.evidence.push(`Response is array with ${parsed.length} item(s)`);
|
|
489
|
+
return true;
|
|
490
|
+
}
|
|
491
|
+
// Handle JSON object responses
|
|
492
|
+
if (typeof parsed === "object" && parsed !== null) {
|
|
493
|
+
const keys = Object.keys(parsed);
|
|
494
|
+
if (keys.length === 0) {
|
|
495
|
+
result.issues.push("Response object is empty");
|
|
496
|
+
return false;
|
|
497
|
+
}
|
|
498
|
+
// Check for null/undefined values
|
|
499
|
+
const hasNonNullValues = keys.some((key) => parsed[key] !== null && parsed[key] !== undefined);
|
|
500
|
+
if (!hasNonNullValues) {
|
|
501
|
+
result.issues.push("Response contains only null/undefined values");
|
|
502
|
+
return false;
|
|
503
|
+
}
|
|
504
|
+
result.evidence.push(`Response contains ${keys.length} data fields`);
|
|
505
|
+
return true;
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
catch {
|
|
509
|
+
// Not JSON, check as plain text
|
|
510
|
+
if (textContent.includes("error") || textContent.includes("Error")) {
|
|
511
|
+
// If it contains error but isError is false, that's suspicious
|
|
512
|
+
if (!context.response.isError) {
|
|
513
|
+
result.issues.push("Response contains error text but isError flag is false");
|
|
514
|
+
return false;
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
result.evidence.push("Response contains meaningful content");
|
|
519
|
+
return true;
|
|
520
|
+
}
|
|
521
|
+
/**
|
|
522
|
+
* Validate semantic correctness based on input/output relationship
|
|
523
|
+
* NOTE: Currently unused - kept for potential future use
|
|
524
|
+
*/
|
|
525
|
+
// @ts-ignore - Unused method kept for potential future use
|
|
526
|
+
static validateSemanticCorrectness(context, result) {
|
|
527
|
+
const toolName = context.tool.name.toLowerCase();
|
|
528
|
+
const content = context.response.content;
|
|
529
|
+
const textContent = content.find((item) => item.type === "text")?.text || "";
|
|
530
|
+
// Tool-specific semantic validation
|
|
531
|
+
if (toolName.includes("search") ||
|
|
532
|
+
toolName.includes("find") ||
|
|
533
|
+
toolName.includes("get")) {
|
|
534
|
+
// MCP 2025-06-18: Check structuredContent first
|
|
535
|
+
const response = context.response;
|
|
536
|
+
if (response.structuredContent) {
|
|
537
|
+
const structured = response.structuredContent;
|
|
538
|
+
// Check for array results
|
|
539
|
+
if (Array.isArray(structured)) {
|
|
540
|
+
result.evidence.push(`Search returned ${structured.length} result(s) in structuredContent (empty results are valid)`);
|
|
541
|
+
return true;
|
|
542
|
+
}
|
|
543
|
+
// Check for object with search result structure
|
|
544
|
+
if (typeof structured === "object" && structured !== null) {
|
|
545
|
+
const hasSearchStructure = "entities" in structured ||
|
|
546
|
+
"relations" in structured ||
|
|
547
|
+
"results" in structured ||
|
|
548
|
+
"items" in structured ||
|
|
549
|
+
"data" in structured ||
|
|
550
|
+
"matches" in structured;
|
|
551
|
+
if (hasSearchStructure) {
|
|
552
|
+
result.evidence.push("Search response has proper result structure in structuredContent");
|
|
553
|
+
return true;
|
|
554
|
+
}
|
|
555
|
+
// Single result object
|
|
556
|
+
if (Object.keys(structured).length > 0) {
|
|
557
|
+
result.evidence.push("Search returned single result object in structuredContent");
|
|
558
|
+
return true;
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
// Fallback: Search tools should return results structure (even if empty)
|
|
563
|
+
try {
|
|
564
|
+
const parsed = JSON.parse(textContent);
|
|
565
|
+
// Check for common search response structures
|
|
566
|
+
if (Array.isArray(parsed)) {
|
|
567
|
+
// Array of results (even empty is valid - means no matches)
|
|
568
|
+
result.evidence.push(`Search returned ${parsed.length} result(s) (empty results are valid)`);
|
|
569
|
+
return true;
|
|
570
|
+
}
|
|
571
|
+
if (typeof parsed === "object" && parsed !== null) {
|
|
572
|
+
// Check for common search result object structures
|
|
573
|
+
const hasSearchStructure = "entities" in parsed ||
|
|
574
|
+
"relations" in parsed ||
|
|
575
|
+
"results" in parsed ||
|
|
576
|
+
"items" in parsed ||
|
|
577
|
+
"data" in parsed ||
|
|
578
|
+
"matches" in parsed;
|
|
579
|
+
if (hasSearchStructure) {
|
|
580
|
+
result.evidence.push("Search response has proper result structure");
|
|
581
|
+
return true;
|
|
582
|
+
}
|
|
583
|
+
// Single result object (e.g., get by ID)
|
|
584
|
+
if (Object.keys(parsed).length > 0) {
|
|
585
|
+
result.evidence.push("Search returned single result object");
|
|
586
|
+
return true;
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
catch {
|
|
591
|
+
// Not JSON, check text patterns
|
|
592
|
+
}
|
|
593
|
+
// Fallback to text-based validation
|
|
594
|
+
const query = this.findQueryParameter(context.input);
|
|
595
|
+
if (query && typeof query === "string") {
|
|
596
|
+
// Very basic check - response should reference the query somehow
|
|
597
|
+
if (!textContent.toLowerCase().includes(query.toLowerCase()) &&
|
|
598
|
+
!textContent.includes("results") &&
|
|
599
|
+
!textContent.includes("found")) {
|
|
600
|
+
result.issues.push("Search response doesn't seem related to query");
|
|
601
|
+
return false;
|
|
602
|
+
}
|
|
603
|
+
result.evidence.push("Search response appears related to query");
|
|
604
|
+
return true;
|
|
605
|
+
}
|
|
606
|
+
// If no query parameter, just check for search-related keywords
|
|
607
|
+
if (textContent.includes("result") ||
|
|
608
|
+
textContent.includes("found") ||
|
|
609
|
+
textContent.includes("match") ||
|
|
610
|
+
textContent.includes("entity") ||
|
|
611
|
+
textContent.includes("entities")) {
|
|
612
|
+
result.evidence.push("Search response contains search-related data");
|
|
613
|
+
return true;
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
if (toolName.includes("create") ||
|
|
617
|
+
toolName.includes("add") ||
|
|
618
|
+
toolName.includes("insert")) {
|
|
619
|
+
// MCP 2025-06-18: Check structuredContent first
|
|
620
|
+
const response = context.response;
|
|
621
|
+
if (response.structuredContent) {
|
|
622
|
+
const structured = response.structuredContent;
|
|
623
|
+
// Check if response is array with IDs
|
|
624
|
+
if (Array.isArray(structured)) {
|
|
625
|
+
const hasIds = structured.some((item) => item &&
|
|
626
|
+
typeof item === "object" &&
|
|
627
|
+
("id" in item || "_id" in item || "ID" in item));
|
|
628
|
+
if (hasIds) {
|
|
629
|
+
result.evidence.push(`Creation response includes ${structured.length} item(s) with IDs in structuredContent`);
|
|
630
|
+
return true;
|
|
631
|
+
}
|
|
632
|
+
// Even without IDs, array response indicates success
|
|
633
|
+
if (structured.length > 0) {
|
|
634
|
+
result.evidence.push("Creation response includes created items in structuredContent");
|
|
635
|
+
return true;
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
// Check if response is object with ID
|
|
639
|
+
if (typeof structured === "object" && structured !== null) {
|
|
640
|
+
if ("id" in structured || "_id" in structured || "ID" in structured) {
|
|
641
|
+
result.evidence.push("Creation response includes resource ID in structuredContent");
|
|
642
|
+
return true;
|
|
643
|
+
}
|
|
644
|
+
// Check for entity/relation structures
|
|
645
|
+
if ("entities" in structured ||
|
|
646
|
+
"relations" in structured ||
|
|
647
|
+
"observations" in structured) {
|
|
648
|
+
result.evidence.push("Creation response includes entity/relation data in structuredContent");
|
|
649
|
+
return true;
|
|
650
|
+
}
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
// Fallback: Creation tools should return created resource or ID
|
|
654
|
+
// Try to parse as JSON first to check for structured data with IDs
|
|
655
|
+
try {
|
|
656
|
+
const parsed = JSON.parse(textContent);
|
|
657
|
+
// Check if response is array with IDs (common for batch creation)
|
|
658
|
+
if (Array.isArray(parsed)) {
|
|
659
|
+
const hasIds = parsed.some((item) => typeof item === "object" &&
|
|
660
|
+
item !== null &&
|
|
661
|
+
("id" in item || "_id" in item || "ID" in item));
|
|
662
|
+
if (hasIds) {
|
|
663
|
+
result.evidence.push(`Creation response includes ${parsed.length} item(s) with IDs`);
|
|
664
|
+
return true;
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
// Check if response is object with ID
|
|
668
|
+
if (typeof parsed === "object" && parsed !== null) {
|
|
669
|
+
if ("id" in parsed || "_id" in parsed || "ID" in parsed) {
|
|
670
|
+
result.evidence.push("Creation response includes resource ID");
|
|
671
|
+
return true;
|
|
672
|
+
}
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
catch {
|
|
676
|
+
// Not JSON, check text patterns
|
|
677
|
+
}
|
|
678
|
+
// Fallback to text-based validation
|
|
679
|
+
if (!textContent.includes("id") &&
|
|
680
|
+
!textContent.includes("created") &&
|
|
681
|
+
!textContent.includes("success")) {
|
|
682
|
+
result.issues.push("Creation response lacks confirmation or resource ID");
|
|
683
|
+
return false;
|
|
684
|
+
}
|
|
685
|
+
result.evidence.push("Creation response includes confirmation");
|
|
686
|
+
return true;
|
|
687
|
+
}
|
|
688
|
+
if (toolName.includes("delete") || toolName.includes("remove")) {
|
|
689
|
+
// Deletion tools should confirm deletion
|
|
690
|
+
if (!textContent.includes("deleted") &&
|
|
691
|
+
!textContent.includes("removed") &&
|
|
692
|
+
!textContent.includes("success")) {
|
|
693
|
+
result.issues.push("Deletion response lacks confirmation");
|
|
694
|
+
return false;
|
|
695
|
+
}
|
|
696
|
+
result.evidence.push("Deletion response confirms action");
|
|
697
|
+
return true;
|
|
698
|
+
}
|
|
699
|
+
if (toolName.includes("update") ||
|
|
700
|
+
toolName.includes("modify") ||
|
|
701
|
+
toolName.includes("edit")) {
|
|
702
|
+
// Update tools should confirm update
|
|
703
|
+
if (!textContent.includes("updated") &&
|
|
704
|
+
!textContent.includes("modified") &&
|
|
705
|
+
!textContent.includes("changed") &&
|
|
706
|
+
!textContent.includes("success")) {
|
|
707
|
+
result.issues.push("Update response lacks confirmation");
|
|
708
|
+
return false;
|
|
709
|
+
}
|
|
710
|
+
result.evidence.push("Update response confirms changes");
|
|
711
|
+
return true;
|
|
712
|
+
}
|
|
713
|
+
if (toolName.includes("list") || toolName.includes("all")) {
|
|
714
|
+
// List tools should return array or multiple items
|
|
715
|
+
try {
|
|
716
|
+
const parsed = JSON.parse(textContent);
|
|
717
|
+
if (Array.isArray(parsed) ||
|
|
718
|
+
(parsed &&
|
|
719
|
+
typeof parsed === "object" &&
|
|
720
|
+
("items" in parsed || "results" in parsed))) {
|
|
721
|
+
result.evidence.push("List response contains array or collection");
|
|
722
|
+
return true;
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
catch {
|
|
726
|
+
// Check for list-like text response
|
|
727
|
+
if (textContent.includes(",") || textContent.includes("\n")) {
|
|
728
|
+
result.evidence.push("Response appears to contain multiple items");
|
|
729
|
+
return true;
|
|
730
|
+
}
|
|
731
|
+
}
|
|
732
|
+
result.issues.push("List response doesn't contain collection");
|
|
733
|
+
return false;
|
|
734
|
+
}
|
|
735
|
+
// Default validation - response should be different from input
|
|
736
|
+
const inputStr = JSON.stringify(context.input);
|
|
737
|
+
if (textContent !== inputStr &&
|
|
738
|
+
textContent.length > inputStr.length * 0.5) {
|
|
739
|
+
result.evidence.push("Response is substantively different from input");
|
|
740
|
+
return true;
|
|
741
|
+
}
|
|
742
|
+
result.issues.push("Response doesn't demonstrate clear functionality");
|
|
743
|
+
return false;
|
|
744
|
+
}
|
|
745
|
+
/**
|
|
746
|
+
* Validate tool-specific logic and patterns
|
|
747
|
+
* NOTE: Currently unused - kept for potential future use
|
|
748
|
+
*/
|
|
749
|
+
// @ts-ignore - Unused method kept for potential future use
|
|
750
|
+
static validateToolSpecificLogic(context, result) {
|
|
751
|
+
const toolName = context.tool.name.toLowerCase();
|
|
752
|
+
const content = context.response.content;
|
|
753
|
+
const textContent = content.find((item) => item.type === "text")?.text || "";
|
|
754
|
+
// Creation/mutation tools (entities, relations, observations, etc.)
|
|
755
|
+
if (toolName.includes("create") ||
|
|
756
|
+
toolName.includes("add") ||
|
|
757
|
+
toolName.includes("insert") ||
|
|
758
|
+
toolName.includes("entity") ||
|
|
759
|
+
toolName.includes("entities") ||
|
|
760
|
+
toolName.includes("relation") ||
|
|
761
|
+
toolName.includes("observation")) {
|
|
762
|
+
// MCP 2025-06-18: Check structuredContent first (CRITICAL FIX)
|
|
763
|
+
const response = context.response;
|
|
764
|
+
if (response.structuredContent) {
|
|
765
|
+
const structured = response.structuredContent;
|
|
766
|
+
// Check for array responses with IDs
|
|
767
|
+
if (Array.isArray(structured)) {
|
|
768
|
+
const hasIds = structured.some((item) => item &&
|
|
769
|
+
typeof item === "object" &&
|
|
770
|
+
("id" in item || "_id" in item || "ID" in item));
|
|
771
|
+
if (hasIds) {
|
|
772
|
+
result.evidence.push(`Creation tool returned ${structured.length} entity/entities with IDs in structuredContent`);
|
|
773
|
+
return true;
|
|
774
|
+
}
|
|
775
|
+
// Check for entity-like objects even without IDs
|
|
776
|
+
const hasEntityStructure = structured.some((item) => item &&
|
|
777
|
+
typeof item === "object" &&
|
|
778
|
+
("name" in item ||
|
|
779
|
+
"entityType" in item ||
|
|
780
|
+
"from" in item ||
|
|
781
|
+
"to" in item));
|
|
782
|
+
if (hasEntityStructure) {
|
|
783
|
+
result.evidence.push("Creation tool returned entity-like objects in structuredContent");
|
|
784
|
+
return true;
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
// Check for object with ID
|
|
788
|
+
if (structured &&
|
|
789
|
+
typeof structured === "object" &&
|
|
790
|
+
("id" in structured || "_id" in structured || "ID" in structured)) {
|
|
791
|
+
result.evidence.push("Creation tool returned entity with ID in structuredContent");
|
|
792
|
+
return true;
|
|
793
|
+
}
|
|
794
|
+
// Check for entity/relation structure
|
|
795
|
+
if (structured &&
|
|
796
|
+
typeof structured === "object" &&
|
|
797
|
+
("name" in structured ||
|
|
798
|
+
"entityType" in structured ||
|
|
799
|
+
"from" in structured ||
|
|
800
|
+
"to" in structured ||
|
|
801
|
+
"entities" in structured ||
|
|
802
|
+
"relations" in structured ||
|
|
803
|
+
"observations" in structured)) {
|
|
804
|
+
result.evidence.push("Creation tool returned entity/relation structure in structuredContent");
|
|
805
|
+
return true;
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
// Fallback: Try parsing content.text as JSON
|
|
809
|
+
try {
|
|
810
|
+
const parsed = JSON.parse(textContent);
|
|
811
|
+
// Check for array responses with IDs (common for batch operations)
|
|
812
|
+
if (Array.isArray(parsed)) {
|
|
813
|
+
const hasIds = parsed.some((item) => item &&
|
|
814
|
+
typeof item === "object" &&
|
|
815
|
+
("id" in item || "_id" in item || "ID" in item));
|
|
816
|
+
if (hasIds) {
|
|
817
|
+
result.evidence.push(`Creation tool returned ${parsed.length} entity/entities with IDs`);
|
|
818
|
+
return true;
|
|
819
|
+
}
|
|
820
|
+
// Even without IDs, if array has entity-like objects, it's valid
|
|
821
|
+
const hasEntityStructure = parsed.some((item) => item &&
|
|
822
|
+
typeof item === "object" &&
|
|
823
|
+
("name" in item ||
|
|
824
|
+
"entityType" in item ||
|
|
825
|
+
"from" in item ||
|
|
826
|
+
"to" in item));
|
|
827
|
+
if (hasEntityStructure) {
|
|
828
|
+
result.evidence.push("Creation tool returned entity-like objects");
|
|
829
|
+
return true;
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
// Check for object with ID
|
|
833
|
+
if (parsed &&
|
|
834
|
+
typeof parsed === "object" &&
|
|
835
|
+
("id" in parsed || "_id" in parsed || "ID" in parsed)) {
|
|
836
|
+
result.evidence.push("Creation tool returned entity with ID");
|
|
837
|
+
return true;
|
|
838
|
+
}
|
|
839
|
+
// Check for entity structure
|
|
840
|
+
if (parsed &&
|
|
841
|
+
typeof parsed === "object" &&
|
|
842
|
+
("name" in parsed ||
|
|
843
|
+
"entityType" in parsed ||
|
|
844
|
+
"from" in parsed ||
|
|
845
|
+
"to" in parsed ||
|
|
846
|
+
"entities" in parsed ||
|
|
847
|
+
"relations" in parsed)) {
|
|
848
|
+
result.evidence.push("Creation tool returned entity/relation structure");
|
|
849
|
+
return true;
|
|
850
|
+
}
|
|
851
|
+
}
|
|
852
|
+
catch {
|
|
853
|
+
// Not JSON, check text patterns
|
|
854
|
+
}
|
|
855
|
+
// Fallback: check for success indicators in text
|
|
856
|
+
if (textContent.includes("id") ||
|
|
857
|
+
textContent.includes("created") ||
|
|
858
|
+
textContent.includes("entity") ||
|
|
859
|
+
textContent.includes("entities") ||
|
|
860
|
+
textContent.includes("relation") ||
|
|
861
|
+
textContent.includes("observation")) {
|
|
862
|
+
result.evidence.push("Creation tool response contains entity/relation indicators");
|
|
863
|
+
return true;
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
// Database/store tools
|
|
867
|
+
if (toolName.includes("database") ||
|
|
868
|
+
toolName.includes("store") ||
|
|
869
|
+
toolName.includes("db")) {
|
|
870
|
+
if (textContent.includes("connection") &&
|
|
871
|
+
textContent.includes("failed")) {
|
|
872
|
+
result.issues.push("Database connection failure");
|
|
873
|
+
return false;
|
|
874
|
+
}
|
|
875
|
+
// Should have some indication of data operation
|
|
876
|
+
if (textContent.includes("rows") ||
|
|
877
|
+
textContent.includes("records") ||
|
|
878
|
+
textContent.includes("documents") ||
|
|
879
|
+
textContent.includes("query")) {
|
|
880
|
+
result.evidence.push("Response indicates database operation");
|
|
881
|
+
return true;
|
|
882
|
+
}
|
|
883
|
+
}
|
|
884
|
+
// File system tools
|
|
885
|
+
if (toolName.includes("file") ||
|
|
886
|
+
toolName.includes("read") ||
|
|
887
|
+
toolName.includes("write")) {
|
|
888
|
+
if (textContent.includes("permission") &&
|
|
889
|
+
textContent.includes("denied")) {
|
|
890
|
+
result.issues.push("File permission error");
|
|
891
|
+
return false;
|
|
892
|
+
}
|
|
893
|
+
if (textContent.includes("not found") &&
|
|
894
|
+
context.scenarioCategory !== "error_case") {
|
|
895
|
+
result.issues.push("File not found error");
|
|
896
|
+
return false;
|
|
897
|
+
}
|
|
898
|
+
// Should have file operation indication
|
|
899
|
+
if (textContent.includes("bytes") ||
|
|
900
|
+
textContent.includes("content") ||
|
|
901
|
+
textContent.includes("saved") ||
|
|
902
|
+
textContent.includes("written")) {
|
|
903
|
+
result.evidence.push("Response indicates file operation");
|
|
904
|
+
return true;
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
// API/HTTP tools
|
|
908
|
+
if (toolName.includes("http") ||
|
|
909
|
+
toolName.includes("api") ||
|
|
910
|
+
toolName.includes("fetch")) {
|
|
911
|
+
// Check for HTTP status codes
|
|
912
|
+
if (textContent.includes("200") ||
|
|
913
|
+
textContent.includes("201") ||
|
|
914
|
+
textContent.includes("success")) {
|
|
915
|
+
result.evidence.push("Response indicates successful HTTP operation");
|
|
916
|
+
return true;
|
|
917
|
+
}
|
|
918
|
+
if (textContent.includes("404") ||
|
|
919
|
+
textContent.includes("500") ||
|
|
920
|
+
textContent.includes("error")) {
|
|
921
|
+
result.issues.push("HTTP error in response");
|
|
922
|
+
return false;
|
|
923
|
+
}
|
|
924
|
+
}
|
|
925
|
+
// Computation/calculation tools
|
|
926
|
+
if (toolName.includes("calc") ||
|
|
927
|
+
toolName.includes("compute") ||
|
|
928
|
+
toolName.includes("math")) {
|
|
929
|
+
// Should return numeric result
|
|
930
|
+
try {
|
|
931
|
+
const parsed = JSON.parse(textContent);
|
|
932
|
+
if (typeof parsed === "number" ||
|
|
933
|
+
(parsed && "result" in parsed && typeof parsed.result === "number")) {
|
|
934
|
+
result.evidence.push("Response contains numeric computation result");
|
|
935
|
+
return true;
|
|
936
|
+
}
|
|
937
|
+
}
|
|
938
|
+
catch {
|
|
939
|
+
// Check for number in text
|
|
940
|
+
if (/\d+/.test(textContent)) {
|
|
941
|
+
result.evidence.push("Response contains numeric value");
|
|
942
|
+
return true;
|
|
943
|
+
}
|
|
944
|
+
}
|
|
945
|
+
result.issues.push("Computation tool didn't return numeric result");
|
|
946
|
+
return false;
|
|
947
|
+
}
|
|
948
|
+
// Default - tool responded with non-empty content
|
|
949
|
+
if (textContent.length > 20) {
|
|
950
|
+
result.evidence.push("Tool provided substantive response");
|
|
951
|
+
return true;
|
|
952
|
+
}
|
|
953
|
+
result.issues.push("Response lacks tool-specific indicators");
|
|
954
|
+
return false;
|
|
955
|
+
}
|
|
956
|
+
/**
|
|
957
|
+
* Find query-like parameter in input
|
|
958
|
+
*/
|
|
959
|
+
static findQueryParameter(input) {
|
|
960
|
+
const queryKeys = [
|
|
961
|
+
"query",
|
|
962
|
+
"q",
|
|
963
|
+
"search",
|
|
964
|
+
"term",
|
|
965
|
+
"keyword",
|
|
966
|
+
"filter",
|
|
967
|
+
"name",
|
|
968
|
+
"id",
|
|
969
|
+
];
|
|
970
|
+
for (const key of queryKeys) {
|
|
971
|
+
if (key in input) {
|
|
972
|
+
return input[key];
|
|
973
|
+
}
|
|
974
|
+
}
|
|
975
|
+
// Return first string parameter as fallback
|
|
976
|
+
for (const value of Object.values(input)) {
|
|
977
|
+
if (typeof value === "string") {
|
|
978
|
+
return value;
|
|
979
|
+
}
|
|
980
|
+
}
|
|
981
|
+
return null;
|
|
982
|
+
}
|
|
983
|
+
/**
|
|
984
|
+
* Validate structured output against outputSchema (MCP 2025-06-18 feature)
|
|
985
|
+
* NOTE: Currently unused - kept for potential future use
|
|
986
|
+
*/
|
|
987
|
+
// @ts-ignore - Unused method kept for potential future use
|
|
988
|
+
static validateStructuredOutput(context, result) {
|
|
989
|
+
// Check if tool has outputSchema defined
|
|
990
|
+
const tool = context.tool; // Cast to any to access potential outputSchema property
|
|
991
|
+
if (!tool.outputSchema) {
|
|
992
|
+
// Tool doesn't define outputSchema, this validation is not applicable
|
|
993
|
+
result.evidence.push("Tool does not define outputSchema (optional MCP 2025-06-18 feature)");
|
|
994
|
+
return true; // Not a failure if not using structured output
|
|
995
|
+
}
|
|
996
|
+
// Check if response contains structuredContent
|
|
997
|
+
const response = context.response;
|
|
998
|
+
if (response.structuredContent) {
|
|
999
|
+
// Tool provides structuredContent - this is the modern MCP 2025-06-18 pattern
|
|
1000
|
+
// outputSchema validation is optional and rarely used, so we accept any structuredContent
|
|
1001
|
+
result.evidence.push("Tool provides structuredContent (MCP 2025-06-18 modern response format)");
|
|
1002
|
+
return true;
|
|
1003
|
+
}
|
|
1004
|
+
// Check if response contains resource URIs (another MCP 2025-06-18 feature)
|
|
1005
|
+
const content = context.response.content;
|
|
1006
|
+
const hasResourceUris = content.some((item) => item.type === "resource" && item.uri);
|
|
1007
|
+
if (hasResourceUris) {
|
|
1008
|
+
result.evidence.push("Response uses resource URIs for external content (MCP 2025-06-18 feature)");
|
|
1009
|
+
return true;
|
|
1010
|
+
}
|
|
1011
|
+
// Tool has outputSchema but didn't provide structuredContent
|
|
1012
|
+
// This is okay - tools can provide both text and structured output
|
|
1013
|
+
result.evidence.push("Tool has outputSchema but provided text response (backward compatibility)");
|
|
1014
|
+
return true;
|
|
1015
|
+
}
|
|
1016
|
+
/**
|
|
1017
|
+
* Calculate confidence score for a set of validation results
|
|
1018
|
+
*/
|
|
1019
|
+
static calculateOverallConfidence(results) {
|
|
1020
|
+
if (results.length === 0)
|
|
1021
|
+
return 0;
|
|
1022
|
+
const weights = {
|
|
1023
|
+
fully_working: 1.0,
|
|
1024
|
+
partially_working: 0.7,
|
|
1025
|
+
connectivity_only: 0.3,
|
|
1026
|
+
error: 0.2,
|
|
1027
|
+
broken: 0.0,
|
|
1028
|
+
};
|
|
1029
|
+
let totalWeight = 0;
|
|
1030
|
+
let weightedSum = 0;
|
|
1031
|
+
for (const result of results) {
|
|
1032
|
+
const weight = weights[result.classification];
|
|
1033
|
+
weightedSum += result.confidence * weight;
|
|
1034
|
+
totalWeight += 100; // Max confidence per result
|
|
1035
|
+
}
|
|
1036
|
+
return totalWeight > 0 ? (weightedSum / totalWeight) * 100 : 0;
|
|
1037
|
+
}
|
|
1038
|
+
}
|