@bryan-thompson/inspector-assessment 1.36.5 → 1.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/build/lib/assessment-runner/assessment-executor.js +12 -0
- package/cli/package.json +1 -1
- package/client/dist/assets/{OAuthCallback-DJ1av7om.js → OAuthCallback-6-wM7Zc1.js} +1 -1
- package/client/dist/assets/{OAuthDebugCallback-lRXgX7wV.js → OAuthDebugCallback-Bw9-AzzP.js} +1 -1
- package/client/dist/assets/{index-DEdS99fp.js → index-DyCdQP10.js} +4 -4
- package/client/dist/index.html +1 -1
- package/client/lib/lib/assessment/resultTypes.d.ts +4 -0
- package/client/lib/lib/assessment/resultTypes.d.ts.map +1 -1
- package/client/lib/lib/assessment/sharedSchemas.d.ts +10 -0
- package/client/lib/lib/assessment/sharedSchemas.d.ts.map +1 -1
- package/client/lib/lib/assessment/sharedSchemas.js +4 -0
- package/client/lib/services/assessment/AssessmentOrchestrator.d.ts +2 -0
- package/client/lib/services/assessment/AssessmentOrchestrator.d.ts.map +1 -1
- package/client/lib/services/assessment/config/performanceConfig.d.ts +18 -0
- package/client/lib/services/assessment/config/performanceConfig.d.ts.map +1 -1
- package/client/lib/services/assessment/config/performanceConfig.js +6 -0
- package/client/lib/services/assessment/config/performanceConfigSchemas.d.ts +18 -0
- package/client/lib/services/assessment/config/performanceConfigSchemas.d.ts.map +1 -1
- package/client/lib/services/assessment/config/performanceConfigSchemas.js +20 -0
- package/client/lib/services/assessment/helpers/ExternalAPIDependencyDetector.d.ts +74 -0
- package/client/lib/services/assessment/helpers/ExternalAPIDependencyDetector.d.ts.map +1 -0
- package/client/lib/services/assessment/helpers/ExternalAPIDependencyDetector.js +131 -0
- package/client/lib/services/assessment/modules/ErrorHandlingAssessor.d.ts +6 -0
- package/client/lib/services/assessment/modules/ErrorHandlingAssessor.d.ts.map +1 -1
- package/client/lib/services/assessment/modules/ErrorHandlingAssessor.js +93 -10
- package/client/lib/services/assessment/modules/FunctionalityAssessor.d.ts +10 -0
- package/client/lib/services/assessment/modules/FunctionalityAssessor.d.ts.map +1 -1
- package/client/lib/services/assessment/modules/FunctionalityAssessor.js +65 -3
- package/client/lib/services/assessment/modules/TemporalAssessor.d.ts.map +1 -1
- package/client/lib/services/assessment/modules/TemporalAssessor.js +16 -3
- package/client/lib/services/assessment/modules/annotations/AlignmentChecker.d.ts.map +1 -1
- package/client/lib/services/assessment/modules/annotations/AlignmentChecker.js +6 -2
- package/client/lib/services/assessment/modules/annotations/DescriptionPoisoningDetector.d.ts.map +1 -1
- package/client/lib/services/assessment/modules/annotations/DescriptionPoisoningDetector.js +16 -7
- package/client/lib/services/assessment/modules/securityTests/ErrorClassifier.d.ts +14 -0
- package/client/lib/services/assessment/modules/securityTests/ErrorClassifier.d.ts.map +1 -1
- package/client/lib/services/assessment/modules/securityTests/ErrorClassifier.js +24 -1
- package/client/lib/services/assessment/modules/securityTests/SecurityPatternLibrary.d.ts +23 -1
- package/client/lib/services/assessment/modules/securityTests/SecurityPatternLibrary.d.ts.map +1 -1
- package/client/lib/services/assessment/modules/securityTests/SecurityPatternLibrary.js +50 -1
- package/client/lib/services/assessment/modules/securityTests/SecurityPayloadTester.d.ts +28 -0
- package/client/lib/services/assessment/modules/securityTests/SecurityPayloadTester.d.ts.map +1 -1
- package/client/lib/services/assessment/modules/securityTests/SecurityPayloadTester.js +67 -2
- package/client/lib/services/assessment/modules/temporal/VarianceClassifier.d.ts +16 -1
- package/client/lib/services/assessment/modules/temporal/VarianceClassifier.d.ts.map +1 -1
- package/client/lib/services/assessment/modules/temporal/VarianceClassifier.js +43 -1
- package/client/package.json +1 -1
- package/package.json +1 -1
- package/server/package.json +1 -1
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* External API Dependency Detector
|
|
3
|
+
*
|
|
4
|
+
* Identifies tools that depend on external APIs based on name and description patterns.
|
|
5
|
+
* This information enables downstream assessors to adjust their behavior:
|
|
6
|
+
* - TemporalAssessor: Relaxed variance thresholds for external API tools
|
|
7
|
+
* - FunctionalityAssessor: Accept API errors as valid responses
|
|
8
|
+
* - ErrorHandlingAssessor: Account for external service failures
|
|
9
|
+
*
|
|
10
|
+
* Issue #168: New module for external API dependency detection
|
|
11
|
+
*
|
|
12
|
+
* @module helpers/ExternalAPIDependencyDetector
|
|
13
|
+
*/
|
|
14
|
+
/**
|
|
15
|
+
* Detects external API dependencies in MCP tools based on name and description patterns.
|
|
16
|
+
* Designed to run during context preparation before assessors execute.
|
|
17
|
+
*
|
|
18
|
+
* @public
|
|
19
|
+
*/
|
|
20
|
+
export class ExternalAPIDependencyDetector {
|
|
21
|
+
/**
|
|
22
|
+
* Tool name patterns that suggest external API dependency.
|
|
23
|
+
* Uses word-boundary matching to prevent false positives.
|
|
24
|
+
*
|
|
25
|
+
* Extracted from VarianceClassifier (Issue #166) for reuse across modules.
|
|
26
|
+
*/
|
|
27
|
+
EXTERNAL_API_PATTERNS = [
|
|
28
|
+
// API-related prefixes
|
|
29
|
+
"api",
|
|
30
|
+
"external",
|
|
31
|
+
"remote",
|
|
32
|
+
"live",
|
|
33
|
+
// Data type patterns (typically from external sources)
|
|
34
|
+
"weather",
|
|
35
|
+
"stock",
|
|
36
|
+
"price",
|
|
37
|
+
"market",
|
|
38
|
+
"currency",
|
|
39
|
+
"exchange",
|
|
40
|
+
"rate",
|
|
41
|
+
"forex",
|
|
42
|
+
// Service-specific prefixes
|
|
43
|
+
"wb", // World Bank
|
|
44
|
+
"worldbank",
|
|
45
|
+
// Action patterns suggesting external fetch
|
|
46
|
+
"fetch_from",
|
|
47
|
+
"poll",
|
|
48
|
+
"realtime",
|
|
49
|
+
"current",
|
|
50
|
+
];
|
|
51
|
+
/**
|
|
52
|
+
* Description patterns that suggest external API dependency.
|
|
53
|
+
* Regex patterns for more flexible matching.
|
|
54
|
+
*/
|
|
55
|
+
EXTERNAL_API_DESCRIPTION_PATTERNS = [
|
|
56
|
+
/external\s*(api|service)/i,
|
|
57
|
+
/fetche?s?\s*(from|data\s+from)/i,
|
|
58
|
+
/calls?\s*(external|remote)/i,
|
|
59
|
+
/live\s*(data|feed|stream)/i,
|
|
60
|
+
/real[- ]?time/i,
|
|
61
|
+
/world\s*bank/i,
|
|
62
|
+
/third[- ]?party\s*(api|service)/i,
|
|
63
|
+
];
|
|
64
|
+
/**
|
|
65
|
+
* Detect external API dependencies from a list of tools.
|
|
66
|
+
*
|
|
67
|
+
* @param tools - List of MCP tools to analyze
|
|
68
|
+
* @returns Detection results with tool names and confidence
|
|
69
|
+
*/
|
|
70
|
+
detect(tools) {
|
|
71
|
+
const toolsWithExternalAPI = new Set();
|
|
72
|
+
for (const tool of tools) {
|
|
73
|
+
if (this.isExternalAPITool(tool)) {
|
|
74
|
+
toolsWithExternalAPI.add(tool.name);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
const detectedCount = toolsWithExternalAPI.size;
|
|
78
|
+
// Determine confidence based on detection count
|
|
79
|
+
// More detections = higher confidence in pattern accuracy
|
|
80
|
+
let confidence;
|
|
81
|
+
if (detectedCount === 0) {
|
|
82
|
+
confidence = "low";
|
|
83
|
+
}
|
|
84
|
+
else if (detectedCount >= 3) {
|
|
85
|
+
confidence = "high";
|
|
86
|
+
}
|
|
87
|
+
else {
|
|
88
|
+
confidence = "medium";
|
|
89
|
+
}
|
|
90
|
+
return {
|
|
91
|
+
toolsWithExternalAPIDependency: toolsWithExternalAPI,
|
|
92
|
+
detectedCount,
|
|
93
|
+
confidence,
|
|
94
|
+
detectedTools: Array.from(toolsWithExternalAPI),
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Check if a single tool depends on external APIs.
|
|
99
|
+
* Uses BOTH name patterns AND description analysis for detection.
|
|
100
|
+
*
|
|
101
|
+
* @param tool - MCP tool to check
|
|
102
|
+
* @returns true if tool appears to depend on external APIs
|
|
103
|
+
*/
|
|
104
|
+
isExternalAPITool(tool) {
|
|
105
|
+
const toolName = tool.name.toLowerCase();
|
|
106
|
+
const description = (tool.description || "").toLowerCase();
|
|
107
|
+
// Check name patterns with word-boundary matching
|
|
108
|
+
// "weather_api" matches "api" but "capital_gains" doesn't match "api"
|
|
109
|
+
const nameMatch = this.EXTERNAL_API_PATTERNS.some((pattern) => {
|
|
110
|
+
const wordBoundaryRegex = new RegExp(`(^|_|-)${pattern}($|_|-|s)`);
|
|
111
|
+
return wordBoundaryRegex.test(toolName);
|
|
112
|
+
});
|
|
113
|
+
// Check description for external API indicators
|
|
114
|
+
const descriptionMatch = this.EXTERNAL_API_DESCRIPTION_PATTERNS.some((regex) => regex.test(description));
|
|
115
|
+
return nameMatch || descriptionMatch;
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Get the list of name patterns used for detection.
|
|
119
|
+
* Useful for debugging and documentation.
|
|
120
|
+
*/
|
|
121
|
+
getNamePatterns() {
|
|
122
|
+
return this.EXTERNAL_API_PATTERNS;
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Get the list of description patterns used for detection.
|
|
126
|
+
* Useful for debugging and documentation.
|
|
127
|
+
*/
|
|
128
|
+
getDescriptionPatterns() {
|
|
129
|
+
return this.EXTERNAL_API_DESCRIPTION_PATTERNS;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
@@ -46,6 +46,12 @@ export declare class ErrorHandlingAssessor extends BaseAssessor {
|
|
|
46
46
|
private calculateMetrics;
|
|
47
47
|
private determineErrorHandlingStatus;
|
|
48
48
|
private generateExplanation;
|
|
49
|
+
/**
|
|
50
|
+
* Check if an error indicates an external service failure
|
|
51
|
+
* Issue #168: External API tools may fail due to service unavailability,
|
|
52
|
+
* which should not count as validation failure
|
|
53
|
+
*/
|
|
54
|
+
private isExternalServiceError;
|
|
49
55
|
private generateRecommendations;
|
|
50
56
|
}
|
|
51
57
|
//# sourceMappingURL=ErrorHandlingAssessor.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ErrorHandlingAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/ErrorHandlingAssessor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,uBAAuB,EAKxB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AAEvE,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAU9D,qBAAa,qBAAsB,SAAQ,YAAY;IACrD,OAAO,CAAC,iBAAiB,CAA4B;IACrD,OAAO,CAAC,oBAAoB,CAAuB;IACnD,OAAO,CAAC,eAAe,CAAkB;gBAE7B,MAAM,EAAE,uBAAuB;IAOrC,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"ErrorHandlingAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/ErrorHandlingAssessor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,uBAAuB,EAKxB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AAEvE,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAU9D,qBAAa,qBAAsB,SAAQ,YAAY;IACrD,OAAO,CAAC,iBAAiB,CAA4B;IACrD,OAAO,CAAC,oBAAoB,CAAuB;IACnD,OAAO,CAAC,eAAe,CAAkB;gBAE7B,MAAM,EAAE,uBAAuB;IAOrC,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,uBAAuB,CAAC;IA6G1E,OAAO,CAAC,qBAAqB;YAuDf,qBAAqB;YAiCrB,qBAAqB;YA+IrB,cAAc;YA8Hd,iBAAiB;YAyGjB,kBAAkB;IAwGhC,OAAO,CAAC,aAAa;IAOrB,OAAO,CAAC,uBAAuB;IAkC/B,OAAO,CAAC,0BAA0B;IAkClC,OAAO,CAAC,uBAAuB;IA4B/B;;;;;;;;;;OAUG;IACH,OAAO,CAAC,4BAA4B;IAgEpC;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAc/B;;;OAGG;IACH,OAAO,CAAC,8BAA8B;IAetC,OAAO,CAAC,gBAAgB;IA8GxB,OAAO,CAAC,4BAA4B;IAapC,OAAO,CAAC,mBAAmB;IAuE3B;;;;OAIG;IACH,OAAO,CAAC,sBAAsB;IAgB9B,OAAO,CAAC,uBAAuB;CA4ChC"}
|
|
@@ -28,7 +28,7 @@ export class ErrorHandlingAssessor extends BaseAssessor {
|
|
|
28
28
|
const limit = createConcurrencyLimit(concurrency, this.logger);
|
|
29
29
|
this.logger.info(`Testing ${toolsToTest.length} tools for error handling with concurrency limit of ${concurrency}`);
|
|
30
30
|
const allToolTests = await Promise.all(toolsToTest.map((tool) => limit(async () => {
|
|
31
|
-
const toolTests = await this.testToolErrorHandling(tool, context.callTool);
|
|
31
|
+
const toolTests = await this.testToolErrorHandling(tool, context.callTool, context);
|
|
32
32
|
// Emit per-tool validation summary for auditor UI (Phase 7)
|
|
33
33
|
if (context.onProgress) {
|
|
34
34
|
// Count failures by test type (failed = tool didn't reject invalid input)
|
|
@@ -125,21 +125,23 @@ export class ErrorHandlingAssessor extends BaseAssessor {
|
|
|
125
125
|
this.logger.info(`Testing ${maxTools} out of ${tools.length} tools for error handling`);
|
|
126
126
|
return tools.slice(0, maxTools);
|
|
127
127
|
}
|
|
128
|
-
async testToolErrorHandling(tool, callTool) {
|
|
128
|
+
async testToolErrorHandling(tool, callTool, context) {
|
|
129
129
|
const tests = [];
|
|
130
|
+
// Issue #168: Check if tool depends on external API
|
|
131
|
+
const isExternalAPI = context.externalAPIDependencies?.toolsWithExternalAPIDependency.has(tool.name) ?? false;
|
|
130
132
|
// Scored tests first (affect compliance score)
|
|
131
133
|
// Test 1: Missing required parameters
|
|
132
|
-
tests.push(await this.testMissingParameters(tool, callTool));
|
|
134
|
+
tests.push(await this.testMissingParameters(tool, callTool, isExternalAPI));
|
|
133
135
|
// Test 2: Wrong parameter types
|
|
134
|
-
tests.push(await this.testWrongTypes(tool, callTool));
|
|
136
|
+
tests.push(await this.testWrongTypes(tool, callTool, isExternalAPI));
|
|
135
137
|
// Test 3: Excessive input size
|
|
136
|
-
tests.push(await this.testExcessiveInput(tool, callTool));
|
|
138
|
+
tests.push(await this.testExcessiveInput(tool, callTool, isExternalAPI));
|
|
137
139
|
// Informational tests last (do not affect compliance score)
|
|
138
140
|
// Test 4: Invalid parameter values (edge case handling)
|
|
139
|
-
tests.push(await this.testInvalidValues(tool, callTool));
|
|
141
|
+
tests.push(await this.testInvalidValues(tool, callTool, isExternalAPI));
|
|
140
142
|
return tests;
|
|
141
143
|
}
|
|
142
|
-
async testMissingParameters(tool, callTool) {
|
|
144
|
+
async testMissingParameters(tool, callTool, isExternalAPI = false) {
|
|
143
145
|
const testInput = {}; // Empty params
|
|
144
146
|
// Check if tool has any required parameters
|
|
145
147
|
const schema = this.getToolSchema(tool);
|
|
@@ -178,6 +180,24 @@ export class ErrorHandlingAssessor extends BaseAssessor {
|
|
|
178
180
|
messageLower.includes("must specify") ||
|
|
179
181
|
// Also accept field-specific errors (even better!)
|
|
180
182
|
/\b(query|field|parameter|argument|value|input)\b/i.test(errorInfo.message ?? ""));
|
|
183
|
+
// Issue #168: For external API tools, check if error is an external service error
|
|
184
|
+
// External service errors should be treated as passed (validation can't be tested)
|
|
185
|
+
if (isExternalAPI && isError && this.isExternalServiceError(errorInfo)) {
|
|
186
|
+
return {
|
|
187
|
+
toolName: tool.name,
|
|
188
|
+
testType: "missing_required",
|
|
189
|
+
testInput,
|
|
190
|
+
expectedError: "Missing required parameters",
|
|
191
|
+
actualResponse: {
|
|
192
|
+
isError,
|
|
193
|
+
errorCode: errorInfo.code,
|
|
194
|
+
errorMessage: errorInfo.message,
|
|
195
|
+
rawResponse: response,
|
|
196
|
+
},
|
|
197
|
+
passed: true,
|
|
198
|
+
reason: "External API service error (validation cannot be tested when service unavailable)",
|
|
199
|
+
};
|
|
200
|
+
}
|
|
181
201
|
return {
|
|
182
202
|
toolName: tool.name,
|
|
183
203
|
testType: "missing_required",
|
|
@@ -239,7 +259,7 @@ export class ErrorHandlingAssessor extends BaseAssessor {
|
|
|
239
259
|
};
|
|
240
260
|
}
|
|
241
261
|
}
|
|
242
|
-
async testWrongTypes(tool, callTool) {
|
|
262
|
+
async testWrongTypes(tool, callTool, isExternalAPI = false) {
|
|
243
263
|
const schema = this.getToolSchema(tool);
|
|
244
264
|
const testInput = this.generateWrongTypeParams(schema);
|
|
245
265
|
try {
|
|
@@ -264,6 +284,23 @@ export class ErrorHandlingAssessor extends BaseAssessor {
|
|
|
264
284
|
messageLower.includes("object") ||
|
|
265
285
|
// Also accept validation framework messages
|
|
266
286
|
/\b(validation|validate|schema|format)\b/i.test(errorInfo.message ?? ""));
|
|
287
|
+
// Issue #168: For external API tools, check if error is an external service error
|
|
288
|
+
if (isExternalAPI && isError && this.isExternalServiceError(errorInfo)) {
|
|
289
|
+
return {
|
|
290
|
+
toolName: tool.name,
|
|
291
|
+
testType: "wrong_type",
|
|
292
|
+
testInput,
|
|
293
|
+
expectedError: "Type validation error",
|
|
294
|
+
actualResponse: {
|
|
295
|
+
isError,
|
|
296
|
+
errorCode: errorInfo.code,
|
|
297
|
+
errorMessage: errorInfo.message,
|
|
298
|
+
rawResponse: response,
|
|
299
|
+
},
|
|
300
|
+
passed: true,
|
|
301
|
+
reason: "External API service error (validation cannot be tested when service unavailable)",
|
|
302
|
+
};
|
|
303
|
+
}
|
|
267
304
|
return {
|
|
268
305
|
toolName: tool.name,
|
|
269
306
|
testType: "wrong_type",
|
|
@@ -326,13 +363,30 @@ export class ErrorHandlingAssessor extends BaseAssessor {
|
|
|
326
363
|
};
|
|
327
364
|
}
|
|
328
365
|
}
|
|
329
|
-
async testInvalidValues(tool, callTool) {
|
|
366
|
+
async testInvalidValues(tool, callTool, isExternalAPI = false) {
|
|
330
367
|
const schema = this.getToolSchema(tool);
|
|
331
368
|
const testInput = this.generateInvalidValueParams(schema);
|
|
332
369
|
try {
|
|
333
370
|
const response = await this.executeWithTimeout(callTool(tool.name, testInput), 5000);
|
|
334
371
|
const isError = this.isErrorResponse(response);
|
|
335
372
|
const errorInfo = this.extractErrorInfo(response);
|
|
373
|
+
// Issue #168: For external API tools, check if error is an external service error
|
|
374
|
+
if (isExternalAPI && isError && this.isExternalServiceError(errorInfo)) {
|
|
375
|
+
return {
|
|
376
|
+
toolName: tool.name,
|
|
377
|
+
testType: "invalid_values",
|
|
378
|
+
testInput,
|
|
379
|
+
expectedError: "Invalid parameter values",
|
|
380
|
+
actualResponse: {
|
|
381
|
+
isError,
|
|
382
|
+
errorCode: errorInfo.code,
|
|
383
|
+
errorMessage: errorInfo.message,
|
|
384
|
+
rawResponse: response,
|
|
385
|
+
},
|
|
386
|
+
passed: true,
|
|
387
|
+
reason: "External API service error (validation cannot be tested when service unavailable)",
|
|
388
|
+
};
|
|
389
|
+
}
|
|
336
390
|
// For invalid values, any error response is good
|
|
337
391
|
// The server is validating inputs properly
|
|
338
392
|
return {
|
|
@@ -396,13 +450,30 @@ export class ErrorHandlingAssessor extends BaseAssessor {
|
|
|
396
450
|
};
|
|
397
451
|
}
|
|
398
452
|
}
|
|
399
|
-
async testExcessiveInput(tool, callTool) {
|
|
453
|
+
async testExcessiveInput(tool, callTool, isExternalAPI = false) {
|
|
400
454
|
const largeString = "x".repeat(100000); // 100KB string
|
|
401
455
|
const testInput = this.generateParamsWithValue(tool, largeString);
|
|
402
456
|
try {
|
|
403
457
|
const response = await this.executeWithTimeout(callTool(tool.name, testInput), 5000);
|
|
404
458
|
const isError = this.isErrorResponse(response);
|
|
405
459
|
const errorInfo = this.extractErrorInfo(response);
|
|
460
|
+
// Issue #168: For external API tools, check if error is an external service error
|
|
461
|
+
if (isExternalAPI && isError && this.isExternalServiceError(errorInfo)) {
|
|
462
|
+
return {
|
|
463
|
+
toolName: tool.name,
|
|
464
|
+
testType: "excessive_input",
|
|
465
|
+
testInput: { ...testInput, value: "[100KB string]" },
|
|
466
|
+
expectedError: "Input size limit exceeded",
|
|
467
|
+
actualResponse: {
|
|
468
|
+
isError,
|
|
469
|
+
errorCode: errorInfo.code,
|
|
470
|
+
errorMessage: errorInfo.message,
|
|
471
|
+
rawResponse: response ? "[response omitted]" : undefined,
|
|
472
|
+
},
|
|
473
|
+
passed: true,
|
|
474
|
+
reason: "External API service error (validation cannot be tested when service unavailable)",
|
|
475
|
+
};
|
|
476
|
+
}
|
|
406
477
|
return {
|
|
407
478
|
toolName: tool.name,
|
|
408
479
|
testType: "excessive_input",
|
|
@@ -780,6 +851,18 @@ export class ErrorHandlingAssessor extends BaseAssessor {
|
|
|
780
851
|
parts.push(`Tested ${toolsTested} tools with ${totalScoredTests} scored scenarios (${totalTests} total including informational).`);
|
|
781
852
|
return parts.join(" ");
|
|
782
853
|
}
|
|
854
|
+
/**
|
|
855
|
+
* Check if an error indicates an external service failure
|
|
856
|
+
* Issue #168: External API tools may fail due to service unavailability,
|
|
857
|
+
* which should not count as validation failure
|
|
858
|
+
*/
|
|
859
|
+
isExternalServiceError(errorInfo) {
|
|
860
|
+
const message = errorInfo.message?.toLowerCase() ?? "";
|
|
861
|
+
const code = String(errorInfo.code ?? "").toLowerCase();
|
|
862
|
+
// Common external service error patterns
|
|
863
|
+
const externalErrorPatterns = /rate\s*limit|429|503|502|504|service\s*unavailable|temporarily|timeout|connection\s*refused|network\s*error|api\s*error|external\s*service|upstream|gateway|unreachable|econnrefused|enotfound|etimedout|socket\s*hang\s*up/i;
|
|
864
|
+
return (externalErrorPatterns.test(message) || externalErrorPatterns.test(code));
|
|
865
|
+
}
|
|
783
866
|
generateRecommendations(metrics, tests) {
|
|
784
867
|
const recommendations = [];
|
|
785
868
|
if (!metrics.hasProperErrorCodes) {
|
|
@@ -31,5 +31,15 @@ export declare class FunctionalityAssessor extends BaseAssessor {
|
|
|
31
31
|
private determineStrategy;
|
|
32
32
|
generateTestInput(schema: JSONSchema7): unknown;
|
|
33
33
|
private generateExplanation;
|
|
34
|
+
/**
|
|
35
|
+
* Issue #168: Check if an error response indicates an expected external API error.
|
|
36
|
+
* External APIs may return rate limit (429), service unavailable (503), timeout,
|
|
37
|
+
* or similar errors that are expected behavior, not broken functionality.
|
|
38
|
+
*/
|
|
39
|
+
private isExpectedAPIError;
|
|
40
|
+
/**
|
|
41
|
+
* Extract text content from a response for pattern matching.
|
|
42
|
+
*/
|
|
43
|
+
private extractResponseText;
|
|
34
44
|
}
|
|
35
45
|
//# sourceMappingURL=FunctionalityAssessor.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"FunctionalityAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/FunctionalityAssessor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,uBAAuB,EAGvB,WAAW,EACZ,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAc9D,qBAAa,qBAAsB,SAAQ,YAAY;IACrD,OAAO,CAAC,cAAc,CAAwB;IAE9C;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAoCvB,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,uBAAuB,CAAC;YAoI5D,QAAQ;
|
|
1
|
+
{"version":3,"file":"FunctionalityAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/FunctionalityAssessor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,uBAAuB,EAGvB,WAAW,EACZ,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAc9D,qBAAa,qBAAsB,SAAQ,YAAY;IACrD,OAAO,CAAC,cAAc,CAAwB;IAE9C;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAoCvB,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,uBAAuB,CAAC;YAoI5D,QAAQ;IA6HtB,OAAO,CAAC,qBAAqB;IAoE7B,OAAO,CAAC,kBAAkB;IAoH1B;;;OAGG;IACH,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,uBAAuB,CAe7C;IAEF;;;OAGG;IACH,OAAO,CAAC,mCAAmC;IAsF3C;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAWlB,iBAAiB,CAAC,MAAM,EAAE,WAAW,GAAG,OAAO;IAItD,OAAO,CAAC,mBAAmB;IAgC3B;;;;OAIG;IACH,OAAO,CAAC,kBAAkB;IAW1B;;OAEG;IACH,OAAO,CAAC,mBAAmB;CAyB5B"}
|
|
@@ -76,7 +76,7 @@ export class FunctionalityAssessor extends BaseAssessor {
|
|
|
76
76
|
this.testCount++;
|
|
77
77
|
completedTests++;
|
|
78
78
|
batchCount++;
|
|
79
|
-
const result = await this.testTool(tool, context.callTool);
|
|
79
|
+
const result = await this.testTool(tool, context.callTool, context);
|
|
80
80
|
// Emit progress batch if threshold reached
|
|
81
81
|
const timeSinceLastBatch = Date.now() - lastBatchTime;
|
|
82
82
|
if (batchCount >= BATCH_SIZE ||
|
|
@@ -131,7 +131,7 @@ export class FunctionalityAssessor extends BaseAssessor {
|
|
|
131
131
|
tools,
|
|
132
132
|
};
|
|
133
133
|
}
|
|
134
|
-
async testTool(tool, callTool) {
|
|
134
|
+
async testTool(tool, callTool, context) {
|
|
135
135
|
const startTime = Date.now();
|
|
136
136
|
// Generate minimal valid parameters with metadata
|
|
137
137
|
const { params: testParams, metadata } = this.generateMinimalParams(tool);
|
|
@@ -173,7 +173,25 @@ export class FunctionalityAssessor extends BaseAssessor {
|
|
|
173
173
|
responseMetadata,
|
|
174
174
|
};
|
|
175
175
|
}
|
|
176
|
-
//
|
|
176
|
+
// Issue #168: Check for expected external API errors
|
|
177
|
+
// External API tools may fail due to rate limits, service unavailability, etc.
|
|
178
|
+
// These are expected behaviors, not broken functionality
|
|
179
|
+
const isExternalAPI = context.externalAPIDependencies?.toolsWithExternalAPIDependency.has(tool.name);
|
|
180
|
+
if (isExternalAPI && this.isExpectedAPIError(response)) {
|
|
181
|
+
this.logger.info(`${tool.name}: External API error (expected behavior for external API tool)`);
|
|
182
|
+
return {
|
|
183
|
+
toolName: tool.name,
|
|
184
|
+
tested: true,
|
|
185
|
+
status: "working",
|
|
186
|
+
executionTime,
|
|
187
|
+
testParameters: cleanedParams,
|
|
188
|
+
response,
|
|
189
|
+
testInputMetadata: metadata,
|
|
190
|
+
responseMetadata,
|
|
191
|
+
note: "External API returned error (expected behavior)",
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
// Real tool failure (not just validation or expected API error)
|
|
177
195
|
return {
|
|
178
196
|
toolName: tool.name,
|
|
179
197
|
tested: true,
|
|
@@ -472,4 +490,48 @@ export class FunctionalityAssessor extends BaseAssessor {
|
|
|
472
490
|
}
|
|
473
491
|
return parts.join(" ");
|
|
474
492
|
}
|
|
493
|
+
/**
|
|
494
|
+
* Issue #168: Check if an error response indicates an expected external API error.
|
|
495
|
+
* External APIs may return rate limit (429), service unavailable (503), timeout,
|
|
496
|
+
* or similar errors that are expected behavior, not broken functionality.
|
|
497
|
+
*/
|
|
498
|
+
isExpectedAPIError(response) {
|
|
499
|
+
const content = this.extractResponseText(response);
|
|
500
|
+
if (!content)
|
|
501
|
+
return false;
|
|
502
|
+
// Match common external API error patterns
|
|
503
|
+
const expectedErrorPatterns = /rate\s*limit|429|503|service\s*unavailable|temporarily|timeout|connection\s*refused|network\s*error|api\s*error|external\s*service|upstream/i;
|
|
504
|
+
return expectedErrorPatterns.test(content);
|
|
505
|
+
}
|
|
506
|
+
/**
|
|
507
|
+
* Extract text content from a response for pattern matching.
|
|
508
|
+
*/
|
|
509
|
+
extractResponseText(response) {
|
|
510
|
+
if (typeof response === "string")
|
|
511
|
+
return response;
|
|
512
|
+
if (!response || typeof response !== "object")
|
|
513
|
+
return "";
|
|
514
|
+
const obj = response;
|
|
515
|
+
// Check common response content locations
|
|
516
|
+
if (typeof obj.content === "string")
|
|
517
|
+
return obj.content;
|
|
518
|
+
if (typeof obj.message === "string")
|
|
519
|
+
return obj.message;
|
|
520
|
+
if (typeof obj.error === "string")
|
|
521
|
+
return obj.error;
|
|
522
|
+
// Handle MCP response format with content array
|
|
523
|
+
if (Array.isArray(obj.content)) {
|
|
524
|
+
return obj.content
|
|
525
|
+
.map((item) => {
|
|
526
|
+
if (typeof item === "string")
|
|
527
|
+
return item;
|
|
528
|
+
if (typeof item?.text === "string")
|
|
529
|
+
return item.text;
|
|
530
|
+
return "";
|
|
531
|
+
})
|
|
532
|
+
.join(" ");
|
|
533
|
+
}
|
|
534
|
+
// Fallback to JSON stringify for deep search
|
|
535
|
+
return JSON.stringify(response);
|
|
536
|
+
}
|
|
475
537
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"TemporalAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/TemporalAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EACL,uBAAuB,EAEvB,kBAAkB,EAGnB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAiB9C,qBAAa,gBAAiB,SAAQ,YAAY;IAChD,OAAO,CAAC,kBAAkB,CAAS;IACnC,OAAO,CAAC,gBAAgB,CAAmB;IAC3C,OAAO,CAAC,kBAAkB,CAAqB;IAG/C,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CAAU;IAGjD,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAK;gBAE5B,MAAM,EAAE,uBAAuB;IAQrC,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC;YAqEvD,UAAU;
|
|
1
|
+
{"version":3,"file":"TemporalAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/TemporalAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EACL,uBAAuB,EAEvB,kBAAkB,EAGnB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAiB9C,qBAAa,gBAAiB,SAAQ,YAAY;IAChD,OAAO,CAAC,kBAAkB,CAAS;IACnC,OAAO,CAAC,gBAAgB,CAAmB;IAC3C,OAAO,CAAC,kBAAkB,CAAqB;IAG/C,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CAAU;IAGjD,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAK;gBAE5B,MAAM,EAAE,uBAAuB;IAQrC,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC;YAqEvD,UAAU;IAwHxB,OAAO,CAAC,gBAAgB;IA2LxB;;;;;;;;;;;OAWG;IACH,OAAO,CAAC,uBAAuB;IAa/B;;;OAGG;IACH,OAAO,CAAC,mBAAmB;IAsC3B,OAAO,CAAC,uBAAuB;IAa/B,OAAO,CAAC,mBAAmB;IA+C3B,OAAO,CAAC,uBAAuB;CA+DhC"}
|
|
@@ -144,7 +144,8 @@ export class TemporalAssessor extends BaseAssessor {
|
|
|
144
144
|
}
|
|
145
145
|
}
|
|
146
146
|
// Analyze responses for temporal behavior changes
|
|
147
|
-
|
|
147
|
+
// Issue #168: Pass context for external API dependency awareness
|
|
148
|
+
const result = this.analyzeResponses(tool, responses, context);
|
|
148
149
|
// Analyze definitions for mutation (rug pull via description change)
|
|
149
150
|
const definitionMutation = this.mutationDetector.detectDefinitionMutation(definitionSnapshots);
|
|
150
151
|
return {
|
|
@@ -167,7 +168,7 @@ export class TemporalAssessor extends BaseAssessor {
|
|
|
167
168
|
severity: definitionMutation !== null || result.vulnerable ? "HIGH" : "NONE",
|
|
168
169
|
};
|
|
169
170
|
}
|
|
170
|
-
analyzeResponses(tool, responses) {
|
|
171
|
+
analyzeResponses(tool, responses, context) {
|
|
171
172
|
if (responses.length === 0) {
|
|
172
173
|
return {
|
|
173
174
|
tool: tool.name,
|
|
@@ -205,6 +206,17 @@ export class TemporalAssessor extends BaseAssessor {
|
|
|
205
206
|
else if (isStateful) {
|
|
206
207
|
// Original stateful tool logic: schema comparison + behavioral content check
|
|
207
208
|
// Content variance is allowed as long as schema is consistent
|
|
209
|
+
// Issue #166: Check for isError variance first (external API behavior)
|
|
210
|
+
// For stateful tools, error vs success responses are expected from external APIs
|
|
211
|
+
const baselineIsError = responses[0].response?.isError === true;
|
|
212
|
+
const currentIsError = responses[i].response?.isError === true;
|
|
213
|
+
// Issue #168: Check context-based detection first, fall back to VarianceClassifier
|
|
214
|
+
const isExternalAPI = context.externalAPIDependencies?.toolsWithExternalAPIDependency.has(tool.name) ?? this.varianceClassifier.isExternalAPITool(tool);
|
|
215
|
+
if (baselineIsError !== currentIsError && isExternalAPI) {
|
|
216
|
+
// External API tool with error vs success variance - LEGITIMATE, not a deviation
|
|
217
|
+
this.logger.info(`${tool.name}: API error vs success variance at invocation ${i + 1} (expected for external API)`);
|
|
218
|
+
continue; // Skip to next invocation, don't count as deviation
|
|
219
|
+
}
|
|
208
220
|
let isDifferent = !this.varianceClassifier.compareSchemas(responses[0].response, responses[i].response);
|
|
209
221
|
// Secondary detection: Check for content semantic changes (rug pull patterns)
|
|
210
222
|
// This catches cases where schema is same but content shifts from helpful to harmful
|
|
@@ -222,7 +234,8 @@ export class TemporalAssessor extends BaseAssessor {
|
|
|
222
234
|
else if (isResourceCreating) {
|
|
223
235
|
// Issue #69: Use variance classification for resource-creating tools
|
|
224
236
|
// These need intelligent classification to distinguish ID variance from rug pulls
|
|
225
|
-
|
|
237
|
+
// Issue #166: Pass tool for external API error variance handling
|
|
238
|
+
const classification = this.varianceClassifier.classifyVariance(responses[0].response, responses[i].response, tool);
|
|
226
239
|
varianceDetails.push({
|
|
227
240
|
invocation: i + 1,
|
|
228
241
|
classification,
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"AlignmentChecker.d.ts","sourceRoot":"","sources":["../../../../../src/services/assessment/modules/annotations/AlignmentChecker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,oCAAoC,CAAC;AAC/D,OAAO,KAAK,EACV,oBAAoB,EACpB,gBAAgB,EAEhB,iBAAiB,EACjB,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,KAAK,EACV,gBAAgB,EAChB,wBAAwB,EACzB,MAAM,iCAAiC,CAAC;AAEzC,OAAO,EAEL,KAAK,mBAAmB,EACzB,MAAM,gCAAgC,CAAC;AAuFxC;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,MAAM,EAAE,gBAAgB,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,OAAO,EAAE;QACP,QAAQ,EAAE,MAAM,CAAC;QACjB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,EAAE,MAAM,CAAC;QACpB,cAAc,EAAE,MAAM,CAAC;KACxB,CAAC;IACF,kBAAkB,EAAE;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,UAAU,EAAE,MAAM,CAAC;QACnB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC;CACH;AA0CD;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAE7D;AAED;;GAEG;AACH,wBAAgB,wBAAwB,IAAI,OAAO,CAElD;AAED,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,IAAI,GAAG,oBAAoB,CAiNnE;AAED;;;GAGG;AACH,wBAAgB,uBAAuB,CACrC,IAAI,EAAE,IAAI,GACT,oBAAoB,CAAC,kBAAkB,CAAC,CA6D1C;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,OAAO,GAAG,iBAAiB,EAAE,CAqBtE;AAED;;;;;;GAMG;AACH,wBAAgB,2BAA2B,CAAC,IAAI,EAAE,IAAI,GAAG,mBAAmB,CAmD3E;AAqCD;;GAEG;AACH,wBAAgB,gBAAgB,CAC9B,IAAI,EAAE,IAAI,EACV,gBAAgB,EAAE,gBAAgB,EAClC,kBAAkB,CAAC,EAAE,wBAAwB,GAC5C,oBAAoB,CA0JtB;AAED;;GAEG;AACH,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,oBAAoB,EAAE,EAC/B,UAAU,EAAE,MAAM,GACjB,gBAAgB,
|
|
1
|
+
{"version":3,"file":"AlignmentChecker.d.ts","sourceRoot":"","sources":["../../../../../src/services/assessment/modules/annotations/AlignmentChecker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,oCAAoC,CAAC;AAC/D,OAAO,KAAK,EACV,oBAAoB,EACpB,gBAAgB,EAEhB,iBAAiB,EACjB,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,KAAK,EACV,gBAAgB,EAChB,wBAAwB,EACzB,MAAM,iCAAiC,CAAC;AAEzC,OAAO,EAEL,KAAK,mBAAmB,EACzB,MAAM,gCAAgC,CAAC;AAuFxC;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,MAAM,EAAE,gBAAgB,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,OAAO,EAAE;QACP,QAAQ,EAAE,MAAM,CAAC;QACjB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,EAAE,MAAM,CAAC;QACpB,cAAc,EAAE,MAAM,CAAC;KACxB,CAAC;IACF,kBAAkB,EAAE;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,UAAU,EAAE,MAAM,CAAC;QACnB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC;CACH;AA0CD;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAE7D;AAED;;GAEG;AACH,wBAAgB,wBAAwB,IAAI,OAAO,CAElD;AAED,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,IAAI,GAAG,oBAAoB,CAiNnE;AAED;;;GAGG;AACH,wBAAgB,uBAAuB,CACrC,IAAI,EAAE,IAAI,GACT,oBAAoB,CAAC,kBAAkB,CAAC,CA6D1C;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,OAAO,GAAG,iBAAiB,EAAE,CAqBtE;AAED;;;;;;GAMG;AACH,wBAAgB,2BAA2B,CAAC,IAAI,EAAE,IAAI,GAAG,mBAAmB,CAmD3E;AAqCD;;GAEG;AACH,wBAAgB,gBAAgB,CAC9B,IAAI,EAAE,IAAI,EACV,gBAAgB,EAAE,gBAAgB,EAClC,kBAAkB,CAAC,EAAE,wBAAwB,GAC5C,oBAAoB,CA0JtB;AAED;;GAEG;AACH,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,oBAAoB,EAAE,EAC/B,UAAU,EAAE,MAAM,GACjB,gBAAgB,CAoClB;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,oBAAoB,EAAE,EAC/B,UAAU,EAAE,MAAM,GACjB,sBAAsB,CA2BxB"}
|
|
@@ -433,8 +433,12 @@ export function determineAnnotationStatus(results, totalTools) {
|
|
|
433
433
|
if (totalTools === 0)
|
|
434
434
|
return "PASS";
|
|
435
435
|
const annotatedCount = results.filter((r) => r.hasAnnotations).length;
|
|
436
|
-
|
|
437
|
-
|
|
436
|
+
// Issue #167: Only fail for actionable poisoning (MEDIUM or HIGH risk)
|
|
437
|
+
// LOW risk (e.g., length-only) is informational and should not cause FAIL
|
|
438
|
+
const actionablePoisonedCount = results.filter((r) => r.descriptionPoisoning?.detected === true &&
|
|
439
|
+
(r.descriptionPoisoning.riskLevel === "MEDIUM" ||
|
|
440
|
+
r.descriptionPoisoning.riskLevel === "HIGH")).length;
|
|
441
|
+
if (actionablePoisonedCount > 0)
|
|
438
442
|
return "FAIL";
|
|
439
443
|
const misalignedCount = results.filter((r) => r.alignmentStatus === "MISALIGNED").length;
|
|
440
444
|
const destructiveWithoutHint = results.filter((r) => r.inferredBehavior?.expectedDestructive === true &&
|
package/client/lib/services/assessment/modules/annotations/DescriptionPoisoningDetector.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"DescriptionPoisoningDetector.d.ts","sourceRoot":"","sources":["../../../../../src/services/assessment/modules/annotations/DescriptionPoisoningDetector.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,oCAAoC,CAAC;AAE/D;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;IACpC,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,QAAQ,EAAE,OAAO,CAAC;IAClB,QAAQ,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;QACpC,QAAQ,EAAE,MAAM,CAAC;QACjB,QAAQ,EAAE,MAAM,CAAC;KAClB,CAAC,CAAC;IACH,SAAS,EAAE,MAAM,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;IAC9C,oFAAoF;IACpF,aAAa,CAAC,EAAE;QACd,MAAM,EAAE,MAAM,CAAC;QACf,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,OAAO,CAAC;KACtB,CAAC;CACH;AAED;;;;GAIG;AACH,eAAO,MAAM,8BAA8B,EAAE,gBAAgB,EAwT5D,CAAC;AASF,wBAAgB,2BAA2B,CAAC,IAAI,EAAE,IAAI,GAAG,mBAAmB,
|
|
1
|
+
{"version":3,"file":"DescriptionPoisoningDetector.d.ts","sourceRoot":"","sources":["../../../../../src/services/assessment/modules/annotations/DescriptionPoisoningDetector.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,oCAAoC,CAAC;AAE/D;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;IACpC,QAAQ,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,MAAM,WAAW,mBAAmB;IAClC,QAAQ,EAAE,OAAO,CAAC;IAClB,QAAQ,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;QACpC,QAAQ,EAAE,MAAM,CAAC;QACjB,QAAQ,EAAE,MAAM,CAAC;KAClB,CAAC,CAAC;IACH,SAAS,EAAE,MAAM,GAAG,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;IAC9C,oFAAoF;IACpF,aAAa,CAAC,EAAE;QACd,MAAM,EAAE,MAAM,CAAC;QACf,SAAS,EAAE,MAAM,CAAC;QAClB,WAAW,EAAE,OAAO,CAAC;KACtB,CAAC;CACH;AAED;;;;GAIG;AACH,eAAO,MAAM,8BAA8B,EAAE,gBAAgB,EAwT5D,CAAC;AASF,wBAAgB,2BAA2B,CAAC,IAAI,EAAE,IAAI,GAAG,mBAAmB,CA8E3E"}
|
|
@@ -318,6 +318,7 @@ export function scanDescriptionForPoisoning(tool) {
|
|
|
318
318
|
const matches = [];
|
|
319
319
|
// Length-based heuristic (Issue #119, Challenge #15)
|
|
320
320
|
// Excessively long descriptions may be used to hide malicious content
|
|
321
|
+
// Issue #167: Length check moved AFTER pattern scan - severity depends on other patterns
|
|
321
322
|
let lengthWarning;
|
|
322
323
|
if (description.length > DESCRIPTION_LENGTH_WARNING_THRESHOLD) {
|
|
323
324
|
lengthWarning = {
|
|
@@ -325,13 +326,7 @@ export function scanDescriptionForPoisoning(tool) {
|
|
|
325
326
|
threshold: DESCRIPTION_LENGTH_WARNING_THRESHOLD,
|
|
326
327
|
isExcessive: true,
|
|
327
328
|
};
|
|
328
|
-
matches.push(
|
|
329
|
-
name: "excessive_description_length",
|
|
330
|
-
pattern: `length > ${DESCRIPTION_LENGTH_WARNING_THRESHOLD}`,
|
|
331
|
-
severity: "MEDIUM",
|
|
332
|
-
category: "suspicious_length",
|
|
333
|
-
evidence: `Description is ${description.length} characters (threshold: ${DESCRIPTION_LENGTH_WARNING_THRESHOLD})`,
|
|
334
|
-
});
|
|
329
|
+
// NOTE: matches.push moved to after pattern loop (Issue #167)
|
|
335
330
|
}
|
|
336
331
|
for (const patternDef of DESCRIPTION_POISONING_PATTERNS) {
|
|
337
332
|
// Create a fresh regex to reset lastIndex
|
|
@@ -351,6 +346,20 @@ export function scanDescriptionForPoisoning(tool) {
|
|
|
351
346
|
break;
|
|
352
347
|
}
|
|
353
348
|
}
|
|
349
|
+
// Issue #167: Add length warning AFTER pattern scan with conditional severity
|
|
350
|
+
// Long descriptions alone are LOW (informational), but length + other patterns = MEDIUM
|
|
351
|
+
if (lengthWarning) {
|
|
352
|
+
const hasOtherPatterns = matches.length > 0;
|
|
353
|
+
matches.push({
|
|
354
|
+
name: "excessive_description_length",
|
|
355
|
+
pattern: `length > ${DESCRIPTION_LENGTH_WARNING_THRESHOLD}`,
|
|
356
|
+
severity: hasOtherPatterns ? "MEDIUM" : "LOW",
|
|
357
|
+
category: "suspicious_length",
|
|
358
|
+
evidence: hasOtherPatterns
|
|
359
|
+
? `Description is ${description.length} characters AND contains ${matches.length} suspicious pattern(s)`
|
|
360
|
+
: `Description is ${description.length} characters (informational - no suspicious patterns detected)`,
|
|
361
|
+
});
|
|
362
|
+
}
|
|
354
363
|
// Determine overall risk level based on highest severity match
|
|
355
364
|
let riskLevel = "NONE";
|
|
356
365
|
if (matches.some((m) => m.severity === "HIGH")) {
|
|
@@ -29,6 +29,20 @@ export declare class ErrorClassifier {
|
|
|
29
29
|
* Check if caught exception indicates connection/server failure
|
|
30
30
|
*/
|
|
31
31
|
isConnectionErrorFromException(error: unknown): boolean;
|
|
32
|
+
/**
|
|
33
|
+
* Check if response indicates transient error worth retrying.
|
|
34
|
+
* Transient errors (ECONNREFUSED, ETIMEDOUT, etc.) may resolve on retry.
|
|
35
|
+
* Permanent errors (unknown tool, unauthorized) will not.
|
|
36
|
+
*
|
|
37
|
+
* @see https://github.com/triepod-ai/inspector-assessment/issues/157
|
|
38
|
+
*/
|
|
39
|
+
isTransientError(response: CompatibilityCallToolResult): boolean;
|
|
40
|
+
/**
|
|
41
|
+
* Check if caught exception indicates transient error worth retrying.
|
|
42
|
+
*
|
|
43
|
+
* @see https://github.com/triepod-ai/inspector-assessment/issues/157
|
|
44
|
+
*/
|
|
45
|
+
isTransientErrorFromException(error: unknown): boolean;
|
|
32
46
|
/**
|
|
33
47
|
* Internal: Check if text indicates connection/server failure
|
|
34
48
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ErrorClassifier.d.ts","sourceRoot":"","sources":["../../../../../src/services/assessment/modules/securityTests/ErrorClassifier.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,2BAA2B,EAAE,MAAM,oCAAoC,CAAC;
|
|
1
|
+
{"version":3,"file":"ErrorClassifier.d.ts","sourceRoot":"","sources":["../../../../../src/services/assessment/modules/securityTests/ErrorClassifier.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,2BAA2B,EAAE,MAAM,oCAAoC,CAAC;AASjF;;GAEG;AACH,MAAM,MAAM,mBAAmB,GAAG,YAAY,GAAG,QAAQ,GAAG,UAAU,CAAC;AAEvE;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,IAAI,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC;IACvB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,qBAAa,eAAe;IAC1B;;OAEG;IACH,iBAAiB,CAAC,QAAQ,EAAE,2BAA2B,GAAG,OAAO;IAKjE;;OAEG;IACH,8BAA8B,CAAC,KAAK,EAAE,OAAO,GAAG,OAAO;IAQvD;;;;;;OAMG;IACH,gBAAgB,CAAC,QAAQ,EAAE,2BAA2B,GAAG,OAAO;IAKhE;;;;OAIG;IACH,6BAA6B,CAAC,KAAK,EAAE,OAAO,GAAG,OAAO;IAQtD;;OAEG;IACH,OAAO,CAAC,yBAAyB;IAgBjC;;OAEG;IACH,aAAa,CAAC,QAAQ,EAAE,2BAA2B,GAAG,mBAAmB;IAKzE;;OAEG;IACH,0BAA0B,CAAC,KAAK,EAAE,OAAO,GAAG,mBAAmB;IAQ/D;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAgB7B;;OAEG;IACH,gBAAgB,CAAC,QAAQ,EAAE,2BAA2B,GAAG,SAAS;IAsBlE;;OAEG;IACH,sBAAsB,CAAC,QAAQ,EAAE,2BAA2B,GAAG,MAAM;CAUtE"}
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* Extracted from SecurityResponseAnalyzer.ts (Issue #53)
|
|
6
6
|
* Handles: connection error detection, error classification, error info extraction
|
|
7
7
|
*/
|
|
8
|
-
import { CONNECTION_ERROR_PATTERNS, ERROR_CLASSIFICATION_PATTERNS, matchesAny, hasMcpErrorPrefix, } from "./SecurityPatternLibrary.js";
|
|
8
|
+
import { CONNECTION_ERROR_PATTERNS, ERROR_CLASSIFICATION_PATTERNS, matchesAny, hasMcpErrorPrefix, isTransientErrorPattern, } from "./SecurityPatternLibrary.js";
|
|
9
9
|
/**
|
|
10
10
|
* Classifies errors from tool responses and exceptions
|
|
11
11
|
*/
|
|
@@ -27,6 +27,29 @@ export class ErrorClassifier {
|
|
|
27
27
|
}
|
|
28
28
|
return false;
|
|
29
29
|
}
|
|
30
|
+
/**
|
|
31
|
+
* Check if response indicates transient error worth retrying.
|
|
32
|
+
* Transient errors (ECONNREFUSED, ETIMEDOUT, etc.) may resolve on retry.
|
|
33
|
+
* Permanent errors (unknown tool, unauthorized) will not.
|
|
34
|
+
*
|
|
35
|
+
* @see https://github.com/triepod-ai/inspector-assessment/issues/157
|
|
36
|
+
*/
|
|
37
|
+
isTransientError(response) {
|
|
38
|
+
const text = this.extractResponseContent(response).toLowerCase();
|
|
39
|
+
return isTransientErrorPattern(text);
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Check if caught exception indicates transient error worth retrying.
|
|
43
|
+
*
|
|
44
|
+
* @see https://github.com/triepod-ai/inspector-assessment/issues/157
|
|
45
|
+
*/
|
|
46
|
+
isTransientErrorFromException(error) {
|
|
47
|
+
if (error instanceof Error) {
|
|
48
|
+
const message = error.message.toLowerCase();
|
|
49
|
+
return isTransientErrorPattern(message);
|
|
50
|
+
}
|
|
51
|
+
return false;
|
|
52
|
+
}
|
|
30
53
|
/**
|
|
31
54
|
* Internal: Check if text indicates connection/server failure
|
|
32
55
|
*/
|