@bryan-thompson/inspector-assessment 1.36.4 → 1.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/cli/build/__tests__/assessment-runner/tools-with-hints.test.js +91 -1
  2. package/cli/build/lib/assessment-runner/assessment-executor.js +12 -0
  3. package/cli/build/lib/assessment-runner/tools-with-hints.js +45 -5
  4. package/cli/package.json +1 -1
  5. package/client/dist/assets/{OAuthCallback-pydLxj3d.js → OAuthCallback-6-wM7Zc1.js} +1 -1
  6. package/client/dist/assets/{OAuthDebugCallback-BLEebYQf.js → OAuthDebugCallback-Bw9-AzzP.js} +1 -1
  7. package/client/dist/assets/{index-CVyqQ7s8.js → index-DyCdQP10.js} +4 -4
  8. package/client/dist/index.html +1 -1
  9. package/client/lib/lib/assessment/resultTypes.d.ts +4 -0
  10. package/client/lib/lib/assessment/resultTypes.d.ts.map +1 -1
  11. package/client/lib/lib/assessment/sharedSchemas.d.ts +10 -0
  12. package/client/lib/lib/assessment/sharedSchemas.d.ts.map +1 -1
  13. package/client/lib/lib/assessment/sharedSchemas.js +4 -0
  14. package/client/lib/services/assessment/AssessmentOrchestrator.d.ts +2 -0
  15. package/client/lib/services/assessment/AssessmentOrchestrator.d.ts.map +1 -1
  16. package/client/lib/services/assessment/config/performanceConfig.d.ts +18 -0
  17. package/client/lib/services/assessment/config/performanceConfig.d.ts.map +1 -1
  18. package/client/lib/services/assessment/config/performanceConfig.js +6 -0
  19. package/client/lib/services/assessment/config/performanceConfigSchemas.d.ts +18 -0
  20. package/client/lib/services/assessment/config/performanceConfigSchemas.d.ts.map +1 -1
  21. package/client/lib/services/assessment/config/performanceConfigSchemas.js +20 -0
  22. package/client/lib/services/assessment/helpers/ExternalAPIDependencyDetector.d.ts +74 -0
  23. package/client/lib/services/assessment/helpers/ExternalAPIDependencyDetector.d.ts.map +1 -0
  24. package/client/lib/services/assessment/helpers/ExternalAPIDependencyDetector.js +131 -0
  25. package/client/lib/services/assessment/modules/ErrorHandlingAssessor.d.ts +6 -0
  26. package/client/lib/services/assessment/modules/ErrorHandlingAssessor.d.ts.map +1 -1
  27. package/client/lib/services/assessment/modules/ErrorHandlingAssessor.js +93 -10
  28. package/client/lib/services/assessment/modules/FunctionalityAssessor.d.ts +10 -0
  29. package/client/lib/services/assessment/modules/FunctionalityAssessor.d.ts.map +1 -1
  30. package/client/lib/services/assessment/modules/FunctionalityAssessor.js +65 -3
  31. package/client/lib/services/assessment/modules/TemporalAssessor.d.ts.map +1 -1
  32. package/client/lib/services/assessment/modules/TemporalAssessor.js +16 -3
  33. package/client/lib/services/assessment/modules/annotations/AlignmentChecker.d.ts.map +1 -1
  34. package/client/lib/services/assessment/modules/annotations/AlignmentChecker.js +6 -2
  35. package/client/lib/services/assessment/modules/annotations/AnnotationDeceptionDetector.d.ts +22 -0
  36. package/client/lib/services/assessment/modules/annotations/AnnotationDeceptionDetector.d.ts.map +1 -1
  37. package/client/lib/services/assessment/modules/annotations/AnnotationDeceptionDetector.js +53 -0
  38. package/client/lib/services/assessment/modules/annotations/DescriptionPoisoningDetector.d.ts.map +1 -1
  39. package/client/lib/services/assessment/modules/annotations/DescriptionPoisoningDetector.js +16 -7
  40. package/client/lib/services/assessment/modules/securityTests/ErrorClassifier.d.ts +14 -0
  41. package/client/lib/services/assessment/modules/securityTests/ErrorClassifier.d.ts.map +1 -1
  42. package/client/lib/services/assessment/modules/securityTests/ErrorClassifier.js +24 -1
  43. package/client/lib/services/assessment/modules/securityTests/SecurityPatternLibrary.d.ts +23 -1
  44. package/client/lib/services/assessment/modules/securityTests/SecurityPatternLibrary.d.ts.map +1 -1
  45. package/client/lib/services/assessment/modules/securityTests/SecurityPatternLibrary.js +50 -1
  46. package/client/lib/services/assessment/modules/securityTests/SecurityPayloadTester.d.ts +28 -0
  47. package/client/lib/services/assessment/modules/securityTests/SecurityPayloadTester.d.ts.map +1 -1
  48. package/client/lib/services/assessment/modules/securityTests/SecurityPayloadTester.js +67 -2
  49. package/client/lib/services/assessment/modules/temporal/VarianceClassifier.d.ts +16 -1
  50. package/client/lib/services/assessment/modules/temporal/VarianceClassifier.d.ts.map +1 -1
  51. package/client/lib/services/assessment/modules/temporal/VarianceClassifier.js +43 -1
  52. package/client/package.json +1 -1
  53. package/package.json +1 -1
  54. package/server/package.json +1 -1
@@ -83,6 +83,26 @@ export const PerformanceConfigSchema = z.object({
83
83
  .min(PERF_CONFIG_RANGES.eventEmitterMaxListeners.min, `eventEmitterMaxListeners must be >= ${PERF_CONFIG_RANGES.eventEmitterMaxListeners.min}`)
84
84
  .max(PERF_CONFIG_RANGES.eventEmitterMaxListeners.max, `eventEmitterMaxListeners must be <= ${PERF_CONFIG_RANGES.eventEmitterMaxListeners.max}`)
85
85
  .optional(),
86
+ /**
87
+ * Maximum retry attempts for transient errors in security tests.
88
+ * Issue #157: Connection retry logic for reliability
89
+ */
90
+ securityRetryMaxAttempts: z
91
+ .number()
92
+ .int("securityRetryMaxAttempts must be an integer")
93
+ .min(PERF_CONFIG_RANGES.securityRetryMaxAttempts.min, `securityRetryMaxAttempts must be >= ${PERF_CONFIG_RANGES.securityRetryMaxAttempts.min}`)
94
+ .max(PERF_CONFIG_RANGES.securityRetryMaxAttempts.max, `securityRetryMaxAttempts must be <= ${PERF_CONFIG_RANGES.securityRetryMaxAttempts.max}`)
95
+ .optional(),
96
+ /**
97
+ * Initial backoff delay in milliseconds for security test retries.
98
+ * Issue #157: Connection retry logic for reliability
99
+ */
100
+ securityRetryBackoffMs: z
101
+ .number()
102
+ .int("securityRetryBackoffMs must be an integer")
103
+ .min(PERF_CONFIG_RANGES.securityRetryBackoffMs.min, `securityRetryBackoffMs must be >= ${PERF_CONFIG_RANGES.securityRetryBackoffMs.min}`)
104
+ .max(PERF_CONFIG_RANGES.securityRetryBackoffMs.max, `securityRetryBackoffMs must be <= ${PERF_CONFIG_RANGES.securityRetryBackoffMs.max}`)
105
+ .optional(),
86
106
  });
87
107
  /**
88
108
  * Validate a partial performance config using Zod.
@@ -0,0 +1,74 @@
1
+ /**
2
+ * External API Dependency Detector
3
+ *
4
+ * Identifies tools that depend on external APIs based on name and description patterns.
5
+ * This information enables downstream assessors to adjust their behavior:
6
+ * - TemporalAssessor: Relaxed variance thresholds for external API tools
7
+ * - FunctionalityAssessor: Accept API errors as valid responses
8
+ * - ErrorHandlingAssessor: Account for external service failures
9
+ *
10
+ * Issue #168: New module for external API dependency detection
11
+ *
12
+ * @module helpers/ExternalAPIDependencyDetector
13
+ */
14
+ import { Tool } from "@modelcontextprotocol/sdk/types.js";
15
+ /**
16
+ * External API dependency detection results
17
+ * @public
18
+ */
19
+ export interface ExternalAPIDependencyInfo {
20
+ /** Set of tool names that depend on external APIs */
21
+ toolsWithExternalAPIDependency: Set<string>;
22
+ /** Number of tools detected with external API dependencies */
23
+ detectedCount: number;
24
+ /** Detection confidence based on pattern strength */
25
+ confidence: "high" | "medium" | "low";
26
+ /** List of detected tool names (for serialization) */
27
+ detectedTools: string[];
28
+ }
29
+ /**
30
+ * Detects external API dependencies in MCP tools based on name and description patterns.
31
+ * Designed to run during context preparation before assessors execute.
32
+ *
33
+ * @public
34
+ */
35
+ export declare class ExternalAPIDependencyDetector {
36
+ /**
37
+ * Tool name patterns that suggest external API dependency.
38
+ * Uses word-boundary matching to prevent false positives.
39
+ *
40
+ * Extracted from VarianceClassifier (Issue #166) for reuse across modules.
41
+ */
42
+ private readonly EXTERNAL_API_PATTERNS;
43
+ /**
44
+ * Description patterns that suggest external API dependency.
45
+ * Regex patterns for more flexible matching.
46
+ */
47
+ private readonly EXTERNAL_API_DESCRIPTION_PATTERNS;
48
+ /**
49
+ * Detect external API dependencies from a list of tools.
50
+ *
51
+ * @param tools - List of MCP tools to analyze
52
+ * @returns Detection results with tool names and confidence
53
+ */
54
+ detect(tools: Tool[]): ExternalAPIDependencyInfo;
55
+ /**
56
+ * Check if a single tool depends on external APIs.
57
+ * Uses BOTH name patterns AND description analysis for detection.
58
+ *
59
+ * @param tool - MCP tool to check
60
+ * @returns true if tool appears to depend on external APIs
61
+ */
62
+ isExternalAPITool(tool: Tool): boolean;
63
+ /**
64
+ * Get the list of name patterns used for detection.
65
+ * Useful for debugging and documentation.
66
+ */
67
+ getNamePatterns(): readonly string[];
68
+ /**
69
+ * Get the list of description patterns used for detection.
70
+ * Useful for debugging and documentation.
71
+ */
72
+ getDescriptionPatterns(): readonly RegExp[];
73
+ }
74
+ //# sourceMappingURL=ExternalAPIDependencyDetector.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ExternalAPIDependencyDetector.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/helpers/ExternalAPIDependencyDetector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,IAAI,EAAE,MAAM,oCAAoC,CAAC;AAE1D;;;GAGG;AACH,MAAM,WAAW,yBAAyB;IACxC,qDAAqD;IACrD,8BAA8B,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAC5C,8DAA8D;IAC9D,aAAa,EAAE,MAAM,CAAC;IACtB,qDAAqD;IACrD,UAAU,EAAE,MAAM,GAAG,QAAQ,GAAG,KAAK,CAAC;IACtC,sDAAsD;IACtD,aAAa,EAAE,MAAM,EAAE,CAAC;CACzB;AAED;;;;;GAKG;AACH,qBAAa,6BAA6B;IACxC;;;;;OAKG;IACH,OAAO,CAAC,QAAQ,CAAC,qBAAqB,CAuBpC;IAEF;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,iCAAiC,CAQhD;IAEF;;;;;OAKG;IACH,MAAM,CAAC,KAAK,EAAE,IAAI,EAAE,GAAG,yBAAyB;IA8BhD;;;;;;OAMG;IACH,iBAAiB,CAAC,IAAI,EAAE,IAAI,GAAG,OAAO;IAmBtC;;;OAGG;IACH,eAAe,IAAI,SAAS,MAAM,EAAE;IAIpC;;;OAGG;IACH,sBAAsB,IAAI,SAAS,MAAM,EAAE;CAG5C"}
@@ -0,0 +1,131 @@
1
+ /**
2
+ * External API Dependency Detector
3
+ *
4
+ * Identifies tools that depend on external APIs based on name and description patterns.
5
+ * This information enables downstream assessors to adjust their behavior:
6
+ * - TemporalAssessor: Relaxed variance thresholds for external API tools
7
+ * - FunctionalityAssessor: Accept API errors as valid responses
8
+ * - ErrorHandlingAssessor: Account for external service failures
9
+ *
10
+ * Issue #168: New module for external API dependency detection
11
+ *
12
+ * @module helpers/ExternalAPIDependencyDetector
13
+ */
14
+ /**
15
+ * Detects external API dependencies in MCP tools based on name and description patterns.
16
+ * Designed to run during context preparation before assessors execute.
17
+ *
18
+ * @public
19
+ */
20
+ export class ExternalAPIDependencyDetector {
21
+ /**
22
+ * Tool name patterns that suggest external API dependency.
23
+ * Uses word-boundary matching to prevent false positives.
24
+ *
25
+ * Extracted from VarianceClassifier (Issue #166) for reuse across modules.
26
+ */
27
+ EXTERNAL_API_PATTERNS = [
28
+ // API-related prefixes
29
+ "api",
30
+ "external",
31
+ "remote",
32
+ "live",
33
+ // Data type patterns (typically from external sources)
34
+ "weather",
35
+ "stock",
36
+ "price",
37
+ "market",
38
+ "currency",
39
+ "exchange",
40
+ "rate",
41
+ "forex",
42
+ // Service-specific prefixes
43
+ "wb", // World Bank
44
+ "worldbank",
45
+ // Action patterns suggesting external fetch
46
+ "fetch_from",
47
+ "poll",
48
+ "realtime",
49
+ "current",
50
+ ];
51
+ /**
52
+ * Description patterns that suggest external API dependency.
53
+ * Regex patterns for more flexible matching.
54
+ */
55
+ EXTERNAL_API_DESCRIPTION_PATTERNS = [
56
+ /external\s*(api|service)/i,
57
+ /fetche?s?\s*(from|data\s+from)/i,
58
+ /calls?\s*(external|remote)/i,
59
+ /live\s*(data|feed|stream)/i,
60
+ /real[- ]?time/i,
61
+ /world\s*bank/i,
62
+ /third[- ]?party\s*(api|service)/i,
63
+ ];
64
+ /**
65
+ * Detect external API dependencies from a list of tools.
66
+ *
67
+ * @param tools - List of MCP tools to analyze
68
+ * @returns Detection results with tool names and confidence
69
+ */
70
+ detect(tools) {
71
+ const toolsWithExternalAPI = new Set();
72
+ for (const tool of tools) {
73
+ if (this.isExternalAPITool(tool)) {
74
+ toolsWithExternalAPI.add(tool.name);
75
+ }
76
+ }
77
+ const detectedCount = toolsWithExternalAPI.size;
78
+ // Determine confidence based on detection count
79
+ // More detections = higher confidence in pattern accuracy
80
+ let confidence;
81
+ if (detectedCount === 0) {
82
+ confidence = "low";
83
+ }
84
+ else if (detectedCount >= 3) {
85
+ confidence = "high";
86
+ }
87
+ else {
88
+ confidence = "medium";
89
+ }
90
+ return {
91
+ toolsWithExternalAPIDependency: toolsWithExternalAPI,
92
+ detectedCount,
93
+ confidence,
94
+ detectedTools: Array.from(toolsWithExternalAPI),
95
+ };
96
+ }
97
+ /**
98
+ * Check if a single tool depends on external APIs.
99
+ * Uses BOTH name patterns AND description analysis for detection.
100
+ *
101
+ * @param tool - MCP tool to check
102
+ * @returns true if tool appears to depend on external APIs
103
+ */
104
+ isExternalAPITool(tool) {
105
+ const toolName = tool.name.toLowerCase();
106
+ const description = (tool.description || "").toLowerCase();
107
+ // Check name patterns with word-boundary matching
108
+ // "weather_api" matches "api" but "capital_gains" doesn't match "api"
109
+ const nameMatch = this.EXTERNAL_API_PATTERNS.some((pattern) => {
110
+ const wordBoundaryRegex = new RegExp(`(^|_|-)${pattern}($|_|-|s)`);
111
+ return wordBoundaryRegex.test(toolName);
112
+ });
113
+ // Check description for external API indicators
114
+ const descriptionMatch = this.EXTERNAL_API_DESCRIPTION_PATTERNS.some((regex) => regex.test(description));
115
+ return nameMatch || descriptionMatch;
116
+ }
117
+ /**
118
+ * Get the list of name patterns used for detection.
119
+ * Useful for debugging and documentation.
120
+ */
121
+ getNamePatterns() {
122
+ return this.EXTERNAL_API_PATTERNS;
123
+ }
124
+ /**
125
+ * Get the list of description patterns used for detection.
126
+ * Useful for debugging and documentation.
127
+ */
128
+ getDescriptionPatterns() {
129
+ return this.EXTERNAL_API_DESCRIPTION_PATTERNS;
130
+ }
131
+ }
@@ -46,6 +46,12 @@ export declare class ErrorHandlingAssessor extends BaseAssessor {
46
46
  private calculateMetrics;
47
47
  private determineErrorHandlingStatus;
48
48
  private generateExplanation;
49
+ /**
50
+ * Check if an error indicates an external service failure
51
+ * Issue #168: External API tools may fail due to service unavailability,
52
+ * which should not count as validation failure
53
+ */
54
+ private isExternalServiceError;
49
55
  private generateRecommendations;
50
56
  }
51
57
  //# sourceMappingURL=ErrorHandlingAssessor.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"ErrorHandlingAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/ErrorHandlingAssessor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,uBAAuB,EAKxB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AAEvE,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAU9D,qBAAa,qBAAsB,SAAQ,YAAY;IACrD,OAAO,CAAC,iBAAiB,CAA4B;IACrD,OAAO,CAAC,oBAAoB,CAAuB;IACnD,OAAO,CAAC,eAAe,CAAkB;gBAE7B,MAAM,EAAE,uBAAuB;IAOrC,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,uBAAuB,CAAC;IA4G1E,OAAO,CAAC,qBAAqB;YAuDf,qBAAqB;YA0BrB,qBAAqB;YA0HrB,cAAc;YA0Gd,iBAAiB;YAqFjB,kBAAkB;IAoFhC,OAAO,CAAC,aAAa;IAOrB,OAAO,CAAC,uBAAuB;IAkC/B,OAAO,CAAC,0BAA0B;IAkClC,OAAO,CAAC,uBAAuB;IA4B/B;;;;;;;;;;OAUG;IACH,OAAO,CAAC,4BAA4B;IAgEpC;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAc/B;;;OAGG;IACH,OAAO,CAAC,8BAA8B;IAetC,OAAO,CAAC,gBAAgB;IA8GxB,OAAO,CAAC,4BAA4B;IAapC,OAAO,CAAC,mBAAmB;IAuE3B,OAAO,CAAC,uBAAuB;CA4ChC"}
1
+ {"version":3,"file":"ErrorHandlingAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/ErrorHandlingAssessor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,uBAAuB,EAKxB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AAEvE,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAU9D,qBAAa,qBAAsB,SAAQ,YAAY;IACrD,OAAO,CAAC,iBAAiB,CAA4B;IACrD,OAAO,CAAC,oBAAoB,CAAuB;IACnD,OAAO,CAAC,eAAe,CAAkB;gBAE7B,MAAM,EAAE,uBAAuB;IAOrC,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,uBAAuB,CAAC;IA6G1E,OAAO,CAAC,qBAAqB;YAuDf,qBAAqB;YAiCrB,qBAAqB;YA+IrB,cAAc;YA8Hd,iBAAiB;YAyGjB,kBAAkB;IAwGhC,OAAO,CAAC,aAAa;IAOrB,OAAO,CAAC,uBAAuB;IAkC/B,OAAO,CAAC,0BAA0B;IAkClC,OAAO,CAAC,uBAAuB;IA4B/B;;;;;;;;;;OAUG;IACH,OAAO,CAAC,4BAA4B;IAgEpC;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAc/B;;;OAGG;IACH,OAAO,CAAC,8BAA8B;IAetC,OAAO,CAAC,gBAAgB;IA8GxB,OAAO,CAAC,4BAA4B;IAapC,OAAO,CAAC,mBAAmB;IAuE3B;;;;OAIG;IACH,OAAO,CAAC,sBAAsB;IAgB9B,OAAO,CAAC,uBAAuB;CA4ChC"}
@@ -28,7 +28,7 @@ export class ErrorHandlingAssessor extends BaseAssessor {
28
28
  const limit = createConcurrencyLimit(concurrency, this.logger);
29
29
  this.logger.info(`Testing ${toolsToTest.length} tools for error handling with concurrency limit of ${concurrency}`);
30
30
  const allToolTests = await Promise.all(toolsToTest.map((tool) => limit(async () => {
31
- const toolTests = await this.testToolErrorHandling(tool, context.callTool);
31
+ const toolTests = await this.testToolErrorHandling(tool, context.callTool, context);
32
32
  // Emit per-tool validation summary for auditor UI (Phase 7)
33
33
  if (context.onProgress) {
34
34
  // Count failures by test type (failed = tool didn't reject invalid input)
@@ -125,21 +125,23 @@ export class ErrorHandlingAssessor extends BaseAssessor {
125
125
  this.logger.info(`Testing ${maxTools} out of ${tools.length} tools for error handling`);
126
126
  return tools.slice(0, maxTools);
127
127
  }
128
- async testToolErrorHandling(tool, callTool) {
128
+ async testToolErrorHandling(tool, callTool, context) {
129
129
  const tests = [];
130
+ // Issue #168: Check if tool depends on external API
131
+ const isExternalAPI = context.externalAPIDependencies?.toolsWithExternalAPIDependency.has(tool.name) ?? false;
130
132
  // Scored tests first (affect compliance score)
131
133
  // Test 1: Missing required parameters
132
- tests.push(await this.testMissingParameters(tool, callTool));
134
+ tests.push(await this.testMissingParameters(tool, callTool, isExternalAPI));
133
135
  // Test 2: Wrong parameter types
134
- tests.push(await this.testWrongTypes(tool, callTool));
136
+ tests.push(await this.testWrongTypes(tool, callTool, isExternalAPI));
135
137
  // Test 3: Excessive input size
136
- tests.push(await this.testExcessiveInput(tool, callTool));
138
+ tests.push(await this.testExcessiveInput(tool, callTool, isExternalAPI));
137
139
  // Informational tests last (do not affect compliance score)
138
140
  // Test 4: Invalid parameter values (edge case handling)
139
- tests.push(await this.testInvalidValues(tool, callTool));
141
+ tests.push(await this.testInvalidValues(tool, callTool, isExternalAPI));
140
142
  return tests;
141
143
  }
142
- async testMissingParameters(tool, callTool) {
144
+ async testMissingParameters(tool, callTool, isExternalAPI = false) {
143
145
  const testInput = {}; // Empty params
144
146
  // Check if tool has any required parameters
145
147
  const schema = this.getToolSchema(tool);
@@ -178,6 +180,24 @@ export class ErrorHandlingAssessor extends BaseAssessor {
178
180
  messageLower.includes("must specify") ||
179
181
  // Also accept field-specific errors (even better!)
180
182
  /\b(query|field|parameter|argument|value|input)\b/i.test(errorInfo.message ?? ""));
183
+ // Issue #168: For external API tools, check if error is an external service error
184
+ // External service errors should be treated as passed (validation can't be tested)
185
+ if (isExternalAPI && isError && this.isExternalServiceError(errorInfo)) {
186
+ return {
187
+ toolName: tool.name,
188
+ testType: "missing_required",
189
+ testInput,
190
+ expectedError: "Missing required parameters",
191
+ actualResponse: {
192
+ isError,
193
+ errorCode: errorInfo.code,
194
+ errorMessage: errorInfo.message,
195
+ rawResponse: response,
196
+ },
197
+ passed: true,
198
+ reason: "External API service error (validation cannot be tested when service unavailable)",
199
+ };
200
+ }
181
201
  return {
182
202
  toolName: tool.name,
183
203
  testType: "missing_required",
@@ -239,7 +259,7 @@ export class ErrorHandlingAssessor extends BaseAssessor {
239
259
  };
240
260
  }
241
261
  }
242
- async testWrongTypes(tool, callTool) {
262
+ async testWrongTypes(tool, callTool, isExternalAPI = false) {
243
263
  const schema = this.getToolSchema(tool);
244
264
  const testInput = this.generateWrongTypeParams(schema);
245
265
  try {
@@ -264,6 +284,23 @@ export class ErrorHandlingAssessor extends BaseAssessor {
264
284
  messageLower.includes("object") ||
265
285
  // Also accept validation framework messages
266
286
  /\b(validation|validate|schema|format)\b/i.test(errorInfo.message ?? ""));
287
+ // Issue #168: For external API tools, check if error is an external service error
288
+ if (isExternalAPI && isError && this.isExternalServiceError(errorInfo)) {
289
+ return {
290
+ toolName: tool.name,
291
+ testType: "wrong_type",
292
+ testInput,
293
+ expectedError: "Type validation error",
294
+ actualResponse: {
295
+ isError,
296
+ errorCode: errorInfo.code,
297
+ errorMessage: errorInfo.message,
298
+ rawResponse: response,
299
+ },
300
+ passed: true,
301
+ reason: "External API service error (validation cannot be tested when service unavailable)",
302
+ };
303
+ }
267
304
  return {
268
305
  toolName: tool.name,
269
306
  testType: "wrong_type",
@@ -326,13 +363,30 @@ export class ErrorHandlingAssessor extends BaseAssessor {
326
363
  };
327
364
  }
328
365
  }
329
- async testInvalidValues(tool, callTool) {
366
+ async testInvalidValues(tool, callTool, isExternalAPI = false) {
330
367
  const schema = this.getToolSchema(tool);
331
368
  const testInput = this.generateInvalidValueParams(schema);
332
369
  try {
333
370
  const response = await this.executeWithTimeout(callTool(tool.name, testInput), 5000);
334
371
  const isError = this.isErrorResponse(response);
335
372
  const errorInfo = this.extractErrorInfo(response);
373
+ // Issue #168: For external API tools, check if error is an external service error
374
+ if (isExternalAPI && isError && this.isExternalServiceError(errorInfo)) {
375
+ return {
376
+ toolName: tool.name,
377
+ testType: "invalid_values",
378
+ testInput,
379
+ expectedError: "Invalid parameter values",
380
+ actualResponse: {
381
+ isError,
382
+ errorCode: errorInfo.code,
383
+ errorMessage: errorInfo.message,
384
+ rawResponse: response,
385
+ },
386
+ passed: true,
387
+ reason: "External API service error (validation cannot be tested when service unavailable)",
388
+ };
389
+ }
336
390
  // For invalid values, any error response is good
337
391
  // The server is validating inputs properly
338
392
  return {
@@ -396,13 +450,30 @@ export class ErrorHandlingAssessor extends BaseAssessor {
396
450
  };
397
451
  }
398
452
  }
399
- async testExcessiveInput(tool, callTool) {
453
+ async testExcessiveInput(tool, callTool, isExternalAPI = false) {
400
454
  const largeString = "x".repeat(100000); // 100KB string
401
455
  const testInput = this.generateParamsWithValue(tool, largeString);
402
456
  try {
403
457
  const response = await this.executeWithTimeout(callTool(tool.name, testInput), 5000);
404
458
  const isError = this.isErrorResponse(response);
405
459
  const errorInfo = this.extractErrorInfo(response);
460
+ // Issue #168: For external API tools, check if error is an external service error
461
+ if (isExternalAPI && isError && this.isExternalServiceError(errorInfo)) {
462
+ return {
463
+ toolName: tool.name,
464
+ testType: "excessive_input",
465
+ testInput: { ...testInput, value: "[100KB string]" },
466
+ expectedError: "Input size limit exceeded",
467
+ actualResponse: {
468
+ isError,
469
+ errorCode: errorInfo.code,
470
+ errorMessage: errorInfo.message,
471
+ rawResponse: response ? "[response omitted]" : undefined,
472
+ },
473
+ passed: true,
474
+ reason: "External API service error (validation cannot be tested when service unavailable)",
475
+ };
476
+ }
406
477
  return {
407
478
  toolName: tool.name,
408
479
  testType: "excessive_input",
@@ -780,6 +851,18 @@ export class ErrorHandlingAssessor extends BaseAssessor {
780
851
  parts.push(`Tested ${toolsTested} tools with ${totalScoredTests} scored scenarios (${totalTests} total including informational).`);
781
852
  return parts.join(" ");
782
853
  }
854
+ /**
855
+ * Check if an error indicates an external service failure
856
+ * Issue #168: External API tools may fail due to service unavailability,
857
+ * which should not count as validation failure
858
+ */
859
+ isExternalServiceError(errorInfo) {
860
+ const message = errorInfo.message?.toLowerCase() ?? "";
861
+ const code = String(errorInfo.code ?? "").toLowerCase();
862
+ // Common external service error patterns
863
+ const externalErrorPatterns = /rate\s*limit|429|503|502|504|service\s*unavailable|temporarily|timeout|connection\s*refused|network\s*error|api\s*error|external\s*service|upstream|gateway|unreachable|econnrefused|enotfound|etimedout|socket\s*hang\s*up/i;
864
+ return (externalErrorPatterns.test(message) || externalErrorPatterns.test(code));
865
+ }
783
866
  generateRecommendations(metrics, tests) {
784
867
  const recommendations = [];
785
868
  if (!metrics.hasProperErrorCodes) {
@@ -31,5 +31,15 @@ export declare class FunctionalityAssessor extends BaseAssessor {
31
31
  private determineStrategy;
32
32
  generateTestInput(schema: JSONSchema7): unknown;
33
33
  private generateExplanation;
34
+ /**
35
+ * Issue #168: Check if an error response indicates an expected external API error.
36
+ * External APIs may return rate limit (429), service unavailable (503), timeout,
37
+ * or similar errors that are expected behavior, not broken functionality.
38
+ */
39
+ private isExpectedAPIError;
40
+ /**
41
+ * Extract text content from a response for pattern matching.
42
+ */
43
+ private extractResponseText;
34
44
  }
35
45
  //# sourceMappingURL=FunctionalityAssessor.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"FunctionalityAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/FunctionalityAssessor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,uBAAuB,EAGvB,WAAW,EACZ,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAc9D,qBAAa,qBAAsB,SAAQ,YAAY;IACrD,OAAO,CAAC,cAAc,CAAwB;IAE9C;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAoCvB,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,uBAAuB,CAAC;YAoI5D,QAAQ;IAoGtB,OAAO,CAAC,qBAAqB;IAoE7B,OAAO,CAAC,kBAAkB;IAoH1B;;;OAGG;IACH,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,uBAAuB,CAe7C;IAEF;;;OAGG;IACH,OAAO,CAAC,mCAAmC;IAsF3C;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAWlB,iBAAiB,CAAC,MAAM,EAAE,WAAW,GAAG,OAAO;IAItD,OAAO,CAAC,mBAAmB;CA+B5B"}
1
+ {"version":3,"file":"FunctionalityAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/FunctionalityAssessor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,uBAAuB,EAGvB,WAAW,EACZ,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAc9D,qBAAa,qBAAsB,SAAQ,YAAY;IACrD,OAAO,CAAC,cAAc,CAAwB;IAE9C;;OAEG;IACH,OAAO,CAAC,qBAAqB;IAoCvB,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,uBAAuB,CAAC;YAoI5D,QAAQ;IA6HtB,OAAO,CAAC,qBAAqB;IAoE7B,OAAO,CAAC,kBAAkB;IAoH1B;;;OAGG;IACH,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,uBAAuB,CAe7C;IAEF;;;OAGG;IACH,OAAO,CAAC,mCAAmC;IAsF3C;;OAEG;IACH,OAAO,CAAC,iBAAiB;IAWlB,iBAAiB,CAAC,MAAM,EAAE,WAAW,GAAG,OAAO;IAItD,OAAO,CAAC,mBAAmB;IAgC3B;;;;OAIG;IACH,OAAO,CAAC,kBAAkB;IAW1B;;OAEG;IACH,OAAO,CAAC,mBAAmB;CAyB5B"}
@@ -76,7 +76,7 @@ export class FunctionalityAssessor extends BaseAssessor {
76
76
  this.testCount++;
77
77
  completedTests++;
78
78
  batchCount++;
79
- const result = await this.testTool(tool, context.callTool);
79
+ const result = await this.testTool(tool, context.callTool, context);
80
80
  // Emit progress batch if threshold reached
81
81
  const timeSinceLastBatch = Date.now() - lastBatchTime;
82
82
  if (batchCount >= BATCH_SIZE ||
@@ -131,7 +131,7 @@ export class FunctionalityAssessor extends BaseAssessor {
131
131
  tools,
132
132
  };
133
133
  }
134
- async testTool(tool, callTool) {
134
+ async testTool(tool, callTool, context) {
135
135
  const startTime = Date.now();
136
136
  // Generate minimal valid parameters with metadata
137
137
  const { params: testParams, metadata } = this.generateMinimalParams(tool);
@@ -173,7 +173,25 @@ export class FunctionalityAssessor extends BaseAssessor {
173
173
  responseMetadata,
174
174
  };
175
175
  }
176
- // Real tool failure (not just validation)
176
+ // Issue #168: Check for expected external API errors
177
+ // External API tools may fail due to rate limits, service unavailability, etc.
178
+ // These are expected behaviors, not broken functionality
179
+ const isExternalAPI = context.externalAPIDependencies?.toolsWithExternalAPIDependency.has(tool.name);
180
+ if (isExternalAPI && this.isExpectedAPIError(response)) {
181
+ this.logger.info(`${tool.name}: External API error (expected behavior for external API tool)`);
182
+ return {
183
+ toolName: tool.name,
184
+ tested: true,
185
+ status: "working",
186
+ executionTime,
187
+ testParameters: cleanedParams,
188
+ response,
189
+ testInputMetadata: metadata,
190
+ responseMetadata,
191
+ note: "External API returned error (expected behavior)",
192
+ };
193
+ }
194
+ // Real tool failure (not just validation or expected API error)
177
195
  return {
178
196
  toolName: tool.name,
179
197
  tested: true,
@@ -472,4 +490,48 @@ export class FunctionalityAssessor extends BaseAssessor {
472
490
  }
473
491
  return parts.join(" ");
474
492
  }
493
+ /**
494
+ * Issue #168: Check if an error response indicates an expected external API error.
495
+ * External APIs may return rate limit (429), service unavailable (503), timeout,
496
+ * or similar errors that are expected behavior, not broken functionality.
497
+ */
498
+ isExpectedAPIError(response) {
499
+ const content = this.extractResponseText(response);
500
+ if (!content)
501
+ return false;
502
+ // Match common external API error patterns
503
+ const expectedErrorPatterns = /rate\s*limit|429|503|service\s*unavailable|temporarily|timeout|connection\s*refused|network\s*error|api\s*error|external\s*service|upstream/i;
504
+ return expectedErrorPatterns.test(content);
505
+ }
506
+ /**
507
+ * Extract text content from a response for pattern matching.
508
+ */
509
+ extractResponseText(response) {
510
+ if (typeof response === "string")
511
+ return response;
512
+ if (!response || typeof response !== "object")
513
+ return "";
514
+ const obj = response;
515
+ // Check common response content locations
516
+ if (typeof obj.content === "string")
517
+ return obj.content;
518
+ if (typeof obj.message === "string")
519
+ return obj.message;
520
+ if (typeof obj.error === "string")
521
+ return obj.error;
522
+ // Handle MCP response format with content array
523
+ if (Array.isArray(obj.content)) {
524
+ return obj.content
525
+ .map((item) => {
526
+ if (typeof item === "string")
527
+ return item;
528
+ if (typeof item?.text === "string")
529
+ return item.text;
530
+ return "";
531
+ })
532
+ .join(" ");
533
+ }
534
+ // Fallback to JSON stringify for deep search
535
+ return JSON.stringify(response);
536
+ }
475
537
  }
@@ -1 +1 @@
1
- {"version":3,"file":"TemporalAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/TemporalAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EACL,uBAAuB,EAEvB,kBAAkB,EAGnB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAiB9C,qBAAa,gBAAiB,SAAQ,YAAY;IAChD,OAAO,CAAC,kBAAkB,CAAS;IACnC,OAAO,CAAC,gBAAgB,CAAmB;IAC3C,OAAO,CAAC,kBAAkB,CAAqB;IAG/C,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CAAU;IAGjD,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAK;gBAE5B,MAAM,EAAE,uBAAuB;IAQrC,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC;YAqEvD,UAAU;IAuHxB,OAAO,CAAC,gBAAgB;IAkKxB;;;;;;;;;;;OAWG;IACH,OAAO,CAAC,uBAAuB;IAa/B;;;OAGG;IACH,OAAO,CAAC,mBAAmB;IAsC3B,OAAO,CAAC,uBAAuB;IAa/B,OAAO,CAAC,mBAAmB;IA+C3B,OAAO,CAAC,uBAAuB;CA+DhC"}
1
+ {"version":3,"file":"TemporalAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/TemporalAssessor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EACL,uBAAuB,EAEvB,kBAAkB,EAGnB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAiB9C,qBAAa,gBAAiB,SAAQ,YAAY;IAChD,OAAO,CAAC,kBAAkB,CAAS;IACnC,OAAO,CAAC,gBAAgB,CAAmB;IAC3C,OAAO,CAAC,kBAAkB,CAAqB;IAG/C,OAAO,CAAC,QAAQ,CAAC,sBAAsB,CAAU;IAGjD,OAAO,CAAC,QAAQ,CAAC,kBAAkB,CAAK;gBAE5B,MAAM,EAAE,uBAAuB;IAQrC,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,kBAAkB,CAAC;YAqEvD,UAAU;IAwHxB,OAAO,CAAC,gBAAgB;IA2LxB;;;;;;;;;;;OAWG;IACH,OAAO,CAAC,uBAAuB;IAa/B;;;OAGG;IACH,OAAO,CAAC,mBAAmB;IAsC3B,OAAO,CAAC,uBAAuB;IAa/B,OAAO,CAAC,mBAAmB;IA+C3B,OAAO,CAAC,uBAAuB;CA+DhC"}
@@ -144,7 +144,8 @@ export class TemporalAssessor extends BaseAssessor {
144
144
  }
145
145
  }
146
146
  // Analyze responses for temporal behavior changes
147
- const result = this.analyzeResponses(tool, responses);
147
+ // Issue #168: Pass context for external API dependency awareness
148
+ const result = this.analyzeResponses(tool, responses, context);
148
149
  // Analyze definitions for mutation (rug pull via description change)
149
150
  const definitionMutation = this.mutationDetector.detectDefinitionMutation(definitionSnapshots);
150
151
  return {
@@ -167,7 +168,7 @@ export class TemporalAssessor extends BaseAssessor {
167
168
  severity: definitionMutation !== null || result.vulnerable ? "HIGH" : "NONE",
168
169
  };
169
170
  }
170
- analyzeResponses(tool, responses) {
171
+ analyzeResponses(tool, responses, context) {
171
172
  if (responses.length === 0) {
172
173
  return {
173
174
  tool: tool.name,
@@ -205,6 +206,17 @@ export class TemporalAssessor extends BaseAssessor {
205
206
  else if (isStateful) {
206
207
  // Original stateful tool logic: schema comparison + behavioral content check
207
208
  // Content variance is allowed as long as schema is consistent
209
+ // Issue #166: Check for isError variance first (external API behavior)
210
+ // For stateful tools, error vs success responses are expected from external APIs
211
+ const baselineIsError = responses[0].response?.isError === true;
212
+ const currentIsError = responses[i].response?.isError === true;
213
+ // Issue #168: Check context-based detection first, fall back to VarianceClassifier
214
+ const isExternalAPI = context.externalAPIDependencies?.toolsWithExternalAPIDependency.has(tool.name) ?? this.varianceClassifier.isExternalAPITool(tool);
215
+ if (baselineIsError !== currentIsError && isExternalAPI) {
216
+ // External API tool with error vs success variance - LEGITIMATE, not a deviation
217
+ this.logger.info(`${tool.name}: API error vs success variance at invocation ${i + 1} (expected for external API)`);
218
+ continue; // Skip to next invocation, don't count as deviation
219
+ }
208
220
  let isDifferent = !this.varianceClassifier.compareSchemas(responses[0].response, responses[i].response);
209
221
  // Secondary detection: Check for content semantic changes (rug pull patterns)
210
222
  // This catches cases where schema is same but content shifts from helpful to harmful
@@ -222,7 +234,8 @@ export class TemporalAssessor extends BaseAssessor {
222
234
  else if (isResourceCreating) {
223
235
  // Issue #69: Use variance classification for resource-creating tools
224
236
  // These need intelligent classification to distinguish ID variance from rug pulls
225
- const classification = this.varianceClassifier.classifyVariance(responses[0].response, responses[i].response);
237
+ // Issue #166: Pass tool for external API error variance handling
238
+ const classification = this.varianceClassifier.classifyVariance(responses[0].response, responses[i].response, tool);
226
239
  varianceDetails.push({
227
240
  invocation: i + 1,
228
241
  classification,
@@ -1 +1 @@
1
- {"version":3,"file":"AlignmentChecker.d.ts","sourceRoot":"","sources":["../../../../../src/services/assessment/modules/annotations/AlignmentChecker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,oCAAoC,CAAC;AAC/D,OAAO,KAAK,EACV,oBAAoB,EACpB,gBAAgB,EAEhB,iBAAiB,EACjB,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,KAAK,EACV,gBAAgB,EAChB,wBAAwB,EACzB,MAAM,iCAAiC,CAAC;AAEzC,OAAO,EAEL,KAAK,mBAAmB,EACzB,MAAM,gCAAgC,CAAC;AAuFxC;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,MAAM,EAAE,gBAAgB,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,OAAO,EAAE;QACP,QAAQ,EAAE,MAAM,CAAC;QACjB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,EAAE,MAAM,CAAC;QACpB,cAAc,EAAE,MAAM,CAAC;KACxB,CAAC;IACF,kBAAkB,EAAE;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,UAAU,EAAE,MAAM,CAAC;QACnB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC;CACH;AA0CD;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAE7D;AAED;;GAEG;AACH,wBAAgB,wBAAwB,IAAI,OAAO,CAElD;AAED,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,IAAI,GAAG,oBAAoB,CAiNnE;AAED;;;GAGG;AACH,wBAAgB,uBAAuB,CACrC,IAAI,EAAE,IAAI,GACT,oBAAoB,CAAC,kBAAkB,CAAC,CA6D1C;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,OAAO,GAAG,iBAAiB,EAAE,CAqBtE;AAED;;;;;;GAMG;AACH,wBAAgB,2BAA2B,CAAC,IAAI,EAAE,IAAI,GAAG,mBAAmB,CAmD3E;AAqCD;;GAEG;AACH,wBAAgB,gBAAgB,CAC9B,IAAI,EAAE,IAAI,EACV,gBAAgB,EAAE,gBAAgB,EAClC,kBAAkB,CAAC,EAAE,wBAAwB,GAC5C,oBAAoB,CA0JtB;AAED;;GAEG;AACH,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,oBAAoB,EAAE,EAC/B,UAAU,EAAE,MAAM,GACjB,gBAAgB,CA8BlB;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,oBAAoB,EAAE,EAC/B,UAAU,EAAE,MAAM,GACjB,sBAAsB,CA2BxB"}
1
+ {"version":3,"file":"AlignmentChecker.d.ts","sourceRoot":"","sources":["../../../../../src/services/assessment/modules/annotations/AlignmentChecker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,oCAAoC,CAAC;AAC/D,OAAO,KAAK,EACV,oBAAoB,EACpB,gBAAgB,EAEhB,iBAAiB,EACjB,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,KAAK,EACV,gBAAgB,EAChB,wBAAwB,EACzB,MAAM,iCAAiC,CAAC;AAEzC,OAAO,EAEL,KAAK,mBAAmB,EACzB,MAAM,gCAAgC,CAAC;AAuFxC;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,YAAY,CAAC,EAAE,OAAO,CAAC;IACvB,eAAe,CAAC,EAAE,OAAO,CAAC;IAC1B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,cAAc,CAAC,EAAE,OAAO,CAAC;IACzB,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,MAAM,EAAE,gBAAgB,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,OAAO,EAAE;QACP,QAAQ,EAAE,MAAM,CAAC;QACjB,WAAW,EAAE,MAAM,CAAC;QACpB,WAAW,EAAE,MAAM,CAAC;QACpB,cAAc,EAAE,MAAM,CAAC;KACxB,CAAC;IACF,kBAAkB,EAAE;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,UAAU,EAAE,MAAM,CAAC;QACnB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,OAAO,EAAE,MAAM,CAAC;KACjB,CAAC;CACH;AA0CD;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAE7D;AAED;;GAEG;AACH,wBAAgB,wBAAwB,IAAI,OAAO,CAElD;AAED,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,IAAI,GAAG,oBAAoB,CAiNnE;AAED;;;GAGG;AACH,wBAAgB,uBAAuB,CACrC,IAAI,EAAE,IAAI,GACT,oBAAoB,CAAC,kBAAkB,CAAC,CA6D1C;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,OAAO,GAAG,iBAAiB,EAAE,CAqBtE;AAED;;;;;;GAMG;AACH,wBAAgB,2BAA2B,CAAC,IAAI,EAAE,IAAI,GAAG,mBAAmB,CAmD3E;AAqCD;;GAEG;AACH,wBAAgB,gBAAgB,CAC9B,IAAI,EAAE,IAAI,EACV,gBAAgB,EAAE,gBAAgB,EAClC,kBAAkB,CAAC,EAAE,wBAAwB,GAC5C,oBAAoB,CA0JtB;AAED;;GAEG;AACH,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,oBAAoB,EAAE,EAC/B,UAAU,EAAE,MAAM,GACjB,gBAAgB,CAoClB;AAED;;GAEG;AACH,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,oBAAoB,EAAE,EAC/B,UAAU,EAAE,MAAM,GACjB,sBAAsB,CA2BxB"}
@@ -433,8 +433,12 @@ export function determineAnnotationStatus(results, totalTools) {
433
433
  if (totalTools === 0)
434
434
  return "PASS";
435
435
  const annotatedCount = results.filter((r) => r.hasAnnotations).length;
436
- const poisonedCount = results.filter((r) => r.descriptionPoisoning?.detected === true).length;
437
- if (poisonedCount > 0)
436
+ // Issue #167: Only fail for actionable poisoning (MEDIUM or HIGH risk)
437
+ // LOW risk (e.g., length-only) is informational and should not cause FAIL
438
+ const actionablePoisonedCount = results.filter((r) => r.descriptionPoisoning?.detected === true &&
439
+ (r.descriptionPoisoning.riskLevel === "MEDIUM" ||
440
+ r.descriptionPoisoning.riskLevel === "HIGH")).length;
441
+ if (actionablePoisonedCount > 0)
438
442
  return "FAIL";
439
443
  const misalignedCount = results.filter((r) => r.alignmentStatus === "MISALIGNED").length;
440
444
  const destructiveWithoutHint = results.filter((r) => r.inferredBehavior?.expectedDestructive === true &&