@gleanwork/mcp-server-tester 1.0.0-beta.8 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -735,6 +735,34 @@ interface AuthServerMetadata {
735
735
  */
736
736
  issuer: string;
737
737
  }
738
+ /**
739
+ * Configuration for token refresh
740
+ */
741
+ interface TokenRefreshConfig {
742
+ /**
743
+ * Authorization server metadata
744
+ */
745
+ authServer: AuthServerMetadata;
746
+ /**
747
+ * Client ID
748
+ */
749
+ clientId: string;
750
+ /**
751
+ * Client secret (for confidential clients)
752
+ */
753
+ clientSecret?: string;
754
+ /**
755
+ * Refresh token
756
+ */
757
+ refreshToken: string;
758
+ }
759
+ /**
760
+ * Refreshes an access token using a refresh token
761
+ *
762
+ * @param config - Token refresh configuration
763
+ * @returns New token result
764
+ */
765
+ declare function refreshAccessToken(config: TokenRefreshConfig): Promise<TokenResult>;
738
766
  /**
739
767
  * Configuration for client credentials grant
740
768
  */
@@ -2131,7 +2159,7 @@ type ExpectationResultMap = Partial<Record<ExpectationType, EvalExpectationResul
2131
2159
  /**
2132
2160
  * Breakdown of expectation types used in a run
2133
2161
  */
2134
- type ExpectationBreakdown = Record<ExpectationType, number>;
2162
+ type ExpectationBreakdown = Partial<Record<ExpectationType, number>>;
2135
2163
 
2136
2164
  /**
2137
2165
  * Options for creating an MCP fixture
@@ -4455,4 +4483,4 @@ interface MCPEvalReporterConfig {
4455
4483
  includeAutoTracking?: boolean;
4456
4484
  }
4457
4485
 
4458
- export { type AuthType, BUILT_IN_RUBRICS, type BuiltInRubric, type BuiltInSanitizer, type CLIConfig, CLIOAuthClient, type CLIOAuthClientConfig, type CLIOAuthResult, type CLIOutputFormat, type CaseComparisonResult, type ClientCredentialsConfig, type ComparisonOutcome, type ContentBlock, type CreateMCPClientOptions, type CustomJudgeExecutor, type CustomJudgeResult, DiscoveryError, ENV_VAR_NAMES, type EvalCase, type EvalCaseResult, EvalCaseSchema, type EvalContext, type EvalDataset, EvalDatasetSchema, type EvalExpectBlock, type EvalExpectationResult, type EvalMode, type EvalRunnerOptions, type EvalRunnerResult, type ExpectationBreakdown, type ExpectationResultMap, type ExpectationType, type FieldRemovalSanitizer, type HostType, type HttpMCPConfig, type IterationResult, type Judge, type JudgeConfig, type JudgeExpectConfig, type JudgeMatcherOptions, type JudgeResult, type JudgeValidatorConfig, type LLMProvider, type LLMToolCall, type LoadDatasetOptions, type MCPAuthConfig, type MCPAuthFixtures, type MCPClientCredentialsConfig, type MCPConfig, MCPConfigSchema, type MCPConformanceCheck, type MCPConformanceOptions, type MCPConformanceRaw, type MCPConformanceResult, type MCPConformanceResultData, type MCPEvalData, type MCPEvalHistoricalSummary, type MCPEvalReporterConfig, type MCPEvalRunData, type MCPFixtureApi, type MCPFixtureOptions, type MCPHostCapabilities, type MCPHostConfig, type MCPHostSimulationResult, type MCPHostSimulator, type MCPOAuthConfig, type MCPServerCapabilitiesData, MCP_PROTOCOL_VERSION, type NormalizedToolResponse, type OAuthSetupConfig, type PatternValidatorOptions, PlaywrightOAuthClientProvider, type PlaywrightOAuthClientProviderConfig, type PredicateResult, type ProtectedResourceDiscoveryResult, type ProtectedResourceMetadata, type ProviderKind, type RegexSanitizer, type ResultSource, type RubricSpec, type SaveBaselineOptions, type SchemaRegistry, type SchemaValidatorOptions, type SerializedEvalDataset, type ServerComparisonOptions, type ServerComparisonResult, type SizeValidatorOptions, type SnapshotSanitizer, SnapshotSanitizers, type StdioMCPConfig, type StoredClientInfo, type StoredOAuthState, type StoredServerMetadata, type StoredTokens, type TextValidatorOptions, type TokenResult, type ToolCallCountOptions, type ToolCallExpectation, type ToolPredicate, type UsageMetrics, type ValidationResult, clearJudgeRegistry, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getRegisteredJudge, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, registerJudge, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateMCPHost, test$1 as test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
4486
+ export { type AuthType, BUILT_IN_RUBRICS, type BuiltInRubric, type BuiltInSanitizer, type CLIConfig, CLIOAuthClient, type CLIOAuthClientConfig, type CLIOAuthResult, type CLIOutputFormat, type CaseComparisonResult, type ClientCredentialsConfig, type ComparisonOutcome, type ContentBlock, type CreateMCPClientOptions, type CustomJudgeExecutor, type CustomJudgeResult, DiscoveryError, ENV_VAR_NAMES, type EvalCase, type EvalCaseRequest, type EvalCaseResult, EvalCaseSchema, type EvalContext, type EvalDataset, EvalDatasetSchema, type EvalExpectBlock, type EvalExpectationResult, type EvalMode, type EvalRunMetadata, type EvalRunnerOptions, type EvalRunnerResult, type ExpectationBreakdown, type ExpectationResultMap, type ExpectationType, type FieldRemovalSanitizer, type HostType, type HttpMCPConfig, type IterationResult, type Judge, type JudgeConfig, type JudgeExpectConfig, type JudgeMatcherOptions, type JudgeResult, type JudgeValidatorConfig, type LLMProvider, type LLMToolCall, type LoadDatasetOptions, type MCPAuthConfig, type MCPAuthFixtures, type MCPClientCredentialsConfig, type MCPConfig, MCPConfigSchema, type MCPConformanceCheck, type MCPConformanceOptions, type MCPConformanceRaw, type MCPConformanceResult, type MCPConformanceResultData, type MCPEvalData, type MCPEvalHistoricalSummary, type MCPEvalReporterConfig, type MCPEvalRunData, type MCPFixtureApi, type MCPFixtureOptions, type MCPHostCapabilities, type MCPHostConfig, type MCPHostSimulationResult, type MCPHostSimulator, type MCPOAuthConfig, type MCPServerCapabilitiesData, MCP_PROTOCOL_VERSION, type NormalizedToolResponse, type OAuthSetupConfig, type PatternValidatorOptions, PlaywrightOAuthClientProvider, type PlaywrightOAuthClientProviderConfig, type PredicateResult, type ProtectedResourceDiscoveryResult, type ProtectedResourceMetadata, type ProviderKind, type RegexSanitizer, type ResultSource, type RubricSpec, type SaveBaselineOptions, type SchemaRegistry, type SchemaValidatorOptions, type SerializedEvalDataset, type ServerComparisonOptions, type ServerComparisonResult, type SizeValidatorOptions, type SnapshotSanitizer, SnapshotSanitizers, type StdioMCPConfig, type StoredClientInfo, type StoredOAuthState, type StoredServerMetadata, type StoredTokens, type TextValidatorOptions, type TokenResult, type ToolCallCountOptions, type ToolCallExpectation, type ToolPredicate, type UsageMetrics, type ValidationResult, clearJudgeRegistry, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getRegisteredJudge, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, refreshAccessToken, registerJudge, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateMCPHost, test$1 as test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
package/dist/index.d.ts CHANGED
@@ -735,6 +735,34 @@ interface AuthServerMetadata {
735
735
  */
736
736
  issuer: string;
737
737
  }
738
+ /**
739
+ * Configuration for token refresh
740
+ */
741
+ interface TokenRefreshConfig {
742
+ /**
743
+ * Authorization server metadata
744
+ */
745
+ authServer: AuthServerMetadata;
746
+ /**
747
+ * Client ID
748
+ */
749
+ clientId: string;
750
+ /**
751
+ * Client secret (for confidential clients)
752
+ */
753
+ clientSecret?: string;
754
+ /**
755
+ * Refresh token
756
+ */
757
+ refreshToken: string;
758
+ }
759
+ /**
760
+ * Refreshes an access token using a refresh token
761
+ *
762
+ * @param config - Token refresh configuration
763
+ * @returns New token result
764
+ */
765
+ declare function refreshAccessToken(config: TokenRefreshConfig): Promise<TokenResult>;
738
766
  /**
739
767
  * Configuration for client credentials grant
740
768
  */
@@ -2131,7 +2159,7 @@ type ExpectationResultMap = Partial<Record<ExpectationType, EvalExpectationResul
2131
2159
  /**
2132
2160
  * Breakdown of expectation types used in a run
2133
2161
  */
2134
- type ExpectationBreakdown = Record<ExpectationType, number>;
2162
+ type ExpectationBreakdown = Partial<Record<ExpectationType, number>>;
2135
2163
 
2136
2164
  /**
2137
2165
  * Options for creating an MCP fixture
@@ -4455,4 +4483,4 @@ interface MCPEvalReporterConfig {
4455
4483
  includeAutoTracking?: boolean;
4456
4484
  }
4457
4485
 
4458
- export { type AuthType, BUILT_IN_RUBRICS, type BuiltInRubric, type BuiltInSanitizer, type CLIConfig, CLIOAuthClient, type CLIOAuthClientConfig, type CLIOAuthResult, type CLIOutputFormat, type CaseComparisonResult, type ClientCredentialsConfig, type ComparisonOutcome, type ContentBlock, type CreateMCPClientOptions, type CustomJudgeExecutor, type CustomJudgeResult, DiscoveryError, ENV_VAR_NAMES, type EvalCase, type EvalCaseResult, EvalCaseSchema, type EvalContext, type EvalDataset, EvalDatasetSchema, type EvalExpectBlock, type EvalExpectationResult, type EvalMode, type EvalRunnerOptions, type EvalRunnerResult, type ExpectationBreakdown, type ExpectationResultMap, type ExpectationType, type FieldRemovalSanitizer, type HostType, type HttpMCPConfig, type IterationResult, type Judge, type JudgeConfig, type JudgeExpectConfig, type JudgeMatcherOptions, type JudgeResult, type JudgeValidatorConfig, type LLMProvider, type LLMToolCall, type LoadDatasetOptions, type MCPAuthConfig, type MCPAuthFixtures, type MCPClientCredentialsConfig, type MCPConfig, MCPConfigSchema, type MCPConformanceCheck, type MCPConformanceOptions, type MCPConformanceRaw, type MCPConformanceResult, type MCPConformanceResultData, type MCPEvalData, type MCPEvalHistoricalSummary, type MCPEvalReporterConfig, type MCPEvalRunData, type MCPFixtureApi, type MCPFixtureOptions, type MCPHostCapabilities, type MCPHostConfig, type MCPHostSimulationResult, type MCPHostSimulator, type MCPOAuthConfig, type MCPServerCapabilitiesData, MCP_PROTOCOL_VERSION, type NormalizedToolResponse, type OAuthSetupConfig, type PatternValidatorOptions, PlaywrightOAuthClientProvider, type PlaywrightOAuthClientProviderConfig, type PredicateResult, type ProtectedResourceDiscoveryResult, type ProtectedResourceMetadata, type ProviderKind, type RegexSanitizer, type ResultSource, type RubricSpec, type SaveBaselineOptions, type SchemaRegistry, type SchemaValidatorOptions, type SerializedEvalDataset, type ServerComparisonOptions, type ServerComparisonResult, type SizeValidatorOptions, type SnapshotSanitizer, SnapshotSanitizers, type StdioMCPConfig, type StoredClientInfo, type StoredOAuthState, type StoredServerMetadata, type StoredTokens, type TextValidatorOptions, type TokenResult, type ToolCallCountOptions, type ToolCallExpectation, type ToolPredicate, type UsageMetrics, type ValidationResult, clearJudgeRegistry, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getRegisteredJudge, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, registerJudge, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateMCPHost, test$1 as test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
4486
+ export { type AuthType, BUILT_IN_RUBRICS, type BuiltInRubric, type BuiltInSanitizer, type CLIConfig, CLIOAuthClient, type CLIOAuthClientConfig, type CLIOAuthResult, type CLIOutputFormat, type CaseComparisonResult, type ClientCredentialsConfig, type ComparisonOutcome, type ContentBlock, type CreateMCPClientOptions, type CustomJudgeExecutor, type CustomJudgeResult, DiscoveryError, ENV_VAR_NAMES, type EvalCase, type EvalCaseRequest, type EvalCaseResult, EvalCaseSchema, type EvalContext, type EvalDataset, EvalDatasetSchema, type EvalExpectBlock, type EvalExpectationResult, type EvalMode, type EvalRunMetadata, type EvalRunnerOptions, type EvalRunnerResult, type ExpectationBreakdown, type ExpectationResultMap, type ExpectationType, type FieldRemovalSanitizer, type HostType, type HttpMCPConfig, type IterationResult, type Judge, type JudgeConfig, type JudgeExpectConfig, type JudgeMatcherOptions, type JudgeResult, type JudgeValidatorConfig, type LLMProvider, type LLMToolCall, type LoadDatasetOptions, type MCPAuthConfig, type MCPAuthFixtures, type MCPClientCredentialsConfig, type MCPConfig, MCPConfigSchema, type MCPConformanceCheck, type MCPConformanceOptions, type MCPConformanceRaw, type MCPConformanceResult, type MCPConformanceResultData, type MCPEvalData, type MCPEvalHistoricalSummary, type MCPEvalReporterConfig, type MCPEvalRunData, type MCPFixtureApi, type MCPFixtureOptions, type MCPHostCapabilities, type MCPHostConfig, type MCPHostSimulationResult, type MCPHostSimulator, type MCPOAuthConfig, type MCPServerCapabilitiesData, MCP_PROTOCOL_VERSION, type NormalizedToolResponse, type OAuthSetupConfig, type PatternValidatorOptions, PlaywrightOAuthClientProvider, type PlaywrightOAuthClientProviderConfig, type PredicateResult, type ProtectedResourceDiscoveryResult, type ProtectedResourceMetadata, type ProviderKind, type RegexSanitizer, type ResultSource, type RubricSpec, type SaveBaselineOptions, type SchemaRegistry, type SchemaValidatorOptions, type SerializedEvalDataset, type ServerComparisonOptions, type ServerComparisonResult, type SizeValidatorOptions, type SnapshotSanitizer, SnapshotSanitizers, type StdioMCPConfig, type StoredClientInfo, type StoredOAuthState, type StoredServerMetadata, type StoredTokens, type TextValidatorOptions, type TokenResult, type ToolCallCountOptions, type ToolCallExpectation, type ToolPredicate, type UsageMetrics, type ValidationResult, clearJudgeRegistry, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getRegisteredJudge, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, refreshAccessToken, registerJudge, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateMCPHost, test$1 as test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
package/dist/index.js CHANGED
@@ -4384,7 +4384,7 @@ function escapeHtml(text) {
4384
4384
 
4385
4385
  // package.json
4386
4386
  var package_default = {
4387
- version: "1.0.0-beta.8"};
4387
+ version: "1.0.0"};
4388
4388
 
4389
4389
  // src/mcp/clientFactory.ts
4390
4390
  function getRetryAfterDelayMs(err) {
@@ -4603,6 +4603,17 @@ async function createMCPClientForConfig(config, options) {
4603
4603
  }
4604
4604
  async function closeMCPClient(client) {
4605
4605
  try {
4606
+ const transport = client.transport;
4607
+ if (transport instanceof StreamableHTTPClientTransport) {
4608
+ try {
4609
+ await transport.terminateSession();
4610
+ } catch (sessionError) {
4611
+ debugClient(
4612
+ "Error terminating session: %s",
4613
+ sessionError instanceof Error ? sessionError.message : String(sessionError)
4614
+ );
4615
+ }
4616
+ }
4606
4617
  await client.close();
4607
4618
  } catch (error) {
4608
4619
  debugClient(
@@ -4831,11 +4842,13 @@ function validateSchema(response, schema, options = {}) {
4831
4842
  } catch (error) {
4832
4843
  const zodError = error;
4833
4844
  const issues = formatZodIssues(zodError);
4845
+ const text = stringifyResponse(response);
4834
4846
  return {
4835
4847
  pass: false,
4836
4848
  message: `Response does not match schema: ${issues}`,
4837
4849
  details: {
4838
- issues: zodError.issues
4850
+ issues: zodError.issues,
4851
+ textPreview: truncateForDisplay2(text)
4839
4852
  }
4840
4853
  };
4841
4854
  }
@@ -4888,6 +4901,12 @@ function formatZodIssues(error) {
4888
4901
  });
4889
4902
  return issues.join("; ");
4890
4903
  }
4904
+ function truncateForDisplay2(str, maxLength = 200) {
4905
+ if (str.length <= maxLength) {
4906
+ return str;
4907
+ }
4908
+ return str.slice(0, maxLength) + "... (truncated)";
4909
+ }
4891
4910
 
4892
4911
  // src/assertions/validators/text.ts
4893
4912
  function validateText(response, expected, options = {}) {
@@ -4914,11 +4933,11 @@ function validateText(response, expected, options = {}) {
4914
4933
  details: {
4915
4934
  missing,
4916
4935
  textLength: text.length,
4917
- textPreview: truncateForDisplay2(text)
4936
+ textPreview: truncateForDisplay3(text)
4918
4937
  }
4919
4938
  };
4920
4939
  }
4921
- function truncateForDisplay2(str, maxLength = 200) {
4940
+ function truncateForDisplay3(str, maxLength = 200) {
4922
4941
  if (str.length <= maxLength) {
4923
4942
  return str;
4924
4943
  }
@@ -4950,7 +4969,7 @@ function validatePattern(response, patterns, options = {}) {
4950
4969
  details: {
4951
4970
  unmatched,
4952
4971
  textLength: text.length,
4953
- textPreview: truncateForDisplay3(text)
4972
+ textPreview: truncateForDisplay4(text)
4954
4973
  }
4955
4974
  };
4956
4975
  }
@@ -4970,7 +4989,7 @@ function patternToString(pattern) {
4970
4989
  }
4971
4990
  return `/${pattern}/`;
4972
4991
  }
4973
- function truncateForDisplay3(str, maxLength = 200) {
4992
+ function truncateForDisplay4(str, maxLength = 200) {
4974
4993
  if (str.length <= maxLength) {
4975
4994
  return str;
4976
4995
  }
@@ -4993,7 +5012,7 @@ function validateError(response, expected = true) {
4993
5012
  pass: false,
4994
5013
  message: "Expected an error response but got success",
4995
5014
  details: {
4996
- textPreview: truncateForDisplay4(extractText2(response))
5015
+ textPreview: truncateForDisplay5(extractText2(response))
4997
5016
  }
4998
5017
  };
4999
5018
  } else {
@@ -5005,7 +5024,7 @@ function validateError(response, expected = true) {
5005
5024
  }
5006
5025
  return {
5007
5026
  pass: false,
5008
- message: `Expected a success response but got error: "${truncateForDisplay4(errorMessage)}"`,
5027
+ message: `Expected a success response but got error: "${truncateForDisplay5(errorMessage)}"`,
5009
5028
  details: {
5010
5029
  errorMessage
5011
5030
  }
@@ -5018,7 +5037,7 @@ function validateError(response, expected = true) {
5018
5037
  pass: false,
5019
5038
  message: `Expected an error containing "${expectedMessages[0]}" but got success`,
5020
5039
  details: {
5021
- textPreview: truncateForDisplay4(extractText2(response))
5040
+ textPreview: truncateForDisplay5(extractText2(response))
5022
5041
  }
5023
5042
  };
5024
5043
  }
@@ -5040,7 +5059,7 @@ function validateError(response, expected = true) {
5040
5059
  }
5041
5060
  };
5042
5061
  }
5043
- function truncateForDisplay4(str, maxLength = 200) {
5062
+ function truncateForDisplay5(str, maxLength = 200) {
5044
5063
  if (str.length <= maxLength) {
5045
5064
  return str;
5046
5065
  }
@@ -5158,6 +5177,10 @@ function validateToolCalls(response, expectation) {
5158
5177
  return {
5159
5178
  pass: false,
5160
5179
  message: `Expected tool '${expected.name}' to be called in sequence (starting from position ${searchFrom}), but it was not found`,
5180
+ details: {
5181
+ actual: actual.map((c) => c.name),
5182
+ expected: expected.name
5183
+ },
5161
5184
  metrics
5162
5185
  };
5163
5186
  }
@@ -5174,6 +5197,10 @@ function validateToolCalls(response, expectation) {
5174
5197
  return {
5175
5198
  pass: false,
5176
5199
  message: `Expected tool '${expected.name}'${argsNote} to be called, but it was not`,
5200
+ details: {
5201
+ actual: actual.map((c) => c.name),
5202
+ expected: expected.name
5203
+ },
5177
5204
  metrics
5178
5205
  };
5179
5206
  }
@@ -5186,6 +5213,10 @@ function validateToolCalls(response, expectation) {
5186
5213
  return {
5187
5214
  pass: false,
5188
5215
  message: `Unexpected tool calls: ${names}. Only ${[...allowedNames].map((n) => `'${n}'`).join(", ")} are allowed`,
5216
+ details: {
5217
+ actual: actual.map((c) => c.name),
5218
+ unexpected: unexpected.map((c) => c.name)
5219
+ },
5189
5220
  metrics
5190
5221
  };
5191
5222
  }
@@ -5204,19 +5235,22 @@ function validateToolCallCount(response, options) {
5204
5235
  if (exact !== void 0 && count !== exact) {
5205
5236
  return {
5206
5237
  pass: false,
5207
- message: `Expected exactly ${exact} tool call(s), but got ${count}`
5238
+ message: `Expected exactly ${exact} tool call(s), but got ${count}`,
5239
+ details: { actual: count, expected: exact }
5208
5240
  };
5209
5241
  }
5210
5242
  if (min !== void 0 && count < min) {
5211
5243
  return {
5212
5244
  pass: false,
5213
- message: `Expected at least ${min} tool call(s), but got ${count}`
5245
+ message: `Expected at least ${min} tool call(s), but got ${count}`,
5246
+ details: { actual: count, min }
5214
5247
  };
5215
5248
  }
5216
5249
  if (max !== void 0 && count > max) {
5217
5250
  return {
5218
5251
  pass: false,
5219
- message: `Expected at most ${max} tool call(s), but got ${count}`
5252
+ message: `Expected at most ${max} tool call(s), but got ${count}`,
5253
+ details: { actual: count, max }
5220
5254
  };
5221
5255
  }
5222
5256
  return {
@@ -5730,7 +5764,9 @@ function createJudge(config = {}) {
5730
5764
  case "google":
5731
5765
  return createGoogleJudge(config);
5732
5766
  default:
5733
- throw new Error(`Unsupported LLM provider: ${String(provider)}`);
5767
+ throw new Error(
5768
+ `Unsupported LLM provider: ${String(provider)}. Valid providers: 'anthropic', 'vertex-anthropic', 'anthropic-agent-sdk', 'openai', 'google'`
5769
+ );
5734
5770
  }
5735
5771
  }
5736
5772
 
@@ -6055,12 +6091,19 @@ function toMatchToolResponse(received, expected) {
6055
6091
  // src/assertions/matchers/toMatchToolSchema.ts
6056
6092
  function toMatchToolSchema(received, schema, options = {}) {
6057
6093
  const result = validateSchema(received, schema, options);
6094
+ const preview = result.details?.textPreview;
6058
6095
  return {
6059
6096
  pass: result.pass,
6060
6097
  message: () => {
6061
6098
  if (this.isNot) {
6062
6099
  return result.pass ? "Expected response NOT to match schema, but it did" : result.message;
6063
6100
  }
6101
+ if (!result.pass && preview) {
6102
+ return `${result.message}
6103
+
6104
+ Actual response (truncated):
6105
+ ${preview}`;
6106
+ }
6064
6107
  return result.message;
6065
6108
  }
6066
6109
  };
@@ -6069,6 +6112,7 @@ function toMatchToolSchema(received, schema, options = {}) {
6069
6112
  // src/assertions/matchers/toContainToolText.ts
6070
6113
  function toContainToolText(received, expected, options = {}) {
6071
6114
  const result = validateText(received, expected, options);
6115
+ const preview = result.details?.textPreview;
6072
6116
  return {
6073
6117
  pass: result.pass,
6074
6118
  message: () => {
@@ -6076,6 +6120,12 @@ function toContainToolText(received, expected, options = {}) {
6076
6120
  const expectedStr = Array.isArray(expected) ? expected.map((s) => `"${s}"`).join(", ") : `"${expected}"`;
6077
6121
  return result.pass ? `Expected response NOT to contain ${expectedStr}, but it did` : result.message;
6078
6122
  }
6123
+ if (!result.pass && preview) {
6124
+ return `${result.message}
6125
+
6126
+ Actual response (truncated):
6127
+ ${preview}`;
6128
+ }
6079
6129
  return result.message;
6080
6130
  }
6081
6131
  };
@@ -6084,12 +6134,19 @@ function toContainToolText(received, expected, options = {}) {
6084
6134
  // src/assertions/matchers/toMatchToolPattern.ts
6085
6135
  function toMatchToolPattern(received, patterns, options = {}) {
6086
6136
  const result = validatePattern(received, patterns, options);
6137
+ const preview = result.details?.textPreview;
6087
6138
  return {
6088
6139
  pass: result.pass,
6089
6140
  message: () => {
6090
6141
  if (this.isNot) {
6091
6142
  return result.pass ? "Expected response NOT to match pattern(s), but it did" : result.message;
6092
6143
  }
6144
+ if (!result.pass && preview) {
6145
+ return `${result.message}
6146
+
6147
+ Actual response (truncated):
6148
+ ${preview}`;
6149
+ }
6093
6150
  return result.message;
6094
6151
  }
6095
6152
  };
@@ -7498,7 +7555,7 @@ function isInfrastructureError(err) {
7498
7555
  } else {
7499
7556
  return false;
7500
7557
  }
7501
- return name15 === "AbortError" || msg.includes("econnreset") || msg.includes("etimedout") || msg.includes("econnrefused") || msg.includes("rate limit") || msg.includes("429") || msg.includes("503") || msg.includes("network") || // Prompt/context overflow — LLM couldn't run, not a tool discoverability failure
7558
+ return name15?.toLowerCase() === "aborterror" || msg.includes("econnreset") || msg.includes("etimedout") || msg.includes("econnrefused") || msg.includes("rate limit") || msg.includes("429") || msg.includes("503") || msg.includes("network") || // Prompt/context overflow — LLM couldn't run, not a tool discoverability failure
7502
7559
  msg.includes("prompt is too long") || msg.includes("context length exceeded") || msg.includes("maximum context length") || msg.includes("context_length_exceeded") || msg.includes("tokens > ") || code.includes("econnreset") || code.includes("etimedout") || code.includes("econnrefused");
7503
7560
  }
7504
7561
  async function runEvalCase(evalCase, context, options = {}) {
@@ -7969,6 +8026,6 @@ function formatCapabilities(capabilities) {
7969
8026
  return parts.length > 0 ? parts.join(", ") : "none declared";
7970
8027
  }
7971
8028
 
7972
- export { BUILT_IN_RUBRICS, CLIOAuthClient, DiscoveryError, ENV_VAR_NAMES, EvalCaseSchema, EvalDatasetSchema, MCPConfigSchema, MCP_PROTOCOL_VERSION, PlaywrightOAuthClientProvider, SnapshotSanitizers, clearJudgeRegistry, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getRegisteredJudge, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test2 as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, registerJudge, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateMCPHost, test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
8029
+ export { BUILT_IN_RUBRICS, CLIOAuthClient, DiscoveryError, ENV_VAR_NAMES, EvalCaseSchema, EvalDatasetSchema, MCPConfigSchema, MCP_PROTOCOL_VERSION, PlaywrightOAuthClientProvider, SnapshotSanitizers, clearJudgeRegistry, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getRegisteredJudge, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test2 as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, refreshAccessToken, registerJudge, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateMCPHost, test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
7973
8030
  //# sourceMappingURL=index.js.map
7974
8031
  //# sourceMappingURL=index.js.map