@gleanwork/mcp-server-tester 1.0.0-beta.8 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -1
- package/dist/cli/index.js +12 -1
- package/dist/fixtures/mcp.js +71 -14
- package/dist/fixtures/mcp.js.map +1 -1
- package/dist/index.cjs +73 -15
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +30 -2
- package/dist/index.d.ts +30 -2
- package/dist/index.js +73 -16
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -735,6 +735,34 @@ interface AuthServerMetadata {
|
|
|
735
735
|
*/
|
|
736
736
|
issuer: string;
|
|
737
737
|
}
|
|
738
|
+
/**
|
|
739
|
+
* Configuration for token refresh
|
|
740
|
+
*/
|
|
741
|
+
interface TokenRefreshConfig {
|
|
742
|
+
/**
|
|
743
|
+
* Authorization server metadata
|
|
744
|
+
*/
|
|
745
|
+
authServer: AuthServerMetadata;
|
|
746
|
+
/**
|
|
747
|
+
* Client ID
|
|
748
|
+
*/
|
|
749
|
+
clientId: string;
|
|
750
|
+
/**
|
|
751
|
+
* Client secret (for confidential clients)
|
|
752
|
+
*/
|
|
753
|
+
clientSecret?: string;
|
|
754
|
+
/**
|
|
755
|
+
* Refresh token
|
|
756
|
+
*/
|
|
757
|
+
refreshToken: string;
|
|
758
|
+
}
|
|
759
|
+
/**
|
|
760
|
+
* Refreshes an access token using a refresh token
|
|
761
|
+
*
|
|
762
|
+
* @param config - Token refresh configuration
|
|
763
|
+
* @returns New token result
|
|
764
|
+
*/
|
|
765
|
+
declare function refreshAccessToken(config: TokenRefreshConfig): Promise<TokenResult>;
|
|
738
766
|
/**
|
|
739
767
|
* Configuration for client credentials grant
|
|
740
768
|
*/
|
|
@@ -2131,7 +2159,7 @@ type ExpectationResultMap = Partial<Record<ExpectationType, EvalExpectationResul
|
|
|
2131
2159
|
/**
|
|
2132
2160
|
* Breakdown of expectation types used in a run
|
|
2133
2161
|
*/
|
|
2134
|
-
type ExpectationBreakdown = Record<ExpectationType, number
|
|
2162
|
+
type ExpectationBreakdown = Partial<Record<ExpectationType, number>>;
|
|
2135
2163
|
|
|
2136
2164
|
/**
|
|
2137
2165
|
* Options for creating an MCP fixture
|
|
@@ -4455,4 +4483,4 @@ interface MCPEvalReporterConfig {
|
|
|
4455
4483
|
includeAutoTracking?: boolean;
|
|
4456
4484
|
}
|
|
4457
4485
|
|
|
4458
|
-
export { type AuthType, BUILT_IN_RUBRICS, type BuiltInRubric, type BuiltInSanitizer, type CLIConfig, CLIOAuthClient, type CLIOAuthClientConfig, type CLIOAuthResult, type CLIOutputFormat, type CaseComparisonResult, type ClientCredentialsConfig, type ComparisonOutcome, type ContentBlock, type CreateMCPClientOptions, type CustomJudgeExecutor, type CustomJudgeResult, DiscoveryError, ENV_VAR_NAMES, type EvalCase, type EvalCaseResult, EvalCaseSchema, type EvalContext, type EvalDataset, EvalDatasetSchema, type EvalExpectBlock, type EvalExpectationResult, type EvalMode, type EvalRunnerOptions, type EvalRunnerResult, type ExpectationBreakdown, type ExpectationResultMap, type ExpectationType, type FieldRemovalSanitizer, type HostType, type HttpMCPConfig, type IterationResult, type Judge, type JudgeConfig, type JudgeExpectConfig, type JudgeMatcherOptions, type JudgeResult, type JudgeValidatorConfig, type LLMProvider, type LLMToolCall, type LoadDatasetOptions, type MCPAuthConfig, type MCPAuthFixtures, type MCPClientCredentialsConfig, type MCPConfig, MCPConfigSchema, type MCPConformanceCheck, type MCPConformanceOptions, type MCPConformanceRaw, type MCPConformanceResult, type MCPConformanceResultData, type MCPEvalData, type MCPEvalHistoricalSummary, type MCPEvalReporterConfig, type MCPEvalRunData, type MCPFixtureApi, type MCPFixtureOptions, type MCPHostCapabilities, type MCPHostConfig, type MCPHostSimulationResult, type MCPHostSimulator, type MCPOAuthConfig, type MCPServerCapabilitiesData, MCP_PROTOCOL_VERSION, type NormalizedToolResponse, type OAuthSetupConfig, type PatternValidatorOptions, PlaywrightOAuthClientProvider, type PlaywrightOAuthClientProviderConfig, type PredicateResult, type ProtectedResourceDiscoveryResult, type ProtectedResourceMetadata, type ProviderKind, type RegexSanitizer, type ResultSource, type RubricSpec, type SaveBaselineOptions, type SchemaRegistry, type SchemaValidatorOptions, type SerializedEvalDataset, type ServerComparisonOptions, type ServerComparisonResult, type SizeValidatorOptions, type SnapshotSanitizer, SnapshotSanitizers, type StdioMCPConfig, type StoredClientInfo, type StoredOAuthState, type StoredServerMetadata, type StoredTokens, type TextValidatorOptions, type TokenResult, type ToolCallCountOptions, type ToolCallExpectation, type ToolPredicate, type UsageMetrics, type ValidationResult, clearJudgeRegistry, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getRegisteredJudge, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, registerJudge, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateMCPHost, test$1 as test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
|
|
4486
|
+
export { type AuthType, BUILT_IN_RUBRICS, type BuiltInRubric, type BuiltInSanitizer, type CLIConfig, CLIOAuthClient, type CLIOAuthClientConfig, type CLIOAuthResult, type CLIOutputFormat, type CaseComparisonResult, type ClientCredentialsConfig, type ComparisonOutcome, type ContentBlock, type CreateMCPClientOptions, type CustomJudgeExecutor, type CustomJudgeResult, DiscoveryError, ENV_VAR_NAMES, type EvalCase, type EvalCaseRequest, type EvalCaseResult, EvalCaseSchema, type EvalContext, type EvalDataset, EvalDatasetSchema, type EvalExpectBlock, type EvalExpectationResult, type EvalMode, type EvalRunMetadata, type EvalRunnerOptions, type EvalRunnerResult, type ExpectationBreakdown, type ExpectationResultMap, type ExpectationType, type FieldRemovalSanitizer, type HostType, type HttpMCPConfig, type IterationResult, type Judge, type JudgeConfig, type JudgeExpectConfig, type JudgeMatcherOptions, type JudgeResult, type JudgeValidatorConfig, type LLMProvider, type LLMToolCall, type LoadDatasetOptions, type MCPAuthConfig, type MCPAuthFixtures, type MCPClientCredentialsConfig, type MCPConfig, MCPConfigSchema, type MCPConformanceCheck, type MCPConformanceOptions, type MCPConformanceRaw, type MCPConformanceResult, type MCPConformanceResultData, type MCPEvalData, type MCPEvalHistoricalSummary, type MCPEvalReporterConfig, type MCPEvalRunData, type MCPFixtureApi, type MCPFixtureOptions, type MCPHostCapabilities, type MCPHostConfig, type MCPHostSimulationResult, type MCPHostSimulator, type MCPOAuthConfig, type MCPServerCapabilitiesData, MCP_PROTOCOL_VERSION, type NormalizedToolResponse, type OAuthSetupConfig, type PatternValidatorOptions, PlaywrightOAuthClientProvider, type PlaywrightOAuthClientProviderConfig, type PredicateResult, type ProtectedResourceDiscoveryResult, type ProtectedResourceMetadata, type ProviderKind, type RegexSanitizer, type ResultSource, type RubricSpec, type SaveBaselineOptions, type SchemaRegistry, type SchemaValidatorOptions, type SerializedEvalDataset, type ServerComparisonOptions, type ServerComparisonResult, type SizeValidatorOptions, type SnapshotSanitizer, SnapshotSanitizers, type StdioMCPConfig, type StoredClientInfo, type StoredOAuthState, type StoredServerMetadata, type StoredTokens, type TextValidatorOptions, type TokenResult, type ToolCallCountOptions, type ToolCallExpectation, type ToolPredicate, type UsageMetrics, type ValidationResult, clearJudgeRegistry, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getRegisteredJudge, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, refreshAccessToken, registerJudge, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateMCPHost, test$1 as test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
|
package/dist/index.d.ts
CHANGED
|
@@ -735,6 +735,34 @@ interface AuthServerMetadata {
|
|
|
735
735
|
*/
|
|
736
736
|
issuer: string;
|
|
737
737
|
}
|
|
738
|
+
/**
|
|
739
|
+
* Configuration for token refresh
|
|
740
|
+
*/
|
|
741
|
+
interface TokenRefreshConfig {
|
|
742
|
+
/**
|
|
743
|
+
* Authorization server metadata
|
|
744
|
+
*/
|
|
745
|
+
authServer: AuthServerMetadata;
|
|
746
|
+
/**
|
|
747
|
+
* Client ID
|
|
748
|
+
*/
|
|
749
|
+
clientId: string;
|
|
750
|
+
/**
|
|
751
|
+
* Client secret (for confidential clients)
|
|
752
|
+
*/
|
|
753
|
+
clientSecret?: string;
|
|
754
|
+
/**
|
|
755
|
+
* Refresh token
|
|
756
|
+
*/
|
|
757
|
+
refreshToken: string;
|
|
758
|
+
}
|
|
759
|
+
/**
|
|
760
|
+
* Refreshes an access token using a refresh token
|
|
761
|
+
*
|
|
762
|
+
* @param config - Token refresh configuration
|
|
763
|
+
* @returns New token result
|
|
764
|
+
*/
|
|
765
|
+
declare function refreshAccessToken(config: TokenRefreshConfig): Promise<TokenResult>;
|
|
738
766
|
/**
|
|
739
767
|
* Configuration for client credentials grant
|
|
740
768
|
*/
|
|
@@ -2131,7 +2159,7 @@ type ExpectationResultMap = Partial<Record<ExpectationType, EvalExpectationResul
|
|
|
2131
2159
|
/**
|
|
2132
2160
|
* Breakdown of expectation types used in a run
|
|
2133
2161
|
*/
|
|
2134
|
-
type ExpectationBreakdown = Record<ExpectationType, number
|
|
2162
|
+
type ExpectationBreakdown = Partial<Record<ExpectationType, number>>;
|
|
2135
2163
|
|
|
2136
2164
|
/**
|
|
2137
2165
|
* Options for creating an MCP fixture
|
|
@@ -4455,4 +4483,4 @@ interface MCPEvalReporterConfig {
|
|
|
4455
4483
|
includeAutoTracking?: boolean;
|
|
4456
4484
|
}
|
|
4457
4485
|
|
|
4458
|
-
export { type AuthType, BUILT_IN_RUBRICS, type BuiltInRubric, type BuiltInSanitizer, type CLIConfig, CLIOAuthClient, type CLIOAuthClientConfig, type CLIOAuthResult, type CLIOutputFormat, type CaseComparisonResult, type ClientCredentialsConfig, type ComparisonOutcome, type ContentBlock, type CreateMCPClientOptions, type CustomJudgeExecutor, type CustomJudgeResult, DiscoveryError, ENV_VAR_NAMES, type EvalCase, type EvalCaseResult, EvalCaseSchema, type EvalContext, type EvalDataset, EvalDatasetSchema, type EvalExpectBlock, type EvalExpectationResult, type EvalMode, type EvalRunnerOptions, type EvalRunnerResult, type ExpectationBreakdown, type ExpectationResultMap, type ExpectationType, type FieldRemovalSanitizer, type HostType, type HttpMCPConfig, type IterationResult, type Judge, type JudgeConfig, type JudgeExpectConfig, type JudgeMatcherOptions, type JudgeResult, type JudgeValidatorConfig, type LLMProvider, type LLMToolCall, type LoadDatasetOptions, type MCPAuthConfig, type MCPAuthFixtures, type MCPClientCredentialsConfig, type MCPConfig, MCPConfigSchema, type MCPConformanceCheck, type MCPConformanceOptions, type MCPConformanceRaw, type MCPConformanceResult, type MCPConformanceResultData, type MCPEvalData, type MCPEvalHistoricalSummary, type MCPEvalReporterConfig, type MCPEvalRunData, type MCPFixtureApi, type MCPFixtureOptions, type MCPHostCapabilities, type MCPHostConfig, type MCPHostSimulationResult, type MCPHostSimulator, type MCPOAuthConfig, type MCPServerCapabilitiesData, MCP_PROTOCOL_VERSION, type NormalizedToolResponse, type OAuthSetupConfig, type PatternValidatorOptions, PlaywrightOAuthClientProvider, type PlaywrightOAuthClientProviderConfig, type PredicateResult, type ProtectedResourceDiscoveryResult, type ProtectedResourceMetadata, type ProviderKind, type RegexSanitizer, type ResultSource, type RubricSpec, type SaveBaselineOptions, type SchemaRegistry, type SchemaValidatorOptions, type SerializedEvalDataset, type ServerComparisonOptions, type ServerComparisonResult, type SizeValidatorOptions, type SnapshotSanitizer, SnapshotSanitizers, type StdioMCPConfig, type StoredClientInfo, type StoredOAuthState, type StoredServerMetadata, type StoredTokens, type TextValidatorOptions, type TokenResult, type ToolCallCountOptions, type ToolCallExpectation, type ToolPredicate, type UsageMetrics, type ValidationResult, clearJudgeRegistry, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getRegisteredJudge, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, registerJudge, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateMCPHost, test$1 as test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
|
|
4486
|
+
export { type AuthType, BUILT_IN_RUBRICS, type BuiltInRubric, type BuiltInSanitizer, type CLIConfig, CLIOAuthClient, type CLIOAuthClientConfig, type CLIOAuthResult, type CLIOutputFormat, type CaseComparisonResult, type ClientCredentialsConfig, type ComparisonOutcome, type ContentBlock, type CreateMCPClientOptions, type CustomJudgeExecutor, type CustomJudgeResult, DiscoveryError, ENV_VAR_NAMES, type EvalCase, type EvalCaseRequest, type EvalCaseResult, EvalCaseSchema, type EvalContext, type EvalDataset, EvalDatasetSchema, type EvalExpectBlock, type EvalExpectationResult, type EvalMode, type EvalRunMetadata, type EvalRunnerOptions, type EvalRunnerResult, type ExpectationBreakdown, type ExpectationResultMap, type ExpectationType, type FieldRemovalSanitizer, type HostType, type HttpMCPConfig, type IterationResult, type Judge, type JudgeConfig, type JudgeExpectConfig, type JudgeMatcherOptions, type JudgeResult, type JudgeValidatorConfig, type LLMProvider, type LLMToolCall, type LoadDatasetOptions, type MCPAuthConfig, type MCPAuthFixtures, type MCPClientCredentialsConfig, type MCPConfig, MCPConfigSchema, type MCPConformanceCheck, type MCPConformanceOptions, type MCPConformanceRaw, type MCPConformanceResult, type MCPConformanceResultData, type MCPEvalData, type MCPEvalHistoricalSummary, type MCPEvalReporterConfig, type MCPEvalRunData, type MCPFixtureApi, type MCPFixtureOptions, type MCPHostCapabilities, type MCPHostConfig, type MCPHostSimulationResult, type MCPHostSimulator, type MCPOAuthConfig, type MCPServerCapabilitiesData, MCP_PROTOCOL_VERSION, type NormalizedToolResponse, type OAuthSetupConfig, type PatternValidatorOptions, PlaywrightOAuthClientProvider, type PlaywrightOAuthClientProviderConfig, type PredicateResult, type ProtectedResourceDiscoveryResult, type ProtectedResourceMetadata, type ProviderKind, type RegexSanitizer, type ResultSource, type RubricSpec, type SaveBaselineOptions, type SchemaRegistry, type SchemaValidatorOptions, type SerializedEvalDataset, type ServerComparisonOptions, type ServerComparisonResult, type SizeValidatorOptions, type SnapshotSanitizer, SnapshotSanitizers, type StdioMCPConfig, type StoredClientInfo, type StoredOAuthState, type StoredServerMetadata, type StoredTokens, type TextValidatorOptions, type TokenResult, type ToolCallCountOptions, type ToolCallExpectation, type ToolPredicate, type UsageMetrics, type ValidationResult, clearJudgeRegistry, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getRegisteredJudge, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, refreshAccessToken, registerJudge, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateMCPHost, test$1 as test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
|
package/dist/index.js
CHANGED
|
@@ -4384,7 +4384,7 @@ function escapeHtml(text) {
|
|
|
4384
4384
|
|
|
4385
4385
|
// package.json
|
|
4386
4386
|
var package_default = {
|
|
4387
|
-
version: "1.0.0
|
|
4387
|
+
version: "1.0.0"};
|
|
4388
4388
|
|
|
4389
4389
|
// src/mcp/clientFactory.ts
|
|
4390
4390
|
function getRetryAfterDelayMs(err) {
|
|
@@ -4603,6 +4603,17 @@ async function createMCPClientForConfig(config, options) {
|
|
|
4603
4603
|
}
|
|
4604
4604
|
async function closeMCPClient(client) {
|
|
4605
4605
|
try {
|
|
4606
|
+
const transport = client.transport;
|
|
4607
|
+
if (transport instanceof StreamableHTTPClientTransport) {
|
|
4608
|
+
try {
|
|
4609
|
+
await transport.terminateSession();
|
|
4610
|
+
} catch (sessionError) {
|
|
4611
|
+
debugClient(
|
|
4612
|
+
"Error terminating session: %s",
|
|
4613
|
+
sessionError instanceof Error ? sessionError.message : String(sessionError)
|
|
4614
|
+
);
|
|
4615
|
+
}
|
|
4616
|
+
}
|
|
4606
4617
|
await client.close();
|
|
4607
4618
|
} catch (error) {
|
|
4608
4619
|
debugClient(
|
|
@@ -4831,11 +4842,13 @@ function validateSchema(response, schema, options = {}) {
|
|
|
4831
4842
|
} catch (error) {
|
|
4832
4843
|
const zodError = error;
|
|
4833
4844
|
const issues = formatZodIssues(zodError);
|
|
4845
|
+
const text = stringifyResponse(response);
|
|
4834
4846
|
return {
|
|
4835
4847
|
pass: false,
|
|
4836
4848
|
message: `Response does not match schema: ${issues}`,
|
|
4837
4849
|
details: {
|
|
4838
|
-
issues: zodError.issues
|
|
4850
|
+
issues: zodError.issues,
|
|
4851
|
+
textPreview: truncateForDisplay2(text)
|
|
4839
4852
|
}
|
|
4840
4853
|
};
|
|
4841
4854
|
}
|
|
@@ -4888,6 +4901,12 @@ function formatZodIssues(error) {
|
|
|
4888
4901
|
});
|
|
4889
4902
|
return issues.join("; ");
|
|
4890
4903
|
}
|
|
4904
|
+
function truncateForDisplay2(str, maxLength = 200) {
|
|
4905
|
+
if (str.length <= maxLength) {
|
|
4906
|
+
return str;
|
|
4907
|
+
}
|
|
4908
|
+
return str.slice(0, maxLength) + "... (truncated)";
|
|
4909
|
+
}
|
|
4891
4910
|
|
|
4892
4911
|
// src/assertions/validators/text.ts
|
|
4893
4912
|
function validateText(response, expected, options = {}) {
|
|
@@ -4914,11 +4933,11 @@ function validateText(response, expected, options = {}) {
|
|
|
4914
4933
|
details: {
|
|
4915
4934
|
missing,
|
|
4916
4935
|
textLength: text.length,
|
|
4917
|
-
textPreview:
|
|
4936
|
+
textPreview: truncateForDisplay3(text)
|
|
4918
4937
|
}
|
|
4919
4938
|
};
|
|
4920
4939
|
}
|
|
4921
|
-
function
|
|
4940
|
+
function truncateForDisplay3(str, maxLength = 200) {
|
|
4922
4941
|
if (str.length <= maxLength) {
|
|
4923
4942
|
return str;
|
|
4924
4943
|
}
|
|
@@ -4950,7 +4969,7 @@ function validatePattern(response, patterns, options = {}) {
|
|
|
4950
4969
|
details: {
|
|
4951
4970
|
unmatched,
|
|
4952
4971
|
textLength: text.length,
|
|
4953
|
-
textPreview:
|
|
4972
|
+
textPreview: truncateForDisplay4(text)
|
|
4954
4973
|
}
|
|
4955
4974
|
};
|
|
4956
4975
|
}
|
|
@@ -4970,7 +4989,7 @@ function patternToString(pattern) {
|
|
|
4970
4989
|
}
|
|
4971
4990
|
return `/${pattern}/`;
|
|
4972
4991
|
}
|
|
4973
|
-
function
|
|
4992
|
+
function truncateForDisplay4(str, maxLength = 200) {
|
|
4974
4993
|
if (str.length <= maxLength) {
|
|
4975
4994
|
return str;
|
|
4976
4995
|
}
|
|
@@ -4993,7 +5012,7 @@ function validateError(response, expected = true) {
|
|
|
4993
5012
|
pass: false,
|
|
4994
5013
|
message: "Expected an error response but got success",
|
|
4995
5014
|
details: {
|
|
4996
|
-
textPreview:
|
|
5015
|
+
textPreview: truncateForDisplay5(extractText2(response))
|
|
4997
5016
|
}
|
|
4998
5017
|
};
|
|
4999
5018
|
} else {
|
|
@@ -5005,7 +5024,7 @@ function validateError(response, expected = true) {
|
|
|
5005
5024
|
}
|
|
5006
5025
|
return {
|
|
5007
5026
|
pass: false,
|
|
5008
|
-
message: `Expected a success response but got error: "${
|
|
5027
|
+
message: `Expected a success response but got error: "${truncateForDisplay5(errorMessage)}"`,
|
|
5009
5028
|
details: {
|
|
5010
5029
|
errorMessage
|
|
5011
5030
|
}
|
|
@@ -5018,7 +5037,7 @@ function validateError(response, expected = true) {
|
|
|
5018
5037
|
pass: false,
|
|
5019
5038
|
message: `Expected an error containing "${expectedMessages[0]}" but got success`,
|
|
5020
5039
|
details: {
|
|
5021
|
-
textPreview:
|
|
5040
|
+
textPreview: truncateForDisplay5(extractText2(response))
|
|
5022
5041
|
}
|
|
5023
5042
|
};
|
|
5024
5043
|
}
|
|
@@ -5040,7 +5059,7 @@ function validateError(response, expected = true) {
|
|
|
5040
5059
|
}
|
|
5041
5060
|
};
|
|
5042
5061
|
}
|
|
5043
|
-
function
|
|
5062
|
+
function truncateForDisplay5(str, maxLength = 200) {
|
|
5044
5063
|
if (str.length <= maxLength) {
|
|
5045
5064
|
return str;
|
|
5046
5065
|
}
|
|
@@ -5158,6 +5177,10 @@ function validateToolCalls(response, expectation) {
|
|
|
5158
5177
|
return {
|
|
5159
5178
|
pass: false,
|
|
5160
5179
|
message: `Expected tool '${expected.name}' to be called in sequence (starting from position ${searchFrom}), but it was not found`,
|
|
5180
|
+
details: {
|
|
5181
|
+
actual: actual.map((c) => c.name),
|
|
5182
|
+
expected: expected.name
|
|
5183
|
+
},
|
|
5161
5184
|
metrics
|
|
5162
5185
|
};
|
|
5163
5186
|
}
|
|
@@ -5174,6 +5197,10 @@ function validateToolCalls(response, expectation) {
|
|
|
5174
5197
|
return {
|
|
5175
5198
|
pass: false,
|
|
5176
5199
|
message: `Expected tool '${expected.name}'${argsNote} to be called, but it was not`,
|
|
5200
|
+
details: {
|
|
5201
|
+
actual: actual.map((c) => c.name),
|
|
5202
|
+
expected: expected.name
|
|
5203
|
+
},
|
|
5177
5204
|
metrics
|
|
5178
5205
|
};
|
|
5179
5206
|
}
|
|
@@ -5186,6 +5213,10 @@ function validateToolCalls(response, expectation) {
|
|
|
5186
5213
|
return {
|
|
5187
5214
|
pass: false,
|
|
5188
5215
|
message: `Unexpected tool calls: ${names}. Only ${[...allowedNames].map((n) => `'${n}'`).join(", ")} are allowed`,
|
|
5216
|
+
details: {
|
|
5217
|
+
actual: actual.map((c) => c.name),
|
|
5218
|
+
unexpected: unexpected.map((c) => c.name)
|
|
5219
|
+
},
|
|
5189
5220
|
metrics
|
|
5190
5221
|
};
|
|
5191
5222
|
}
|
|
@@ -5204,19 +5235,22 @@ function validateToolCallCount(response, options) {
|
|
|
5204
5235
|
if (exact !== void 0 && count !== exact) {
|
|
5205
5236
|
return {
|
|
5206
5237
|
pass: false,
|
|
5207
|
-
message: `Expected exactly ${exact} tool call(s), but got ${count}
|
|
5238
|
+
message: `Expected exactly ${exact} tool call(s), but got ${count}`,
|
|
5239
|
+
details: { actual: count, expected: exact }
|
|
5208
5240
|
};
|
|
5209
5241
|
}
|
|
5210
5242
|
if (min !== void 0 && count < min) {
|
|
5211
5243
|
return {
|
|
5212
5244
|
pass: false,
|
|
5213
|
-
message: `Expected at least ${min} tool call(s), but got ${count}
|
|
5245
|
+
message: `Expected at least ${min} tool call(s), but got ${count}`,
|
|
5246
|
+
details: { actual: count, min }
|
|
5214
5247
|
};
|
|
5215
5248
|
}
|
|
5216
5249
|
if (max !== void 0 && count > max) {
|
|
5217
5250
|
return {
|
|
5218
5251
|
pass: false,
|
|
5219
|
-
message: `Expected at most ${max} tool call(s), but got ${count}
|
|
5252
|
+
message: `Expected at most ${max} tool call(s), but got ${count}`,
|
|
5253
|
+
details: { actual: count, max }
|
|
5220
5254
|
};
|
|
5221
5255
|
}
|
|
5222
5256
|
return {
|
|
@@ -5730,7 +5764,9 @@ function createJudge(config = {}) {
|
|
|
5730
5764
|
case "google":
|
|
5731
5765
|
return createGoogleJudge(config);
|
|
5732
5766
|
default:
|
|
5733
|
-
throw new Error(
|
|
5767
|
+
throw new Error(
|
|
5768
|
+
`Unsupported LLM provider: ${String(provider)}. Valid providers: 'anthropic', 'vertex-anthropic', 'anthropic-agent-sdk', 'openai', 'google'`
|
|
5769
|
+
);
|
|
5734
5770
|
}
|
|
5735
5771
|
}
|
|
5736
5772
|
|
|
@@ -6055,12 +6091,19 @@ function toMatchToolResponse(received, expected) {
|
|
|
6055
6091
|
// src/assertions/matchers/toMatchToolSchema.ts
|
|
6056
6092
|
function toMatchToolSchema(received, schema, options = {}) {
|
|
6057
6093
|
const result = validateSchema(received, schema, options);
|
|
6094
|
+
const preview = result.details?.textPreview;
|
|
6058
6095
|
return {
|
|
6059
6096
|
pass: result.pass,
|
|
6060
6097
|
message: () => {
|
|
6061
6098
|
if (this.isNot) {
|
|
6062
6099
|
return result.pass ? "Expected response NOT to match schema, but it did" : result.message;
|
|
6063
6100
|
}
|
|
6101
|
+
if (!result.pass && preview) {
|
|
6102
|
+
return `${result.message}
|
|
6103
|
+
|
|
6104
|
+
Actual response (truncated):
|
|
6105
|
+
${preview}`;
|
|
6106
|
+
}
|
|
6064
6107
|
return result.message;
|
|
6065
6108
|
}
|
|
6066
6109
|
};
|
|
@@ -6069,6 +6112,7 @@ function toMatchToolSchema(received, schema, options = {}) {
|
|
|
6069
6112
|
// src/assertions/matchers/toContainToolText.ts
|
|
6070
6113
|
function toContainToolText(received, expected, options = {}) {
|
|
6071
6114
|
const result = validateText(received, expected, options);
|
|
6115
|
+
const preview = result.details?.textPreview;
|
|
6072
6116
|
return {
|
|
6073
6117
|
pass: result.pass,
|
|
6074
6118
|
message: () => {
|
|
@@ -6076,6 +6120,12 @@ function toContainToolText(received, expected, options = {}) {
|
|
|
6076
6120
|
const expectedStr = Array.isArray(expected) ? expected.map((s) => `"${s}"`).join(", ") : `"${expected}"`;
|
|
6077
6121
|
return result.pass ? `Expected response NOT to contain ${expectedStr}, but it did` : result.message;
|
|
6078
6122
|
}
|
|
6123
|
+
if (!result.pass && preview) {
|
|
6124
|
+
return `${result.message}
|
|
6125
|
+
|
|
6126
|
+
Actual response (truncated):
|
|
6127
|
+
${preview}`;
|
|
6128
|
+
}
|
|
6079
6129
|
return result.message;
|
|
6080
6130
|
}
|
|
6081
6131
|
};
|
|
@@ -6084,12 +6134,19 @@ function toContainToolText(received, expected, options = {}) {
|
|
|
6084
6134
|
// src/assertions/matchers/toMatchToolPattern.ts
|
|
6085
6135
|
function toMatchToolPattern(received, patterns, options = {}) {
|
|
6086
6136
|
const result = validatePattern(received, patterns, options);
|
|
6137
|
+
const preview = result.details?.textPreview;
|
|
6087
6138
|
return {
|
|
6088
6139
|
pass: result.pass,
|
|
6089
6140
|
message: () => {
|
|
6090
6141
|
if (this.isNot) {
|
|
6091
6142
|
return result.pass ? "Expected response NOT to match pattern(s), but it did" : result.message;
|
|
6092
6143
|
}
|
|
6144
|
+
if (!result.pass && preview) {
|
|
6145
|
+
return `${result.message}
|
|
6146
|
+
|
|
6147
|
+
Actual response (truncated):
|
|
6148
|
+
${preview}`;
|
|
6149
|
+
}
|
|
6093
6150
|
return result.message;
|
|
6094
6151
|
}
|
|
6095
6152
|
};
|
|
@@ -7498,7 +7555,7 @@ function isInfrastructureError(err) {
|
|
|
7498
7555
|
} else {
|
|
7499
7556
|
return false;
|
|
7500
7557
|
}
|
|
7501
|
-
return name15 === "
|
|
7558
|
+
return name15?.toLowerCase() === "aborterror" || msg.includes("econnreset") || msg.includes("etimedout") || msg.includes("econnrefused") || msg.includes("rate limit") || msg.includes("429") || msg.includes("503") || msg.includes("network") || // Prompt/context overflow — LLM couldn't run, not a tool discoverability failure
|
|
7502
7559
|
msg.includes("prompt is too long") || msg.includes("context length exceeded") || msg.includes("maximum context length") || msg.includes("context_length_exceeded") || msg.includes("tokens > ") || code.includes("econnreset") || code.includes("etimedout") || code.includes("econnrefused");
|
|
7503
7560
|
}
|
|
7504
7561
|
async function runEvalCase(evalCase, context, options = {}) {
|
|
@@ -7969,6 +8026,6 @@ function formatCapabilities(capabilities) {
|
|
|
7969
8026
|
return parts.length > 0 ? parts.join(", ") : "none declared";
|
|
7970
8027
|
}
|
|
7971
8028
|
|
|
7972
|
-
export { BUILT_IN_RUBRICS, CLIOAuthClient, DiscoveryError, ENV_VAR_NAMES, EvalCaseSchema, EvalDatasetSchema, MCPConfigSchema, MCP_PROTOCOL_VERSION, PlaywrightOAuthClientProvider, SnapshotSanitizers, clearJudgeRegistry, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getRegisteredJudge, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test2 as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, registerJudge, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateMCPHost, test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
|
|
8029
|
+
export { BUILT_IN_RUBRICS, CLIOAuthClient, DiscoveryError, ENV_VAR_NAMES, EvalCaseSchema, EvalDatasetSchema, MCPConfigSchema, MCP_PROTOCOL_VERSION, PlaywrightOAuthClientProvider, SnapshotSanitizers, clearJudgeRegistry, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getRegisteredJudge, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test2 as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, refreshAccessToken, registerJudge, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateMCPHost, test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
|
|
7973
8030
|
//# sourceMappingURL=index.js.map
|
|
7974
8031
|
//# sourceMappingURL=index.js.map
|