@gleanwork/mcp-server-tester 1.0.0-beta.8 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -1
- package/dist/cli/index.js +12 -1
- package/dist/fixtures/mcp.js +71 -14
- package/dist/fixtures/mcp.js.map +1 -1
- package/dist/index.cjs +73 -15
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +30 -2
- package/dist/index.d.ts +30 -2
- package/dist/index.js +73 -16
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -4411,7 +4411,7 @@ function escapeHtml(text) {
|
|
|
4411
4411
|
|
|
4412
4412
|
// package.json
|
|
4413
4413
|
var package_default = {
|
|
4414
|
-
version: "1.0.0
|
|
4414
|
+
version: "1.0.0"};
|
|
4415
4415
|
|
|
4416
4416
|
// src/mcp/clientFactory.ts
|
|
4417
4417
|
function getRetryAfterDelayMs(err) {
|
|
@@ -4630,6 +4630,17 @@ async function createMCPClientForConfig(config, options) {
|
|
|
4630
4630
|
}
|
|
4631
4631
|
async function closeMCPClient(client) {
|
|
4632
4632
|
try {
|
|
4633
|
+
const transport = client.transport;
|
|
4634
|
+
if (transport instanceof streamableHttp_js.StreamableHTTPClientTransport) {
|
|
4635
|
+
try {
|
|
4636
|
+
await transport.terminateSession();
|
|
4637
|
+
} catch (sessionError) {
|
|
4638
|
+
debugClient(
|
|
4639
|
+
"Error terminating session: %s",
|
|
4640
|
+
sessionError instanceof Error ? sessionError.message : String(sessionError)
|
|
4641
|
+
);
|
|
4642
|
+
}
|
|
4643
|
+
}
|
|
4633
4644
|
await client.close();
|
|
4634
4645
|
} catch (error) {
|
|
4635
4646
|
debugClient(
|
|
@@ -4858,11 +4869,13 @@ function validateSchema(response, schema, options = {}) {
|
|
|
4858
4869
|
} catch (error) {
|
|
4859
4870
|
const zodError = error;
|
|
4860
4871
|
const issues = formatZodIssues(zodError);
|
|
4872
|
+
const text = stringifyResponse(response);
|
|
4861
4873
|
return {
|
|
4862
4874
|
pass: false,
|
|
4863
4875
|
message: `Response does not match schema: ${issues}`,
|
|
4864
4876
|
details: {
|
|
4865
|
-
issues: zodError.issues
|
|
4877
|
+
issues: zodError.issues,
|
|
4878
|
+
textPreview: truncateForDisplay2(text)
|
|
4866
4879
|
}
|
|
4867
4880
|
};
|
|
4868
4881
|
}
|
|
@@ -4915,6 +4928,12 @@ function formatZodIssues(error) {
|
|
|
4915
4928
|
});
|
|
4916
4929
|
return issues.join("; ");
|
|
4917
4930
|
}
|
|
4931
|
+
function truncateForDisplay2(str, maxLength = 200) {
|
|
4932
|
+
if (str.length <= maxLength) {
|
|
4933
|
+
return str;
|
|
4934
|
+
}
|
|
4935
|
+
return str.slice(0, maxLength) + "... (truncated)";
|
|
4936
|
+
}
|
|
4918
4937
|
|
|
4919
4938
|
// src/assertions/validators/text.ts
|
|
4920
4939
|
function validateText(response, expected, options = {}) {
|
|
@@ -4941,11 +4960,11 @@ function validateText(response, expected, options = {}) {
|
|
|
4941
4960
|
details: {
|
|
4942
4961
|
missing,
|
|
4943
4962
|
textLength: text.length,
|
|
4944
|
-
textPreview:
|
|
4963
|
+
textPreview: truncateForDisplay3(text)
|
|
4945
4964
|
}
|
|
4946
4965
|
};
|
|
4947
4966
|
}
|
|
4948
|
-
function
|
|
4967
|
+
function truncateForDisplay3(str, maxLength = 200) {
|
|
4949
4968
|
if (str.length <= maxLength) {
|
|
4950
4969
|
return str;
|
|
4951
4970
|
}
|
|
@@ -4977,7 +4996,7 @@ function validatePattern(response, patterns, options = {}) {
|
|
|
4977
4996
|
details: {
|
|
4978
4997
|
unmatched,
|
|
4979
4998
|
textLength: text.length,
|
|
4980
|
-
textPreview:
|
|
4999
|
+
textPreview: truncateForDisplay4(text)
|
|
4981
5000
|
}
|
|
4982
5001
|
};
|
|
4983
5002
|
}
|
|
@@ -4997,7 +5016,7 @@ function patternToString(pattern) {
|
|
|
4997
5016
|
}
|
|
4998
5017
|
return `/${pattern}/`;
|
|
4999
5018
|
}
|
|
5000
|
-
function
|
|
5019
|
+
function truncateForDisplay4(str, maxLength = 200) {
|
|
5001
5020
|
if (str.length <= maxLength) {
|
|
5002
5021
|
return str;
|
|
5003
5022
|
}
|
|
@@ -5020,7 +5039,7 @@ function validateError(response, expected = true) {
|
|
|
5020
5039
|
pass: false,
|
|
5021
5040
|
message: "Expected an error response but got success",
|
|
5022
5041
|
details: {
|
|
5023
|
-
textPreview:
|
|
5042
|
+
textPreview: truncateForDisplay5(extractText2(response))
|
|
5024
5043
|
}
|
|
5025
5044
|
};
|
|
5026
5045
|
} else {
|
|
@@ -5032,7 +5051,7 @@ function validateError(response, expected = true) {
|
|
|
5032
5051
|
}
|
|
5033
5052
|
return {
|
|
5034
5053
|
pass: false,
|
|
5035
|
-
message: `Expected a success response but got error: "${
|
|
5054
|
+
message: `Expected a success response but got error: "${truncateForDisplay5(errorMessage)}"`,
|
|
5036
5055
|
details: {
|
|
5037
5056
|
errorMessage
|
|
5038
5057
|
}
|
|
@@ -5045,7 +5064,7 @@ function validateError(response, expected = true) {
|
|
|
5045
5064
|
pass: false,
|
|
5046
5065
|
message: `Expected an error containing "${expectedMessages[0]}" but got success`,
|
|
5047
5066
|
details: {
|
|
5048
|
-
textPreview:
|
|
5067
|
+
textPreview: truncateForDisplay5(extractText2(response))
|
|
5049
5068
|
}
|
|
5050
5069
|
};
|
|
5051
5070
|
}
|
|
@@ -5067,7 +5086,7 @@ function validateError(response, expected = true) {
|
|
|
5067
5086
|
}
|
|
5068
5087
|
};
|
|
5069
5088
|
}
|
|
5070
|
-
function
|
|
5089
|
+
function truncateForDisplay5(str, maxLength = 200) {
|
|
5071
5090
|
if (str.length <= maxLength) {
|
|
5072
5091
|
return str;
|
|
5073
5092
|
}
|
|
@@ -5185,6 +5204,10 @@ function validateToolCalls(response, expectation) {
|
|
|
5185
5204
|
return {
|
|
5186
5205
|
pass: false,
|
|
5187
5206
|
message: `Expected tool '${expected.name}' to be called in sequence (starting from position ${searchFrom}), but it was not found`,
|
|
5207
|
+
details: {
|
|
5208
|
+
actual: actual.map((c) => c.name),
|
|
5209
|
+
expected: expected.name
|
|
5210
|
+
},
|
|
5188
5211
|
metrics
|
|
5189
5212
|
};
|
|
5190
5213
|
}
|
|
@@ -5201,6 +5224,10 @@ function validateToolCalls(response, expectation) {
|
|
|
5201
5224
|
return {
|
|
5202
5225
|
pass: false,
|
|
5203
5226
|
message: `Expected tool '${expected.name}'${argsNote} to be called, but it was not`,
|
|
5227
|
+
details: {
|
|
5228
|
+
actual: actual.map((c) => c.name),
|
|
5229
|
+
expected: expected.name
|
|
5230
|
+
},
|
|
5204
5231
|
metrics
|
|
5205
5232
|
};
|
|
5206
5233
|
}
|
|
@@ -5213,6 +5240,10 @@ function validateToolCalls(response, expectation) {
|
|
|
5213
5240
|
return {
|
|
5214
5241
|
pass: false,
|
|
5215
5242
|
message: `Unexpected tool calls: ${names}. Only ${[...allowedNames].map((n) => `'${n}'`).join(", ")} are allowed`,
|
|
5243
|
+
details: {
|
|
5244
|
+
actual: actual.map((c) => c.name),
|
|
5245
|
+
unexpected: unexpected.map((c) => c.name)
|
|
5246
|
+
},
|
|
5216
5247
|
metrics
|
|
5217
5248
|
};
|
|
5218
5249
|
}
|
|
@@ -5231,19 +5262,22 @@ function validateToolCallCount(response, options) {
|
|
|
5231
5262
|
if (exact !== void 0 && count !== exact) {
|
|
5232
5263
|
return {
|
|
5233
5264
|
pass: false,
|
|
5234
|
-
message: `Expected exactly ${exact} tool call(s), but got ${count}
|
|
5265
|
+
message: `Expected exactly ${exact} tool call(s), but got ${count}`,
|
|
5266
|
+
details: { actual: count, expected: exact }
|
|
5235
5267
|
};
|
|
5236
5268
|
}
|
|
5237
5269
|
if (min !== void 0 && count < min) {
|
|
5238
5270
|
return {
|
|
5239
5271
|
pass: false,
|
|
5240
|
-
message: `Expected at least ${min} tool call(s), but got ${count}
|
|
5272
|
+
message: `Expected at least ${min} tool call(s), but got ${count}`,
|
|
5273
|
+
details: { actual: count, min }
|
|
5241
5274
|
};
|
|
5242
5275
|
}
|
|
5243
5276
|
if (max !== void 0 && count > max) {
|
|
5244
5277
|
return {
|
|
5245
5278
|
pass: false,
|
|
5246
|
-
message: `Expected at most ${max} tool call(s), but got ${count}
|
|
5279
|
+
message: `Expected at most ${max} tool call(s), but got ${count}`,
|
|
5280
|
+
details: { actual: count, max }
|
|
5247
5281
|
};
|
|
5248
5282
|
}
|
|
5249
5283
|
return {
|
|
@@ -5757,7 +5791,9 @@ function createJudge(config = {}) {
|
|
|
5757
5791
|
case "google":
|
|
5758
5792
|
return createGoogleJudge(config);
|
|
5759
5793
|
default:
|
|
5760
|
-
throw new Error(
|
|
5794
|
+
throw new Error(
|
|
5795
|
+
`Unsupported LLM provider: ${String(provider)}. Valid providers: 'anthropic', 'vertex-anthropic', 'anthropic-agent-sdk', 'openai', 'google'`
|
|
5796
|
+
);
|
|
5761
5797
|
}
|
|
5762
5798
|
}
|
|
5763
5799
|
|
|
@@ -6082,12 +6118,19 @@ function toMatchToolResponse(received, expected) {
|
|
|
6082
6118
|
// src/assertions/matchers/toMatchToolSchema.ts
|
|
6083
6119
|
function toMatchToolSchema(received, schema, options = {}) {
|
|
6084
6120
|
const result = validateSchema(received, schema, options);
|
|
6121
|
+
const preview = result.details?.textPreview;
|
|
6085
6122
|
return {
|
|
6086
6123
|
pass: result.pass,
|
|
6087
6124
|
message: () => {
|
|
6088
6125
|
if (this.isNot) {
|
|
6089
6126
|
return result.pass ? "Expected response NOT to match schema, but it did" : result.message;
|
|
6090
6127
|
}
|
|
6128
|
+
if (!result.pass && preview) {
|
|
6129
|
+
return `${result.message}
|
|
6130
|
+
|
|
6131
|
+
Actual response (truncated):
|
|
6132
|
+
${preview}`;
|
|
6133
|
+
}
|
|
6091
6134
|
return result.message;
|
|
6092
6135
|
}
|
|
6093
6136
|
};
|
|
@@ -6096,6 +6139,7 @@ function toMatchToolSchema(received, schema, options = {}) {
|
|
|
6096
6139
|
// src/assertions/matchers/toContainToolText.ts
|
|
6097
6140
|
function toContainToolText(received, expected, options = {}) {
|
|
6098
6141
|
const result = validateText(received, expected, options);
|
|
6142
|
+
const preview = result.details?.textPreview;
|
|
6099
6143
|
return {
|
|
6100
6144
|
pass: result.pass,
|
|
6101
6145
|
message: () => {
|
|
@@ -6103,6 +6147,12 @@ function toContainToolText(received, expected, options = {}) {
|
|
|
6103
6147
|
const expectedStr = Array.isArray(expected) ? expected.map((s) => `"${s}"`).join(", ") : `"${expected}"`;
|
|
6104
6148
|
return result.pass ? `Expected response NOT to contain ${expectedStr}, but it did` : result.message;
|
|
6105
6149
|
}
|
|
6150
|
+
if (!result.pass && preview) {
|
|
6151
|
+
return `${result.message}
|
|
6152
|
+
|
|
6153
|
+
Actual response (truncated):
|
|
6154
|
+
${preview}`;
|
|
6155
|
+
}
|
|
6106
6156
|
return result.message;
|
|
6107
6157
|
}
|
|
6108
6158
|
};
|
|
@@ -6111,12 +6161,19 @@ function toContainToolText(received, expected, options = {}) {
|
|
|
6111
6161
|
// src/assertions/matchers/toMatchToolPattern.ts
|
|
6112
6162
|
function toMatchToolPattern(received, patterns, options = {}) {
|
|
6113
6163
|
const result = validatePattern(received, patterns, options);
|
|
6164
|
+
const preview = result.details?.textPreview;
|
|
6114
6165
|
return {
|
|
6115
6166
|
pass: result.pass,
|
|
6116
6167
|
message: () => {
|
|
6117
6168
|
if (this.isNot) {
|
|
6118
6169
|
return result.pass ? "Expected response NOT to match pattern(s), but it did" : result.message;
|
|
6119
6170
|
}
|
|
6171
|
+
if (!result.pass && preview) {
|
|
6172
|
+
return `${result.message}
|
|
6173
|
+
|
|
6174
|
+
Actual response (truncated):
|
|
6175
|
+
${preview}`;
|
|
6176
|
+
}
|
|
6120
6177
|
return result.message;
|
|
6121
6178
|
}
|
|
6122
6179
|
};
|
|
@@ -7525,7 +7582,7 @@ function isInfrastructureError(err) {
|
|
|
7525
7582
|
} else {
|
|
7526
7583
|
return false;
|
|
7527
7584
|
}
|
|
7528
|
-
return name15 === "
|
|
7585
|
+
return name15?.toLowerCase() === "aborterror" || msg.includes("econnreset") || msg.includes("etimedout") || msg.includes("econnrefused") || msg.includes("rate limit") || msg.includes("429") || msg.includes("503") || msg.includes("network") || // Prompt/context overflow — LLM couldn't run, not a tool discoverability failure
|
|
7529
7586
|
msg.includes("prompt is too long") || msg.includes("context length exceeded") || msg.includes("maximum context length") || msg.includes("context_length_exceeded") || msg.includes("tokens > ") || code.includes("econnreset") || code.includes("etimedout") || code.includes("econnrefused");
|
|
7530
7587
|
}
|
|
7531
7588
|
async function runEvalCase(evalCase, context, options = {}) {
|
|
@@ -8037,6 +8094,7 @@ exports.normalizeWhitespace = normalizeWhitespace;
|
|
|
8037
8094
|
exports.performClientCredentialsFlow = performClientCredentialsFlow;
|
|
8038
8095
|
exports.performOAuthSetup = performOAuthSetup;
|
|
8039
8096
|
exports.performOAuthSetupIfNeeded = performOAuthSetupIfNeeded;
|
|
8097
|
+
exports.refreshAccessToken = refreshAccessToken;
|
|
8040
8098
|
exports.registerJudge = registerJudge;
|
|
8041
8099
|
exports.resolveRubric = resolveRubric;
|
|
8042
8100
|
exports.runConformanceChecks = runConformanceChecks;
|