@gleanwork/mcp-server-tester 1.0.0-beta.2 → 1.0.0-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -12
- package/dist/cli/index.js +5 -2
- package/dist/fixtures/mcp.d.ts +8 -0
- package/dist/fixtures/mcp.js +5 -2
- package/dist/fixtures/mcp.js.map +1 -1
- package/dist/index.cjs +6 -5
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +9 -7
- package/dist/index.d.ts +9 -7
- package/dist/index.js +6 -5
- package/dist/index.js.map +1 -1
- package/dist/reporters/ui-dist/app.js +4 -4
- package/package.json +3 -3
- package/src/reporters/ui-dist/app.js +4 -4
package/dist/index.d.cts
CHANGED
|
@@ -2486,6 +2486,14 @@ declare function toMatchToolPattern(this: {
|
|
|
2486
2486
|
/**
|
|
2487
2487
|
* Creates the toMatchToolSnapshot matcher function
|
|
2488
2488
|
*
|
|
2489
|
+
* @remarks
|
|
2490
|
+
* **Requires Playwright test context.** This matcher calls `expect(content).toMatchSnapshot()`
|
|
2491
|
+
* internally, which only works inside a Playwright test (i.e., when `testInfo` is available).
|
|
2492
|
+
* Calling it outside a Playwright test will throw a cryptic context error.
|
|
2493
|
+
*
|
|
2494
|
+
* To test sanitizer logic without a Playwright context, use the exported `applySanitizers`
|
|
2495
|
+
* function directly.
|
|
2496
|
+
*
|
|
2489
2497
|
* Note: This is an async matcher that uses Playwright's snapshot testing.
|
|
2490
2498
|
*/
|
|
2491
2499
|
declare function toMatchToolSnapshot(this: {
|
|
@@ -2896,7 +2904,7 @@ interface EvalCase {
|
|
|
2896
2904
|
metadata?: Record<string, unknown>;
|
|
2897
2905
|
/**
|
|
2898
2906
|
* Number of times to run this case and compute an accuracy score.
|
|
2899
|
-
* When > 1, `EvalCaseResult.
|
|
2907
|
+
* When > 1, `EvalCaseResult.assertionPassRate` is populated and `pass` is determined
|
|
2900
2908
|
* by `accuracyThreshold` rather than a single run.
|
|
2901
2909
|
* @default 1
|
|
2902
2910
|
*/
|
|
@@ -4282,12 +4290,6 @@ interface EvalCaseResult {
|
|
|
4282
4290
|
* Only present when the case was run with `iterations > 1`.
|
|
4283
4291
|
*/
|
|
4284
4292
|
infrastructureErrorRate?: number;
|
|
4285
|
-
/**
|
|
4286
|
-
* Accuracy score (0–1) across all iterations.
|
|
4287
|
-
* Alias for `assertionPassRate`. Only present when the case was run with `iterations > 1`.
|
|
4288
|
-
* @deprecated Use `assertionPassRate` for clarity; this field is kept for backward compatibility.
|
|
4289
|
-
*/
|
|
4290
|
-
accuracy?: number;
|
|
4291
4293
|
/**
|
|
4292
4294
|
* Per-iteration pass/fail breakdown.
|
|
4293
4295
|
* Only present when the case was run with `iterations > 1`.
|
package/dist/index.d.ts
CHANGED
|
@@ -2486,6 +2486,14 @@ declare function toMatchToolPattern(this: {
|
|
|
2486
2486
|
/**
|
|
2487
2487
|
* Creates the toMatchToolSnapshot matcher function
|
|
2488
2488
|
*
|
|
2489
|
+
* @remarks
|
|
2490
|
+
* **Requires Playwright test context.** This matcher calls `expect(content).toMatchSnapshot()`
|
|
2491
|
+
* internally, which only works inside a Playwright test (i.e., when `testInfo` is available).
|
|
2492
|
+
* Calling it outside a Playwright test will throw a cryptic context error.
|
|
2493
|
+
*
|
|
2494
|
+
* To test sanitizer logic without a Playwright context, use the exported `applySanitizers`
|
|
2495
|
+
* function directly.
|
|
2496
|
+
*
|
|
2489
2497
|
* Note: This is an async matcher that uses Playwright's snapshot testing.
|
|
2490
2498
|
*/
|
|
2491
2499
|
declare function toMatchToolSnapshot(this: {
|
|
@@ -2896,7 +2904,7 @@ interface EvalCase {
|
|
|
2896
2904
|
metadata?: Record<string, unknown>;
|
|
2897
2905
|
/**
|
|
2898
2906
|
* Number of times to run this case and compute an accuracy score.
|
|
2899
|
-
* When > 1, `EvalCaseResult.
|
|
2907
|
+
* When > 1, `EvalCaseResult.assertionPassRate` is populated and `pass` is determined
|
|
2900
2908
|
* by `accuracyThreshold` rather than a single run.
|
|
2901
2909
|
* @default 1
|
|
2902
2910
|
*/
|
|
@@ -4282,12 +4290,6 @@ interface EvalCaseResult {
|
|
|
4282
4290
|
* Only present when the case was run with `iterations > 1`.
|
|
4283
4291
|
*/
|
|
4284
4292
|
infrastructureErrorRate?: number;
|
|
4285
|
-
/**
|
|
4286
|
-
* Accuracy score (0–1) across all iterations.
|
|
4287
|
-
* Alias for `assertionPassRate`. Only present when the case was run with `iterations > 1`.
|
|
4288
|
-
* @deprecated Use `assertionPassRate` for clarity; this field is kept for backward compatibility.
|
|
4289
|
-
*/
|
|
4290
|
-
accuracy?: number;
|
|
4291
4293
|
/**
|
|
4292
4294
|
* Per-iteration pass/fail breakdown.
|
|
4293
4295
|
* Only present when the case was run with `iterations > 1`.
|
package/dist/index.js
CHANGED
|
@@ -4380,7 +4380,7 @@ function escapeHtml(text) {
|
|
|
4380
4380
|
|
|
4381
4381
|
// package.json
|
|
4382
4382
|
var package_default = {
|
|
4383
|
-
version: "1.0.0-beta.
|
|
4383
|
+
version: "1.0.0-beta.3"};
|
|
4384
4384
|
|
|
4385
4385
|
// src/mcp/clientFactory.ts
|
|
4386
4386
|
function getRetryAfterDelayMs(err) {
|
|
@@ -4471,7 +4471,10 @@ async function createMCPClientForConfig(config, options) {
|
|
|
4471
4471
|
validatedConfig.connectTimeoutMs !== void 0 ? { timeout: validatedConfig.connectTimeoutMs } : void 0
|
|
4472
4472
|
);
|
|
4473
4473
|
} else if (isHttpConfig(validatedConfig)) {
|
|
4474
|
-
const headers = {
|
|
4474
|
+
const headers = {
|
|
4475
|
+
"User-Agent": `@gleanwork/mcp-server-tester/${package_default.version}`,
|
|
4476
|
+
...validatedConfig.headers
|
|
4477
|
+
};
|
|
4475
4478
|
if (validatedConfig.auth?.clientCredentials && !options?.authProvider) {
|
|
4476
4479
|
const ccConfig = validatedConfig.auth.clientCredentials;
|
|
4477
4480
|
const clientId = ccConfig.clientId ?? process.env["MCP_CLIENT_ID"];
|
|
@@ -6924,7 +6927,6 @@ async function runEvalCase(evalCase, context, options = {}) {
|
|
|
6924
6927
|
const passCount = assertionResults.filter((r) => r.pass).length;
|
|
6925
6928
|
const assertionPassRate = assertionResults.length > 0 ? passCount / assertionResults.length : 0;
|
|
6926
6929
|
const infrastructureErrorRate = infraErrors.length / iterations;
|
|
6927
|
-
const accuracy = assertionPassRate;
|
|
6928
6930
|
const threshold = evalCase.accuracyThreshold ?? 1;
|
|
6929
6931
|
const baseResult = lastResult ?? {
|
|
6930
6932
|
id: evalCase.id,
|
|
@@ -6941,10 +6943,9 @@ async function runEvalCase(evalCase, context, options = {}) {
|
|
|
6941
6943
|
};
|
|
6942
6944
|
return {
|
|
6943
6945
|
...baseResult,
|
|
6944
|
-
pass:
|
|
6946
|
+
pass: assertionPassRate >= threshold,
|
|
6945
6947
|
assertionPassRate,
|
|
6946
6948
|
infrastructureErrorRate,
|
|
6947
|
-
accuracy,
|
|
6948
6949
|
iterationResults,
|
|
6949
6950
|
infrastructureErrorCount: infraErrors.length,
|
|
6950
6951
|
durationMs: iterationResults.reduce((sum, r) => sum + r.durationMs, 0)
|