@gleanwork/mcp-server-tester 1.0.0-beta.2 → 1.0.0-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -2486,6 +2486,14 @@ declare function toMatchToolPattern(this: {
2486
2486
  /**
2487
2487
  * Creates the toMatchToolSnapshot matcher function
2488
2488
  *
2489
+ * @remarks
2490
+ * **Requires Playwright test context.** This matcher calls `expect(content).toMatchSnapshot()`
2491
+ * internally, which only works inside a Playwright test (i.e., when `testInfo` is available).
2492
+ * Calling it outside a Playwright test will throw a cryptic context error.
2493
+ *
2494
+ * To test sanitizer logic without a Playwright context, use the exported `applySanitizers`
2495
+ * function directly.
2496
+ *
2489
2497
  * Note: This is an async matcher that uses Playwright's snapshot testing.
2490
2498
  */
2491
2499
  declare function toMatchToolSnapshot(this: {
@@ -2896,7 +2904,7 @@ interface EvalCase {
2896
2904
  metadata?: Record<string, unknown>;
2897
2905
  /**
2898
2906
  * Number of times to run this case and compute an accuracy score.
2899
- * When > 1, `EvalCaseResult.accuracy` is populated and `pass` is determined
2907
+ * When > 1, `EvalCaseResult.assertionPassRate` is populated and `pass` is determined
2900
2908
  * by `accuracyThreshold` rather than a single run.
2901
2909
  * @default 1
2902
2910
  */
@@ -4282,12 +4290,6 @@ interface EvalCaseResult {
4282
4290
  * Only present when the case was run with `iterations > 1`.
4283
4291
  */
4284
4292
  infrastructureErrorRate?: number;
4285
- /**
4286
- * Accuracy score (0–1) across all iterations.
4287
- * Alias for `assertionPassRate`. Only present when the case was run with `iterations > 1`.
4288
- * @deprecated Use `assertionPassRate` for clarity; this field is kept for backward compatibility.
4289
- */
4290
- accuracy?: number;
4291
4293
  /**
4292
4294
  * Per-iteration pass/fail breakdown.
4293
4295
  * Only present when the case was run with `iterations > 1`.
package/dist/index.d.ts CHANGED
@@ -2486,6 +2486,14 @@ declare function toMatchToolPattern(this: {
2486
2486
  /**
2487
2487
  * Creates the toMatchToolSnapshot matcher function
2488
2488
  *
2489
+ * @remarks
2490
+ * **Requires Playwright test context.** This matcher calls `expect(content).toMatchSnapshot()`
2491
+ * internally, which only works inside a Playwright test (i.e., when `testInfo` is available).
2492
+ * Calling it outside a Playwright test will throw a cryptic context error.
2493
+ *
2494
+ * To test sanitizer logic without a Playwright context, use the exported `applySanitizers`
2495
+ * function directly.
2496
+ *
2489
2497
  * Note: This is an async matcher that uses Playwright's snapshot testing.
2490
2498
  */
2491
2499
  declare function toMatchToolSnapshot(this: {
@@ -2896,7 +2904,7 @@ interface EvalCase {
2896
2904
  metadata?: Record<string, unknown>;
2897
2905
  /**
2898
2906
  * Number of times to run this case and compute an accuracy score.
2899
- * When > 1, `EvalCaseResult.accuracy` is populated and `pass` is determined
2907
+ * When > 1, `EvalCaseResult.assertionPassRate` is populated and `pass` is determined
2900
2908
  * by `accuracyThreshold` rather than a single run.
2901
2909
  * @default 1
2902
2910
  */
@@ -4282,12 +4290,6 @@ interface EvalCaseResult {
4282
4290
  * Only present when the case was run with `iterations > 1`.
4283
4291
  */
4284
4292
  infrastructureErrorRate?: number;
4285
- /**
4286
- * Accuracy score (0–1) across all iterations.
4287
- * Alias for `assertionPassRate`. Only present when the case was run with `iterations > 1`.
4288
- * @deprecated Use `assertionPassRate` for clarity; this field is kept for backward compatibility.
4289
- */
4290
- accuracy?: number;
4291
4293
  /**
4292
4294
  * Per-iteration pass/fail breakdown.
4293
4295
  * Only present when the case was run with `iterations > 1`.
package/dist/index.js CHANGED
@@ -4380,7 +4380,7 @@ function escapeHtml(text) {
4380
4380
 
4381
4381
  // package.json
4382
4382
  var package_default = {
4383
- version: "1.0.0-beta.2"};
4383
+ version: "1.0.0-beta.3"};
4384
4384
 
4385
4385
  // src/mcp/clientFactory.ts
4386
4386
  function getRetryAfterDelayMs(err) {
@@ -4471,7 +4471,10 @@ async function createMCPClientForConfig(config, options) {
4471
4471
  validatedConfig.connectTimeoutMs !== void 0 ? { timeout: validatedConfig.connectTimeoutMs } : void 0
4472
4472
  );
4473
4473
  } else if (isHttpConfig(validatedConfig)) {
4474
- const headers = { ...validatedConfig.headers };
4474
+ const headers = {
4475
+ "User-Agent": `@gleanwork/mcp-server-tester/${package_default.version}`,
4476
+ ...validatedConfig.headers
4477
+ };
4475
4478
  if (validatedConfig.auth?.clientCredentials && !options?.authProvider) {
4476
4479
  const ccConfig = validatedConfig.auth.clientCredentials;
4477
4480
  const clientId = ccConfig.clientId ?? process.env["MCP_CLIENT_ID"];
@@ -6924,7 +6927,6 @@ async function runEvalCase(evalCase, context, options = {}) {
6924
6927
  const passCount = assertionResults.filter((r) => r.pass).length;
6925
6928
  const assertionPassRate = assertionResults.length > 0 ? passCount / assertionResults.length : 0;
6926
6929
  const infrastructureErrorRate = infraErrors.length / iterations;
6927
- const accuracy = assertionPassRate;
6928
6930
  const threshold = evalCase.accuracyThreshold ?? 1;
6929
6931
  const baseResult = lastResult ?? {
6930
6932
  id: evalCase.id,
@@ -6941,10 +6943,9 @@ async function runEvalCase(evalCase, context, options = {}) {
6941
6943
  };
6942
6944
  return {
6943
6945
  ...baseResult,
6944
- pass: accuracy >= threshold,
6946
+ pass: assertionPassRate >= threshold,
6945
6947
  assertionPassRate,
6946
6948
  infrastructureErrorRate,
6947
- accuracy,
6948
6949
  iterationResults,
6949
6950
  infrastructureErrorCount: infraErrors.length,
6950
6951
  durationMs: iterationResults.reduce((sum, r) => sum + r.durationMs, 0)