@gleanwork/mcp-server-tester 1.0.0-beta.5 → 1.0.0-beta.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -4407,7 +4407,7 @@ function escapeHtml(text) {
4407
4407
 
4408
4408
  // package.json
4409
4409
  var package_default = {
4410
- version: "1.0.0-beta.5"};
4410
+ version: "1.0.0-beta.6"};
4411
4411
 
4412
4412
  // src/mcp/clientFactory.ts
4413
4413
  function getRetryAfterDelayMs(err) {
@@ -6669,9 +6669,16 @@ function getMissingDependencyMessage(provider) {
6669
6669
  const pkg = packageMap[provider];
6670
6670
  return pkg ? `${String(provider)} provider requires: ${pkg}` : `Unknown provider: ${String(provider)}`;
6671
6671
  }
6672
- async function saveBaseline(result, filePath) {
6672
+ async function saveBaseline(result, filePath, options = {}) {
6673
+ const { omitResponses = true } = options;
6674
+ const toSave = omitResponses ? {
6675
+ ...result,
6676
+ caseResults: result.caseResults.map(
6677
+ ({ response: _response, ...rest }) => rest
6678
+ )
6679
+ } : result;
6673
6680
  await fs$1.mkdir(path2.dirname(filePath), { recursive: true });
6674
- await fs$1.writeFile(filePath, JSON.stringify(result, null, 2), "utf8");
6681
+ await fs$1.writeFile(filePath, JSON.stringify(toSave, null, 2), "utf8");
6675
6682
  }
6676
6683
  async function loadBaseline(filePath) {
6677
6684
  const raw = await fs$1.readFile(filePath, "utf8");
@@ -6939,7 +6946,8 @@ function isInfrastructureError(err) {
6939
6946
  } else {
6940
6947
  return false;
6941
6948
  }
6942
- return name15 === "AbortError" || msg.includes("econnreset") || msg.includes("etimedout") || msg.includes("econnrefused") || msg.includes("rate limit") || msg.includes("429") || msg.includes("503") || msg.includes("network") || code.includes("econnreset") || code.includes("etimedout") || code.includes("econnrefused");
6949
+ return name15 === "AbortError" || msg.includes("econnreset") || msg.includes("etimedout") || msg.includes("econnrefused") || msg.includes("rate limit") || msg.includes("429") || msg.includes("503") || msg.includes("network") || // Prompt/context overflow LLM couldn't run, not a tool discoverability failure
6950
+ msg.includes("prompt is too long") || msg.includes("context length exceeded") || msg.includes("maximum context length") || msg.includes("context_length_exceeded") || msg.includes("tokens > ") || code.includes("econnreset") || code.includes("etimedout") || code.includes("econnrefused");
6943
6951
  }
6944
6952
  async function runEvalCase(evalCase, context, options = {}) {
6945
6953
  const iterations = evalCase.iterations ?? 1;
@@ -6957,7 +6965,8 @@ async function runEvalCase(evalCase, context, options = {}) {
6957
6965
  pass: result.pass,
6958
6966
  durationMs: result.durationMs,
6959
6967
  error: result.error,
6960
- isInfrastructureError: infraError
6968
+ isInfrastructureError: infraError,
6969
+ mcpHostTrace: result.mcpHostTrace
6961
6970
  });
6962
6971
  } catch (err) {
6963
6972
  const errorMessage = err instanceof Error ? err.message : String(err);
@@ -7041,6 +7050,7 @@ async function runEvalDataset(options, context) {
7041
7050
  onCaseComplete,
7042
7051
  filterTags,
7043
7052
  saveResultsTo,
7053
+ omitResponsesFromBaseline = true,
7044
7054
  baselineResultsFrom,
7045
7055
  mcpHostModel,
7046
7056
  judgeModel
@@ -7155,7 +7165,9 @@ async function runEvalDataset(options, context) {
7155
7165
  result.datasetToolF1 = avgPrec + avgRecall > 0 ? 2 * avgPrec * avgRecall / (avgPrec + avgRecall) : 0;
7156
7166
  }
7157
7167
  if (saveResultsTo) {
7158
- await saveBaseline(result, saveResultsTo);
7168
+ await saveBaseline(result, saveResultsTo, {
7169
+ omitResponses: omitResponsesFromBaseline
7170
+ });
7159
7171
  }
7160
7172
  if (context.testInfo) {
7161
7173
  await context.testInfo.attach("mcp-test-results", {