@gleanwork/mcp-server-tester 1.0.0-beta.5 → 1.0.0-beta.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +1 -1
- package/dist/fixtures/mcp.js +1 -1
- package/dist/fixtures/mcp.js.map +1 -1
- package/dist/index.cjs +18 -6
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +43 -2
- package/dist/index.d.ts +43 -2
- package/dist/index.js +18 -6
- package/dist/index.js.map +1 -1
- package/dist/reporters/ui-dist/app.js +5 -5
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -4407,7 +4407,7 @@ function escapeHtml(text) {
|
|
|
4407
4407
|
|
|
4408
4408
|
// package.json
|
|
4409
4409
|
var package_default = {
|
|
4410
|
-
version: "1.0.0-beta.
|
|
4410
|
+
version: "1.0.0-beta.6"};
|
|
4411
4411
|
|
|
4412
4412
|
// src/mcp/clientFactory.ts
|
|
4413
4413
|
function getRetryAfterDelayMs(err) {
|
|
@@ -6669,9 +6669,16 @@ function getMissingDependencyMessage(provider) {
|
|
|
6669
6669
|
const pkg = packageMap[provider];
|
|
6670
6670
|
return pkg ? `${String(provider)} provider requires: ${pkg}` : `Unknown provider: ${String(provider)}`;
|
|
6671
6671
|
}
|
|
6672
|
-
async function saveBaseline(result, filePath) {
|
|
6672
|
+
async function saveBaseline(result, filePath, options = {}) {
|
|
6673
|
+
const { omitResponses = true } = options;
|
|
6674
|
+
const toSave = omitResponses ? {
|
|
6675
|
+
...result,
|
|
6676
|
+
caseResults: result.caseResults.map(
|
|
6677
|
+
({ response: _response, ...rest }) => rest
|
|
6678
|
+
)
|
|
6679
|
+
} : result;
|
|
6673
6680
|
await fs$1.mkdir(path2.dirname(filePath), { recursive: true });
|
|
6674
|
-
await fs$1.writeFile(filePath, JSON.stringify(
|
|
6681
|
+
await fs$1.writeFile(filePath, JSON.stringify(toSave, null, 2), "utf8");
|
|
6675
6682
|
}
|
|
6676
6683
|
async function loadBaseline(filePath) {
|
|
6677
6684
|
const raw = await fs$1.readFile(filePath, "utf8");
|
|
@@ -6939,7 +6946,8 @@ function isInfrastructureError(err) {
|
|
|
6939
6946
|
} else {
|
|
6940
6947
|
return false;
|
|
6941
6948
|
}
|
|
6942
|
-
return name15 === "AbortError" || msg.includes("econnreset") || msg.includes("etimedout") || msg.includes("econnrefused") || msg.includes("rate limit") || msg.includes("429") || msg.includes("503") || msg.includes("network") ||
|
|
6949
|
+
return name15 === "AbortError" || msg.includes("econnreset") || msg.includes("etimedout") || msg.includes("econnrefused") || msg.includes("rate limit") || msg.includes("429") || msg.includes("503") || msg.includes("network") || // Prompt/context overflow — LLM couldn't run, not a tool discoverability failure
|
|
6950
|
+
msg.includes("prompt is too long") || msg.includes("context length exceeded") || msg.includes("maximum context length") || msg.includes("context_length_exceeded") || msg.includes("tokens > ") || code.includes("econnreset") || code.includes("etimedout") || code.includes("econnrefused");
|
|
6943
6951
|
}
|
|
6944
6952
|
async function runEvalCase(evalCase, context, options = {}) {
|
|
6945
6953
|
const iterations = evalCase.iterations ?? 1;
|
|
@@ -6957,7 +6965,8 @@ async function runEvalCase(evalCase, context, options = {}) {
|
|
|
6957
6965
|
pass: result.pass,
|
|
6958
6966
|
durationMs: result.durationMs,
|
|
6959
6967
|
error: result.error,
|
|
6960
|
-
isInfrastructureError: infraError
|
|
6968
|
+
isInfrastructureError: infraError,
|
|
6969
|
+
mcpHostTrace: result.mcpHostTrace
|
|
6961
6970
|
});
|
|
6962
6971
|
} catch (err) {
|
|
6963
6972
|
const errorMessage = err instanceof Error ? err.message : String(err);
|
|
@@ -7041,6 +7050,7 @@ async function runEvalDataset(options, context) {
|
|
|
7041
7050
|
onCaseComplete,
|
|
7042
7051
|
filterTags,
|
|
7043
7052
|
saveResultsTo,
|
|
7053
|
+
omitResponsesFromBaseline = true,
|
|
7044
7054
|
baselineResultsFrom,
|
|
7045
7055
|
mcpHostModel,
|
|
7046
7056
|
judgeModel
|
|
@@ -7155,7 +7165,9 @@ async function runEvalDataset(options, context) {
|
|
|
7155
7165
|
result.datasetToolF1 = avgPrec + avgRecall > 0 ? 2 * avgPrec * avgRecall / (avgPrec + avgRecall) : 0;
|
|
7156
7166
|
}
|
|
7157
7167
|
if (saveResultsTo) {
|
|
7158
|
-
await saveBaseline(result, saveResultsTo
|
|
7168
|
+
await saveBaseline(result, saveResultsTo, {
|
|
7169
|
+
omitResponses: omitResponsesFromBaseline
|
|
7170
|
+
});
|
|
7159
7171
|
}
|
|
7160
7172
|
if (context.testInfo) {
|
|
7161
7173
|
await context.testInfo.attach("mcp-test-results", {
|