@gleanwork/mcp-server-tester 1.0.0-beta.2 → 1.0.0-beta.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -22
- package/dist/cli/index.js +38 -12
- package/dist/fixtures/mcp.d.ts +14 -6
- package/dist/fixtures/mcp.js +9 -6
- package/dist/fixtures/mcp.js.map +1 -1
- package/dist/index.cjs +69 -47
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +208 -1175
- package/dist/index.d.ts +208 -1175
- package/dist/index.js +69 -47
- package/dist/index.js.map +1 -1
- package/dist/reporters/mcpReporter.cjs.map +1 -1
- package/dist/reporters/mcpReporter.js.map +1 -1
- package/dist/reporters/ui-dist/app.js +107 -7
- package/dist/reporters/ui-dist/styles.css +1 -1
- package/package.json +11 -8
- package/src/reporters/ui-dist/app.js +0 -174
- package/src/reporters/ui-dist/index.html +0 -28
- package/src/reporters/ui-dist/styles.css +0 -1
package/dist/index.cjs
CHANGED
|
@@ -3127,7 +3127,7 @@ var init_dist3 = __esm({
|
|
|
3127
3127
|
}
|
|
3128
3128
|
});
|
|
3129
3129
|
var MCPHostCapabilitiesSchema = zod.z.object({
|
|
3130
|
-
sampling: zod.z.record(zod.z.unknown()).optional(),
|
|
3130
|
+
sampling: zod.z.record(zod.z.string(), zod.z.unknown()).optional(),
|
|
3131
3131
|
roots: zod.z.object({
|
|
3132
3132
|
listChanged: zod.z.boolean()
|
|
3133
3133
|
}).optional()
|
|
@@ -3186,7 +3186,7 @@ var HttpConfigSchema = zod.z.object({
|
|
|
3186
3186
|
}
|
|
3187
3187
|
return true;
|
|
3188
3188
|
}),
|
|
3189
|
-
headers: zod.z.record(zod.z.string()).optional(),
|
|
3189
|
+
headers: zod.z.record(zod.z.string(), zod.z.string()).optional(),
|
|
3190
3190
|
capabilities: MCPHostCapabilitiesSchema.optional(),
|
|
3191
3191
|
connectTimeoutMs: zod.z.number().positive().optional(),
|
|
3192
3192
|
requestTimeoutMs: zod.z.number().positive().optional(),
|
|
@@ -4407,7 +4407,7 @@ function escapeHtml(text) {
|
|
|
4407
4407
|
|
|
4408
4408
|
// package.json
|
|
4409
4409
|
var package_default = {
|
|
4410
|
-
version: "1.0.0-beta.
|
|
4410
|
+
version: "1.0.0-beta.4"};
|
|
4411
4411
|
|
|
4412
4412
|
// src/mcp/clientFactory.ts
|
|
4413
4413
|
function getRetryAfterDelayMs(err) {
|
|
@@ -4498,7 +4498,10 @@ async function createMCPClientForConfig(config, options) {
|
|
|
4498
4498
|
validatedConfig.connectTimeoutMs !== void 0 ? { timeout: validatedConfig.connectTimeoutMs } : void 0
|
|
4499
4499
|
);
|
|
4500
4500
|
} else if (isHttpConfig(validatedConfig)) {
|
|
4501
|
-
const headers = {
|
|
4501
|
+
const headers = {
|
|
4502
|
+
"User-Agent": `@gleanwork/mcp-server-tester/${package_default.version}`,
|
|
4503
|
+
...validatedConfig.headers
|
|
4504
|
+
};
|
|
4502
4505
|
if (validatedConfig.auth?.clientCredentials && !options?.authProvider) {
|
|
4503
4506
|
const ccConfig = validatedConfig.auth.clientCredentials;
|
|
4504
4507
|
const clientId = ccConfig.clientId ?? process.env["MCP_CLIENT_ID"];
|
|
@@ -5148,7 +5151,7 @@ function validateToolCalls(response, expectation) {
|
|
|
5148
5151
|
if (!isSimulationResult(response)) {
|
|
5149
5152
|
return {
|
|
5150
5153
|
pass: false,
|
|
5151
|
-
message: "toolsTriggered expectation requires
|
|
5154
|
+
message: "toolsTriggered expectation requires mcp_host mode \u2014 response must be an MCPHostSimulationResult"
|
|
5152
5155
|
};
|
|
5153
5156
|
}
|
|
5154
5157
|
const actual = response.toolCalls;
|
|
@@ -5208,7 +5211,7 @@ function validateToolCallCount(response, options) {
|
|
|
5208
5211
|
if (!isSimulationResult(response)) {
|
|
5209
5212
|
return {
|
|
5210
5213
|
pass: false,
|
|
5211
|
-
message: "toolCallCount expectation requires
|
|
5214
|
+
message: "toolCallCount expectation requires mcp_host mode \u2014 response must be an MCPHostSimulationResult"
|
|
5212
5215
|
};
|
|
5213
5216
|
}
|
|
5214
5217
|
const count = response.toolCalls.length;
|
|
@@ -6330,7 +6333,7 @@ function getAuthConfigFromEnv() {
|
|
|
6330
6333
|
}
|
|
6331
6334
|
return void 0;
|
|
6332
6335
|
}
|
|
6333
|
-
var
|
|
6336
|
+
var MCPHostConfigSchema = zod.z.object({
|
|
6334
6337
|
provider: zod.z.enum([
|
|
6335
6338
|
"openai",
|
|
6336
6339
|
"anthropic",
|
|
@@ -6399,7 +6402,7 @@ var EvalExpectBlockSchema = zod.z.object({
|
|
|
6399
6402
|
calls: zod.z.array(
|
|
6400
6403
|
zod.z.object({
|
|
6401
6404
|
name: zod.z.string(),
|
|
6402
|
-
arguments: zod.z.record(zod.z.unknown()).optional(),
|
|
6405
|
+
arguments: zod.z.record(zod.z.string(), zod.z.unknown()).optional(),
|
|
6403
6406
|
required: zod.z.boolean().optional()
|
|
6404
6407
|
})
|
|
6405
6408
|
),
|
|
@@ -6415,12 +6418,12 @@ var EvalExpectBlockSchema = zod.z.object({
|
|
|
6415
6418
|
var EvalCaseSchema = zod.z.object({
|
|
6416
6419
|
id: zod.z.string().min(1, "id must not be empty"),
|
|
6417
6420
|
description: zod.z.string().optional(),
|
|
6418
|
-
mode: zod.z.enum(["direct", "
|
|
6421
|
+
mode: zod.z.enum(["direct", "mcp_host"]).optional(),
|
|
6419
6422
|
toolName: zod.z.string().min(1, "toolName must not be empty").optional(),
|
|
6420
|
-
args: zod.z.record(zod.z.unknown()).optional(),
|
|
6423
|
+
args: zod.z.record(zod.z.string(), zod.z.unknown()).optional(),
|
|
6421
6424
|
scenario: zod.z.string().optional(),
|
|
6422
|
-
|
|
6423
|
-
metadata: zod.z.record(zod.z.unknown()).optional(),
|
|
6425
|
+
mcpHostConfig: MCPHostConfigSchema.optional(),
|
|
6426
|
+
metadata: zod.z.record(zod.z.string(), zod.z.unknown()).optional(),
|
|
6424
6427
|
iterations: zod.z.number().int().min(1).optional(),
|
|
6425
6428
|
accuracyThreshold: zod.z.number().min(0).max(1).optional(),
|
|
6426
6429
|
judgeReps: zod.z.number().int().min(1).optional(),
|
|
@@ -6432,7 +6435,7 @@ var EvalDatasetSchema = zod.z.object({
|
|
|
6432
6435
|
name: zod.z.string().min(1, "name must not be empty"),
|
|
6433
6436
|
description: zod.z.string().optional(),
|
|
6434
6437
|
cases: zod.z.array(EvalCaseSchema).min(1, "dataset must have at least one case"),
|
|
6435
|
-
metadata: zod.z.record(zod.z.unknown()).optional()
|
|
6438
|
+
metadata: zod.z.record(zod.z.string(), zod.z.unknown()).optional()
|
|
6436
6439
|
});
|
|
6437
6440
|
function validateEvalCase(evalCase) {
|
|
6438
6441
|
return EvalCaseSchema.parse(evalCase);
|
|
@@ -6470,30 +6473,30 @@ function loadEvalDatasetFromObject(data, options = {}) {
|
|
|
6470
6473
|
return dataset;
|
|
6471
6474
|
}
|
|
6472
6475
|
|
|
6473
|
-
// src/evals/
|
|
6476
|
+
// src/evals/mcpHost/adapters/vercel.ts
|
|
6474
6477
|
function enrichErrorMessage(err, provider) {
|
|
6475
6478
|
const raw = err instanceof Error ? err.message : String(err);
|
|
6476
6479
|
if (raw.includes("Cannot find module") || raw.includes("ERR_MODULE_NOT_FOUND")) {
|
|
6477
|
-
return `
|
|
6478
|
-
Hint: run \`getMissingDependencyMessage('${provider}')\` or check docs/
|
|
6480
|
+
return `MCP host simulation failed: required package not installed.
|
|
6481
|
+
Hint: run \`getMissingDependencyMessage('${provider}')\` or check docs/mcp-host.md for install instructions.`;
|
|
6479
6482
|
}
|
|
6480
6483
|
if (raw.includes("401") || raw.includes("Unauthorized") || raw.includes("API key") || raw.includes("api_key")) {
|
|
6481
|
-
return `
|
|
6484
|
+
return `MCP host simulation failed: authentication error.
|
|
6482
6485
|
Hint: check your API key environment variable (e.g. ANTHROPIC_API_KEY, GOOGLE_APPLICATION_CREDENTIALS).`;
|
|
6483
6486
|
}
|
|
6484
6487
|
if (raw.includes("404") || raw.includes("Not Found") || raw.toLowerCase().includes("model") && raw.toLowerCase().includes("not found")) {
|
|
6485
|
-
return `
|
|
6488
|
+
return `MCP host simulation failed: model not found.
|
|
6486
6489
|
Hint: check the model name format for your provider. For vertex-anthropic use 'claude-3-5-haiku@20241022' (with @).`;
|
|
6487
6490
|
}
|
|
6488
6491
|
if (raw.includes("ENOTFOUND") || raw.includes("fetch failed") || raw.includes("ECONNREFUSED")) {
|
|
6489
|
-
return `
|
|
6492
|
+
return `MCP host simulation failed: network error.
|
|
6490
6493
|
Hint: check network connectivity and whether the provider's API endpoint is reachable from this machine.`;
|
|
6491
6494
|
}
|
|
6492
6495
|
if (raw.includes("429") || raw.toLowerCase().includes("rate limit") || raw.includes("Too Many Requests")) {
|
|
6493
|
-
return `
|
|
6496
|
+
return `MCP host simulation failed: rate limited.
|
|
6494
6497
|
Hint: reduce concurrency, add delays between iterations, or upgrade your API plan.`;
|
|
6495
6498
|
}
|
|
6496
|
-
return `
|
|
6499
|
+
return `MCP host simulation failed: ${raw}`;
|
|
6497
6500
|
}
|
|
6498
6501
|
async function loadModel(provider, model) {
|
|
6499
6502
|
switch (provider) {
|
|
@@ -6623,7 +6626,7 @@ function createVercelOrchestrator() {
|
|
|
6623
6626
|
};
|
|
6624
6627
|
}
|
|
6625
6628
|
|
|
6626
|
-
// src/evals/
|
|
6629
|
+
// src/evals/mcpHost/mcpHostSimulation.ts
|
|
6627
6630
|
var vercelOrchestrator = createVercelOrchestrator();
|
|
6628
6631
|
var allProviders = [
|
|
6629
6632
|
"openai",
|
|
@@ -6639,7 +6642,7 @@ var allProviders = [
|
|
|
6639
6642
|
var simulatorRegistry = new Map(
|
|
6640
6643
|
allProviders.map((p) => [p, vercelOrchestrator])
|
|
6641
6644
|
);
|
|
6642
|
-
async function
|
|
6645
|
+
async function simulateMCPHost(mcp, scenario, config) {
|
|
6643
6646
|
const simulator = simulatorRegistry.get(config.provider);
|
|
6644
6647
|
if (!simulator) {
|
|
6645
6648
|
throw new Error(
|
|
@@ -6661,7 +6664,7 @@ function getMissingDependencyMessage(provider) {
|
|
|
6661
6664
|
deepseek: "npm install ai @ai-sdk/deepseek",
|
|
6662
6665
|
openrouter: "npm install ai @openrouter/ai-sdk-provider",
|
|
6663
6666
|
xai: "npm install ai @ai-sdk/xai",
|
|
6664
|
-
"vertex-anthropic": "npm install ai @ai-sdk/google-vertex (requires Application Default Credentials \u2014 see docs/
|
|
6667
|
+
"vertex-anthropic": "npm install ai @ai-sdk/google-vertex (requires Application Default Credentials \u2014 see docs/mcp-host.md)"
|
|
6665
6668
|
};
|
|
6666
6669
|
const pkg = packageMap[provider];
|
|
6667
6670
|
return pkg ? `${String(provider)} provider requires: ${pkg}` : `Unknown provider: ${String(provider)}`;
|
|
@@ -6704,24 +6707,24 @@ async function execFileNoThrow(file, args) {
|
|
|
6704
6707
|
async function executeToolCall(evalCase, mcp) {
|
|
6705
6708
|
const mode = evalCase.mode || "direct";
|
|
6706
6709
|
try {
|
|
6707
|
-
if (mode === "
|
|
6710
|
+
if (mode === "mcp_host") {
|
|
6708
6711
|
if (!evalCase.scenario) {
|
|
6709
6712
|
throw new Error(
|
|
6710
|
-
`Eval case ${evalCase.id}: scenario is required for
|
|
6713
|
+
`Eval case ${evalCase.id}: scenario is required for mcp_host mode`
|
|
6711
6714
|
);
|
|
6712
6715
|
}
|
|
6713
|
-
if (!evalCase.
|
|
6716
|
+
if (!evalCase.mcpHostConfig) {
|
|
6714
6717
|
throw new Error(
|
|
6715
|
-
`Eval case ${evalCase.id}:
|
|
6718
|
+
`Eval case ${evalCase.id}: mcpHostConfig is required for mcp_host mode`
|
|
6716
6719
|
);
|
|
6717
6720
|
}
|
|
6718
|
-
const simulationResult = await
|
|
6721
|
+
const simulationResult = await simulateMCPHost(
|
|
6719
6722
|
mcp,
|
|
6720
6723
|
evalCase.scenario,
|
|
6721
|
-
evalCase.
|
|
6724
|
+
evalCase.mcpHostConfig
|
|
6722
6725
|
);
|
|
6723
6726
|
if (!simulationResult.success) {
|
|
6724
|
-
throw new Error(simulationResult.error || "
|
|
6727
|
+
throw new Error(simulationResult.error || "MCP host simulation failed");
|
|
6725
6728
|
}
|
|
6726
6729
|
return { response: simulationResult };
|
|
6727
6730
|
} else {
|
|
@@ -6863,12 +6866,16 @@ async function runExpectBlockValidations(expectBlock, response, config) {
|
|
|
6863
6866
|
}
|
|
6864
6867
|
return { expectations: results, toolPrecision, toolRecall };
|
|
6865
6868
|
}
|
|
6869
|
+
function isMCPHostSimulationResult(value) {
|
|
6870
|
+
return typeof value === "object" && value !== null && "success" in value && "toolCalls" in value && Array.isArray(value.toolCalls);
|
|
6871
|
+
}
|
|
6866
6872
|
async function runSingleIteration(evalCase, context, options) {
|
|
6867
6873
|
const startTime = Date.now();
|
|
6868
6874
|
const { response, error } = await executeToolCall(evalCase, context.mcp);
|
|
6869
6875
|
let expectationResults = {};
|
|
6870
6876
|
let toolPrecision;
|
|
6871
6877
|
let toolRecall;
|
|
6878
|
+
let mcpHostTrace;
|
|
6872
6879
|
if (!error && evalCase.expect) {
|
|
6873
6880
|
const {
|
|
6874
6881
|
expectations,
|
|
@@ -6883,6 +6890,23 @@ async function runSingleIteration(evalCase, context, options) {
|
|
|
6883
6890
|
expectationResults = expectations;
|
|
6884
6891
|
toolPrecision = tp;
|
|
6885
6892
|
toolRecall = tr;
|
|
6893
|
+
if (evalCase.expect.toolsTriggered !== void 0 && isMCPHostSimulationResult(response)) {
|
|
6894
|
+
const expectedNames = new Set(
|
|
6895
|
+
evalCase.expect.toolsTriggered.calls.map((c) => c.name)
|
|
6896
|
+
);
|
|
6897
|
+
const requiredNames = new Set(
|
|
6898
|
+
evalCase.expect.toolsTriggered.calls.filter((c) => c.required !== false).map((c) => c.name)
|
|
6899
|
+
);
|
|
6900
|
+
const calledNames = new Set(response.toolCalls.map((c) => c.name));
|
|
6901
|
+
mcpHostTrace = {
|
|
6902
|
+
calls: response.toolCalls.map((call) => ({
|
|
6903
|
+
name: call.name,
|
|
6904
|
+
arguments: call.arguments,
|
|
6905
|
+
status: expectedNames.has(call.name) ? "expected" : "unexpected"
|
|
6906
|
+
})),
|
|
6907
|
+
missed: Array.from(requiredNames).filter((name15) => !calledNames.has(name15)).map((name15) => ({ name: name15 }))
|
|
6908
|
+
};
|
|
6909
|
+
}
|
|
6886
6910
|
}
|
|
6887
6911
|
return {
|
|
6888
6912
|
id: evalCase.id,
|
|
@@ -6898,7 +6922,8 @@ async function runSingleIteration(evalCase, context, options) {
|
|
|
6898
6922
|
durationMs: Date.now() - startTime,
|
|
6899
6923
|
tags: evalCase.tags,
|
|
6900
6924
|
toolPrecision,
|
|
6901
|
-
toolRecall
|
|
6925
|
+
toolRecall,
|
|
6926
|
+
mcpHostTrace
|
|
6902
6927
|
};
|
|
6903
6928
|
}
|
|
6904
6929
|
function isInfrastructureError(err) {
|
|
@@ -6951,7 +6976,6 @@ async function runEvalCase(evalCase, context, options = {}) {
|
|
|
6951
6976
|
const passCount = assertionResults.filter((r) => r.pass).length;
|
|
6952
6977
|
const assertionPassRate = assertionResults.length > 0 ? passCount / assertionResults.length : 0;
|
|
6953
6978
|
const infrastructureErrorRate = infraErrors.length / iterations;
|
|
6954
|
-
const accuracy = assertionPassRate;
|
|
6955
6979
|
const threshold = evalCase.accuracyThreshold ?? 1;
|
|
6956
6980
|
const baseResult = lastResult ?? {
|
|
6957
6981
|
id: evalCase.id,
|
|
@@ -6968,10 +6992,9 @@ async function runEvalCase(evalCase, context, options = {}) {
|
|
|
6968
6992
|
};
|
|
6969
6993
|
return {
|
|
6970
6994
|
...baseResult,
|
|
6971
|
-
pass:
|
|
6995
|
+
pass: assertionPassRate >= threshold,
|
|
6972
6996
|
assertionPassRate,
|
|
6973
6997
|
infrastructureErrorRate,
|
|
6974
|
-
accuracy,
|
|
6975
6998
|
iterationResults,
|
|
6976
6999
|
infrastructureErrorCount: infraErrors.length,
|
|
6977
7000
|
durationMs: iterationResults.reduce((sum, r) => sum + r.durationMs, 0)
|
|
@@ -7006,7 +7029,7 @@ async function runEvalDataset(options, context) {
|
|
|
7006
7029
|
filterTags,
|
|
7007
7030
|
saveResultsTo,
|
|
7008
7031
|
baselineResultsFrom,
|
|
7009
|
-
|
|
7032
|
+
mcpHostModel,
|
|
7010
7033
|
judgeModel
|
|
7011
7034
|
} = options;
|
|
7012
7035
|
const startTime = Date.now();
|
|
@@ -7016,7 +7039,7 @@ async function runEvalDataset(options, context) {
|
|
|
7016
7039
|
};
|
|
7017
7040
|
const casesToRun = filterTags && filterTags.length > 0 ? dataset.cases.filter((c) => c.tags?.some((t) => filterTags.includes(t))) : dataset.cases;
|
|
7018
7041
|
const estimatedJudgeCalls = casesToRun.reduce((sum, c) => {
|
|
7019
|
-
const effectiveIterations = c.mode === "
|
|
7042
|
+
const effectiveIterations = c.mode === "mcp_host" ? c.iterations ?? defaultLlmIterations ?? 1 : c.iterations ?? 1;
|
|
7020
7043
|
const judgeReps = c.expect?.passesJudge != null ? c.expect.passesJudge.reps ?? c.judgeReps ?? defaultJudgeReps ?? 1 : 0;
|
|
7021
7044
|
return sum + effectiveIterations * judgeReps;
|
|
7022
7045
|
}, 0);
|
|
@@ -7026,12 +7049,12 @@ async function runEvalDataset(options, context) {
|
|
|
7026
7049
|
);
|
|
7027
7050
|
}
|
|
7028
7051
|
const tasks = casesToRun.map((evalCase) => async () => {
|
|
7029
|
-
const withIterations = evalCase.mode === "
|
|
7030
|
-
if (evalCase.mode === "
|
|
7052
|
+
const withIterations = evalCase.mode === "mcp_host" && evalCase.iterations === void 0 && defaultLlmIterations !== void 0 ? { ...evalCase, iterations: defaultLlmIterations } : evalCase;
|
|
7053
|
+
if (evalCase.mode === "mcp_host") {
|
|
7031
7054
|
const effectiveIterations = withIterations.iterations ?? 1;
|
|
7032
7055
|
if (effectiveIterations > 1 && effectiveIterations < 10) {
|
|
7033
7056
|
console.warn(
|
|
7034
|
-
`[mcp-server-tester] Eval case "${evalCase.id}": running ${effectiveIterations} iterations in
|
|
7057
|
+
`[mcp-server-tester] Eval case "${evalCase.id}": running ${effectiveIterations} iterations in mcp_host mode may not be statistically reliable. Consider using 10+ iterations for accuracy measurements you can trust.`
|
|
7035
7058
|
);
|
|
7036
7059
|
}
|
|
7037
7060
|
}
|
|
@@ -7063,7 +7086,7 @@ async function runEvalDataset(options, context) {
|
|
|
7063
7086
|
gitHash,
|
|
7064
7087
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7065
7088
|
packageVersion: package_default.version,
|
|
7066
|
-
...
|
|
7089
|
+
...mcpHostModel !== void 0 && { mcpHostModel },
|
|
7067
7090
|
...judgeModel !== void 0 && { judgeModel }
|
|
7068
7091
|
};
|
|
7069
7092
|
const result = {
|
|
@@ -7108,12 +7131,12 @@ async function runEvalDataset(options, context) {
|
|
|
7108
7131
|
);
|
|
7109
7132
|
}
|
|
7110
7133
|
}
|
|
7111
|
-
const
|
|
7134
|
+
const mcpHostCases = caseResults.filter(
|
|
7112
7135
|
(r) => r.toolPrecision !== void 0 || r.toolRecall !== void 0
|
|
7113
7136
|
);
|
|
7114
|
-
if (
|
|
7115
|
-
const avgPrec =
|
|
7116
|
-
const avgRecall =
|
|
7137
|
+
if (mcpHostCases.length > 0) {
|
|
7138
|
+
const avgPrec = mcpHostCases.reduce((s, r) => s + (r.toolPrecision ?? 0), 0) / mcpHostCases.length;
|
|
7139
|
+
const avgRecall = mcpHostCases.reduce((s, r) => s + (r.toolRecall ?? 0), 0) / mcpHostCases.length;
|
|
7117
7140
|
result.datasetToolPrecision = avgPrec;
|
|
7118
7141
|
result.datasetToolRecall = avgRecall;
|
|
7119
7142
|
result.datasetToolF1 = avgPrec + avgRecall > 0 ? 2 * avgPrec * avgRecall / (avgPrec + avgRecall) : 0;
|
|
@@ -7181,7 +7204,6 @@ async function runServerComparison(options, contextA, contextB) {
|
|
|
7181
7204
|
bWins,
|
|
7182
7205
|
ties,
|
|
7183
7206
|
bothFail,
|
|
7184
|
-
bothFailCount: bothFail,
|
|
7185
7207
|
decidedCases,
|
|
7186
7208
|
failureAlignment: total > 0 ? bothFail / total : 0,
|
|
7187
7209
|
aWinRate: decidedCases > 0 ? aWins / decidedCases : 0,
|
|
@@ -7410,7 +7432,7 @@ exports.runEvalCase = runEvalCase;
|
|
|
7410
7432
|
exports.runEvalDataset = runEvalDataset;
|
|
7411
7433
|
exports.runServerComparison = runServerComparison;
|
|
7412
7434
|
exports.saveBaseline = saveBaseline;
|
|
7413
|
-
exports.
|
|
7435
|
+
exports.simulateMCPHost = simulateMCPHost;
|
|
7414
7436
|
exports.test = test;
|
|
7415
7437
|
exports.validateAccessToken = validateAccessToken;
|
|
7416
7438
|
exports.validateError = validateError;
|