@gleanwork/mcp-server-tester 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +1 -1
- package/dist/fixtures/mcp.js +1 -1
- package/dist/fixtures/mcp.js.map +1 -1
- package/dist/index.cjs +52 -6
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +88 -14
- package/dist/index.d.ts +88 -14
- package/dist/index.js +52 -6
- package/dist/index.js.map +1 -1
- package/dist/reporters/mcpReporter.cjs +34 -1
- package/dist/reporters/mcpReporter.cjs.map +1 -1
- package/dist/reporters/mcpReporter.d.cts +90 -0
- package/dist/reporters/mcpReporter.d.ts +90 -0
- package/dist/reporters/mcpReporter.js +34 -1
- package/dist/reporters/mcpReporter.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -4411,7 +4411,7 @@ function escapeHtml(text) {
|
|
|
4411
4411
|
|
|
4412
4412
|
// package.json
|
|
4413
4413
|
var package_default = {
|
|
4414
|
-
version: "1.0.
|
|
4414
|
+
version: "1.0.1"};
|
|
4415
4415
|
|
|
4416
4416
|
// src/mcp/clientFactory.ts
|
|
4417
4417
|
function getRetryAfterDelayMs(err) {
|
|
@@ -6958,6 +6958,12 @@ function createVercelOrchestrator() {
|
|
|
6958
6958
|
});
|
|
6959
6959
|
const totalDurationMs = Date.now() - llmStart;
|
|
6960
6960
|
const llmDurationMs = totalDurationMs - mcpDurationMs;
|
|
6961
|
+
const hostUsage = result.usage ? {
|
|
6962
|
+
inputTokens: result.usage.promptTokens ?? 0,
|
|
6963
|
+
outputTokens: result.usage.completionTokens ?? 0,
|
|
6964
|
+
totalCostUsd: 0,
|
|
6965
|
+
durationMs: llmDurationMs
|
|
6966
|
+
} : void 0;
|
|
6961
6967
|
const conversationHistory = (result.steps ?? []).map((step) => ({
|
|
6962
6968
|
role: step.toolCalls?.length > 0 ? "tool" : "assistant",
|
|
6963
6969
|
content: step.toolCalls?.length > 0 ? JSON.stringify(step.toolResults) : step.text ?? ""
|
|
@@ -6969,7 +6975,8 @@ function createVercelOrchestrator() {
|
|
|
6969
6975
|
scenario,
|
|
6970
6976
|
llmDurationMs,
|
|
6971
6977
|
mcpDurationMs,
|
|
6972
|
-
conversationHistory
|
|
6978
|
+
conversationHistory,
|
|
6979
|
+
usage: hostUsage
|
|
6973
6980
|
};
|
|
6974
6981
|
} catch (err) {
|
|
6975
6982
|
return {
|
|
@@ -7305,6 +7312,32 @@ async function execFileNoThrow(file, args) {
|
|
|
7305
7312
|
}
|
|
7306
7313
|
}
|
|
7307
7314
|
|
|
7315
|
+
// src/utils/usageUtils.ts
|
|
7316
|
+
function optionalSum(a, b) {
|
|
7317
|
+
if (a === void 0 && b === void 0) return void 0;
|
|
7318
|
+
return (a ?? 0) + (b ?? 0);
|
|
7319
|
+
}
|
|
7320
|
+
function sumUsage(a, b) {
|
|
7321
|
+
if (!a && !b) return void 0;
|
|
7322
|
+
if (!a) return b ? { ...b } : void 0;
|
|
7323
|
+
if (!b) return { ...a };
|
|
7324
|
+
return {
|
|
7325
|
+
inputTokens: a.inputTokens + b.inputTokens,
|
|
7326
|
+
outputTokens: a.outputTokens + b.outputTokens,
|
|
7327
|
+
totalCostUsd: a.totalCostUsd + b.totalCostUsd,
|
|
7328
|
+
durationMs: a.durationMs + b.durationMs,
|
|
7329
|
+
durationApiMs: optionalSum(a.durationApiMs, b.durationApiMs),
|
|
7330
|
+
cacheReadInputTokens: optionalSum(
|
|
7331
|
+
a.cacheReadInputTokens,
|
|
7332
|
+
b.cacheReadInputTokens
|
|
7333
|
+
),
|
|
7334
|
+
cacheCreationInputTokens: optionalSum(
|
|
7335
|
+
a.cacheCreationInputTokens,
|
|
7336
|
+
b.cacheCreationInputTokens
|
|
7337
|
+
)
|
|
7338
|
+
};
|
|
7339
|
+
}
|
|
7340
|
+
|
|
7308
7341
|
// src/evals/evalRunner.ts
|
|
7309
7342
|
async function executeToolCall(evalCase, mcp) {
|
|
7310
7343
|
const mode = evalCase.mode || "direct";
|
|
@@ -7550,6 +7583,7 @@ async function runSingleIteration(evalCase, context, options) {
|
|
|
7550
7583
|
};
|
|
7551
7584
|
}
|
|
7552
7585
|
}
|
|
7586
|
+
const hostUsage = isMCPHostSimulationResult(response) && response.usage ? response.usage : void 0;
|
|
7553
7587
|
return {
|
|
7554
7588
|
id: evalCase.id,
|
|
7555
7589
|
datasetName: options.datasetName ?? "single-case",
|
|
@@ -7566,7 +7600,8 @@ async function runSingleIteration(evalCase, context, options) {
|
|
|
7566
7600
|
tags: evalCase.tags,
|
|
7567
7601
|
toolPrecision,
|
|
7568
7602
|
toolRecall,
|
|
7569
|
-
mcpHostTrace
|
|
7603
|
+
mcpHostTrace,
|
|
7604
|
+
hostUsage
|
|
7570
7605
|
};
|
|
7571
7606
|
}
|
|
7572
7607
|
function isInfrastructureError(err) {
|
|
@@ -7602,7 +7637,8 @@ async function runEvalCase(evalCase, context, options = {}) {
|
|
|
7602
7637
|
durationMs: result.durationMs,
|
|
7603
7638
|
error: result.error,
|
|
7604
7639
|
isInfrastructureError: infraError,
|
|
7605
|
-
mcpHostTrace: result.mcpHostTrace
|
|
7640
|
+
mcpHostTrace: result.mcpHostTrace,
|
|
7641
|
+
hostUsage: result.hostUsage
|
|
7606
7642
|
});
|
|
7607
7643
|
} catch (err) {
|
|
7608
7644
|
const errorMessage = err instanceof Error ? err.message : String(err);
|
|
@@ -7635,6 +7671,10 @@ async function runEvalCase(evalCase, context, options = {}) {
|
|
|
7635
7671
|
durationMs: 0,
|
|
7636
7672
|
tags: evalCase.tags
|
|
7637
7673
|
};
|
|
7674
|
+
const totalHostUsage = iterationResults.reduce(
|
|
7675
|
+
(acc, r) => sumUsage(acc, r.hostUsage),
|
|
7676
|
+
void 0
|
|
7677
|
+
);
|
|
7638
7678
|
return {
|
|
7639
7679
|
...baseResult,
|
|
7640
7680
|
pass: assertionPassRate >= threshold,
|
|
@@ -7643,7 +7683,8 @@ async function runEvalCase(evalCase, context, options = {}) {
|
|
|
7643
7683
|
infrastructureErrorRate,
|
|
7644
7684
|
iterationResults,
|
|
7645
7685
|
infrastructureErrorCount: infraErrors.length,
|
|
7646
|
-
durationMs: iterationResults.reduce((sum, r) => sum + r.durationMs, 0)
|
|
7686
|
+
durationMs: iterationResults.reduce((sum, r) => sum + r.durationMs, 0),
|
|
7687
|
+
hostUsage: totalHostUsage
|
|
7647
7688
|
};
|
|
7648
7689
|
}
|
|
7649
7690
|
function wilsonCI(k, n) {
|
|
@@ -7753,13 +7794,18 @@ async function runEvalDataset(options, context) {
|
|
|
7753
7794
|
...mcpHostModel !== void 0 && { mcpHostModel },
|
|
7754
7795
|
...judgeModel !== void 0 && { judgeModel }
|
|
7755
7796
|
};
|
|
7797
|
+
const runHostUsage = caseResults.reduce(
|
|
7798
|
+
(acc, r) => sumUsage(acc, r.hostUsage),
|
|
7799
|
+
void 0
|
|
7800
|
+
);
|
|
7756
7801
|
const result = {
|
|
7757
7802
|
total,
|
|
7758
7803
|
passed,
|
|
7759
7804
|
failed: total - passed,
|
|
7760
7805
|
caseResults,
|
|
7761
7806
|
durationMs: Date.now() - startTime,
|
|
7762
|
-
metadata
|
|
7807
|
+
metadata,
|
|
7808
|
+
totalHostUsage: runHostUsage
|
|
7763
7809
|
};
|
|
7764
7810
|
if (baselineResultsFrom) {
|
|
7765
7811
|
try {
|