@gleanwork/mcp-server-tester 1.0.0-beta.1 → 1.0.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +1 -1
- package/dist/fixtures/mcp.js +13 -2
- package/dist/fixtures/mcp.js.map +1 -1
- package/dist/index.cjs +25 -4
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +25 -4
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -4380,7 +4380,7 @@ function escapeHtml(text) {
|
|
|
4380
4380
|
|
|
4381
4381
|
// package.json
|
|
4382
4382
|
var package_default = {
|
|
4383
|
-
version: "1.0.0-beta.
|
|
4383
|
+
version: "1.0.0-beta.2"};
|
|
4384
4384
|
|
|
4385
4385
|
// src/mcp/clientFactory.ts
|
|
4386
4386
|
function getRetryAfterDelayMs(err) {
|
|
@@ -5888,7 +5888,18 @@ function applySanitizers(value, sanitizers) {
|
|
|
5888
5888
|
continue;
|
|
5889
5889
|
}
|
|
5890
5890
|
if (isRegexSanitizer(sanitizer)) {
|
|
5891
|
-
|
|
5891
|
+
let pattern;
|
|
5892
|
+
if (sanitizer.pattern instanceof RegExp) {
|
|
5893
|
+
pattern = sanitizer.pattern;
|
|
5894
|
+
} else {
|
|
5895
|
+
try {
|
|
5896
|
+
pattern = new RegExp(sanitizer.pattern, "g");
|
|
5897
|
+
} catch {
|
|
5898
|
+
throw new Error(
|
|
5899
|
+
`toMatchToolSnapshot: invalid regex pattern "${sanitizer.pattern}" in sanitizer`
|
|
5900
|
+
);
|
|
5901
|
+
}
|
|
5902
|
+
}
|
|
5892
5903
|
const replacement = sanitizer.replacement ?? "[SANITIZED]";
|
|
5893
5904
|
result = result.replace(pattern, replacement);
|
|
5894
5905
|
continue;
|
|
@@ -6991,9 +7002,9 @@ async function runEvalDataset(options, context) {
|
|
|
6991
7002
|
const withIterations = evalCase.mode === "llm_host" && evalCase.iterations === void 0 && defaultLlmIterations !== void 0 ? { ...evalCase, iterations: defaultLlmIterations } : evalCase;
|
|
6992
7003
|
if (evalCase.mode === "llm_host") {
|
|
6993
7004
|
const effectiveIterations = withIterations.iterations ?? 1;
|
|
6994
|
-
if (effectiveIterations < 10) {
|
|
7005
|
+
if (effectiveIterations > 1 && effectiveIterations < 10) {
|
|
6995
7006
|
console.warn(
|
|
6996
|
-
`[mcp-server-tester] Eval case "${evalCase.id}"
|
|
7007
|
+
`[mcp-server-tester] Eval case "${evalCase.id}": running ${effectiveIterations} iterations in llm_host mode may not be statistically reliable. Consider using 10+ iterations for accuracy measurements you can trust.`
|
|
6997
7008
|
);
|
|
6998
7009
|
}
|
|
6999
7010
|
}
|
|
@@ -7041,6 +7052,16 @@ async function runEvalDataset(options, context) {
|
|
|
7041
7052
|
const baseline = await loadBaseline(baselineResultsFrom);
|
|
7042
7053
|
const baselinePassRate = baseline.total > 0 ? baseline.passed / baseline.total : 0;
|
|
7043
7054
|
const baselineMap = buildBaselinePassMap(baseline);
|
|
7055
|
+
const currentCaseIds = result.caseResults.map((cr) => cr.id);
|
|
7056
|
+
const unmatchedCount = currentCaseIds.filter(
|
|
7057
|
+
(id) => !baselineMap.has(id)
|
|
7058
|
+
).length;
|
|
7059
|
+
const unmatchedRatio = currentCaseIds.length > 0 ? unmatchedCount / currentCaseIds.length : 0;
|
|
7060
|
+
if (unmatchedRatio > 0.2) {
|
|
7061
|
+
console.warn(
|
|
7062
|
+
`[mcp-server-tester] Baseline comparison: ${unmatchedCount} of ${currentCaseIds.length} cases (${Math.round(unmatchedRatio * 100)}%) have no baseline entry. This may indicate the dataset structure has changed. Results for unmatched cases cannot be compared.`
|
|
7063
|
+
);
|
|
7064
|
+
}
|
|
7044
7065
|
for (const cr of result.caseResults) {
|
|
7045
7066
|
const baselinePass = baselineMap.get(cr.id);
|
|
7046
7067
|
if (baselinePass !== void 0) {
|