@gleanwork/mcp-server-tester 1.0.0-beta.1 → 1.0.0-beta.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +1 -1
- package/dist/fixtures/mcp.js +13 -2
- package/dist/fixtures/mcp.js.map +1 -1
- package/dist/index.cjs +25 -4
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +25 -4
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -4407,7 +4407,7 @@ function escapeHtml(text) {
|
|
|
4407
4407
|
|
|
4408
4408
|
// package.json
|
|
4409
4409
|
var package_default = {
|
|
4410
|
-
version: "1.0.0-beta.
|
|
4410
|
+
version: "1.0.0-beta.2"};
|
|
4411
4411
|
|
|
4412
4412
|
// src/mcp/clientFactory.ts
|
|
4413
4413
|
function getRetryAfterDelayMs(err) {
|
|
@@ -5915,7 +5915,18 @@ function applySanitizers(value, sanitizers) {
|
|
|
5915
5915
|
continue;
|
|
5916
5916
|
}
|
|
5917
5917
|
if (isRegexSanitizer(sanitizer)) {
|
|
5918
|
-
|
|
5918
|
+
let pattern;
|
|
5919
|
+
if (sanitizer.pattern instanceof RegExp) {
|
|
5920
|
+
pattern = sanitizer.pattern;
|
|
5921
|
+
} else {
|
|
5922
|
+
try {
|
|
5923
|
+
pattern = new RegExp(sanitizer.pattern, "g");
|
|
5924
|
+
} catch {
|
|
5925
|
+
throw new Error(
|
|
5926
|
+
`toMatchToolSnapshot: invalid regex pattern "${sanitizer.pattern}" in sanitizer`
|
|
5927
|
+
);
|
|
5928
|
+
}
|
|
5929
|
+
}
|
|
5919
5930
|
const replacement = sanitizer.replacement ?? "[SANITIZED]";
|
|
5920
5931
|
result = result.replace(pattern, replacement);
|
|
5921
5932
|
continue;
|
|
@@ -7018,9 +7029,9 @@ async function runEvalDataset(options, context) {
|
|
|
7018
7029
|
const withIterations = evalCase.mode === "llm_host" && evalCase.iterations === void 0 && defaultLlmIterations !== void 0 ? { ...evalCase, iterations: defaultLlmIterations } : evalCase;
|
|
7019
7030
|
if (evalCase.mode === "llm_host") {
|
|
7020
7031
|
const effectiveIterations = withIterations.iterations ?? 1;
|
|
7021
|
-
if (effectiveIterations < 10) {
|
|
7032
|
+
if (effectiveIterations > 1 && effectiveIterations < 10) {
|
|
7022
7033
|
console.warn(
|
|
7023
|
-
`[mcp-server-tester] Eval case "${evalCase.id}"
|
|
7034
|
+
`[mcp-server-tester] Eval case "${evalCase.id}": running ${effectiveIterations} iterations in llm_host mode may not be statistically reliable. Consider using 10+ iterations for accuracy measurements you can trust.`
|
|
7024
7035
|
);
|
|
7025
7036
|
}
|
|
7026
7037
|
}
|
|
@@ -7068,6 +7079,16 @@ async function runEvalDataset(options, context) {
|
|
|
7068
7079
|
const baseline = await loadBaseline(baselineResultsFrom);
|
|
7069
7080
|
const baselinePassRate = baseline.total > 0 ? baseline.passed / baseline.total : 0;
|
|
7070
7081
|
const baselineMap = buildBaselinePassMap(baseline);
|
|
7082
|
+
const currentCaseIds = result.caseResults.map((cr) => cr.id);
|
|
7083
|
+
const unmatchedCount = currentCaseIds.filter(
|
|
7084
|
+
(id) => !baselineMap.has(id)
|
|
7085
|
+
).length;
|
|
7086
|
+
const unmatchedRatio = currentCaseIds.length > 0 ? unmatchedCount / currentCaseIds.length : 0;
|
|
7087
|
+
if (unmatchedRatio > 0.2) {
|
|
7088
|
+
console.warn(
|
|
7089
|
+
`[mcp-server-tester] Baseline comparison: ${unmatchedCount} of ${currentCaseIds.length} cases (${Math.round(unmatchedRatio * 100)}%) have no baseline entry. This may indicate the dataset structure has changed. Results for unmatched cases cannot be compared.`
|
|
7090
|
+
);
|
|
7091
|
+
}
|
|
7071
7092
|
for (const cr of result.caseResults) {
|
|
7072
7093
|
const baselinePass = baselineMap.get(cr.id);
|
|
7073
7094
|
if (baselinePass !== void 0) {
|