npm - @gleanwork/mcp-server-tester - Versions diffs - 1.0.0-beta.8 → 1.0.1-beta.0 - Mend

@gleanwork/mcp-server-tester 1.0.0-beta.8 → 1.0.1-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +20 -1
package/dist/cli/index.js +12 -1
package/dist/fixtures/mcp.js +71 -14
package/dist/fixtures/mcp.js.map +1 -1
package/dist/index.cjs +142 -24
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +118 -16
package/dist/index.d.ts +118 -16
package/dist/index.js +142 -25
package/dist/index.js.map +1 -1
package/dist/reporters/mcpReporter.cjs +34 -1
package/dist/reporters/mcpReporter.cjs.map +1 -1
package/dist/reporters/mcpReporter.d.cts +90 -0
package/dist/reporters/mcpReporter.d.ts +90 -0
package/dist/reporters/mcpReporter.js +34 -1
package/dist/reporters/mcpReporter.js.map +1 -1
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -4384,7 +4384,7 @@ function escapeHtml(text) {
 // package.json
 var package_default = {
-  version: "1.0.0-beta.8"};
+  version: "1.0.1-beta.0"};
 // src/mcp/clientFactory.ts
 function getRetryAfterDelayMs(err) {
@@ -4603,6 +4603,17 @@ async function createMCPClientForConfig(config, options) {
 }
 async function closeMCPClient(client) {
   try {
+    const transport = client.transport;
+    if (transport instanceof StreamableHTTPClientTransport) {
+      try {
+        await transport.terminateSession();
+      } catch (sessionError) {
+        debugClient(
+          "Error terminating session: %s",
+          sessionError instanceof Error ? sessionError.message : String(sessionError)
+        );
+      }
+    }
     await client.close();
   } catch (error) {
     debugClient(
@@ -4831,11 +4842,13 @@ function validateSchema(response, schema, options = {}) {
   } catch (error) {
     const zodError = error;
     const issues = formatZodIssues(zodError);
+    const text = stringifyResponse(response);
     return {
       pass: false,
       message: `Response does not match schema: ${issues}`,
       details: {
-        issues: zodError.issues
+        issues: zodError.issues,
+        textPreview: truncateForDisplay2(text)
       }
     };
   }
@@ -4888,6 +4901,12 @@ function formatZodIssues(error) {
   });
   return issues.join("; ");
 }
+function truncateForDisplay2(str, maxLength = 200) {
+  if (str.length <= maxLength) {
+    return str;
+  }
+  return str.slice(0, maxLength) + "... (truncated)";
+}
 // src/assertions/validators/text.ts
 function validateText(response, expected, options = {}) {
@@ -4914,11 +4933,11 @@ function validateText(response, expected, options = {}) {
     details: {
       missing,
       textLength: text.length,
-      textPreview: truncateForDisplay2(text)
+      textPreview: truncateForDisplay3(text)
     }
   };
 }
-function truncateForDisplay2(str, maxLength = 200) {
+function truncateForDisplay3(str, maxLength = 200) {
   if (str.length <= maxLength) {
     return str;
   }
@@ -4950,7 +4969,7 @@ function validatePattern(response, patterns, options = {}) {
     details: {
       unmatched,
       textLength: text.length,
-      textPreview: truncateForDisplay3(text)
+      textPreview: truncateForDisplay4(text)
     }
   };
 }
@@ -4970,7 +4989,7 @@ function patternToString(pattern) {
   }
   return `/${pattern}/`;
 }
-function truncateForDisplay3(str, maxLength = 200) {
+function truncateForDisplay4(str, maxLength = 200) {
   if (str.length <= maxLength) {
     return str;
   }
@@ -4993,7 +5012,7 @@ function validateError(response, expected = true) {
         pass: false,
         message: "Expected an error response but got success",
         details: {
-          textPreview: truncateForDisplay4(extractText2(response))
+          textPreview: truncateForDisplay5(extractText2(response))
         }
       };
     } else {
@@ -5005,7 +5024,7 @@ function validateError(response, expected = true) {
       }
       return {
         pass: false,
-        message: `Expected a success response but got error: "${truncateForDisplay4(errorMessage)}"`,
+        message: `Expected a success response but got error: "${truncateForDisplay5(errorMessage)}"`,
         details: {
           errorMessage
         }
@@ -5018,7 +5037,7 @@ function validateError(response, expected = true) {
       pass: false,
       message: `Expected an error containing "${expectedMessages[0]}" but got success`,
       details: {
-        textPreview: truncateForDisplay4(extractText2(response))
+        textPreview: truncateForDisplay5(extractText2(response))
       }
     };
   }
@@ -5040,7 +5059,7 @@ function validateError(response, expected = true) {
     }
   };
 }
-function truncateForDisplay4(str, maxLength = 200) {
+function truncateForDisplay5(str, maxLength = 200) {
   if (str.length <= maxLength) {
     return str;
   }
@@ -5158,6 +5177,10 @@ function validateToolCalls(response, expectation) {
           return {
             pass: false,
             message: `Expected tool '${expected.name}' to be called in sequence (starting from position ${searchFrom}), but it was not found`,
+            details: {
+              actual: actual.map((c) => c.name),
+              expected: expected.name
+            },
             metrics
           };
         }
@@ -5174,6 +5197,10 @@ function validateToolCalls(response, expectation) {
         return {
           pass: false,
           message: `Expected tool '${expected.name}'${argsNote} to be called, but it was not`,
+          details: {
+            actual: actual.map((c) => c.name),
+            expected: expected.name
+          },
           metrics
         };
       }
@@ -5186,6 +5213,10 @@ function validateToolCalls(response, expectation) {
       return {
         pass: false,
         message: `Unexpected tool calls: ${names}. Only ${[...allowedNames].map((n) => `'${n}'`).join(", ")} are allowed`,
+        details: {
+          actual: actual.map((c) => c.name),
+          unexpected: unexpected.map((c) => c.name)
+        },
         metrics
       };
     }
@@ -5204,19 +5235,22 @@ function validateToolCallCount(response, options) {
   if (exact !== void 0 && count !== exact) {
     return {
       pass: false,
-      message: `Expected exactly ${exact} tool call(s), but got ${count}`
+      message: `Expected exactly ${exact} tool call(s), but got ${count}`,
+      details: { actual: count, expected: exact }
     };
   }
   if (min !== void 0 && count < min) {
     return {
       pass: false,
-      message: `Expected at least ${min} tool call(s), but got ${count}`
+      message: `Expected at least ${min} tool call(s), but got ${count}`,
+      details: { actual: count, min }
     };
   }
   if (max !== void 0 && count > max) {
     return {
       pass: false,
-      message: `Expected at most ${max} tool call(s), but got ${count}`
+      message: `Expected at most ${max} tool call(s), but got ${count}`,
+      details: { actual: count, max }
     };
   }
   return {
@@ -5730,7 +5764,9 @@ function createJudge(config = {}) {
     case "google":
       return createGoogleJudge(config);
     default:
-      throw new Error(`Unsupported LLM provider: ${String(provider)}`);
+      throw new Error(
+        `Unsupported LLM provider: ${String(provider)}. Valid providers: 'anthropic', 'vertex-anthropic', 'anthropic-agent-sdk', 'openai', 'google'`
+      );
   }
 }
@@ -6055,12 +6091,19 @@ function toMatchToolResponse(received, expected) {
 // src/assertions/matchers/toMatchToolSchema.ts
 function toMatchToolSchema(received, schema, options = {}) {
   const result = validateSchema(received, schema, options);
+  const preview = result.details?.textPreview;
   return {
     pass: result.pass,
     message: () => {
       if (this.isNot) {
         return result.pass ? "Expected response NOT to match schema, but it did" : result.message;
       }
+      if (!result.pass && preview) {
+        return `${result.message}
+Actual response (truncated):
+${preview}`;
+      }
       return result.message;
     }
   };
@@ -6069,6 +6112,7 @@ function toMatchToolSchema(received, schema, options = {}) {
 // src/assertions/matchers/toContainToolText.ts
 function toContainToolText(received, expected, options = {}) {
   const result = validateText(received, expected, options);
+  const preview = result.details?.textPreview;
   return {
     pass: result.pass,
     message: () => {
@@ -6076,6 +6120,12 @@ function toContainToolText(received, expected, options = {}) {
         const expectedStr = Array.isArray(expected) ? expected.map((s) => `"${s}"`).join(", ") : `"${expected}"`;
         return result.pass ? `Expected response NOT to contain ${expectedStr}, but it did` : result.message;
       }
+      if (!result.pass && preview) {
+        return `${result.message}
+Actual response (truncated):
+${preview}`;
+      }
       return result.message;
     }
   };
@@ -6084,12 +6134,19 @@ function toContainToolText(received, expected, options = {}) {
 // src/assertions/matchers/toMatchToolPattern.ts
 function toMatchToolPattern(received, patterns, options = {}) {
   const result = validatePattern(received, patterns, options);
+  const preview = result.details?.textPreview;
   return {
     pass: result.pass,
     message: () => {
       if (this.isNot) {
         return result.pass ? "Expected response NOT to match pattern(s), but it did" : result.message;
       }
+      if (!result.pass && preview) {
+        return `${result.message}
+Actual response (truncated):
+${preview}`;
+      }
       return result.message;
     }
   };
@@ -6874,6 +6931,12 @@ function createVercelOrchestrator() {
         });
         const totalDurationMs = Date.now() - llmStart;
         const llmDurationMs = totalDurationMs - mcpDurationMs;
+        const hostUsage = result.usage ? {
+          inputTokens: result.usage.promptTokens ?? 0,
+          outputTokens: result.usage.completionTokens ?? 0,
+          totalCostUsd: 0,
+          durationMs: llmDurationMs
+        } : void 0;
         const conversationHistory = (result.steps ?? []).map((step) => ({
           role: step.toolCalls?.length > 0 ? "tool" : "assistant",
           content: step.toolCalls?.length > 0 ? JSON.stringify(step.toolResults) : step.text ?? ""
@@ -6885,7 +6948,8 @@ function createVercelOrchestrator() {
           scenario,
           llmDurationMs,
           mcpDurationMs,
-          conversationHistory
+          conversationHistory,
+          usage: hostUsage
         };
       } catch (err) {
         return {
@@ -6903,6 +6967,7 @@ function parseStreamJson(stdout) {
   const lines = stdout.split("\n").filter((line) => line.trim().length > 0);
   const toolCalls = [];
   const textParts = [];
+  let usage;
   const conversationHistory = [];
   for (const line of lines) {
     let event;
@@ -6935,16 +7000,28 @@ function parseStreamJson(stdout) {
         }
       }
     }
-    if (event.type === "result" && typeof event.result === "string") {
-      if (textParts.length === 0) {
+    if (event.type === "result") {
+      if (typeof event.result === "string" && textParts.length === 0) {
         textParts.push(event.result);
       }
+      if (event.usage) {
+        usage = {
+          inputTokens: event.usage.input_tokens ?? 0,
+          outputTokens: event.usage.output_tokens ?? 0,
+          totalCostUsd: event.total_cost_usd ?? 0,
+          durationMs: event.duration_ms ?? 0,
+          durationApiMs: event.duration_api_ms,
+          cacheReadInputTokens: event.usage.cache_read_input_tokens,
+          cacheCreationInputTokens: event.usage.cache_creation_input_tokens
+        };
+      }
     }
     if (event.type === "result" && event.is_error === true) {
       return {
         success: false,
         toolCalls,
-        error: typeof event.result === "string" ? event.result : "CLI host reported an error"
+        error: typeof event.result === "string" ? event.result : "CLI host reported an error",
+        usage
       };
     }
   }
@@ -6956,7 +7033,8 @@ function parseStreamJson(stdout) {
     success: true,
     toolCalls,
     response: response || void 0,
-    conversationHistory: conversationHistory.length > 0 ? conversationHistory : void 0
+    conversationHistory: conversationHistory.length > 0 ? conversationHistory : void 0,
+    usage
   };
 }
 function createJsonParser(paths) {
@@ -7221,6 +7299,32 @@ async function execFileNoThrow(file, args) {
   }
 }
+// src/utils/usageUtils.ts
+function optionalSum(a, b) {
+  if (a === void 0 && b === void 0) return void 0;
+  return (a ?? 0) + (b ?? 0);
+}
+function sumUsage(a, b) {
+  if (!a && !b) return void 0;
+  if (!a) return b ? { ...b } : void 0;
+  if (!b) return { ...a };
+  return {
+    inputTokens: a.inputTokens + b.inputTokens,
+    outputTokens: a.outputTokens + b.outputTokens,
+    totalCostUsd: a.totalCostUsd + b.totalCostUsd,
+    durationMs: a.durationMs + b.durationMs,
+    durationApiMs: optionalSum(a.durationApiMs, b.durationApiMs),
+    cacheReadInputTokens: optionalSum(
+      a.cacheReadInputTokens,
+      b.cacheReadInputTokens
+    ),
+    cacheCreationInputTokens: optionalSum(
+      a.cacheCreationInputTokens,
+      b.cacheCreationInputTokens
+    )
+  };
+}
 // src/evals/evalRunner.ts
 async function executeToolCall(evalCase, mcp) {
   const mode = evalCase.mode || "direct";
@@ -7466,6 +7570,7 @@ async function runSingleIteration(evalCase, context, options) {
       };
     }
   }
+  const hostUsage = isMCPHostSimulationResult(response) && response.usage ? response.usage : void 0;
   return {
     id: evalCase.id,
     datasetName: options.datasetName ?? "single-case",
@@ -7482,7 +7587,8 @@ async function runSingleIteration(evalCase, context, options) {
     tags: evalCase.tags,
     toolPrecision,
     toolRecall,
-    mcpHostTrace
+    mcpHostTrace,
+    hostUsage
   };
 }
 function isInfrastructureError(err) {
@@ -7498,7 +7604,7 @@ function isInfrastructureError(err) {
   } else {
     return false;
   }
-  return name15 === "AbortError" || msg.includes("econnreset") || msg.includes("etimedout") || msg.includes("econnrefused") || msg.includes("rate limit") || msg.includes("429") || msg.includes("503") || msg.includes("network") || // Prompt/context overflow — LLM couldn't run, not a tool discoverability failure
+  return name15?.toLowerCase() === "aborterror" || msg.includes("econnreset") || msg.includes("etimedout") || msg.includes("econnrefused") || msg.includes("rate limit") || msg.includes("429") || msg.includes("503") || msg.includes("network") || // Prompt/context overflow — LLM couldn't run, not a tool discoverability failure
   msg.includes("prompt is too long") || msg.includes("context length exceeded") || msg.includes("maximum context length") || msg.includes("context_length_exceeded") || msg.includes("tokens > ") || code.includes("econnreset") || code.includes("etimedout") || code.includes("econnrefused");
 }
 async function runEvalCase(evalCase, context, options = {}) {
@@ -7518,7 +7624,8 @@ async function runEvalCase(evalCase, context, options = {}) {
         durationMs: result.durationMs,
         error: result.error,
         isInfrastructureError: infraError,
-        mcpHostTrace: result.mcpHostTrace
+        mcpHostTrace: result.mcpHostTrace,
+        hostUsage: result.hostUsage
       });
     } catch (err) {
       const errorMessage = err instanceof Error ? err.message : String(err);
@@ -7551,6 +7658,10 @@ async function runEvalCase(evalCase, context, options = {}) {
     durationMs: 0,
     tags: evalCase.tags
   };
+  const totalHostUsage = iterationResults.reduce(
+    (acc, r) => sumUsage(acc, r.hostUsage),
+    void 0
+  );
   return {
     ...baseResult,
     pass: assertionPassRate >= threshold,
@@ -7559,7 +7670,8 @@ async function runEvalCase(evalCase, context, options = {}) {
     infrastructureErrorRate,
     iterationResults,
     infrastructureErrorCount: infraErrors.length,
-    durationMs: iterationResults.reduce((sum, r) => sum + r.durationMs, 0)
+    durationMs: iterationResults.reduce((sum, r) => sum + r.durationMs, 0),
+    hostUsage: totalHostUsage
   };
 }
 function wilsonCI(k, n) {
@@ -7669,13 +7781,18 @@ async function runEvalDataset(options, context) {
     ...mcpHostModel !== void 0 && { mcpHostModel },
     ...judgeModel !== void 0 && { judgeModel }
   };
+  const runHostUsage = caseResults.reduce(
+    (acc, r) => sumUsage(acc, r.hostUsage),
+    void 0
+  );
   const result = {
     total,
     passed,
     failed: total - passed,
     caseResults,
     durationMs: Date.now() - startTime,
-    metadata
+    metadata,
+    totalHostUsage: runHostUsage
   };
   if (baselineResultsFrom) {
     try {
@@ -7969,6 +8086,6 @@ function formatCapabilities(capabilities) {
   return parts.length > 0 ? parts.join(", ") : "none declared";
 }
-export { BUILT_IN_RUBRICS, CLIOAuthClient, DiscoveryError, ENV_VAR_NAMES, EvalCaseSchema, EvalDatasetSchema, MCPConfigSchema, MCP_PROTOCOL_VERSION, PlaywrightOAuthClientProvider, SnapshotSanitizers, clearJudgeRegistry, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getRegisteredJudge, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test2 as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, registerJudge, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateMCPHost, test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
+export { BUILT_IN_RUBRICS, CLIOAuthClient, DiscoveryError, ENV_VAR_NAMES, EvalCaseSchema, EvalDatasetSchema, MCPConfigSchema, MCP_PROTOCOL_VERSION, PlaywrightOAuthClientProvider, SnapshotSanitizers, clearJudgeRegistry, closeMCPClient, createJudge, createMCPClientForConfig, createMCPFixture, createTokenAuthHeaders, discoverAuthorizationServer, discoverProtectedResource, expect, extractText, getMissingDependencyMessage, getRegisteredJudge, getResponseSizeBytes, hasValidTokens, injectTokens, isBuiltInRubric, isHttpConfig, isProviderAvailable, isStdioConfig, isTokenExpired, isTokenExpiringSoon, loadBaseline, loadEvalDataset, loadEvalDatasetFromObject, loadTokens, loadTokensFromEnv, test2 as mcpAuthTest, normalizeToolResponse, normalizeWhitespace, performClientCredentialsFlow, performOAuthSetup, performOAuthSetupIfNeeded, refreshAccessToken, registerJudge, resolveRubric, runConformanceChecks, runEvalCase, runEvalDataset, runServerComparison, saveBaseline, simulateMCPHost, test, validateAccessToken, validateError, validateEvalCase, validateEvalDataset, validateJudge, validateMCPConfig, validatePattern, validateResponse, validateSchema, validateSize, validateText, validateToolCallCount, validateToolCalls };
 //# sourceMappingURL=index.js.map
 //# sourceMappingURL=index.js.map