npm - @gleanwork/mcp-server-tester - Versions diffs - 1.0.0-beta.7 → 1.0.0 - Mend

@gleanwork/mcp-server-tester 1.0.0-beta.7 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +20 -1
package/dist/cli/index.js +12 -1
package/dist/fixtures/mcp.d.ts +33 -8
package/dist/fixtures/mcp.js +354 -37
package/dist/fixtures/mcp.js.map +1 -1
package/dist/index.cjs +721 -76
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +533 -116
package/dist/index.d.ts +533 -116
package/dist/index.js +719 -78
package/dist/index.js.map +1 -1
package/dist/reporters/ui-dist/app.js +8 -134
package/dist/reporters/ui-dist/styles.css +1 -1
package/package.json +11 -6
package/dist/reporters/mcpReporter.d.cts +0 -90
package/dist/reporters/mcpReporter.d.ts +0 -90

package/README.md CHANGED Viewed

@@ -126,7 +126,7 @@ LLM host mode makes real API calls and produces non-deterministic results. Use `
 Requires Node.js 22+.
 ```bash
-npm install --save-dev @gleanwork/mcp-server-tester @playwright/test zod
+npm install --save-dev @gleanwork/mcp-server-tester @playwright/test
 ```
 The Anthropic SDK is only needed for LLM-as-judge assertions or LLM host mode with the Anthropic provider:
@@ -182,6 +182,25 @@ For HTTP servers, set `transport: 'http'` and `serverUrl`. For servers that requ
 - [Development](./docs/development.md) — contributing and building
 - [Migration Guide (v0.12 → v1.0)](./docs/migrations/migration-1.0.md) — upgrading from pre-1.0 releases
+## AI Skills
+Install AI skills to help your coding assistant generate tests, eval datasets, and MCP host evals:
+```bash
+npx skills add -g gleanwork/mcp-server-tester
+```
+This installs skills globally so they're available across all your projects. Four skills are included:
+| Skill                 | Description                                                 |
+| --------------------- | ----------------------------------------------------------- |
+| `mcp-tester-guide`    | Framework reference — matchers, config, auth, anti-patterns |
+| `write-mcp-test`      | Generate direct-mode Playwright tests                       |
+| `write-mcp-eval`      | Generate data-driven eval datasets                          |
+| `write-mcp-host-eval` | Generate LLM host simulation evals                          |
+Compatible with Claude Code, Cursor, Windsurf, Copilot, and [40+ other AI agents](https://github.com/nicepkg/nice-skills).
 ## Examples
 The `examples/` directory contains complete working examples:

package/dist/cli/index.js CHANGED Viewed

@@ -80,7 +80,7 @@ function JsonPreview({ data, maxLines = 15 }) {
 // package.json
 var package_default = {
-  version: "1.0.0-beta.7"};
+  version: "1.0.0"};
 // src/cli/templates/index.ts
 function getPlaywrightConfigTemplate(answers) {
@@ -1005,6 +1005,17 @@ async function createMCPClientForConfig(config, options) {
 }
 async function closeMCPClient(client) {
   try {
+    const transport = client.transport;
+    if (transport instanceof StreamableHTTPClientTransport) {
+      try {
+        await transport.terminateSession();
+      } catch (sessionError) {
+        debugClient(
+          "Error terminating session: %s",
+          sessionError instanceof Error ? sessionError.message : String(sessionError)
+        );
+      }
+    }
     await client.close();
   } catch (error) {
     debugClient(

package/dist/fixtures/mcp.d.ts CHANGED Viewed

@@ -214,7 +214,7 @@ type RubricSpec = BuiltInRubric | {
 };
 /** Valid LLM judge provider kinds. */
-type ProviderKind = 'anthropic' | 'openai' | 'google';
+type ProviderKind = 'anthropic' | 'vertex-anthropic' | 'anthropic-agent-sdk' | 'openai' | 'google';
 /**
  * Tool call validators for mcp_host simulation results.
@@ -258,6 +258,12 @@ interface JudgeMatcherOptions {
     provider?: ProviderKind;
     /** Override the judge model */
     model?: string;
+    /**
+     * Name of a registered custom judge executor.
+     * When set, the named judge handles the entire evaluation pipeline
+     * and its `pass` result is authoritative.
+     */
+    judge?: string;
 }
 /**
  * Declaration merging for Playwright matchers
@@ -348,21 +354,30 @@ declare global {
              */
             toBeToolError(expected?: boolean | string | string[]): R;
             /**
-             * Validates that a response passes LLM-as-judge evaluation
+             * Validates that a response passes LLM-as-judge evaluation.
              *
-             * @param rubric - Evaluation rubric/criteria
-             * @param options - Judge options
+             * Two call signatures:
+             * - With rubric: `toPassToolJudge(rubric, options?)` — built-in LLM judge
+             * - With named judge: `toPassToolJudge({ judge: 'name' })` — custom judge executor
              *
              * @example
              * ```typescript
+             * // Built-in LLM judge with rubric
              * expect(result).toPassToolJudge('Response should be helpful and accurate');
-             * expect(result).toPassToolJudge('Response should match reference', {
+             * expect(result).toPassToolJudge('correctness', {
              *   reference: expectedOutput,
              *   passingThreshold: 0.8,
              * });
+             *
+             * // Named custom judge (registered via registerJudge)
+             * expect(result).toPassToolJudge({ judge: 'glean-completeness' });
              * ```
              */
             toPassToolJudge(rubric: RubricSpec, options?: JudgeMatcherOptions): Promise<R>;
+            toPassToolJudge(options: JudgeMatcherOptions): Promise<R>;
+            toPassToolJudge(judges: Array<JudgeMatcherOptions & {
+                rubric?: RubricSpec;
+            }>): Promise<R>;
             /**
              * Validates that a response meets size constraints
              *
@@ -452,16 +467,26 @@ type ToolPredicate = (response: unknown, text: string) => boolean | PredicateRes
  * Validates that a response passes LLM-as-judge evaluation.
  * Delegates evaluation logic to validateJudge() for consistency
  * with the validator/matcher duality pattern.
+ *
+ * Supports three call signatures:
+ *   - toPassToolJudge(rubric, options?)        — built-in LLM judge with rubric
+ *   - toPassToolJudge({ judge: 'name', ... })  — named custom judge
+ *   - toPassToolJudge([...judges])             — multi-judge (all must pass)
  */
 /**
- * Creates the toPassToolJudge matcher function
+ * The toPassToolJudge matcher function.
  *
- * Note: This is an async matcher that calls an LLM for evaluation.
+ * Accepts either:
+ *   (received, rubric, options?) — rubric-based LLM judge
+ *   (received, options)          — named custom judge (options.judge required)
+ *   (received, judges[])         — multi-judge (all must pass)
  */
 declare function toPassToolJudge(this: {
     isNot: boolean;
-}, received: unknown, rubric: RubricSpec, options?: JudgeMatcherOptions): Promise<{
+}, received: unknown, rubricOrOptions: RubricSpec | JudgeMatcherOptions | Array<JudgeMatcherOptions & {
+    rubric?: RubricSpec;
+}>, maybeOptions?: JudgeMatcherOptions): Promise<{
     pass: boolean;
     message: () => string;
 }>;