@gleanwork/mcp-server-tester 1.0.0-beta.7 → 1.0.0-beta.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -80,7 +80,7 @@ function JsonPreview({ data, maxLines = 15 }) {
80
80
 
81
81
  // package.json
82
82
  var package_default = {
83
- version: "1.0.0-beta.7"};
83
+ version: "1.0.0-beta.8"};
84
84
 
85
85
  // src/cli/templates/index.ts
86
86
  function getPlaywrightConfigTemplate(answers) {
@@ -214,7 +214,7 @@ type RubricSpec = BuiltInRubric | {
214
214
  };
215
215
 
216
216
  /** Valid LLM judge provider kinds. */
217
- type ProviderKind = 'anthropic' | 'openai' | 'google';
217
+ type ProviderKind = 'anthropic' | 'vertex-anthropic' | 'anthropic-agent-sdk' | 'openai' | 'google';
218
218
 
219
219
  /**
220
220
  * Tool call validators for mcp_host simulation results.
@@ -258,6 +258,12 @@ interface JudgeMatcherOptions {
258
258
  provider?: ProviderKind;
259
259
  /** Override the judge model */
260
260
  model?: string;
261
+ /**
262
+ * Name of a registered custom judge executor.
263
+ * When set, the named judge handles the entire evaluation pipeline
264
+ * and its `pass` result is authoritative.
265
+ */
266
+ judge?: string;
261
267
  }
262
268
  /**
263
269
  * Declaration merging for Playwright matchers
@@ -348,21 +354,30 @@ declare global {
348
354
  */
349
355
  toBeToolError(expected?: boolean | string | string[]): R;
350
356
  /**
351
- * Validates that a response passes LLM-as-judge evaluation
357
+ * Validates that a response passes LLM-as-judge evaluation.
352
358
  *
353
- * @param rubric - Evaluation rubric/criteria
354
- * @param options - Judge options
359
+ * Two call signatures:
360
+ * - With rubric: `toPassToolJudge(rubric, options?)` — built-in LLM judge
361
+ * - With named judge: `toPassToolJudge({ judge: 'name' })` — custom judge executor
355
362
  *
356
363
  * @example
357
364
  * ```typescript
365
+ * // Built-in LLM judge with rubric
358
366
  * expect(result).toPassToolJudge('Response should be helpful and accurate');
359
- * expect(result).toPassToolJudge('Response should match reference', {
367
+ * expect(result).toPassToolJudge('correctness', {
360
368
  * reference: expectedOutput,
361
369
  * passingThreshold: 0.8,
362
370
  * });
371
+ *
372
+ * // Named custom judge (registered via registerJudge)
373
+ * expect(result).toPassToolJudge({ judge: 'glean-completeness' });
363
374
  * ```
364
375
  */
365
376
  toPassToolJudge(rubric: RubricSpec, options?: JudgeMatcherOptions): Promise<R>;
377
+ toPassToolJudge(options: JudgeMatcherOptions): Promise<R>;
378
+ toPassToolJudge(judges: Array<JudgeMatcherOptions & {
379
+ rubric?: RubricSpec;
380
+ }>): Promise<R>;
366
381
  /**
367
382
  * Validates that a response meets size constraints
368
383
  *
@@ -452,16 +467,26 @@ type ToolPredicate = (response: unknown, text: string) => boolean | PredicateRes
452
467
  * Validates that a response passes LLM-as-judge evaluation.
453
468
  * Delegates evaluation logic to validateJudge() for consistency
454
469
  * with the validator/matcher duality pattern.
470
+ *
471
+ * Supports three call signatures:
472
+ * - toPassToolJudge(rubric, options?) — built-in LLM judge with rubric
473
+ * - toPassToolJudge({ judge: 'name', ... }) — named custom judge
474
+ * - toPassToolJudge([...judges]) — multi-judge (all must pass)
455
475
  */
456
476
 
457
477
  /**
458
- * Creates the toPassToolJudge matcher function
478
+ * The toPassToolJudge matcher function.
459
479
  *
460
- * Note: This is an async matcher that calls an LLM for evaluation.
480
+ * Accepts either:
481
+ * (received, rubric, options?) — rubric-based LLM judge
482
+ * (received, options) — named custom judge (options.judge required)
483
+ * (received, judges[]) — multi-judge (all must pass)
461
484
  */
462
485
  declare function toPassToolJudge(this: {
463
486
  isNot: boolean;
464
- }, received: unknown, rubric: RubricSpec, options?: JudgeMatcherOptions): Promise<{
487
+ }, received: unknown, rubricOrOptions: RubricSpec | JudgeMatcherOptions | Array<JudgeMatcherOptions & {
488
+ rubric?: RubricSpec;
489
+ }>, maybeOptions?: JudgeMatcherOptions): Promise<{
465
490
  pass: boolean;
466
491
  message: () => string;
467
492
  }>;
@@ -1,6 +1,6 @@
1
1
  import { expect as expect$1, test as test$1 } from '@playwright/test';
2
- import { query } from '@anthropic-ai/claude-agent-sdk';
3
2
  import { z } from 'zod';
3
+ import { query } from '@anthropic-ai/claude-agent-sdk';
4
4
  import { Client } from '@modelcontextprotocol/sdk/client/index.js';
5
5
  import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
6
6
  import { StreamableHTTPClientTransport } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
@@ -662,7 +662,175 @@ var JudgeResponseSchema = z.object({
662
662
  reasoning: z.string()
663
663
  });
664
664
 
665
- // src/judge/claudeAgentJudge.ts
665
+ // src/judge/anthropicJudge.ts
666
+ function createAnthropicJudge(config = {}) {
667
+ const apiKeyEnvVar = config.apiKeyEnvVar ?? "ANTHROPIC_API_KEY";
668
+ const apiKey = process.env[apiKeyEnvVar];
669
+ if (!apiKey) {
670
+ throw new Error(
671
+ `Anthropic judge requires an API key. Set the ${apiKeyEnvVar} environment variable.`
672
+ );
673
+ }
674
+ const model = config.model ?? "claude-sonnet-4-20250514";
675
+ const maxTokens = config.maxTokens ?? 1e3;
676
+ const temperature = config.temperature ?? 0;
677
+ return {
678
+ async evaluate(candidate, reference, rubric) {
679
+ let anthropicModule;
680
+ try {
681
+ anthropicModule = await import('@anthropic-ai/sdk');
682
+ } catch (err) {
683
+ throw new Error(
684
+ `Anthropic judge requires the \`@anthropic-ai/sdk\` package. Install it with: npm install @anthropic-ai/sdk
685
+ Original error: ${err instanceof Error ? err.message : String(err)}`
686
+ );
687
+ }
688
+ const client = new anthropicModule.default({ apiKey });
689
+ const prompt = buildJudgePrompt(candidate, reference, rubric);
690
+ const startTime = Date.now();
691
+ const response = await client.messages.create({
692
+ model,
693
+ max_tokens: maxTokens,
694
+ temperature,
695
+ system: 'You are an expert evaluator. Respond with valid JSON only: {"pass": true|false, "score": 0.0-1.0, "reasoning": "explanation"}',
696
+ messages: [{ role: "user", content: prompt }]
697
+ });
698
+ const durationMs = Date.now() - startTime;
699
+ const textBlock = response.content.find(
700
+ (b) => b.type === "text"
701
+ );
702
+ const text = textBlock?.text ?? "";
703
+ const parsed = parseJudgeResponse(text);
704
+ return {
705
+ pass: parsed.pass,
706
+ score: parsed.score,
707
+ reasoning: parsed.reasoning,
708
+ usage: {
709
+ inputTokens: response.usage?.input_tokens ?? 0,
710
+ outputTokens: response.usage?.output_tokens ?? 0,
711
+ totalCostUsd: 0,
712
+ durationMs
713
+ }
714
+ };
715
+ }
716
+ };
717
+ }
718
+ function buildJudgePrompt(candidate, reference, rubric) {
719
+ const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
720
+ const referenceStr = reference !== null && reference !== void 0 ? typeof reference === "string" ? reference : JSON.stringify(reference, null, 2) : null;
721
+ return `Rubric:
722
+ ${rubric}
723
+
724
+ <candidate_response>
725
+ ${candidateStr}
726
+ </candidate_response>
727
+
728
+ <reference_answer>
729
+ ${referenceStr ?? "No reference provided."}
730
+ </reference_answer>
731
+
732
+ Evaluate and return JSON: {"pass": boolean, "score": number (0-1), "reasoning": string}`;
733
+ }
734
+ function parseJudgeResponse(text) {
735
+ const cleaned = text.replace(/```json\n?/g, "").replace(/```\n?/g, "").trim();
736
+ let parsed;
737
+ try {
738
+ parsed = JSON.parse(cleaned);
739
+ } catch {
740
+ throw new Error(`Failed to parse judge response as JSON: ${text}`);
741
+ }
742
+ const result = JudgeResponseSchema.safeParse(parsed);
743
+ if (!result.success) {
744
+ throw new Error(
745
+ `Judge returned invalid response. Expected {pass, score, reasoning} but got: ${cleaned.slice(0, 500)}
746
+ Validation errors: ${JSON.stringify(result.error.issues)}`
747
+ );
748
+ }
749
+ return result.data;
750
+ }
751
+
752
+ // src/judge/vertexAnthropicJudge.ts
753
+ function createVertexAnthropicJudge(config = {}) {
754
+ const model = config.model ?? "claude-sonnet-4-20250514";
755
+ const maxTokens = config.maxTokens ?? 1e3;
756
+ const temperature = config.temperature ?? 0;
757
+ return {
758
+ async evaluate(candidate, reference, rubric) {
759
+ let vertexModule;
760
+ try {
761
+ vertexModule = await import('@anthropic-ai/vertex-sdk');
762
+ } catch (err) {
763
+ throw new Error(
764
+ `Vertex Anthropic judge requires the \`@anthropic-ai/vertex-sdk\` package. Install it with: npm install @anthropic-ai/vertex-sdk
765
+ Original error: ${err instanceof Error ? err.message : String(err)}`
766
+ );
767
+ }
768
+ const client = new vertexModule.AnthropicVertex({
769
+ projectId: process.env.GOOGLE_VERTEX_PROJECT ?? process.env.CLOUD_ML_PROJECT_ID,
770
+ region: process.env.GOOGLE_VERTEX_LOCATION ?? "us-east5"
771
+ });
772
+ const prompt = buildJudgePrompt2(candidate, reference, rubric);
773
+ const startTime = Date.now();
774
+ const response = await client.messages.create({
775
+ model,
776
+ max_tokens: maxTokens,
777
+ temperature,
778
+ system: 'You are an expert evaluator. Respond with valid JSON only: {"pass": true|false, "score": 0.0-1.0, "reasoning": "explanation"}',
779
+ messages: [{ role: "user", content: prompt }]
780
+ });
781
+ const durationMs = Date.now() - startTime;
782
+ const textBlock = response.content.find(
783
+ (b) => b.type === "text"
784
+ );
785
+ const text = textBlock?.text ?? "";
786
+ const parsed = parseJudgeResponse2(text);
787
+ return {
788
+ pass: parsed.pass,
789
+ score: parsed.score,
790
+ reasoning: parsed.reasoning,
791
+ usage: {
792
+ inputTokens: response.usage?.input_tokens ?? 0,
793
+ outputTokens: response.usage?.output_tokens ?? 0,
794
+ totalCostUsd: 0,
795
+ durationMs
796
+ }
797
+ };
798
+ }
799
+ };
800
+ }
801
+ function buildJudgePrompt2(candidate, reference, rubric) {
802
+ const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
803
+ const referenceStr = reference !== null && reference !== void 0 ? typeof reference === "string" ? reference : JSON.stringify(reference, null, 2) : null;
804
+ return `Rubric:
805
+ ${rubric}
806
+
807
+ <candidate_response>
808
+ ${candidateStr}
809
+ </candidate_response>
810
+
811
+ <reference_answer>
812
+ ${referenceStr ?? "No reference provided."}
813
+ </reference_answer>
814
+
815
+ Evaluate and return JSON: {"pass": boolean, "score": number (0-1), "reasoning": string}`;
816
+ }
817
+ function parseJudgeResponse2(text) {
818
+ const cleaned = text.replace(/```json\n?/g, "").replace(/```\n?/g, "").trim();
819
+ let parsed;
820
+ try {
821
+ parsed = JSON.parse(cleaned);
822
+ } catch {
823
+ throw new Error(`Failed to parse judge response as JSON: ${text}`);
824
+ }
825
+ const result = JudgeResponseSchema.safeParse(parsed);
826
+ if (!result.success) {
827
+ throw new Error(
828
+ `Judge returned invalid response. Expected {pass, score, reasoning} but got: ${cleaned.slice(0, 500)}
829
+ Validation errors: ${JSON.stringify(result.error.issues)}`
830
+ );
831
+ }
832
+ return result.data;
833
+ }
666
834
  function createClaudeAgentJudge(config) {
667
835
  const model = config.model ?? "claude-sonnet-4-20250514";
668
836
  const maxBudgetUsd = config.maxBudgetUsd ?? 0.1;
@@ -680,7 +848,7 @@ function createClaudeAgentJudge(config) {
680
848
  exceedsMaxToolOutputSize: true
681
849
  };
682
850
  }
683
- const prompt = buildJudgePrompt(candidate, reference, rubric);
851
+ const prompt = buildJudgePrompt3(candidate, reference, rubric);
684
852
  try {
685
853
  let resultMessage;
686
854
  for await (const message of query({
@@ -712,7 +880,7 @@ function createClaudeAgentJudge(config) {
712
880
  );
713
881
  }
714
882
  const responseText = resultMessage.result ?? "";
715
- const parsed = parseJudgeResponse(responseText);
883
+ const parsed = parseJudgeResponse3(responseText);
716
884
  const usage = {
717
885
  inputTokens: resultMessage.usage?.input_tokens ?? 0,
718
886
  outputTokens: resultMessage.usage?.output_tokens ?? 0,
@@ -741,7 +909,7 @@ function createClaudeAgentJudge(config) {
741
909
  function buildSystemPrompt() {
742
910
  return 'You are an expert evaluator. Evaluate the candidate response based on the rubric provided. Respond ONLY with valid JSON in this exact format: {"pass": boolean, "score": number (0-1), "reasoning": string}. Do not include any other text, markdown formatting, or code blocks.';
743
911
  }
744
- function buildJudgePrompt(candidate, reference, rubric) {
912
+ function buildJudgePrompt3(candidate, reference, rubric) {
745
913
  const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
746
914
  const referenceStr = reference !== null && reference !== void 0 ? typeof reference === "string" ? reference : JSON.stringify(reference, null, 2) : null;
747
915
  const parts = [];
@@ -758,7 +926,7 @@ function buildJudgePrompt(candidate, reference, rubric) {
758
926
  );
759
927
  return parts.join("");
760
928
  }
761
- function parseJudgeResponse(text) {
929
+ function parseJudgeResponse3(text) {
762
930
  let jsonText = text.trim();
763
931
  if (jsonText.startsWith("```json")) {
764
932
  jsonText = jsonText.slice(7);
@@ -815,7 +983,7 @@ Original error: ${err instanceof Error ? err.message : String(err)}`
815
983
  );
816
984
  }
817
985
  const client = new openaiModule.default({ apiKey });
818
- const prompt = buildJudgePrompt2(candidate, reference, rubric);
986
+ const prompt = buildJudgePrompt4(candidate, reference, rubric);
819
987
  const startTime = Date.now();
820
988
  const completion = await client.chat.completions.create({
821
989
  model,
@@ -831,7 +999,7 @@ Original error: ${err instanceof Error ? err.message : String(err)}`
831
999
  });
832
1000
  const durationMs = Date.now() - startTime;
833
1001
  const text = completion.choices[0]?.message.content ?? "";
834
- const parsed = parseJudgeResponse2(text);
1002
+ const parsed = parseJudgeResponse4(text);
835
1003
  return {
836
1004
  pass: parsed.pass,
837
1005
  score: parsed.score,
@@ -846,7 +1014,7 @@ Original error: ${err instanceof Error ? err.message : String(err)}`
846
1014
  }
847
1015
  };
848
1016
  }
849
- function buildJudgePrompt2(candidate, reference, rubric) {
1017
+ function buildJudgePrompt4(candidate, reference, rubric) {
850
1018
  const candidateStr = typeof candidate === "string" ? candidate : JSON.stringify(candidate, null, 2);
851
1019
  const referenceStr = reference !== null && reference !== void 0 ? typeof reference === "string" ? reference : JSON.stringify(reference, null, 2) : null;
852
1020
  return `Rubric:
@@ -862,7 +1030,7 @@ ${referenceStr ?? "No reference provided."}
862
1030
 
863
1031
  Evaluate and return JSON: {"pass": boolean, "score": number (0-1), "reasoning": string}`;
864
1032
  }
865
- function parseJudgeResponse2(text) {
1033
+ function parseJudgeResponse4(text) {
866
1034
  const cleaned = text.replace(/```json\n?/g, "").replace(/```\n?/g, "").trim();
867
1035
  let parsed;
868
1036
  try {
@@ -964,6 +1132,10 @@ function createJudge(config = {}) {
964
1132
  const provider = config.provider ?? "anthropic";
965
1133
  switch (provider) {
966
1134
  case "anthropic":
1135
+ return createAnthropicJudge(config);
1136
+ case "vertex-anthropic":
1137
+ return createVertexAnthropicJudge(config);
1138
+ case "anthropic-agent-sdk":
967
1139
  return createClaudeAgentJudge(config);
968
1140
  case "openai":
969
1141
  return createOpenAIJudge(config);
@@ -974,6 +1146,19 @@ function createJudge(config = {}) {
974
1146
  }
975
1147
  }
976
1148
 
1149
+ // src/judge/judgeRegistry.ts
1150
+ var registry = /* @__PURE__ */ new Map();
1151
+ function getRegisteredJudge(name) {
1152
+ const executor = registry.get(name);
1153
+ if (!executor) {
1154
+ const available = registry.size > 0 ? ` Available judges: ${[...registry.keys()].join(", ")}` : " No judges are registered.";
1155
+ throw new Error(
1156
+ `Judge "${name}" is not registered.${available} Register it with registerJudge() before tests run.`
1157
+ );
1158
+ }
1159
+ return executor;
1160
+ }
1161
+
977
1162
  // src/assertions/validators/judge.ts
978
1163
  function computeStdDev(scores, mean) {
979
1164
  if (scores.length <= 1) return 0;
@@ -982,6 +1167,7 @@ function computeStdDev(scores, mean) {
982
1167
  }
983
1168
  async function validateJudge(response, config) {
984
1169
  const {
1170
+ judge: judgeName,
985
1171
  rubric,
986
1172
  reference,
987
1173
  threshold = 0.7,
@@ -994,6 +1180,29 @@ async function validateJudge(response, config) {
994
1180
  maxBudgetUsd,
995
1181
  maxToolOutputSize
996
1182
  } = config;
1183
+ if (judgeName !== void 0) {
1184
+ try {
1185
+ const executor = getRegisteredJudge(judgeName);
1186
+ const judgeResult = await executor(response, reference ?? void 0);
1187
+ const score = judgeResult.score;
1188
+ const passed = score >= threshold;
1189
+ return {
1190
+ pass: passed,
1191
+ message: passed ? `Custom judge "${judgeName}" passed with score ${score.toFixed(2)}` : `Custom judge "${judgeName}" failed with score ${score.toFixed(2)} (threshold: ${threshold}). ${judgeResult.reasoning ?? ""}`
1192
+ };
1193
+ } catch (err) {
1194
+ return {
1195
+ pass: false,
1196
+ message: `Custom judge "${judgeName}" error: ${err instanceof Error ? err.message : String(err)}`
1197
+ };
1198
+ }
1199
+ }
1200
+ if (rubric === void 0) {
1201
+ return {
1202
+ pass: false,
1203
+ message: 'Judge evaluation failed: either "judge" or "rubric" must be provided'
1204
+ };
1205
+ }
997
1206
  const resolvedRubric = resolveRubric(rubric);
998
1207
  const judgeConfig = {
999
1208
  ...provider !== void 0 && { provider },
@@ -1040,11 +1249,17 @@ async function validateJudge(response, config) {
1040
1249
  return {
1041
1250
  pass: passed,
1042
1251
  message: passed ? `Judge passed with score ${meanScore.toFixed(2)}${repNote}` : `Judge failed with score ${meanScore.toFixed(2)} (threshold: ${threshold})${repNote}. ${lastReasoning ?? ""}`,
1043
- details: reps > 1 ? {
1044
- scores,
1045
- scoreStdDev: stdDev,
1046
- highVariance
1047
- } : void 0
1252
+ details: {
1253
+ score: meanScore,
1254
+ reasoning: lastReasoning,
1255
+ judgeProvider: provider ?? "anthropic",
1256
+ judgeModel: model,
1257
+ ...reps > 1 && {
1258
+ scores,
1259
+ scoreStdDev: stdDev,
1260
+ highVariance
1261
+ }
1262
+ }
1048
1263
  };
1049
1264
  } catch (err) {
1050
1265
  return {
@@ -1056,31 +1271,68 @@ async function validateJudge(response, config) {
1056
1271
 
1057
1272
  // src/assertions/matchers/toPassToolJudge.ts
1058
1273
  var DEFAULT_PASSING_THRESHOLD = 0.7;
1059
- async function toPassToolJudge(received, rubric, options = {}) {
1274
+ async function runSingleJudge(received, rubric, options) {
1060
1275
  const {
1061
1276
  reference = null,
1062
1277
  passingThreshold = DEFAULT_PASSING_THRESHOLD,
1063
1278
  reps,
1064
1279
  provider,
1065
- model
1280
+ model,
1281
+ judge
1066
1282
  } = options;
1067
1283
  const validation = await validateJudge(received, {
1068
- rubric,
1284
+ ...rubric !== void 0 && { rubric },
1069
1285
  reference: reference ?? void 0,
1070
1286
  threshold: passingThreshold,
1071
1287
  ...reps !== void 0 && { reps },
1072
1288
  ...provider !== void 0 && { provider },
1073
- ...model !== void 0 && { model }
1289
+ ...model !== void 0 && { model },
1290
+ ...judge !== void 0 && { judge }
1074
1291
  });
1292
+ return { pass: validation.pass, message: validation.message };
1293
+ }
1294
+ async function toPassToolJudge(received, rubricOrOptions, maybeOptions) {
1295
+ if (Array.isArray(rubricOrOptions)) {
1296
+ const results = await Promise.all(
1297
+ rubricOrOptions.map(async (judgeConfig) => {
1298
+ const { rubric: r, ...opts } = judgeConfig;
1299
+ return runSingleJudge(received, r, opts);
1300
+ })
1301
+ );
1302
+ const allPassed = results.every((r) => r.pass);
1303
+ const passCount = results.filter((r) => r.pass).length;
1304
+ const summary = `${passCount}/${results.length} judges passed`;
1305
+ const details = results.map((r) => r.message).join("\n");
1306
+ if (this.isNot) {
1307
+ return {
1308
+ pass: !allPassed,
1309
+ message: () => allPassed ? `Expected all judges to fail, but ${summary}` : `Judges failed as expected: ${summary}`
1310
+ };
1311
+ }
1312
+ return {
1313
+ pass: allPassed,
1314
+ message: () => `${summary}
1315
+ ${details}`
1316
+ };
1317
+ }
1318
+ let rubric;
1319
+ let options;
1320
+ if (typeof rubricOrOptions === "string" || typeof rubricOrOptions === "object" && rubricOrOptions !== null && "text" in rubricOrOptions) {
1321
+ rubric = rubricOrOptions;
1322
+ options = maybeOptions ?? {};
1323
+ } else {
1324
+ options = rubricOrOptions;
1325
+ }
1326
+ const result = await runSingleJudge(received, rubric, options);
1075
1327
  if (this.isNot) {
1076
1328
  return {
1077
- pass: !validation.pass,
1078
- message: () => validation.pass ? `Expected judge evaluation to fail, but it passed` : `Judge evaluation failed as expected`
1329
+ pass: !result.pass,
1330
+ message: () => result.pass ? `Expected judge evaluation to fail, but it passed` : `Judge evaluation failed as expected`
1079
1331
  };
1080
1332
  }
1081
1333
  return {
1082
- pass: validation.pass,
1083
- message: () => validation.message
1334
+ pass: result.pass,
1335
+ message: () => result.message
1084
1336
  };
1085
1337
  }
1086
1338
 
@@ -1188,9 +1440,17 @@ async function toSatisfyToolPredicate(received, predicate, description) {
1188
1440
  function isSimulationResult(value) {
1189
1441
  return typeof value === "object" && value !== null && "success" in value && "toolCalls" in value && Array.isArray(value.toolCalls);
1190
1442
  }
1443
+ function isPatternMatcher(v) {
1444
+ return typeof v === "object" && v !== null && "$pattern" in v && typeof v["$pattern"] === "string";
1445
+ }
1191
1446
  function partialMatch(actual, expected) {
1192
1447
  return Object.entries(expected).every(([k, v]) => {
1193
1448
  const actualVal = actual[k];
1449
+ if (isPatternMatcher(v)) {
1450
+ if (typeof actualVal !== "string") return false;
1451
+ const re = new RegExp(v.$pattern, v.$flags);
1452
+ return re.test(actualVal);
1453
+ }
1194
1454
  if (typeof v === "object" && v !== null && typeof actualVal === "object" && actualVal !== null) {
1195
1455
  return partialMatch(
1196
1456
  actualVal,
@@ -1434,7 +1694,7 @@ var debugHttp = createDebug(`${NAMESPACE}:http`);
1434
1694
 
1435
1695
  // package.json
1436
1696
  var package_default = {
1437
- version: "1.0.0-beta.7"};
1697
+ version: "1.0.0-beta.8"};
1438
1698
  var debug = createDebug("mcp-server-tester:oauth-flow");
1439
1699
  async function generatePKCE() {
1440
1700
  const codeVerifier = oauth.generateRandomCodeVerifier();