@mcpjam/inspector 0.9.63 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -543,9 +543,14 @@ import { Hono as Hono6 } from "hono";
543
543
  import { streamText } from "ai";
544
544
 
545
545
  // ../shared/types.ts
546
- var isMCPJamProvidedModel = (provider) => {
547
- const MCPJAM_PROVIDERS = ["meta"];
548
- return MCPJAM_PROVIDERS.includes(provider);
546
+ var MCPJAM_PROVIDED_MODEL_IDS = [
547
+ "meta-llama/llama-3.3-70b-instruct",
548
+ "openai/gpt-oss-120b",
549
+ "x-ai/grok-4-fast",
550
+ "openai/gpt-5-nano"
551
+ ];
552
+ var isMCPJamProvidedModel = (modelId) => {
553
+ return MCPJAM_PROVIDED_MODEL_IDS.includes(modelId);
549
554
  };
550
555
 
551
556
  // routes/mcp/chat.ts
@@ -671,13 +676,6 @@ function ensureOutputSchema(schema) {
671
676
  }
672
677
  return schema;
673
678
  }
674
- function extractPureToolName(toolKey) {
675
- const separatorIndex = toolKey.indexOf("_");
676
- if (separatorIndex === -1 || separatorIndex === toolKey.length - 1) {
677
- return toolKey;
678
- }
679
- return toolKey.slice(separatorIndex + 1);
680
- }
681
679
  function convertMastraToolToVercelTool(toolName, mastraTool, options) {
682
680
  const inputSchema = ensureInputSchema(mastraTool.inputSchema);
683
681
  const outputSchema = ensureOutputSchema(mastraTool.outputSchema);
@@ -743,10 +741,9 @@ function convertMastraToolToVercelTool(toolName, mastraTool, options) {
743
741
  function convertMastraToolsToVercelTools(mastraTools) {
744
742
  return Object.fromEntries(
745
743
  Object.entries(mastraTools).map(([name, mastraTool]) => {
746
- const pureToolName = extractPureToolName(name);
747
744
  return [
748
- pureToolName,
749
- convertMastraToolToVercelTool(pureToolName, mastraTool, {
745
+ name,
746
+ convertMastraToolToVercelTool(name, mastraTool, {
750
747
  originalName: name
751
748
  })
752
749
  ];
@@ -889,7 +886,8 @@ var runBackendConversation = async (options) => {
889
886
  while (step < options.maxSteps) {
890
887
  const payload = {
891
888
  tools: options.toolDefinitions,
892
- messages: JSON.stringify(options.messageHistory)
889
+ messages: JSON.stringify(options.messageHistory),
890
+ model: options.modelId
893
891
  };
894
892
  const data = await options.fetchBackend(payload);
895
893
  if (!data || !data.ok || !Array.isArray(data.messages)) {
@@ -1208,7 +1206,7 @@ var createStreamingResponse = async (model, aiSdkTools, messages, streamingConte
1208
1206
  "[DONE]"
1209
1207
  );
1210
1208
  };
1211
- var sendMessagesToBackend = async (messages, streamingContext, mcpClientManager, baseUrl, authHeader, selectedServers) => {
1209
+ var sendMessagesToBackend = async (messages, streamingContext, mcpClientManager, baseUrl, modelId, authHeader, selectedServers) => {
1212
1210
  const messageHistory = (messages || []).map((m) => {
1213
1211
  switch (m.role) {
1214
1212
  case "system":
@@ -1262,6 +1260,7 @@ var sendMessagesToBackend = async (messages, streamingContext, mcpClientManager,
1262
1260
  await runBackendConversation({
1263
1261
  maxSteps: MAX_AGENT_STEPS,
1264
1262
  messageHistory,
1263
+ modelId,
1265
1264
  toolDefinitions: toolDefs,
1266
1265
  fetchBackend: async (payload) => {
1267
1266
  const data = await sendBackendRequest(
@@ -1367,7 +1366,7 @@ chat.post("/", async (c) => {
1367
1366
  400
1368
1367
  );
1369
1368
  }
1370
- const sendToBackend = isMCPJamProvidedModel(provider) && Boolean(requestData.sendMessagesToBackend);
1369
+ const sendToBackend = model?.id && isMCPJamProvidedModel(model.id) && Boolean(requestData.sendMessagesToBackend);
1371
1370
  if (!sendToBackend && (!model?.id || !apiKey)) {
1372
1371
  return c.json(
1373
1372
  {
@@ -1438,6 +1437,7 @@ chat.post("/", async (c) => {
1438
1437
  streamingContext,
1439
1438
  mcpClientManager,
1440
1439
  process.env.CONVEX_HTTP_URL,
1440
+ model.id,
1441
1441
  authHeader,
1442
1442
  requestData.selectedServers
1443
1443
  );
@@ -1985,13 +1985,13 @@ async function handleProxy(c) {
1985
1985
  const xfProto = req.headers.get("x-forwarded-proto");
1986
1986
  const xfHost = req.headers.get("x-forwarded-host");
1987
1987
  const host = xfHost || req.headers.get("host");
1988
- let proto2 = xfProto;
1989
- if (!proto2) {
1988
+ let proto = xfProto;
1989
+ if (!proto) {
1990
1990
  const originHeader = req.headers.get("origin");
1991
- if (originHeader && /^https:/i.test(originHeader)) proto2 = "https";
1991
+ if (originHeader && /^https:/i.test(originHeader)) proto = "https";
1992
1992
  }
1993
- if (!proto2) proto2 = "http";
1994
- const proxyOrigin = host ? `${proto2}://${host}` : new URL(req.url).origin;
1993
+ if (!proto) proto = "http";
1994
+ const proxyOrigin = host ? `${proto}://${host}` : new URL(req.url).origin;
1995
1995
  const sessionId = crypto.randomUUID();
1996
1996
  interceptorStore.setSessionEndpoint(
1997
1997
  id,
@@ -3688,14 +3688,14 @@ var require_node_gyp_build = __commonJS({
3688
3688
  "../common/temp/node_modules/.pnpm/node-gyp-build@4.8.4/node_modules/node-gyp-build/node-gyp-build.js"(exports, module) {
3689
3689
  var fs = __require("fs");
3690
3690
  var path = __require("path");
3691
- var os2 = __require("os");
3691
+ var os = __require("os");
3692
3692
  var runtimeRequire = typeof __webpack_require__ === "function" ? __non_webpack_require__ : __require;
3693
3693
  var vars = process.config && process.config.variables || {};
3694
3694
  var prebuildsOnly = !!process.env.PREBUILDS_ONLY;
3695
3695
  var abi = process.versions.modules;
3696
3696
  var runtime = isElectron() ? "electron" : isNwjs() ? "node-webkit" : "node";
3697
- var arch = process.env.npm_config_arch || os2.arch();
3698
- var platform = process.env.npm_config_platform || os2.platform();
3697
+ var arch = process.env.npm_config_arch || os.arch();
3698
+ var platform = process.env.npm_config_platform || os.platform();
3699
3699
  var libc = process.env.LIBC || (isAlpine(platform) ? "musl" : "glibc");
3700
3700
  var armv = process.env.ARM_VERSION || (arch === "arm64" ? "8" : vars.arm_version) || "";
3701
3701
  var uv = (process.versions.uv || "").split(".")[0];
@@ -7402,502 +7402,8 @@ var wrapper_default = import_websocket.default;
7402
7402
  var nodeWebSocket = wrapper_default;
7403
7403
  setDefaultWebSocketConstructor(nodeWebSocket);
7404
7404
 
7405
- // ../evals-cli/node_modules/chalk/source/vendor/ansi-styles/index.js
7406
- var ANSI_BACKGROUND_OFFSET = 10;
7407
- var wrapAnsi16 = (offset = 0) => (code2) => `\x1B[${code2 + offset}m`;
7408
- var wrapAnsi256 = (offset = 0) => (code2) => `\x1B[${38 + offset};5;${code2}m`;
7409
- var wrapAnsi16m = (offset = 0) => (red, green, blue) => `\x1B[${38 + offset};2;${red};${green};${blue}m`;
7410
- var styles = {
7411
- modifier: {
7412
- reset: [0, 0],
7413
- // 21 isn't widely supported and 22 does the same thing
7414
- bold: [1, 22],
7415
- dim: [2, 22],
7416
- italic: [3, 23],
7417
- underline: [4, 24],
7418
- overline: [53, 55],
7419
- inverse: [7, 27],
7420
- hidden: [8, 28],
7421
- strikethrough: [9, 29]
7422
- },
7423
- color: {
7424
- black: [30, 39],
7425
- red: [31, 39],
7426
- green: [32, 39],
7427
- yellow: [33, 39],
7428
- blue: [34, 39],
7429
- magenta: [35, 39],
7430
- cyan: [36, 39],
7431
- white: [37, 39],
7432
- // Bright color
7433
- blackBright: [90, 39],
7434
- gray: [90, 39],
7435
- // Alias of `blackBright`
7436
- grey: [90, 39],
7437
- // Alias of `blackBright`
7438
- redBright: [91, 39],
7439
- greenBright: [92, 39],
7440
- yellowBright: [93, 39],
7441
- blueBright: [94, 39],
7442
- magentaBright: [95, 39],
7443
- cyanBright: [96, 39],
7444
- whiteBright: [97, 39]
7445
- },
7446
- bgColor: {
7447
- bgBlack: [40, 49],
7448
- bgRed: [41, 49],
7449
- bgGreen: [42, 49],
7450
- bgYellow: [43, 49],
7451
- bgBlue: [44, 49],
7452
- bgMagenta: [45, 49],
7453
- bgCyan: [46, 49],
7454
- bgWhite: [47, 49],
7455
- // Bright color
7456
- bgBlackBright: [100, 49],
7457
- bgGray: [100, 49],
7458
- // Alias of `bgBlackBright`
7459
- bgGrey: [100, 49],
7460
- // Alias of `bgBlackBright`
7461
- bgRedBright: [101, 49],
7462
- bgGreenBright: [102, 49],
7463
- bgYellowBright: [103, 49],
7464
- bgBlueBright: [104, 49],
7465
- bgMagentaBright: [105, 49],
7466
- bgCyanBright: [106, 49],
7467
- bgWhiteBright: [107, 49]
7468
- }
7469
- };
7470
- var modifierNames = Object.keys(styles.modifier);
7471
- var foregroundColorNames = Object.keys(styles.color);
7472
- var backgroundColorNames = Object.keys(styles.bgColor);
7473
- var colorNames = [...foregroundColorNames, ...backgroundColorNames];
7474
- function assembleStyles() {
7475
- const codes = /* @__PURE__ */ new Map();
7476
- for (const [groupName, group] of Object.entries(styles)) {
7477
- for (const [styleName, style] of Object.entries(group)) {
7478
- styles[styleName] = {
7479
- open: `\x1B[${style[0]}m`,
7480
- close: `\x1B[${style[1]}m`
7481
- };
7482
- group[styleName] = styles[styleName];
7483
- codes.set(style[0], style[1]);
7484
- }
7485
- Object.defineProperty(styles, groupName, {
7486
- value: group,
7487
- enumerable: false
7488
- });
7489
- }
7490
- Object.defineProperty(styles, "codes", {
7491
- value: codes,
7492
- enumerable: false
7493
- });
7494
- styles.color.close = "\x1B[39m";
7495
- styles.bgColor.close = "\x1B[49m";
7496
- styles.color.ansi = wrapAnsi16();
7497
- styles.color.ansi256 = wrapAnsi256();
7498
- styles.color.ansi16m = wrapAnsi16m();
7499
- styles.bgColor.ansi = wrapAnsi16(ANSI_BACKGROUND_OFFSET);
7500
- styles.bgColor.ansi256 = wrapAnsi256(ANSI_BACKGROUND_OFFSET);
7501
- styles.bgColor.ansi16m = wrapAnsi16m(ANSI_BACKGROUND_OFFSET);
7502
- Object.defineProperties(styles, {
7503
- rgbToAnsi256: {
7504
- value(red, green, blue) {
7505
- if (red === green && green === blue) {
7506
- if (red < 8) {
7507
- return 16;
7508
- }
7509
- if (red > 248) {
7510
- return 231;
7511
- }
7512
- return Math.round((red - 8) / 247 * 24) + 232;
7513
- }
7514
- return 16 + 36 * Math.round(red / 255 * 5) + 6 * Math.round(green / 255 * 5) + Math.round(blue / 255 * 5);
7515
- },
7516
- enumerable: false
7517
- },
7518
- hexToRgb: {
7519
- value(hex) {
7520
- const matches = /[a-f\d]{6}|[a-f\d]{3}/i.exec(hex.toString(16));
7521
- if (!matches) {
7522
- return [0, 0, 0];
7523
- }
7524
- let [colorString] = matches;
7525
- if (colorString.length === 3) {
7526
- colorString = [...colorString].map((character) => character + character).join("");
7527
- }
7528
- const integer = Number.parseInt(colorString, 16);
7529
- return [
7530
- /* eslint-disable no-bitwise */
7531
- integer >> 16 & 255,
7532
- integer >> 8 & 255,
7533
- integer & 255
7534
- /* eslint-enable no-bitwise */
7535
- ];
7536
- },
7537
- enumerable: false
7538
- },
7539
- hexToAnsi256: {
7540
- value: (hex) => styles.rgbToAnsi256(...styles.hexToRgb(hex)),
7541
- enumerable: false
7542
- },
7543
- ansi256ToAnsi: {
7544
- value(code2) {
7545
- if (code2 < 8) {
7546
- return 30 + code2;
7547
- }
7548
- if (code2 < 16) {
7549
- return 90 + (code2 - 8);
7550
- }
7551
- let red;
7552
- let green;
7553
- let blue;
7554
- if (code2 >= 232) {
7555
- red = ((code2 - 232) * 10 + 8) / 255;
7556
- green = red;
7557
- blue = red;
7558
- } else {
7559
- code2 -= 16;
7560
- const remainder = code2 % 36;
7561
- red = Math.floor(code2 / 36) / 5;
7562
- green = Math.floor(remainder / 6) / 5;
7563
- blue = remainder % 6 / 5;
7564
- }
7565
- const value = Math.max(red, green, blue) * 2;
7566
- if (value === 0) {
7567
- return 30;
7568
- }
7569
- let result = 30 + (Math.round(blue) << 2 | Math.round(green) << 1 | Math.round(red));
7570
- if (value === 2) {
7571
- result += 60;
7572
- }
7573
- return result;
7574
- },
7575
- enumerable: false
7576
- },
7577
- rgbToAnsi: {
7578
- value: (red, green, blue) => styles.ansi256ToAnsi(styles.rgbToAnsi256(red, green, blue)),
7579
- enumerable: false
7580
- },
7581
- hexToAnsi: {
7582
- value: (hex) => styles.ansi256ToAnsi(styles.hexToAnsi256(hex)),
7583
- enumerable: false
7584
- }
7585
- });
7586
- return styles;
7587
- }
7588
- var ansiStyles = assembleStyles();
7589
- var ansi_styles_default = ansiStyles;
7590
-
7591
- // ../evals-cli/node_modules/chalk/source/vendor/supports-color/index.js
7592
- import process2 from "process";
7593
- import os from "os";
7594
- import tty from "tty";
7595
- function hasFlag(flag, argv = globalThis.Deno ? globalThis.Deno.args : process2.argv) {
7596
- const prefix = flag.startsWith("-") ? "" : flag.length === 1 ? "-" : "--";
7597
- const position = argv.indexOf(prefix + flag);
7598
- const terminatorPosition = argv.indexOf("--");
7599
- return position !== -1 && (terminatorPosition === -1 || position < terminatorPosition);
7600
- }
7601
- var { env } = process2;
7602
- var flagForceColor;
7603
- if (hasFlag("no-color") || hasFlag("no-colors") || hasFlag("color=false") || hasFlag("color=never")) {
7604
- flagForceColor = 0;
7605
- } else if (hasFlag("color") || hasFlag("colors") || hasFlag("color=true") || hasFlag("color=always")) {
7606
- flagForceColor = 1;
7607
- }
7608
- function envForceColor() {
7609
- if ("FORCE_COLOR" in env) {
7610
- if (env.FORCE_COLOR === "true") {
7611
- return 1;
7612
- }
7613
- if (env.FORCE_COLOR === "false") {
7614
- return 0;
7615
- }
7616
- return env.FORCE_COLOR.length === 0 ? 1 : Math.min(Number.parseInt(env.FORCE_COLOR, 10), 3);
7617
- }
7618
- }
7619
- function translateLevel(level) {
7620
- if (level === 0) {
7621
- return false;
7622
- }
7623
- return {
7624
- level,
7625
- hasBasic: true,
7626
- has256: level >= 2,
7627
- has16m: level >= 3
7628
- };
7629
- }
7630
- function _supportsColor(haveStream, { streamIsTTY, sniffFlags = true } = {}) {
7631
- const noFlagForceColor = envForceColor();
7632
- if (noFlagForceColor !== void 0) {
7633
- flagForceColor = noFlagForceColor;
7634
- }
7635
- const forceColor = sniffFlags ? flagForceColor : noFlagForceColor;
7636
- if (forceColor === 0) {
7637
- return 0;
7638
- }
7639
- if (sniffFlags) {
7640
- if (hasFlag("color=16m") || hasFlag("color=full") || hasFlag("color=truecolor")) {
7641
- return 3;
7642
- }
7643
- if (hasFlag("color=256")) {
7644
- return 2;
7645
- }
7646
- }
7647
- if ("TF_BUILD" in env && "AGENT_NAME" in env) {
7648
- return 1;
7649
- }
7650
- if (haveStream && !streamIsTTY && forceColor === void 0) {
7651
- return 0;
7652
- }
7653
- const min = forceColor || 0;
7654
- if (env.TERM === "dumb") {
7655
- return min;
7656
- }
7657
- if (process2.platform === "win32") {
7658
- const osRelease = os.release().split(".");
7659
- if (Number(osRelease[0]) >= 10 && Number(osRelease[2]) >= 10586) {
7660
- return Number(osRelease[2]) >= 14931 ? 3 : 2;
7661
- }
7662
- return 1;
7663
- }
7664
- if ("CI" in env) {
7665
- if (["GITHUB_ACTIONS", "GITEA_ACTIONS", "CIRCLECI"].some((key) => key in env)) {
7666
- return 3;
7667
- }
7668
- if (["TRAVIS", "APPVEYOR", "GITLAB_CI", "BUILDKITE", "DRONE"].some((sign) => sign in env) || env.CI_NAME === "codeship") {
7669
- return 1;
7670
- }
7671
- return min;
7672
- }
7673
- if ("TEAMCITY_VERSION" in env) {
7674
- return /^(9\.(0*[1-9]\d*)\.|\d{2,}\.)/.test(env.TEAMCITY_VERSION) ? 1 : 0;
7675
- }
7676
- if (env.COLORTERM === "truecolor") {
7677
- return 3;
7678
- }
7679
- if (env.TERM === "xterm-kitty") {
7680
- return 3;
7681
- }
7682
- if (env.TERM === "xterm-ghostty") {
7683
- return 3;
7684
- }
7685
- if (env.TERM === "wezterm") {
7686
- return 3;
7687
- }
7688
- if ("TERM_PROGRAM" in env) {
7689
- const version2 = Number.parseInt((env.TERM_PROGRAM_VERSION || "").split(".")[0], 10);
7690
- switch (env.TERM_PROGRAM) {
7691
- case "iTerm.app": {
7692
- return version2 >= 3 ? 3 : 2;
7693
- }
7694
- case "Apple_Terminal": {
7695
- return 2;
7696
- }
7697
- }
7698
- }
7699
- if (/-256(color)?$/i.test(env.TERM)) {
7700
- return 2;
7701
- }
7702
- if (/^screen|^xterm|^vt100|^vt220|^rxvt|color|ansi|cygwin|linux/i.test(env.TERM)) {
7703
- return 1;
7704
- }
7705
- if ("COLORTERM" in env) {
7706
- return 1;
7707
- }
7708
- return min;
7709
- }
7710
- function createSupportsColor(stream, options = {}) {
7711
- const level = _supportsColor(stream, {
7712
- streamIsTTY: stream && stream.isTTY,
7713
- ...options
7714
- });
7715
- return translateLevel(level);
7716
- }
7717
- var supportsColor = {
7718
- stdout: createSupportsColor({ isTTY: tty.isatty(1) }),
7719
- stderr: createSupportsColor({ isTTY: tty.isatty(2) })
7720
- };
7721
- var supports_color_default = supportsColor;
7722
-
7723
- // ../evals-cli/node_modules/chalk/source/utilities.js
7724
- function stringReplaceAll(string, substring, replacer) {
7725
- let index = string.indexOf(substring);
7726
- if (index === -1) {
7727
- return string;
7728
- }
7729
- const substringLength = substring.length;
7730
- let endIndex = 0;
7731
- let returnValue = "";
7732
- do {
7733
- returnValue += string.slice(endIndex, index) + substring + replacer;
7734
- endIndex = index + substringLength;
7735
- index = string.indexOf(substring, endIndex);
7736
- } while (index !== -1);
7737
- returnValue += string.slice(endIndex);
7738
- return returnValue;
7739
- }
7740
- function stringEncaseCRLFWithFirstIndex(string, prefix, postfix, index) {
7741
- let endIndex = 0;
7742
- let returnValue = "";
7743
- do {
7744
- const gotCR = string[index - 1] === "\r";
7745
- returnValue += string.slice(endIndex, gotCR ? index - 1 : index) + prefix + (gotCR ? "\r\n" : "\n") + postfix;
7746
- endIndex = index + 1;
7747
- index = string.indexOf("\n", endIndex);
7748
- } while (index !== -1);
7749
- returnValue += string.slice(endIndex);
7750
- return returnValue;
7751
- }
7752
-
7753
- // ../evals-cli/node_modules/chalk/source/index.js
7754
- var { stdout: stdoutColor, stderr: stderrColor } = supports_color_default;
7755
- var GENERATOR = Symbol("GENERATOR");
7756
- var STYLER = Symbol("STYLER");
7757
- var IS_EMPTY = Symbol("IS_EMPTY");
7758
- var levelMapping = [
7759
- "ansi",
7760
- "ansi",
7761
- "ansi256",
7762
- "ansi16m"
7763
- ];
7764
- var styles2 = /* @__PURE__ */ Object.create(null);
7765
- var applyOptions = (object, options = {}) => {
7766
- if (options.level && !(Number.isInteger(options.level) && options.level >= 0 && options.level <= 3)) {
7767
- throw new Error("The `level` option should be an integer from 0 to 3");
7768
- }
7769
- const colorLevel = stdoutColor ? stdoutColor.level : 0;
7770
- object.level = options.level === void 0 ? colorLevel : options.level;
7771
- };
7772
- var chalkFactory = (options) => {
7773
- const chalk2 = (...strings) => strings.join(" ");
7774
- applyOptions(chalk2, options);
7775
- Object.setPrototypeOf(chalk2, createChalk.prototype);
7776
- return chalk2;
7777
- };
7778
- function createChalk(options) {
7779
- return chalkFactory(options);
7780
- }
7781
- Object.setPrototypeOf(createChalk.prototype, Function.prototype);
7782
- for (const [styleName, style] of Object.entries(ansi_styles_default)) {
7783
- styles2[styleName] = {
7784
- get() {
7785
- const builder = createBuilder(this, createStyler(style.open, style.close, this[STYLER]), this[IS_EMPTY]);
7786
- Object.defineProperty(this, styleName, { value: builder });
7787
- return builder;
7788
- }
7789
- };
7790
- }
7791
- styles2.visible = {
7792
- get() {
7793
- const builder = createBuilder(this, this[STYLER], true);
7794
- Object.defineProperty(this, "visible", { value: builder });
7795
- return builder;
7796
- }
7797
- };
7798
- var getModelAnsi = (model, level, type, ...arguments_) => {
7799
- if (model === "rgb") {
7800
- if (level === "ansi16m") {
7801
- return ansi_styles_default[type].ansi16m(...arguments_);
7802
- }
7803
- if (level === "ansi256") {
7804
- return ansi_styles_default[type].ansi256(ansi_styles_default.rgbToAnsi256(...arguments_));
7805
- }
7806
- return ansi_styles_default[type].ansi(ansi_styles_default.rgbToAnsi(...arguments_));
7807
- }
7808
- if (model === "hex") {
7809
- return getModelAnsi("rgb", level, type, ...ansi_styles_default.hexToRgb(...arguments_));
7810
- }
7811
- return ansi_styles_default[type][model](...arguments_);
7812
- };
7813
- var usedModels = ["rgb", "hex", "ansi256"];
7814
- for (const model of usedModels) {
7815
- styles2[model] = {
7816
- get() {
7817
- const { level } = this;
7818
- return function(...arguments_) {
7819
- const styler = createStyler(getModelAnsi(model, levelMapping[level], "color", ...arguments_), ansi_styles_default.color.close, this[STYLER]);
7820
- return createBuilder(this, styler, this[IS_EMPTY]);
7821
- };
7822
- }
7823
- };
7824
- const bgModel = "bg" + model[0].toUpperCase() + model.slice(1);
7825
- styles2[bgModel] = {
7826
- get() {
7827
- const { level } = this;
7828
- return function(...arguments_) {
7829
- const styler = createStyler(getModelAnsi(model, levelMapping[level], "bgColor", ...arguments_), ansi_styles_default.bgColor.close, this[STYLER]);
7830
- return createBuilder(this, styler, this[IS_EMPTY]);
7831
- };
7832
- }
7833
- };
7834
- }
7835
- var proto = Object.defineProperties(() => {
7836
- }, {
7837
- ...styles2,
7838
- level: {
7839
- enumerable: true,
7840
- get() {
7841
- return this[GENERATOR].level;
7842
- },
7843
- set(level) {
7844
- this[GENERATOR].level = level;
7845
- }
7846
- }
7847
- });
7848
- var createStyler = (open, close, parent) => {
7849
- let openAll;
7850
- let closeAll;
7851
- if (parent === void 0) {
7852
- openAll = open;
7853
- closeAll = close;
7854
- } else {
7855
- openAll = parent.openAll + open;
7856
- closeAll = close + parent.closeAll;
7857
- }
7858
- return {
7859
- open,
7860
- close,
7861
- openAll,
7862
- closeAll,
7863
- parent
7864
- };
7865
- };
7866
- var createBuilder = (self, _styler, _isEmpty) => {
7867
- const builder = (...arguments_) => applyStyle(builder, arguments_.length === 1 ? "" + arguments_[0] : arguments_.join(" "));
7868
- Object.setPrototypeOf(builder, proto);
7869
- builder[GENERATOR] = self;
7870
- builder[STYLER] = _styler;
7871
- builder[IS_EMPTY] = _isEmpty;
7872
- return builder;
7873
- };
7874
- var applyStyle = (self, string) => {
7875
- if (self.level <= 0 || !string) {
7876
- return self[IS_EMPTY] ? "" : string;
7877
- }
7878
- let styler = self[STYLER];
7879
- if (styler === void 0) {
7880
- return string;
7881
- }
7882
- const { openAll, closeAll } = styler;
7883
- if (string.includes("\x1B")) {
7884
- while (styler !== void 0) {
7885
- string = stringReplaceAll(string, styler.close, styler.open);
7886
- styler = styler.parent;
7887
- }
7888
- }
7889
- const lfIndex = string.indexOf("\n");
7890
- if (lfIndex !== -1) {
7891
- string = stringEncaseCRLFWithFirstIndex(string, closeAll, openAll, lfIndex);
7892
- }
7893
- return openAll + string + closeAll;
7894
- };
7895
- Object.defineProperties(createChalk.prototype, styles2);
7896
- var chalk = createChalk();
7897
- var chalkStderr = createChalk({ level: stderrColor ? stderrColor.level : 0 });
7898
- var source_default = chalk;
7899
-
7900
7405
  // ../evals-cli/src/utils/logger.ts
7406
+ import chalk from "chalk";
7901
7407
  var MAX_CONTENT_LENGTH = 160;
7902
7408
  var Logger = class {
7903
7409
  static activeStream = null;
@@ -7929,7 +7435,7 @@ var Logger = class {
7929
7435
  static suiteIntro(options) {
7930
7436
  const { testCount, startedAt } = options;
7931
7437
  this.logLine(
7932
- source_default.bold.blue(
7438
+ chalk.bold.blue(
7933
7439
  `\u25B8 Running ${testCount} test${testCount === 1 ? "" : "s"} (${startedAt.toISOString()})`
7934
7440
  )
7935
7441
  );
@@ -7938,42 +7444,42 @@ var Logger = class {
7938
7444
  const { durationMs, passed, failed } = options;
7939
7445
  const total = passed + failed;
7940
7446
  const summary = `${passed}/${total} passed \u2022 ${this.formatDuration(durationMs)}`;
7941
- const prefix = failed === 0 ? source_default.green("\u2714") : source_default.red("\u2716");
7447
+ const prefix = failed === 0 ? chalk.green("\u2714") : chalk.red("\u2716");
7942
7448
  this.logLine(`${prefix} ${summary}`);
7943
7449
  }
7944
7450
  static header(version2) {
7945
- this.logLine(source_default.bold.blue(`MCPJAM CLI ${version2}`));
7451
+ this.logLine(chalk.bold.blue(`MCPJAM CLI ${version2}`));
7946
7452
  this.logLine("");
7947
7453
  }
7948
7454
  static initiateTestMessage(serverCount, toolCount, serverNames, testCount) {
7949
7455
  this.logLine("");
7950
- this.logLine(source_default.bold.blue("Running tests"));
7456
+ this.logLine(chalk.bold.blue("Running tests"));
7951
7457
  const serverLabel = serverCount === 1 ? "server" : "servers";
7952
7458
  const serverList = serverNames.length > 0 ? serverNames.join(", ") : "none";
7953
7459
  this.logLine(
7954
- `Connected to ${source_default.white.bold(serverCount)} ${serverLabel}: ${source_default.gray(serverList)}`
7460
+ `Connected to ${chalk.white.bold(serverCount)} ${serverLabel}: ${chalk.gray(serverList)}`
7955
7461
  );
7956
7462
  const toolLabel = toolCount === 1 ? "tool" : "tools";
7957
- this.logLine(`Found ${source_default.white.bold(toolCount)} total ${toolLabel}`);
7463
+ this.logLine(`Found ${chalk.white.bold(toolCount)} total ${toolLabel}`);
7958
7464
  const testLabel = testCount === 1 ? "test" : "tests";
7959
- this.logLine(`Running ${source_default.white.bold(testCount)} ${testLabel}`);
7465
+ this.logLine(`Running ${chalk.white.bold(testCount)} ${testLabel}`);
7960
7466
  this.logLine("");
7961
7467
  }
7962
7468
  static logTestGroupTitle(testNumber, testName, modelProvider, modelId) {
7963
- this.logLine(source_default.cyan.bold(`Test ${testNumber}: ${testName}`));
7964
- this.logLine(source_default.gray(`Using ${modelProvider}:${modelId}`));
7469
+ this.logLine(chalk.cyan.bold(`Test ${testNumber}: ${testName}`));
7470
+ this.logLine(chalk.gray(`Using ${modelProvider}:${modelId}`));
7965
7471
  this.logLine("");
7966
7472
  }
7967
7473
  static testRunStart(options) {
7968
7474
  const { runNumber, totalRuns } = options;
7969
7475
  const parts = [`run ${runNumber}/${totalRuns}`];
7970
- this.logLine(source_default.cyanBright(parts.join(" \u2022 ")));
7476
+ this.logLine(chalk.cyanBright(parts.join(" \u2022 ")));
7971
7477
  }
7972
7478
  static conversation(options) {
7973
7479
  const { messages } = options;
7974
7480
  this.closeActiveStream();
7975
7481
  if (!messages.length) {
7976
- this.logLine(source_default.dim("(no messages)"));
7482
+ this.logLine(chalk.dim("(no messages)"));
7977
7483
  return;
7978
7484
  }
7979
7485
  messages.forEach((message, index) => {
@@ -8005,7 +7511,7 @@ var Logger = class {
8005
7511
  }
8006
7512
  static testRunResult(options) {
8007
7513
  const { passed, durationMs, usage } = options;
8008
- const status = passed ? source_default.green("PASS") : source_default.red("FAIL");
7514
+ const status = passed ? chalk.green("PASS") : chalk.red("FAIL");
8009
7515
  this.logLine(`${status} (${this.formatDuration(durationMs)})`);
8010
7516
  if (usage) {
8011
7517
  const usageParts = [];
@@ -8019,49 +7525,49 @@ var Logger = class {
8019
7525
  usageParts.push(`total ${usage.totalTokens}`);
8020
7526
  }
8021
7527
  if (usageParts.length > 0) {
8022
- this.logLine(source_default.gray(`Tokens \u2022 ${usageParts.join(" \u2022 ")}`));
7528
+ this.logLine(chalk.gray(`Tokens \u2022 ${usageParts.join(" \u2022 ")}`));
8023
7529
  }
8024
7530
  }
8025
7531
  this.logLine("");
8026
7532
  this.logLine("");
8027
7533
  }
8028
7534
  static info(message) {
8029
- this.logLine(source_default.blue(`\u2139 ${message}`));
7535
+ this.logLine(chalk.blue(`\u2139 ${message}`));
8030
7536
  }
8031
7537
  static warn(message) {
8032
- this.logLine(source_default.yellow(`\u26A0 ${message}`));
7538
+ this.logLine(chalk.yellow(`\u26A0 ${message}`));
8033
7539
  }
8034
7540
  static success(message) {
8035
- this.logLine(source_default.green(`\u2713 ${message}`));
7541
+ this.logLine(chalk.green(`\u2713 ${message}`));
8036
7542
  }
8037
7543
  static error(message) {
8038
7544
  this.logLine("");
8039
- this.logLine(source_default.red(`\u2715 Error: ${message}`));
7545
+ this.logLine(chalk.red(`\u2715 Error: ${message}`));
8040
7546
  }
8041
7547
  static errorWithExit(message) {
8042
7548
  this.logLine("");
8043
- this.logLine(source_default.red(`\u2715 Error: ${message}`));
7549
+ this.logLine(chalk.red(`\u2715 Error: ${message}`));
8044
7550
  process.exit(1);
8045
7551
  }
8046
7552
  static progress(current, total, testName) {
8047
7553
  const progress = `[${current}/${total}]`;
8048
- this.logLine(source_default.gray(`${progress} ${testName}...`));
7554
+ this.logLine(chalk.gray(`${progress} ${testName}...`));
8049
7555
  }
8050
7556
  static testStarting(testName) {
8051
- this.logLine(source_default.gray(` Running ${testName}...`));
7557
+ this.logLine(chalk.gray(` Running ${testName}...`));
8052
7558
  }
8053
7559
  static testError(testName, error) {
8054
- this.logLine(source_default.red(` \u2715 ${testName} failed: ${error}`));
7560
+ this.logLine(chalk.red(` \u2715 ${testName} failed: ${error}`));
8055
7561
  }
8056
7562
  static connectionError(serverName, error) {
8057
- this.logLine(source_default.red(` \u2715 Failed to connect to ${serverName}: ${error}`));
7563
+ this.logLine(chalk.red(` \u2715 Failed to connect to ${serverName}: ${error}`));
8058
7564
  }
8059
7565
  static apiKeyError(provider, error) {
8060
- this.logLine(source_default.red(` \u2715 API key error for ${provider}: ${error}`));
7566
+ this.logLine(chalk.red(` \u2715 API key error for ${provider}: ${error}`));
8061
7567
  }
8062
7568
  static modelCreationError(provider, modelId, error) {
8063
7569
  this.logLine(
8064
- source_default.red(
7570
+ chalk.red(
8065
7571
  ` \u2715 Failed to create ${provider} model "${modelId}": ${error}`
8066
7572
  )
8067
7573
  );
@@ -8176,11 +7682,11 @@ var Logger = class {
8176
7682
  }
8177
7683
  }
8178
7684
  static logToolCall(toolCall) {
8179
- const header = source_default.whiteBright(`[tool-call] ${toolCall.toolName}`);
7685
+ const header = chalk.whiteBright(`[tool-call] ${toolCall.toolName}`);
8180
7686
  this.logLine(header);
8181
7687
  const jsonArgs = toolCall.args ? JSON.parse(toolCall.args) : null;
8182
7688
  if (toolCall.args) {
8183
- this.logLine(source_default.gray(this.truncate(toolCall.args)));
7689
+ this.logLine(chalk.gray(this.truncate(toolCall.args)));
8184
7690
  }
8185
7691
  }
8186
7692
  static beginStreamingMessage(role) {
@@ -8202,18 +7708,18 @@ var Logger = class {
8202
7708
  }
8203
7709
  static streamToolResult(toolName, output) {
8204
7710
  this.closeActiveStream();
8205
- const header = source_default.whiteBright(`[tool-result] ${toolName}`);
7711
+ const header = chalk.whiteBright(`[tool-result] ${toolName}`);
8206
7712
  this.logLine(header);
8207
7713
  if (output !== void 0) {
8208
- this.logLine(source_default.gray(this.truncate(this.stringify(output))));
7714
+ this.logLine(chalk.gray(this.truncate(this.stringify(output))));
8209
7715
  }
8210
7716
  }
8211
7717
  static streamToolError(toolName, error) {
8212
7718
  this.closeActiveStream();
8213
- const header = source_default.whiteBright(`[tool-error] ${toolName}`);
7719
+ const header = chalk.whiteBright(`[tool-error] ${toolName}`);
8214
7720
  this.logLine(header);
8215
7721
  this.logLine(
8216
- source_default.red(this.truncate(this.stringify(error ?? "Unknown error")))
7722
+ chalk.red(this.truncate(this.stringify(error ?? "Unknown error")))
8217
7723
  );
8218
7724
  }
8219
7725
  static renderBox(lines, options) {
@@ -8228,7 +7734,7 @@ var Logger = class {
8228
7734
  lines.forEach((line, index) => {
8229
7735
  const padded = line.padEnd(width, " ");
8230
7736
  const isStatusLine = index === statusIndex;
8231
- const colouredContent = isStatusLine ? statusColor(padded) : source_default.white(padded);
7737
+ const colouredContent = isStatusLine ? statusColor(padded) : chalk.white(padded);
8232
7738
  this.logLine(
8233
7739
  `${borderColor("| ")}${colouredContent}${borderColor(" |")}`
8234
7740
  );
@@ -8270,15 +7776,15 @@ var Logger = class {
8270
7776
  static colorRole(role) {
8271
7777
  switch (role) {
8272
7778
  case "user":
8273
- return source_default.bold.whiteBright("user");
7779
+ return chalk.bold.whiteBright("user");
8274
7780
  case "assistant":
8275
- return source_default.bold.whiteBright("assistant");
7781
+ return chalk.bold.whiteBright("assistant");
8276
7782
  case "tool":
8277
- return source_default.bold.whiteBright("tool");
7783
+ return chalk.bold.whiteBright("tool");
8278
7784
  case "system":
8279
- return source_default.bold.whiteBright("system");
7785
+ return chalk.bold.whiteBright("system");
8280
7786
  default:
8281
- return source_default.cyan(role);
7787
+ return chalk.cyan(role);
8282
7788
  }
8283
7789
  }
8284
7790
  };
@@ -8600,8 +8106,14 @@ var accumulateTokenCount = (current, increment) => {
8600
8106
  };
8601
8107
  var prepareSuite = async (validatedTests, mcpClientOptions, validatedLlms) => {
8602
8108
  const mcpClient = new MCPClient(mcpClientOptions);
8603
- const availableTools = await mcpClient.getTools();
8604
- const vercelTools = convertMastraToolsToVercelTools(availableTools);
8109
+ const toolsets = await mcpClient.getToolsets();
8110
+ const availableTools = {};
8111
+ Object.values(toolsets).forEach((serverTools) => {
8112
+ Object.assign(availableTools, serverTools);
8113
+ });
8114
+ const vercelTools = convertMastraToolsToVercelTools(
8115
+ availableTools
8116
+ );
8605
8117
  const serverNames = Object.keys(mcpClientOptions.servers);
8606
8118
  Logger.initiateTestMessage(
8607
8119
  serverNames.length,
@@ -8661,6 +8173,7 @@ var runIterationViaBackend = async ({
8661
8173
  await runBackendConversation({
8662
8174
  maxSteps: MAX_STEPS,
8663
8175
  messageHistory,
8176
+ modelId: test.model,
8664
8177
  toolDefinitions: toolDefs,
8665
8178
  fetchBackend: async (payload) => {
8666
8179
  try {
@@ -8782,41 +8295,60 @@ var runIteration = async ({
8782
8295
  });
8783
8296
  while (stepCount < MAX_STEPS) {
8784
8297
  let assistantStreaming = false;
8785
- const streamResult = await streamText2({
8786
- model: createLlmModel2(provider, model, llms),
8787
- system,
8788
- temperature,
8789
- tools: tools2,
8790
- toolChoice,
8791
- messages: messageHistory,
8792
- onChunk: async (chunk) => {
8793
- switch (chunk.chunk.type) {
8794
- case "text-delta":
8795
- case "reasoning-delta": {
8796
- if (!assistantStreaming) {
8797
- Logger.beginStreamingMessage("assistant");
8798
- assistantStreaming = true;
8298
+ let streamResult;
8299
+ try {
8300
+ streamResult = await streamText2({
8301
+ model: createLlmModel2(provider, model, llms),
8302
+ system,
8303
+ temperature,
8304
+ tools: tools2,
8305
+ toolChoice,
8306
+ messages: messageHistory,
8307
+ onChunk: async (chunk) => {
8308
+ switch (chunk.chunk.type) {
8309
+ case "text-delta":
8310
+ case "reasoning-delta": {
8311
+ if (!assistantStreaming) {
8312
+ Logger.beginStreamingMessage("assistant");
8313
+ assistantStreaming = true;
8314
+ }
8315
+ Logger.appendStreamingText(chunk.chunk.text);
8316
+ break;
8799
8317
  }
8800
- Logger.appendStreamingText(chunk.chunk.text);
8801
- break;
8802
- }
8803
- case "tool-call": {
8804
- if (assistantStreaming) {
8805
- Logger.finishStreamingMessage();
8806
- assistantStreaming = false;
8318
+ case "tool-call": {
8319
+ if (assistantStreaming) {
8320
+ Logger.finishStreamingMessage();
8321
+ assistantStreaming = false;
8322
+ }
8323
+ Logger.streamToolCall(chunk.chunk.toolName, chunk.chunk.input);
8324
+ break;
8807
8325
  }
8808
- Logger.streamToolCall(chunk.chunk.toolName, chunk.chunk.input);
8809
- break;
8810
- }
8811
- case "tool-result": {
8812
- Logger.streamToolResult(chunk.chunk.toolName, chunk.chunk.output);
8813
- break;
8326
+ case "tool-result": {
8327
+ Logger.streamToolResult(chunk.chunk.toolName, chunk.chunk.output);
8328
+ break;
8329
+ }
8330
+ default:
8331
+ break;
8814
8332
  }
8815
- default:
8816
- break;
8817
8333
  }
8818
- }
8819
- });
8334
+ });
8335
+ } catch (error) {
8336
+ const errorMessage = error?.message || String(error);
8337
+ Logger.error(errorMessage);
8338
+ const evaluation2 = evaluateResults(test.expectedToolCalls, []);
8339
+ await recorder.finishIteration({
8340
+ iterationId,
8341
+ passed: false,
8342
+ toolsCalled: [],
8343
+ usage: {
8344
+ inputTokens: void 0,
8345
+ outputTokens: void 0,
8346
+ totalTokens: void 0
8347
+ },
8348
+ messages: messageHistory
8349
+ });
8350
+ return evaluation2;
8351
+ }
8820
8352
  await streamResult.consumeStream();
8821
8353
  if (assistantStreaming) {
8822
8354
  Logger.finishStreamingMessage();
@@ -8893,7 +8425,7 @@ var runTestCase = async ({
8893
8425
  let failedRuns = 0;
8894
8426
  const testCaseId = await recorder.recordTestCase(test, testIndex);
8895
8427
  for (let runIndex = 0; runIndex < runs; runIndex++) {
8896
- const usesBackend = isMCPJamProvidedModel(provider);
8428
+ const usesBackend = isMCPJamProvidedModel(model);
8897
8429
  const evaluation = usesBackend && convexUrl && authToken ? await runIterationViaBackend({
8898
8430
  test,
8899
8431
  runIndex,
@@ -9021,9 +8553,10 @@ function transformServerConfigsToEnvironment(serverIds, clientManager) {
9021
8553
  servers: servers2
9022
8554
  };
9023
8555
  }
9024
- function transformLLMConfigToLlmsConfig(llmConfig) {
8556
+ function transformLLMConfigToLlmsConfig(llmConfig, modelId) {
9025
8557
  const llms = {};
9026
- if (isMCPJamProvidedModel(llmConfig.provider)) {
8558
+ const isMCPJamModel = modelId && isMCPJamProvidedModel(modelId);
8559
+ if (isMCPJamModel) {
9027
8560
  llms.openrouter = "BACKEND_EXECUTION";
9028
8561
  } else {
9029
8562
  const providerKey = llmConfig.provider.toLowerCase();
@@ -9036,6 +8569,154 @@ function transformLLMConfigToLlmsConfig(llmConfig) {
9036
8569
  return validated.data;
9037
8570
  }
9038
8571
 
8572
+ // services/eval-agent.ts
8573
+ var AGENT_SYSTEM_PROMPT = `You are an AI agent specialized in creating realistic test cases for MCP (Model Context Protocol) servers.
8574
+
8575
+ **About MCP:**
8576
+ The Model Context Protocol enables AI assistants to securely access external data and tools. MCP servers expose tools, resources, and prompts that AI models can use to accomplish user tasks. Your test cases should reflect real-world usage patterns where users ask an AI assistant to perform tasks, and the assistant uses MCP tools to fulfill those requests.
8577
+
8578
+ **Your Task:**
8579
+ Generate 6 test cases with varying complexity levels that mimic how real users would interact with an AI assistant using these MCP tools.
8580
+
8581
+ **Test Case Distribution:**
8582
+ - **2 EASY tests** (single tool): Simple, straightforward tasks using one tool
8583
+ - **2 MEDIUM tests** (2+ tools): Multi-step workflows requiring 2-3 tools in sequence or parallel
8584
+ - **2 HARD tests** (3+ tools): Complex scenarios requiring 3+ tools, conditional logic, or cross-server operations
8585
+
8586
+ **Guidelines:**
8587
+ 1. **Realistic User Queries**: Write queries as if a real user is talking to an AI assistant (e.g., "Help me find all tasks due this week" not "Call the list_tasks tool")
8588
+ 2. **Natural Workflows**: Chain tools together in logical sequences that solve real problems
8589
+ 3. **Cross-Server Tests**: If multiple servers are available, create tests that use tools from different servers together
8590
+ 4. **Specific Details**: Include concrete examples (dates, names, values) to make tests actionable
8591
+ 5. **Judge Requirements**: Clearly define what success looks like for each test
8592
+ 6. **Test Titles**: Write clear, descriptive titles WITHOUT difficulty prefixes (e.g., "Read project configuration" not "EASY: Read project configuration")
8593
+
8594
+ **Output Format (CRITICAL):**
8595
+ Respond with ONLY a valid JSON array. No explanations, no markdown code blocks, just the raw JSON array.
8596
+
8597
+ Example:
8598
+ [
8599
+ {
8600
+ "title": "Read project configuration",
8601
+ "query": "Show me the contents of config.json in the current project",
8602
+ "runs": 1,
8603
+ "expectedToolCalls": ["read_file"],
8604
+ "judgeRequirement": "Successfully reads and returns the file contents"
8605
+ },
8606
+ {
8607
+ "title": "Find and analyze recent tasks",
8608
+ "query": "Find all tasks created this week and summarize their status",
8609
+ "runs": 1,
8610
+ "expectedToolCalls": ["list_tasks", "get_task_details"],
8611
+ "judgeRequirement": "First lists tasks filtered by date, then retrieves details for each task found"
8612
+ },
8613
+ {
8614
+ "title": "Cross-server project setup",
8615
+ "query": "Create a new project folder, initialize a git repository, and create a task to track the project setup",
8616
+ "runs": 1,
8617
+ "expectedToolCalls": ["create_directory", "git_init", "create_task"],
8618
+ "judgeRequirement": "Successfully creates directory, initializes git, and creates a tracking task with appropriate details"
8619
+ }
8620
+ ]`;
8621
+ async function generateTestCases(tools2, convexHttpUrl, convexAuthToken) {
8622
+ const serverGroups = tools2.reduce(
8623
+ (acc, tool2) => {
8624
+ if (!acc[tool2.serverId]) {
8625
+ acc[tool2.serverId] = [];
8626
+ }
8627
+ acc[tool2.serverId].push(tool2);
8628
+ return acc;
8629
+ },
8630
+ {}
8631
+ );
8632
+ const serverCount = Object.keys(serverGroups).length;
8633
+ const totalTools = tools2.length;
8634
+ const toolsContext = Object.entries(serverGroups).map(([serverId, serverTools]) => {
8635
+ const toolsList = serverTools.map((tool2) => {
8636
+ return ` - ${tool2.name}: ${tool2.description || "No description"}
8637
+ Input: ${JSON.stringify(tool2.inputSchema)}`;
8638
+ }).join("\n");
8639
+ return `**Server: ${serverId}** (${serverTools.length} tools)
8640
+ ${toolsList}`;
8641
+ }).join("\n\n");
8642
+ const crossServerGuidance = serverCount > 1 ? `
8643
+ **IMPORTANT**: You have ${serverCount} servers available. Create at least 2 test cases that use tools from MULTIPLE servers to test cross-server workflows.` : "";
8644
+ const userPrompt = `Generate 6 test cases for the following MCP server tools:
8645
+
8646
+ ${toolsContext}
8647
+
8648
+ **Available Resources:**
8649
+ - ${serverCount} MCP server(s)
8650
+ - ${totalTools} total tools${crossServerGuidance}
8651
+
8652
+ **Remember:**
8653
+ 1. Create exactly 6 tests: 2 EASY (1 tool), 2 MEDIUM (2-3 tools), 2 HARD (3+ tools)
8654
+ 2. Write realistic user queries that sound natural
8655
+ 3. Use specific examples (dates, filenames, values)
8656
+ 4. Chain tools in logical sequences
8657
+ 5. Respond with ONLY a JSON array - no other text or markdown`;
8658
+ const messageHistory = [
8659
+ { role: "system", content: AGENT_SYSTEM_PROMPT },
8660
+ { role: "user", content: userPrompt }
8661
+ ];
8662
+ const response = await fetch(`${convexHttpUrl}/streaming`, {
8663
+ method: "POST",
8664
+ headers: {
8665
+ "Content-Type": "application/json",
8666
+ Authorization: `Bearer ${convexAuthToken}`
8667
+ },
8668
+ body: JSON.stringify({
8669
+ model: "meta-llama/llama-3.3-70b-instruct",
8670
+ tools: [],
8671
+ messages: JSON.stringify(messageHistory)
8672
+ })
8673
+ });
8674
+ if (!response.ok) {
8675
+ const errorText = await response.text();
8676
+ throw new Error(`Failed to generate test cases: ${errorText}`);
8677
+ }
8678
+ const data = await response.json();
8679
+ if (!data.ok || !Array.isArray(data.messages)) {
8680
+ throw new Error("Invalid response from backend LLM");
8681
+ }
8682
+ let assistantResponse = "";
8683
+ for (const msg of data.messages) {
8684
+ if (msg.role === "assistant") {
8685
+ const content = msg.content;
8686
+ if (typeof content === "string") {
8687
+ assistantResponse += content;
8688
+ } else if (Array.isArray(content)) {
8689
+ for (const item of content) {
8690
+ if (item.type === "text" && item.text) {
8691
+ assistantResponse += item.text;
8692
+ }
8693
+ }
8694
+ }
8695
+ }
8696
+ }
8697
+ try {
8698
+ const jsonMatch = assistantResponse.match(/```(?:json)?\s*([\s\S]*?)```/);
8699
+ const jsonText = jsonMatch ? jsonMatch[1].trim() : assistantResponse.trim();
8700
+ const testCases = JSON.parse(jsonText);
8701
+ if (!Array.isArray(testCases)) {
8702
+ throw new Error("Response is not an array");
8703
+ }
8704
+ const validatedTests = testCases.map((tc) => ({
8705
+ title: tc.title || "Untitled Test",
8706
+ query: tc.query || "",
8707
+ runs: typeof tc.runs === "number" ? tc.runs : 1,
8708
+ expectedToolCalls: Array.isArray(tc.expectedToolCalls) ? tc.expectedToolCalls : [],
8709
+ judgeRequirement: tc.judgeRequirement
8710
+ }));
8711
+ return validatedTests;
8712
+ } catch (parseError) {
8713
+ console.error("Failed to parse LLM response:", assistantResponse);
8714
+ throw new Error(
8715
+ `Failed to parse test cases from LLM response: ${parseError instanceof Error ? parseError.message : "Unknown error"}`
8716
+ );
8717
+ }
8718
+ }
8719
+
9039
8720
  // routes/mcp/evals.ts
9040
8721
  var evals = new Hono10();
9041
8722
  var RunEvalsRequestSchema = z3.object({
@@ -9081,7 +8762,8 @@ evals.post("/run", async (c) => {
9081
8762
  serverIds,
9082
8763
  clientManager
9083
8764
  );
9084
- const llms = transformLLMConfigToLlmsConfig(llmConfig);
8765
+ const modelId = tests.length > 0 ? tests[0].model : void 0;
8766
+ const llms = transformLLMConfigToLlmsConfig(llmConfig, modelId);
9085
8767
  const convexUrl = process.env.CONVEX_URL;
9086
8768
  if (!convexUrl) {
9087
8769
  throw new Error("CONVEX_URL is not set");
@@ -9100,7 +8782,8 @@ evals.post("/run", async (c) => {
9100
8782
  convexHttpUrl,
9101
8783
  convexAuthToken
9102
8784
  ).catch((error) => {
9103
- console.error("[Hono:Evals] Error running evals:", error);
8785
+ const errorMessage = error instanceof Error ? error.message : String(error);
8786
+ console.error("[Error running evals:", errorMessage);
9104
8787
  });
9105
8788
  return c.json({
9106
8789
  success: true,
@@ -9116,6 +8799,63 @@ evals.post("/run", async (c) => {
9116
8799
  );
9117
8800
  }
9118
8801
  });
8802
+ var GenerateTestsRequestSchema = z3.object({
8803
+ serverIds: z3.array(z3.string()).min(1, "At least one server must be selected"),
8804
+ convexAuthToken: z3.string()
8805
+ });
8806
+ evals.post("/generate-tests", async (c) => {
8807
+ try {
8808
+ const body = await c.req.json();
8809
+ const validationResult = GenerateTestsRequestSchema.safeParse(body);
8810
+ if (!validationResult.success) {
8811
+ return c.json(
8812
+ {
8813
+ error: "Invalid request body",
8814
+ details: validationResult.error.errors
8815
+ },
8816
+ 400
8817
+ );
8818
+ }
8819
+ const { serverIds, convexAuthToken } = validationResult.data;
8820
+ const clientManager = c.mcpJamClientManager;
8821
+ const allTools = clientManager.getAvailableTools();
8822
+ const serverIdSet = new Set(
8823
+ serverIds.map((name) => clientManager.getServerIdForName(name)).filter(Boolean)
8824
+ );
8825
+ const filteredTools = allTools.filter(
8826
+ (tool2) => serverIdSet.has(tool2.serverId)
8827
+ );
8828
+ if (filteredTools.length === 0) {
8829
+ return c.json(
8830
+ {
8831
+ error: "No tools found for selected servers"
8832
+ },
8833
+ 400
8834
+ );
8835
+ }
8836
+ const convexHttpUrl = process.env.CONVEX_HTTP_URL;
8837
+ if (!convexHttpUrl) {
8838
+ throw new Error("CONVEX_HTTP_URL is not set");
8839
+ }
8840
+ const testCases = await generateTestCases(
8841
+ filteredTools,
8842
+ convexHttpUrl,
8843
+ convexAuthToken
8844
+ );
8845
+ return c.json({
8846
+ success: true,
8847
+ tests: testCases
8848
+ });
8849
+ } catch (error) {
8850
+ console.error("Error in /evals/generate-tests:", error);
8851
+ return c.json(
8852
+ {
8853
+ error: error instanceof Error ? error.message : "Unknown error"
8854
+ },
8855
+ 500
8856
+ );
8857
+ }
8858
+ });
9119
8859
  var evals_default = evals;
9120
8860
 
9121
8861
  // routes/mcp/http-adapters.ts
@@ -9360,13 +9100,13 @@ function createHttpHandler(mode, routePrefix) {
9360
9100
  const xfProto = c.req.header("x-forwarded-proto");
9361
9101
  const xfHost = c.req.header("x-forwarded-host");
9362
9102
  const host = xfHost || c.req.header("host");
9363
- let proto2 = xfProto;
9364
- if (!proto2) {
9103
+ let proto = xfProto;
9104
+ if (!proto) {
9365
9105
  const originHeader = c.req.header("origin");
9366
- if (originHeader && /^https:/i.test(originHeader)) proto2 = "https";
9106
+ if (originHeader && /^https:/i.test(originHeader)) proto = "https";
9367
9107
  }
9368
- if (!proto2) proto2 = "http";
9369
- const origin = host ? `${proto2}://${host}` : incomingUrl.origin;
9108
+ if (!proto) proto = "http";
9109
+ const origin = host ? `${proto}://${host}` : incomingUrl.origin;
9370
9110
  endpointBase = `${origin}/api/mcp/${routePrefix}/${serverId}/messages`;
9371
9111
  }
9372
9112
  const sessionId = crypto.randomUUID();
@@ -9588,23 +9328,22 @@ function validateServerConfig(serverConfig) {
9588
9328
  };
9589
9329
  }
9590
9330
  if (config.oauth?.access_token) {
9331
+ const accessToken = config.oauth.access_token;
9591
9332
  const authHeaders = {
9592
- Authorization: `Bearer ${config.oauth.access_token}`,
9333
+ Authorization: `Bearer ${accessToken}`,
9593
9334
  ...config.requestInit?.headers || {}
9594
9335
  };
9595
9336
  config.requestInit = {
9596
9337
  ...config.requestInit,
9597
9338
  headers: authHeaders
9598
9339
  };
9340
+ const requestInitHeaders = config.requestInit?.headers;
9599
9341
  config.eventSourceInit = {
9600
9342
  fetch(input, init2) {
9601
9343
  const headers = new Headers(init2?.headers || {});
9602
- headers.set(
9603
- "Authorization",
9604
- `Bearer ${config.oauth.access_token}`
9605
- );
9606
- if (config.requestInit?.headers) {
9607
- const requestHeaders = new Headers(config.requestInit.headers);
9344
+ headers.set("Authorization", `Bearer ${accessToken}`);
9345
+ if (requestInitHeaders) {
9346
+ const requestHeaders = new Headers(requestInitHeaders);
9608
9347
  requestHeaders.forEach((value, key) => {
9609
9348
  if (key.toLowerCase() !== "authorization") {
9610
9349
  headers.set(key, value);
@@ -9617,6 +9356,7 @@ function validateServerConfig(serverConfig) {
9617
9356
  });
9618
9357
  }
9619
9358
  };
9359
+ delete config.oauth;
9620
9360
  } else if (config.requestInit?.headers) {
9621
9361
  config.eventSourceInit = {
9622
9362
  fetch(input, init2) {