@mcpjam/inspector 0.9.63 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -272,6 +272,8 @@ tools.post("/list", async (c) => {
272
272
  }
273
273
  });
274
274
  tools.post("/execute", async (c) => {
275
+ const mcp2 = c.mcpJamClientManager;
276
+ let state = null;
275
277
  try {
276
278
  const { serverId, toolName, parameters } = await c.req.json();
277
279
  if (!serverId) return c.json({ error: "serverId is required" }, 400);
@@ -279,19 +281,20 @@ tools.post("/execute", async (c) => {
279
281
  if (activeExecution) {
280
282
  return c.json({ error: "Another execution is already in progress" }, 409);
281
283
  }
282
- const mcp2 = c.mcpJamClientManager;
283
284
  const status = mcp2.getConnectionStatus(serverId);
284
285
  if (status !== "connected") {
285
286
  return c.json({ error: `Server '${serverId}' is not connected` }, 400);
286
287
  }
287
288
  const executionId = `exec_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
288
- const state = {
289
+ const execPromise = Promise.resolve().then(() => mcp2.executeToolDirect(toolName, parameters || {})).catch((error) => {
290
+ if (state) state.error = error;
291
+ throw error;
292
+ });
293
+ state = {
289
294
  id: executionId,
290
295
  serverId,
291
296
  toolName,
292
- execPromise: Promise.resolve().then(
293
- () => mcp2.executeToolDirect(toolName, parameters || {})
294
- ),
297
+ execPromise,
295
298
  completed: false,
296
299
  queue: [],
297
300
  waiters: []
@@ -360,20 +363,27 @@ tools.post("/execute", async (c) => {
360
363
  202
361
364
  );
362
365
  } catch (err) {
366
+ if (state && activeExecution === state) {
367
+ state.error = state.error ?? err;
368
+ state.waiters.length = 0;
369
+ state.queue.length = 0;
370
+ activeExecution = null;
371
+ mcp2.clearElicitationCallback();
372
+ }
363
373
  const msg = err instanceof Error ? err.message : String(err);
364
374
  return c.json({ error: msg }, 500);
365
375
  }
366
376
  });
367
377
  tools.post("/respond", async (c) => {
378
+ const mcp2 = c.mcpJamClientManager;
379
+ const state = activeExecution;
368
380
  try {
369
381
  const { requestId, response } = await c.req.json();
370
382
  if (!requestId) return c.json({ error: "requestId is required" }, 400);
371
- if (!activeExecution) return c.json({ error: "No active execution" }, 404);
372
- const mcp2 = c.mcpJamClientManager;
383
+ if (!state) return c.json({ error: "No active execution" }, 404);
373
384
  const ok = mcp2.respondToElicitation(requestId, response);
374
385
  if (!ok)
375
386
  return c.json({ error: "No pending elicitation for requestId" }, 404);
376
- const state = activeExecution;
377
387
  try {
378
388
  const race = await Promise.race([
379
389
  state.execPromise.then((res) => ({ kind: "done", res })),
@@ -408,10 +418,24 @@ tools.post("/respond", async (c) => {
408
418
  202
409
419
  );
410
420
  } catch (e) {
421
+ state.error = state.error ?? e;
422
+ state.waiters.length = 0;
423
+ state.queue.length = 0;
424
+ if (activeExecution === state) {
425
+ activeExecution = null;
426
+ mcp2.clearElicitationCallback();
427
+ }
411
428
  const msg = e instanceof Error ? e.message : String(e);
412
429
  return c.json({ error: msg }, 500);
413
430
  }
414
431
  } catch (err) {
432
+ if (state && activeExecution === state) {
433
+ state.error = state.error ?? err;
434
+ state.waiters.length = 0;
435
+ state.queue.length = 0;
436
+ activeExecution = null;
437
+ mcp2.clearElicitationCallback();
438
+ }
415
439
  const msg = err instanceof Error ? err.message : String(err);
416
440
  return c.json({ error: msg }, 500);
417
441
  }
@@ -543,9 +567,14 @@ import { Hono as Hono6 } from "hono";
543
567
  import { streamText } from "ai";
544
568
 
545
569
  // ../shared/types.ts
546
- var isMCPJamProvidedModel = (provider) => {
547
- const MCPJAM_PROVIDERS = ["meta"];
548
- return MCPJAM_PROVIDERS.includes(provider);
570
+ var MCPJAM_PROVIDED_MODEL_IDS = [
571
+ "meta-llama/llama-3.3-70b-instruct",
572
+ "openai/gpt-oss-120b",
573
+ "x-ai/grok-4-fast",
574
+ "openai/gpt-5-nano"
575
+ ];
576
+ var isMCPJamProvidedModel = (modelId) => {
577
+ return MCPJAM_PROVIDED_MODEL_IDS.includes(modelId);
549
578
  };
550
579
 
551
580
  // routes/mcp/chat.ts
@@ -671,13 +700,6 @@ function ensureOutputSchema(schema) {
671
700
  }
672
701
  return schema;
673
702
  }
674
- function extractPureToolName(toolKey) {
675
- const separatorIndex = toolKey.indexOf("_");
676
- if (separatorIndex === -1 || separatorIndex === toolKey.length - 1) {
677
- return toolKey;
678
- }
679
- return toolKey.slice(separatorIndex + 1);
680
- }
681
703
  function convertMastraToolToVercelTool(toolName, mastraTool, options) {
682
704
  const inputSchema = ensureInputSchema(mastraTool.inputSchema);
683
705
  const outputSchema = ensureOutputSchema(mastraTool.outputSchema);
@@ -743,10 +765,9 @@ function convertMastraToolToVercelTool(toolName, mastraTool, options) {
743
765
  function convertMastraToolsToVercelTools(mastraTools) {
744
766
  return Object.fromEntries(
745
767
  Object.entries(mastraTools).map(([name, mastraTool]) => {
746
- const pureToolName = extractPureToolName(name);
747
768
  return [
748
- pureToolName,
749
- convertMastraToolToVercelTool(pureToolName, mastraTool, {
769
+ name,
770
+ convertMastraToolToVercelTool(name, mastraTool, {
750
771
  originalName: name
751
772
  })
752
773
  ];
@@ -889,7 +910,8 @@ var runBackendConversation = async (options) => {
889
910
  while (step < options.maxSteps) {
890
911
  const payload = {
891
912
  tools: options.toolDefinitions,
892
- messages: JSON.stringify(options.messageHistory)
913
+ messages: JSON.stringify(options.messageHistory),
914
+ model: options.modelId
893
915
  };
894
916
  const data = await options.fetchBackend(payload);
895
917
  if (!data || !data.ok || !Array.isArray(data.messages)) {
@@ -1208,7 +1230,7 @@ var createStreamingResponse = async (model, aiSdkTools, messages, streamingConte
1208
1230
  "[DONE]"
1209
1231
  );
1210
1232
  };
1211
- var sendMessagesToBackend = async (messages, streamingContext, mcpClientManager, baseUrl, authHeader, selectedServers) => {
1233
+ var sendMessagesToBackend = async (messages, streamingContext, mcpClientManager, baseUrl, modelId, authHeader, selectedServers) => {
1212
1234
  const messageHistory = (messages || []).map((m) => {
1213
1235
  switch (m.role) {
1214
1236
  case "system":
@@ -1262,6 +1284,7 @@ var sendMessagesToBackend = async (messages, streamingContext, mcpClientManager,
1262
1284
  await runBackendConversation({
1263
1285
  maxSteps: MAX_AGENT_STEPS,
1264
1286
  messageHistory,
1287
+ modelId,
1265
1288
  toolDefinitions: toolDefs,
1266
1289
  fetchBackend: async (payload) => {
1267
1290
  const data = await sendBackendRequest(
@@ -1367,7 +1390,7 @@ chat.post("/", async (c) => {
1367
1390
  400
1368
1391
  );
1369
1392
  }
1370
- const sendToBackend = isMCPJamProvidedModel(provider) && Boolean(requestData.sendMessagesToBackend);
1393
+ const sendToBackend = model?.id && isMCPJamProvidedModel(model.id) && Boolean(requestData.sendMessagesToBackend);
1371
1394
  if (!sendToBackend && (!model?.id || !apiKey)) {
1372
1395
  return c.json(
1373
1396
  {
@@ -1438,6 +1461,7 @@ chat.post("/", async (c) => {
1438
1461
  streamingContext,
1439
1462
  mcpClientManager,
1440
1463
  process.env.CONVEX_HTTP_URL,
1464
+ model.id,
1441
1465
  authHeader,
1442
1466
  requestData.selectedServers
1443
1467
  );
@@ -1985,13 +2009,13 @@ async function handleProxy(c) {
1985
2009
  const xfProto = req.headers.get("x-forwarded-proto");
1986
2010
  const xfHost = req.headers.get("x-forwarded-host");
1987
2011
  const host = xfHost || req.headers.get("host");
1988
- let proto2 = xfProto;
1989
- if (!proto2) {
2012
+ let proto = xfProto;
2013
+ if (!proto) {
1990
2014
  const originHeader = req.headers.get("origin");
1991
- if (originHeader && /^https:/i.test(originHeader)) proto2 = "https";
2015
+ if (originHeader && /^https:/i.test(originHeader)) proto = "https";
1992
2016
  }
1993
- if (!proto2) proto2 = "http";
1994
- const proxyOrigin = host ? `${proto2}://${host}` : new URL(req.url).origin;
2017
+ if (!proto) proto = "http";
2018
+ const proxyOrigin = host ? `${proto}://${host}` : new URL(req.url).origin;
1995
2019
  const sessionId = crypto.randomUUID();
1996
2020
  interceptorStore.setSessionEndpoint(
1997
2021
  id,
@@ -3688,14 +3712,14 @@ var require_node_gyp_build = __commonJS({
3688
3712
  "../common/temp/node_modules/.pnpm/node-gyp-build@4.8.4/node_modules/node-gyp-build/node-gyp-build.js"(exports, module) {
3689
3713
  var fs = __require("fs");
3690
3714
  var path = __require("path");
3691
- var os2 = __require("os");
3715
+ var os = __require("os");
3692
3716
  var runtimeRequire = typeof __webpack_require__ === "function" ? __non_webpack_require__ : __require;
3693
3717
  var vars = process.config && process.config.variables || {};
3694
3718
  var prebuildsOnly = !!process.env.PREBUILDS_ONLY;
3695
3719
  var abi = process.versions.modules;
3696
3720
  var runtime = isElectron() ? "electron" : isNwjs() ? "node-webkit" : "node";
3697
- var arch = process.env.npm_config_arch || os2.arch();
3698
- var platform = process.env.npm_config_platform || os2.platform();
3721
+ var arch = process.env.npm_config_arch || os.arch();
3722
+ var platform = process.env.npm_config_platform || os.platform();
3699
3723
  var libc = process.env.LIBC || (isAlpine(platform) ? "musl" : "glibc");
3700
3724
  var armv = process.env.ARM_VERSION || (arch === "arm64" ? "8" : vars.arm_version) || "";
3701
3725
  var uv = (process.versions.uv || "").split(".")[0];
@@ -7402,502 +7426,8 @@ var wrapper_default = import_websocket.default;
7402
7426
  var nodeWebSocket = wrapper_default;
7403
7427
  setDefaultWebSocketConstructor(nodeWebSocket);
7404
7428
 
7405
- // ../evals-cli/node_modules/chalk/source/vendor/ansi-styles/index.js
7406
- var ANSI_BACKGROUND_OFFSET = 10;
7407
- var wrapAnsi16 = (offset = 0) => (code2) => `\x1B[${code2 + offset}m`;
7408
- var wrapAnsi256 = (offset = 0) => (code2) => `\x1B[${38 + offset};5;${code2}m`;
7409
- var wrapAnsi16m = (offset = 0) => (red, green, blue) => `\x1B[${38 + offset};2;${red};${green};${blue}m`;
7410
- var styles = {
7411
- modifier: {
7412
- reset: [0, 0],
7413
- // 21 isn't widely supported and 22 does the same thing
7414
- bold: [1, 22],
7415
- dim: [2, 22],
7416
- italic: [3, 23],
7417
- underline: [4, 24],
7418
- overline: [53, 55],
7419
- inverse: [7, 27],
7420
- hidden: [8, 28],
7421
- strikethrough: [9, 29]
7422
- },
7423
- color: {
7424
- black: [30, 39],
7425
- red: [31, 39],
7426
- green: [32, 39],
7427
- yellow: [33, 39],
7428
- blue: [34, 39],
7429
- magenta: [35, 39],
7430
- cyan: [36, 39],
7431
- white: [37, 39],
7432
- // Bright color
7433
- blackBright: [90, 39],
7434
- gray: [90, 39],
7435
- // Alias of `blackBright`
7436
- grey: [90, 39],
7437
- // Alias of `blackBright`
7438
- redBright: [91, 39],
7439
- greenBright: [92, 39],
7440
- yellowBright: [93, 39],
7441
- blueBright: [94, 39],
7442
- magentaBright: [95, 39],
7443
- cyanBright: [96, 39],
7444
- whiteBright: [97, 39]
7445
- },
7446
- bgColor: {
7447
- bgBlack: [40, 49],
7448
- bgRed: [41, 49],
7449
- bgGreen: [42, 49],
7450
- bgYellow: [43, 49],
7451
- bgBlue: [44, 49],
7452
- bgMagenta: [45, 49],
7453
- bgCyan: [46, 49],
7454
- bgWhite: [47, 49],
7455
- // Bright color
7456
- bgBlackBright: [100, 49],
7457
- bgGray: [100, 49],
7458
- // Alias of `bgBlackBright`
7459
- bgGrey: [100, 49],
7460
- // Alias of `bgBlackBright`
7461
- bgRedBright: [101, 49],
7462
- bgGreenBright: [102, 49],
7463
- bgYellowBright: [103, 49],
7464
- bgBlueBright: [104, 49],
7465
- bgMagentaBright: [105, 49],
7466
- bgCyanBright: [106, 49],
7467
- bgWhiteBright: [107, 49]
7468
- }
7469
- };
7470
- var modifierNames = Object.keys(styles.modifier);
7471
- var foregroundColorNames = Object.keys(styles.color);
7472
- var backgroundColorNames = Object.keys(styles.bgColor);
7473
- var colorNames = [...foregroundColorNames, ...backgroundColorNames];
7474
- function assembleStyles() {
7475
- const codes = /* @__PURE__ */ new Map();
7476
- for (const [groupName, group] of Object.entries(styles)) {
7477
- for (const [styleName, style] of Object.entries(group)) {
7478
- styles[styleName] = {
7479
- open: `\x1B[${style[0]}m`,
7480
- close: `\x1B[${style[1]}m`
7481
- };
7482
- group[styleName] = styles[styleName];
7483
- codes.set(style[0], style[1]);
7484
- }
7485
- Object.defineProperty(styles, groupName, {
7486
- value: group,
7487
- enumerable: false
7488
- });
7489
- }
7490
- Object.defineProperty(styles, "codes", {
7491
- value: codes,
7492
- enumerable: false
7493
- });
7494
- styles.color.close = "\x1B[39m";
7495
- styles.bgColor.close = "\x1B[49m";
7496
- styles.color.ansi = wrapAnsi16();
7497
- styles.color.ansi256 = wrapAnsi256();
7498
- styles.color.ansi16m = wrapAnsi16m();
7499
- styles.bgColor.ansi = wrapAnsi16(ANSI_BACKGROUND_OFFSET);
7500
- styles.bgColor.ansi256 = wrapAnsi256(ANSI_BACKGROUND_OFFSET);
7501
- styles.bgColor.ansi16m = wrapAnsi16m(ANSI_BACKGROUND_OFFSET);
7502
- Object.defineProperties(styles, {
7503
- rgbToAnsi256: {
7504
- value(red, green, blue) {
7505
- if (red === green && green === blue) {
7506
- if (red < 8) {
7507
- return 16;
7508
- }
7509
- if (red > 248) {
7510
- return 231;
7511
- }
7512
- return Math.round((red - 8) / 247 * 24) + 232;
7513
- }
7514
- return 16 + 36 * Math.round(red / 255 * 5) + 6 * Math.round(green / 255 * 5) + Math.round(blue / 255 * 5);
7515
- },
7516
- enumerable: false
7517
- },
7518
- hexToRgb: {
7519
- value(hex) {
7520
- const matches = /[a-f\d]{6}|[a-f\d]{3}/i.exec(hex.toString(16));
7521
- if (!matches) {
7522
- return [0, 0, 0];
7523
- }
7524
- let [colorString] = matches;
7525
- if (colorString.length === 3) {
7526
- colorString = [...colorString].map((character) => character + character).join("");
7527
- }
7528
- const integer = Number.parseInt(colorString, 16);
7529
- return [
7530
- /* eslint-disable no-bitwise */
7531
- integer >> 16 & 255,
7532
- integer >> 8 & 255,
7533
- integer & 255
7534
- /* eslint-enable no-bitwise */
7535
- ];
7536
- },
7537
- enumerable: false
7538
- },
7539
- hexToAnsi256: {
7540
- value: (hex) => styles.rgbToAnsi256(...styles.hexToRgb(hex)),
7541
- enumerable: false
7542
- },
7543
- ansi256ToAnsi: {
7544
- value(code2) {
7545
- if (code2 < 8) {
7546
- return 30 + code2;
7547
- }
7548
- if (code2 < 16) {
7549
- return 90 + (code2 - 8);
7550
- }
7551
- let red;
7552
- let green;
7553
- let blue;
7554
- if (code2 >= 232) {
7555
- red = ((code2 - 232) * 10 + 8) / 255;
7556
- green = red;
7557
- blue = red;
7558
- } else {
7559
- code2 -= 16;
7560
- const remainder = code2 % 36;
7561
- red = Math.floor(code2 / 36) / 5;
7562
- green = Math.floor(remainder / 6) / 5;
7563
- blue = remainder % 6 / 5;
7564
- }
7565
- const value = Math.max(red, green, blue) * 2;
7566
- if (value === 0) {
7567
- return 30;
7568
- }
7569
- let result = 30 + (Math.round(blue) << 2 | Math.round(green) << 1 | Math.round(red));
7570
- if (value === 2) {
7571
- result += 60;
7572
- }
7573
- return result;
7574
- },
7575
- enumerable: false
7576
- },
7577
- rgbToAnsi: {
7578
- value: (red, green, blue) => styles.ansi256ToAnsi(styles.rgbToAnsi256(red, green, blue)),
7579
- enumerable: false
7580
- },
7581
- hexToAnsi: {
7582
- value: (hex) => styles.ansi256ToAnsi(styles.hexToAnsi256(hex)),
7583
- enumerable: false
7584
- }
7585
- });
7586
- return styles;
7587
- }
7588
- var ansiStyles = assembleStyles();
7589
- var ansi_styles_default = ansiStyles;
7590
-
7591
- // ../evals-cli/node_modules/chalk/source/vendor/supports-color/index.js
7592
- import process2 from "process";
7593
- import os from "os";
7594
- import tty from "tty";
7595
- function hasFlag(flag, argv = globalThis.Deno ? globalThis.Deno.args : process2.argv) {
7596
- const prefix = flag.startsWith("-") ? "" : flag.length === 1 ? "-" : "--";
7597
- const position = argv.indexOf(prefix + flag);
7598
- const terminatorPosition = argv.indexOf("--");
7599
- return position !== -1 && (terminatorPosition === -1 || position < terminatorPosition);
7600
- }
7601
- var { env } = process2;
7602
- var flagForceColor;
7603
- if (hasFlag("no-color") || hasFlag("no-colors") || hasFlag("color=false") || hasFlag("color=never")) {
7604
- flagForceColor = 0;
7605
- } else if (hasFlag("color") || hasFlag("colors") || hasFlag("color=true") || hasFlag("color=always")) {
7606
- flagForceColor = 1;
7607
- }
7608
- function envForceColor() {
7609
- if ("FORCE_COLOR" in env) {
7610
- if (env.FORCE_COLOR === "true") {
7611
- return 1;
7612
- }
7613
- if (env.FORCE_COLOR === "false") {
7614
- return 0;
7615
- }
7616
- return env.FORCE_COLOR.length === 0 ? 1 : Math.min(Number.parseInt(env.FORCE_COLOR, 10), 3);
7617
- }
7618
- }
7619
- function translateLevel(level) {
7620
- if (level === 0) {
7621
- return false;
7622
- }
7623
- return {
7624
- level,
7625
- hasBasic: true,
7626
- has256: level >= 2,
7627
- has16m: level >= 3
7628
- };
7629
- }
7630
- function _supportsColor(haveStream, { streamIsTTY, sniffFlags = true } = {}) {
7631
- const noFlagForceColor = envForceColor();
7632
- if (noFlagForceColor !== void 0) {
7633
- flagForceColor = noFlagForceColor;
7634
- }
7635
- const forceColor = sniffFlags ? flagForceColor : noFlagForceColor;
7636
- if (forceColor === 0) {
7637
- return 0;
7638
- }
7639
- if (sniffFlags) {
7640
- if (hasFlag("color=16m") || hasFlag("color=full") || hasFlag("color=truecolor")) {
7641
- return 3;
7642
- }
7643
- if (hasFlag("color=256")) {
7644
- return 2;
7645
- }
7646
- }
7647
- if ("TF_BUILD" in env && "AGENT_NAME" in env) {
7648
- return 1;
7649
- }
7650
- if (haveStream && !streamIsTTY && forceColor === void 0) {
7651
- return 0;
7652
- }
7653
- const min = forceColor || 0;
7654
- if (env.TERM === "dumb") {
7655
- return min;
7656
- }
7657
- if (process2.platform === "win32") {
7658
- const osRelease = os.release().split(".");
7659
- if (Number(osRelease[0]) >= 10 && Number(osRelease[2]) >= 10586) {
7660
- return Number(osRelease[2]) >= 14931 ? 3 : 2;
7661
- }
7662
- return 1;
7663
- }
7664
- if ("CI" in env) {
7665
- if (["GITHUB_ACTIONS", "GITEA_ACTIONS", "CIRCLECI"].some((key) => key in env)) {
7666
- return 3;
7667
- }
7668
- if (["TRAVIS", "APPVEYOR", "GITLAB_CI", "BUILDKITE", "DRONE"].some((sign) => sign in env) || env.CI_NAME === "codeship") {
7669
- return 1;
7670
- }
7671
- return min;
7672
- }
7673
- if ("TEAMCITY_VERSION" in env) {
7674
- return /^(9\.(0*[1-9]\d*)\.|\d{2,}\.)/.test(env.TEAMCITY_VERSION) ? 1 : 0;
7675
- }
7676
- if (env.COLORTERM === "truecolor") {
7677
- return 3;
7678
- }
7679
- if (env.TERM === "xterm-kitty") {
7680
- return 3;
7681
- }
7682
- if (env.TERM === "xterm-ghostty") {
7683
- return 3;
7684
- }
7685
- if (env.TERM === "wezterm") {
7686
- return 3;
7687
- }
7688
- if ("TERM_PROGRAM" in env) {
7689
- const version2 = Number.parseInt((env.TERM_PROGRAM_VERSION || "").split(".")[0], 10);
7690
- switch (env.TERM_PROGRAM) {
7691
- case "iTerm.app": {
7692
- return version2 >= 3 ? 3 : 2;
7693
- }
7694
- case "Apple_Terminal": {
7695
- return 2;
7696
- }
7697
- }
7698
- }
7699
- if (/-256(color)?$/i.test(env.TERM)) {
7700
- return 2;
7701
- }
7702
- if (/^screen|^xterm|^vt100|^vt220|^rxvt|color|ansi|cygwin|linux/i.test(env.TERM)) {
7703
- return 1;
7704
- }
7705
- if ("COLORTERM" in env) {
7706
- return 1;
7707
- }
7708
- return min;
7709
- }
7710
- function createSupportsColor(stream, options = {}) {
7711
- const level = _supportsColor(stream, {
7712
- streamIsTTY: stream && stream.isTTY,
7713
- ...options
7714
- });
7715
- return translateLevel(level);
7716
- }
7717
- var supportsColor = {
7718
- stdout: createSupportsColor({ isTTY: tty.isatty(1) }),
7719
- stderr: createSupportsColor({ isTTY: tty.isatty(2) })
7720
- };
7721
- var supports_color_default = supportsColor;
7722
-
7723
- // ../evals-cli/node_modules/chalk/source/utilities.js
7724
- function stringReplaceAll(string, substring, replacer) {
7725
- let index = string.indexOf(substring);
7726
- if (index === -1) {
7727
- return string;
7728
- }
7729
- const substringLength = substring.length;
7730
- let endIndex = 0;
7731
- let returnValue = "";
7732
- do {
7733
- returnValue += string.slice(endIndex, index) + substring + replacer;
7734
- endIndex = index + substringLength;
7735
- index = string.indexOf(substring, endIndex);
7736
- } while (index !== -1);
7737
- returnValue += string.slice(endIndex);
7738
- return returnValue;
7739
- }
7740
- function stringEncaseCRLFWithFirstIndex(string, prefix, postfix, index) {
7741
- let endIndex = 0;
7742
- let returnValue = "";
7743
- do {
7744
- const gotCR = string[index - 1] === "\r";
7745
- returnValue += string.slice(endIndex, gotCR ? index - 1 : index) + prefix + (gotCR ? "\r\n" : "\n") + postfix;
7746
- endIndex = index + 1;
7747
- index = string.indexOf("\n", endIndex);
7748
- } while (index !== -1);
7749
- returnValue += string.slice(endIndex);
7750
- return returnValue;
7751
- }
7752
-
7753
- // ../evals-cli/node_modules/chalk/source/index.js
7754
- var { stdout: stdoutColor, stderr: stderrColor } = supports_color_default;
7755
- var GENERATOR = Symbol("GENERATOR");
7756
- var STYLER = Symbol("STYLER");
7757
- var IS_EMPTY = Symbol("IS_EMPTY");
7758
- var levelMapping = [
7759
- "ansi",
7760
- "ansi",
7761
- "ansi256",
7762
- "ansi16m"
7763
- ];
7764
- var styles2 = /* @__PURE__ */ Object.create(null);
7765
- var applyOptions = (object, options = {}) => {
7766
- if (options.level && !(Number.isInteger(options.level) && options.level >= 0 && options.level <= 3)) {
7767
- throw new Error("The `level` option should be an integer from 0 to 3");
7768
- }
7769
- const colorLevel = stdoutColor ? stdoutColor.level : 0;
7770
- object.level = options.level === void 0 ? colorLevel : options.level;
7771
- };
7772
- var chalkFactory = (options) => {
7773
- const chalk2 = (...strings) => strings.join(" ");
7774
- applyOptions(chalk2, options);
7775
- Object.setPrototypeOf(chalk2, createChalk.prototype);
7776
- return chalk2;
7777
- };
7778
- function createChalk(options) {
7779
- return chalkFactory(options);
7780
- }
7781
- Object.setPrototypeOf(createChalk.prototype, Function.prototype);
7782
- for (const [styleName, style] of Object.entries(ansi_styles_default)) {
7783
- styles2[styleName] = {
7784
- get() {
7785
- const builder = createBuilder(this, createStyler(style.open, style.close, this[STYLER]), this[IS_EMPTY]);
7786
- Object.defineProperty(this, styleName, { value: builder });
7787
- return builder;
7788
- }
7789
- };
7790
- }
7791
- styles2.visible = {
7792
- get() {
7793
- const builder = createBuilder(this, this[STYLER], true);
7794
- Object.defineProperty(this, "visible", { value: builder });
7795
- return builder;
7796
- }
7797
- };
7798
- var getModelAnsi = (model, level, type, ...arguments_) => {
7799
- if (model === "rgb") {
7800
- if (level === "ansi16m") {
7801
- return ansi_styles_default[type].ansi16m(...arguments_);
7802
- }
7803
- if (level === "ansi256") {
7804
- return ansi_styles_default[type].ansi256(ansi_styles_default.rgbToAnsi256(...arguments_));
7805
- }
7806
- return ansi_styles_default[type].ansi(ansi_styles_default.rgbToAnsi(...arguments_));
7807
- }
7808
- if (model === "hex") {
7809
- return getModelAnsi("rgb", level, type, ...ansi_styles_default.hexToRgb(...arguments_));
7810
- }
7811
- return ansi_styles_default[type][model](...arguments_);
7812
- };
7813
- var usedModels = ["rgb", "hex", "ansi256"];
7814
- for (const model of usedModels) {
7815
- styles2[model] = {
7816
- get() {
7817
- const { level } = this;
7818
- return function(...arguments_) {
7819
- const styler = createStyler(getModelAnsi(model, levelMapping[level], "color", ...arguments_), ansi_styles_default.color.close, this[STYLER]);
7820
- return createBuilder(this, styler, this[IS_EMPTY]);
7821
- };
7822
- }
7823
- };
7824
- const bgModel = "bg" + model[0].toUpperCase() + model.slice(1);
7825
- styles2[bgModel] = {
7826
- get() {
7827
- const { level } = this;
7828
- return function(...arguments_) {
7829
- const styler = createStyler(getModelAnsi(model, levelMapping[level], "bgColor", ...arguments_), ansi_styles_default.bgColor.close, this[STYLER]);
7830
- return createBuilder(this, styler, this[IS_EMPTY]);
7831
- };
7832
- }
7833
- };
7834
- }
7835
- var proto = Object.defineProperties(() => {
7836
- }, {
7837
- ...styles2,
7838
- level: {
7839
- enumerable: true,
7840
- get() {
7841
- return this[GENERATOR].level;
7842
- },
7843
- set(level) {
7844
- this[GENERATOR].level = level;
7845
- }
7846
- }
7847
- });
7848
- var createStyler = (open, close, parent) => {
7849
- let openAll;
7850
- let closeAll;
7851
- if (parent === void 0) {
7852
- openAll = open;
7853
- closeAll = close;
7854
- } else {
7855
- openAll = parent.openAll + open;
7856
- closeAll = close + parent.closeAll;
7857
- }
7858
- return {
7859
- open,
7860
- close,
7861
- openAll,
7862
- closeAll,
7863
- parent
7864
- };
7865
- };
7866
- var createBuilder = (self, _styler, _isEmpty) => {
7867
- const builder = (...arguments_) => applyStyle(builder, arguments_.length === 1 ? "" + arguments_[0] : arguments_.join(" "));
7868
- Object.setPrototypeOf(builder, proto);
7869
- builder[GENERATOR] = self;
7870
- builder[STYLER] = _styler;
7871
- builder[IS_EMPTY] = _isEmpty;
7872
- return builder;
7873
- };
7874
- var applyStyle = (self, string) => {
7875
- if (self.level <= 0 || !string) {
7876
- return self[IS_EMPTY] ? "" : string;
7877
- }
7878
- let styler = self[STYLER];
7879
- if (styler === void 0) {
7880
- return string;
7881
- }
7882
- const { openAll, closeAll } = styler;
7883
- if (string.includes("\x1B")) {
7884
- while (styler !== void 0) {
7885
- string = stringReplaceAll(string, styler.close, styler.open);
7886
- styler = styler.parent;
7887
- }
7888
- }
7889
- const lfIndex = string.indexOf("\n");
7890
- if (lfIndex !== -1) {
7891
- string = stringEncaseCRLFWithFirstIndex(string, closeAll, openAll, lfIndex);
7892
- }
7893
- return openAll + string + closeAll;
7894
- };
7895
- Object.defineProperties(createChalk.prototype, styles2);
7896
- var chalk = createChalk();
7897
- var chalkStderr = createChalk({ level: stderrColor ? stderrColor.level : 0 });
7898
- var source_default = chalk;
7899
-
7900
7429
  // ../evals-cli/src/utils/logger.ts
7430
+ import chalk from "chalk";
7901
7431
  var MAX_CONTENT_LENGTH = 160;
7902
7432
  var Logger = class {
7903
7433
  static activeStream = null;
@@ -7929,7 +7459,7 @@ var Logger = class {
7929
7459
  static suiteIntro(options) {
7930
7460
  const { testCount, startedAt } = options;
7931
7461
  this.logLine(
7932
- source_default.bold.blue(
7462
+ chalk.bold.blue(
7933
7463
  `\u25B8 Running ${testCount} test${testCount === 1 ? "" : "s"} (${startedAt.toISOString()})`
7934
7464
  )
7935
7465
  );
@@ -7938,42 +7468,42 @@ var Logger = class {
7938
7468
  const { durationMs, passed, failed } = options;
7939
7469
  const total = passed + failed;
7940
7470
  const summary = `${passed}/${total} passed \u2022 ${this.formatDuration(durationMs)}`;
7941
- const prefix = failed === 0 ? source_default.green("\u2714") : source_default.red("\u2716");
7471
+ const prefix = failed === 0 ? chalk.green("\u2714") : chalk.red("\u2716");
7942
7472
  this.logLine(`${prefix} ${summary}`);
7943
7473
  }
7944
7474
  static header(version2) {
7945
- this.logLine(source_default.bold.blue(`MCPJAM CLI ${version2}`));
7475
+ this.logLine(chalk.bold.blue(`MCPJAM CLI ${version2}`));
7946
7476
  this.logLine("");
7947
7477
  }
7948
7478
  static initiateTestMessage(serverCount, toolCount, serverNames, testCount) {
7949
7479
  this.logLine("");
7950
- this.logLine(source_default.bold.blue("Running tests"));
7480
+ this.logLine(chalk.bold.blue("Running tests"));
7951
7481
  const serverLabel = serverCount === 1 ? "server" : "servers";
7952
7482
  const serverList = serverNames.length > 0 ? serverNames.join(", ") : "none";
7953
7483
  this.logLine(
7954
- `Connected to ${source_default.white.bold(serverCount)} ${serverLabel}: ${source_default.gray(serverList)}`
7484
+ `Connected to ${chalk.white.bold(serverCount)} ${serverLabel}: ${chalk.gray(serverList)}`
7955
7485
  );
7956
7486
  const toolLabel = toolCount === 1 ? "tool" : "tools";
7957
- this.logLine(`Found ${source_default.white.bold(toolCount)} total ${toolLabel}`);
7487
+ this.logLine(`Found ${chalk.white.bold(toolCount)} total ${toolLabel}`);
7958
7488
  const testLabel = testCount === 1 ? "test" : "tests";
7959
- this.logLine(`Running ${source_default.white.bold(testCount)} ${testLabel}`);
7489
+ this.logLine(`Running ${chalk.white.bold(testCount)} ${testLabel}`);
7960
7490
  this.logLine("");
7961
7491
  }
7962
7492
  static logTestGroupTitle(testNumber, testName, modelProvider, modelId) {
7963
- this.logLine(source_default.cyan.bold(`Test ${testNumber}: ${testName}`));
7964
- this.logLine(source_default.gray(`Using ${modelProvider}:${modelId}`));
7493
+ this.logLine(chalk.cyan.bold(`Test ${testNumber}: ${testName}`));
7494
+ this.logLine(chalk.gray(`Using ${modelProvider}:${modelId}`));
7965
7495
  this.logLine("");
7966
7496
  }
7967
7497
  static testRunStart(options) {
7968
7498
  const { runNumber, totalRuns } = options;
7969
7499
  const parts = [`run ${runNumber}/${totalRuns}`];
7970
- this.logLine(source_default.cyanBright(parts.join(" \u2022 ")));
7500
+ this.logLine(chalk.cyanBright(parts.join(" \u2022 ")));
7971
7501
  }
7972
7502
  static conversation(options) {
7973
7503
  const { messages } = options;
7974
7504
  this.closeActiveStream();
7975
7505
  if (!messages.length) {
7976
- this.logLine(source_default.dim("(no messages)"));
7506
+ this.logLine(chalk.dim("(no messages)"));
7977
7507
  return;
7978
7508
  }
7979
7509
  messages.forEach((message, index) => {
@@ -8005,7 +7535,7 @@ var Logger = class {
8005
7535
  }
8006
7536
  static testRunResult(options) {
8007
7537
  const { passed, durationMs, usage } = options;
8008
- const status = passed ? source_default.green("PASS") : source_default.red("FAIL");
7538
+ const status = passed ? chalk.green("PASS") : chalk.red("FAIL");
8009
7539
  this.logLine(`${status} (${this.formatDuration(durationMs)})`);
8010
7540
  if (usage) {
8011
7541
  const usageParts = [];
@@ -8019,49 +7549,49 @@ var Logger = class {
8019
7549
  usageParts.push(`total ${usage.totalTokens}`);
8020
7550
  }
8021
7551
  if (usageParts.length > 0) {
8022
- this.logLine(source_default.gray(`Tokens \u2022 ${usageParts.join(" \u2022 ")}`));
7552
+ this.logLine(chalk.gray(`Tokens \u2022 ${usageParts.join(" \u2022 ")}`));
8023
7553
  }
8024
7554
  }
8025
7555
  this.logLine("");
8026
7556
  this.logLine("");
8027
7557
  }
8028
7558
  static info(message) {
8029
- this.logLine(source_default.blue(`\u2139 ${message}`));
7559
+ this.logLine(chalk.blue(`\u2139 ${message}`));
8030
7560
  }
8031
7561
  static warn(message) {
8032
- this.logLine(source_default.yellow(`\u26A0 ${message}`));
7562
+ this.logLine(chalk.yellow(`\u26A0 ${message}`));
8033
7563
  }
8034
7564
  static success(message) {
8035
- this.logLine(source_default.green(`\u2713 ${message}`));
7565
+ this.logLine(chalk.green(`\u2713 ${message}`));
8036
7566
  }
8037
7567
  static error(message) {
8038
7568
  this.logLine("");
8039
- this.logLine(source_default.red(`\u2715 Error: ${message}`));
7569
+ this.logLine(chalk.red(`\u2715 Error: ${message}`));
8040
7570
  }
8041
7571
  static errorWithExit(message) {
8042
7572
  this.logLine("");
8043
- this.logLine(source_default.red(`\u2715 Error: ${message}`));
7573
+ this.logLine(chalk.red(`\u2715 Error: ${message}`));
8044
7574
  process.exit(1);
8045
7575
  }
8046
7576
  static progress(current, total, testName) {
8047
7577
  const progress = `[${current}/${total}]`;
8048
- this.logLine(source_default.gray(`${progress} ${testName}...`));
7578
+ this.logLine(chalk.gray(`${progress} ${testName}...`));
8049
7579
  }
8050
7580
  static testStarting(testName) {
8051
- this.logLine(source_default.gray(` Running ${testName}...`));
7581
+ this.logLine(chalk.gray(` Running ${testName}...`));
8052
7582
  }
8053
7583
  static testError(testName, error) {
8054
- this.logLine(source_default.red(` \u2715 ${testName} failed: ${error}`));
7584
+ this.logLine(chalk.red(` \u2715 ${testName} failed: ${error}`));
8055
7585
  }
8056
7586
  static connectionError(serverName, error) {
8057
- this.logLine(source_default.red(` \u2715 Failed to connect to ${serverName}: ${error}`));
7587
+ this.logLine(chalk.red(` \u2715 Failed to connect to ${serverName}: ${error}`));
8058
7588
  }
8059
7589
  static apiKeyError(provider, error) {
8060
- this.logLine(source_default.red(` \u2715 API key error for ${provider}: ${error}`));
7590
+ this.logLine(chalk.red(` \u2715 API key error for ${provider}: ${error}`));
8061
7591
  }
8062
7592
  static modelCreationError(provider, modelId, error) {
8063
7593
  this.logLine(
8064
- source_default.red(
7594
+ chalk.red(
8065
7595
  ` \u2715 Failed to create ${provider} model "${modelId}": ${error}`
8066
7596
  )
8067
7597
  );
@@ -8176,11 +7706,11 @@ var Logger = class {
8176
7706
  }
8177
7707
  }
8178
7708
  static logToolCall(toolCall) {
8179
- const header = source_default.whiteBright(`[tool-call] ${toolCall.toolName}`);
7709
+ const header = chalk.whiteBright(`[tool-call] ${toolCall.toolName}`);
8180
7710
  this.logLine(header);
8181
7711
  const jsonArgs = toolCall.args ? JSON.parse(toolCall.args) : null;
8182
7712
  if (toolCall.args) {
8183
- this.logLine(source_default.gray(this.truncate(toolCall.args)));
7713
+ this.logLine(chalk.gray(this.truncate(toolCall.args)));
8184
7714
  }
8185
7715
  }
8186
7716
  static beginStreamingMessage(role) {
@@ -8202,18 +7732,18 @@ var Logger = class {
8202
7732
  }
8203
7733
  static streamToolResult(toolName, output) {
8204
7734
  this.closeActiveStream();
8205
- const header = source_default.whiteBright(`[tool-result] ${toolName}`);
7735
+ const header = chalk.whiteBright(`[tool-result] ${toolName}`);
8206
7736
  this.logLine(header);
8207
7737
  if (output !== void 0) {
8208
- this.logLine(source_default.gray(this.truncate(this.stringify(output))));
7738
+ this.logLine(chalk.gray(this.truncate(this.stringify(output))));
8209
7739
  }
8210
7740
  }
8211
7741
  static streamToolError(toolName, error) {
8212
7742
  this.closeActiveStream();
8213
- const header = source_default.whiteBright(`[tool-error] ${toolName}`);
7743
+ const header = chalk.whiteBright(`[tool-error] ${toolName}`);
8214
7744
  this.logLine(header);
8215
7745
  this.logLine(
8216
- source_default.red(this.truncate(this.stringify(error ?? "Unknown error")))
7746
+ chalk.red(this.truncate(this.stringify(error ?? "Unknown error")))
8217
7747
  );
8218
7748
  }
8219
7749
  static renderBox(lines, options) {
@@ -8228,7 +7758,7 @@ var Logger = class {
8228
7758
  lines.forEach((line, index) => {
8229
7759
  const padded = line.padEnd(width, " ");
8230
7760
  const isStatusLine = index === statusIndex;
8231
- const colouredContent = isStatusLine ? statusColor(padded) : source_default.white(padded);
7761
+ const colouredContent = isStatusLine ? statusColor(padded) : chalk.white(padded);
8232
7762
  this.logLine(
8233
7763
  `${borderColor("| ")}${colouredContent}${borderColor(" |")}`
8234
7764
  );
@@ -8270,15 +7800,15 @@ var Logger = class {
8270
7800
  static colorRole(role) {
8271
7801
  switch (role) {
8272
7802
  case "user":
8273
- return source_default.bold.whiteBright("user");
7803
+ return chalk.bold.whiteBright("user");
8274
7804
  case "assistant":
8275
- return source_default.bold.whiteBright("assistant");
7805
+ return chalk.bold.whiteBright("assistant");
8276
7806
  case "tool":
8277
- return source_default.bold.whiteBright("tool");
7807
+ return chalk.bold.whiteBright("tool");
8278
7808
  case "system":
8279
- return source_default.bold.whiteBright("system");
7809
+ return chalk.bold.whiteBright("system");
8280
7810
  default:
8281
- return source_default.cyan(role);
7811
+ return chalk.cyan(role);
8282
7812
  }
8283
7813
  }
8284
7814
  };
@@ -8600,8 +8130,14 @@ var accumulateTokenCount = (current, increment) => {
8600
8130
  };
8601
8131
  var prepareSuite = async (validatedTests, mcpClientOptions, validatedLlms) => {
8602
8132
  const mcpClient = new MCPClient(mcpClientOptions);
8603
- const availableTools = await mcpClient.getTools();
8604
- const vercelTools = convertMastraToolsToVercelTools(availableTools);
8133
+ const toolsets = await mcpClient.getToolsets();
8134
+ const availableTools = {};
8135
+ Object.values(toolsets).forEach((serverTools) => {
8136
+ Object.assign(availableTools, serverTools);
8137
+ });
8138
+ const vercelTools = convertMastraToolsToVercelTools(
8139
+ availableTools
8140
+ );
8605
8141
  const serverNames = Object.keys(mcpClientOptions.servers);
8606
8142
  Logger.initiateTestMessage(
8607
8143
  serverNames.length,
@@ -8661,6 +8197,7 @@ var runIterationViaBackend = async ({
8661
8197
  await runBackendConversation({
8662
8198
  maxSteps: MAX_STEPS,
8663
8199
  messageHistory,
8200
+ modelId: test.model,
8664
8201
  toolDefinitions: toolDefs,
8665
8202
  fetchBackend: async (payload) => {
8666
8203
  try {
@@ -8782,41 +8319,60 @@ var runIteration = async ({
8782
8319
  });
8783
8320
  while (stepCount < MAX_STEPS) {
8784
8321
  let assistantStreaming = false;
8785
- const streamResult = await streamText2({
8786
- model: createLlmModel2(provider, model, llms),
8787
- system,
8788
- temperature,
8789
- tools: tools2,
8790
- toolChoice,
8791
- messages: messageHistory,
8792
- onChunk: async (chunk) => {
8793
- switch (chunk.chunk.type) {
8794
- case "text-delta":
8795
- case "reasoning-delta": {
8796
- if (!assistantStreaming) {
8797
- Logger.beginStreamingMessage("assistant");
8798
- assistantStreaming = true;
8322
+ let streamResult;
8323
+ try {
8324
+ streamResult = await streamText2({
8325
+ model: createLlmModel2(provider, model, llms),
8326
+ system,
8327
+ temperature,
8328
+ tools: tools2,
8329
+ toolChoice,
8330
+ messages: messageHistory,
8331
+ onChunk: async (chunk) => {
8332
+ switch (chunk.chunk.type) {
8333
+ case "text-delta":
8334
+ case "reasoning-delta": {
8335
+ if (!assistantStreaming) {
8336
+ Logger.beginStreamingMessage("assistant");
8337
+ assistantStreaming = true;
8338
+ }
8339
+ Logger.appendStreamingText(chunk.chunk.text);
8340
+ break;
8799
8341
  }
8800
- Logger.appendStreamingText(chunk.chunk.text);
8801
- break;
8802
- }
8803
- case "tool-call": {
8804
- if (assistantStreaming) {
8805
- Logger.finishStreamingMessage();
8806
- assistantStreaming = false;
8342
+ case "tool-call": {
8343
+ if (assistantStreaming) {
8344
+ Logger.finishStreamingMessage();
8345
+ assistantStreaming = false;
8346
+ }
8347
+ Logger.streamToolCall(chunk.chunk.toolName, chunk.chunk.input);
8348
+ break;
8807
8349
  }
8808
- Logger.streamToolCall(chunk.chunk.toolName, chunk.chunk.input);
8809
- break;
8810
- }
8811
- case "tool-result": {
8812
- Logger.streamToolResult(chunk.chunk.toolName, chunk.chunk.output);
8813
- break;
8350
+ case "tool-result": {
8351
+ Logger.streamToolResult(chunk.chunk.toolName, chunk.chunk.output);
8352
+ break;
8353
+ }
8354
+ default:
8355
+ break;
8814
8356
  }
8815
- default:
8816
- break;
8817
8357
  }
8818
- }
8819
- });
8358
+ });
8359
+ } catch (error) {
8360
+ const errorMessage = error?.message || String(error);
8361
+ Logger.error(errorMessage);
8362
+ const evaluation2 = evaluateResults(test.expectedToolCalls, []);
8363
+ await recorder.finishIteration({
8364
+ iterationId,
8365
+ passed: false,
8366
+ toolsCalled: [],
8367
+ usage: {
8368
+ inputTokens: void 0,
8369
+ outputTokens: void 0,
8370
+ totalTokens: void 0
8371
+ },
8372
+ messages: messageHistory
8373
+ });
8374
+ return evaluation2;
8375
+ }
8820
8376
  await streamResult.consumeStream();
8821
8377
  if (assistantStreaming) {
8822
8378
  Logger.finishStreamingMessage();
@@ -8893,7 +8449,7 @@ var runTestCase = async ({
8893
8449
  let failedRuns = 0;
8894
8450
  const testCaseId = await recorder.recordTestCase(test, testIndex);
8895
8451
  for (let runIndex = 0; runIndex < runs; runIndex++) {
8896
- const usesBackend = isMCPJamProvidedModel(provider);
8452
+ const usesBackend = isMCPJamProvidedModel(model);
8897
8453
  const evaluation = usesBackend && convexUrl && authToken ? await runIterationViaBackend({
8898
8454
  test,
8899
8455
  runIndex,
@@ -9021,9 +8577,10 @@ function transformServerConfigsToEnvironment(serverIds, clientManager) {
9021
8577
  servers: servers2
9022
8578
  };
9023
8579
  }
9024
- function transformLLMConfigToLlmsConfig(llmConfig) {
8580
+ function transformLLMConfigToLlmsConfig(llmConfig, modelId) {
9025
8581
  const llms = {};
9026
- if (isMCPJamProvidedModel(llmConfig.provider)) {
8582
+ const isMCPJamModel = modelId && isMCPJamProvidedModel(modelId);
8583
+ if (isMCPJamModel) {
9027
8584
  llms.openrouter = "BACKEND_EXECUTION";
9028
8585
  } else {
9029
8586
  const providerKey = llmConfig.provider.toLowerCase();
@@ -9036,6 +8593,154 @@ function transformLLMConfigToLlmsConfig(llmConfig) {
9036
8593
  return validated.data;
9037
8594
  }
9038
8595
 
8596
+ // services/eval-agent.ts
8597
+ var AGENT_SYSTEM_PROMPT = `You are an AI agent specialized in creating realistic test cases for MCP (Model Context Protocol) servers.
8598
+
8599
+ **About MCP:**
8600
+ The Model Context Protocol enables AI assistants to securely access external data and tools. MCP servers expose tools, resources, and prompts that AI models can use to accomplish user tasks. Your test cases should reflect real-world usage patterns where users ask an AI assistant to perform tasks, and the assistant uses MCP tools to fulfill those requests.
8601
+
8602
+ **Your Task:**
8603
+ Generate 6 test cases with varying complexity levels that mimic how real users would interact with an AI assistant using these MCP tools.
8604
+
8605
+ **Test Case Distribution:**
8606
+ - **2 EASY tests** (single tool): Simple, straightforward tasks using one tool
8607
+ - **2 MEDIUM tests** (2+ tools): Multi-step workflows requiring 2-3 tools in sequence or parallel
8608
+ - **2 HARD tests** (3+ tools): Complex scenarios requiring 3+ tools, conditional logic, or cross-server operations
8609
+
8610
+ **Guidelines:**
8611
+ 1. **Realistic User Queries**: Write queries as if a real user is talking to an AI assistant (e.g., "Help me find all tasks due this week" not "Call the list_tasks tool")
8612
+ 2. **Natural Workflows**: Chain tools together in logical sequences that solve real problems
8613
+ 3. **Cross-Server Tests**: If multiple servers are available, create tests that use tools from different servers together
8614
+ 4. **Specific Details**: Include concrete examples (dates, names, values) to make tests actionable
8615
+ 5. **Judge Requirements**: Clearly define what success looks like for each test
8616
+ 6. **Test Titles**: Write clear, descriptive titles WITHOUT difficulty prefixes (e.g., "Read project configuration" not "EASY: Read project configuration")
8617
+
8618
+ **Output Format (CRITICAL):**
8619
+ Respond with ONLY a valid JSON array. No explanations, no markdown code blocks, just the raw JSON array.
8620
+
8621
+ Example:
8622
+ [
8623
+ {
8624
+ "title": "Read project configuration",
8625
+ "query": "Show me the contents of config.json in the current project",
8626
+ "runs": 1,
8627
+ "expectedToolCalls": ["read_file"],
8628
+ "judgeRequirement": "Successfully reads and returns the file contents"
8629
+ },
8630
+ {
8631
+ "title": "Find and analyze recent tasks",
8632
+ "query": "Find all tasks created this week and summarize their status",
8633
+ "runs": 1,
8634
+ "expectedToolCalls": ["list_tasks", "get_task_details"],
8635
+ "judgeRequirement": "First lists tasks filtered by date, then retrieves details for each task found"
8636
+ },
8637
+ {
8638
+ "title": "Cross-server project setup",
8639
+ "query": "Create a new project folder, initialize a git repository, and create a task to track the project setup",
8640
+ "runs": 1,
8641
+ "expectedToolCalls": ["create_directory", "git_init", "create_task"],
8642
+ "judgeRequirement": "Successfully creates directory, initializes git, and creates a tracking task with appropriate details"
8643
+ }
8644
+ ]`;
8645
+ async function generateTestCases(tools2, convexHttpUrl, convexAuthToken) {
8646
+ const serverGroups = tools2.reduce(
8647
+ (acc, tool2) => {
8648
+ if (!acc[tool2.serverId]) {
8649
+ acc[tool2.serverId] = [];
8650
+ }
8651
+ acc[tool2.serverId].push(tool2);
8652
+ return acc;
8653
+ },
8654
+ {}
8655
+ );
8656
+ const serverCount = Object.keys(serverGroups).length;
8657
+ const totalTools = tools2.length;
8658
+ const toolsContext = Object.entries(serverGroups).map(([serverId, serverTools]) => {
8659
+ const toolsList = serverTools.map((tool2) => {
8660
+ return ` - ${tool2.name}: ${tool2.description || "No description"}
8661
+ Input: ${JSON.stringify(tool2.inputSchema)}`;
8662
+ }).join("\n");
8663
+ return `**Server: ${serverId}** (${serverTools.length} tools)
8664
+ ${toolsList}`;
8665
+ }).join("\n\n");
8666
+ const crossServerGuidance = serverCount > 1 ? `
8667
+ **IMPORTANT**: You have ${serverCount} servers available. Create at least 2 test cases that use tools from MULTIPLE servers to test cross-server workflows.` : "";
8668
+ const userPrompt = `Generate 6 test cases for the following MCP server tools:
8669
+
8670
+ ${toolsContext}
8671
+
8672
+ **Available Resources:**
8673
+ - ${serverCount} MCP server(s)
8674
+ - ${totalTools} total tools${crossServerGuidance}
8675
+
8676
+ **Remember:**
8677
+ 1. Create exactly 6 tests: 2 EASY (1 tool), 2 MEDIUM (2-3 tools), 2 HARD (3+ tools)
8678
+ 2. Write realistic user queries that sound natural
8679
+ 3. Use specific examples (dates, filenames, values)
8680
+ 4. Chain tools in logical sequences
8681
+ 5. Respond with ONLY a JSON array - no other text or markdown`;
8682
+ const messageHistory = [
8683
+ { role: "system", content: AGENT_SYSTEM_PROMPT },
8684
+ { role: "user", content: userPrompt }
8685
+ ];
8686
+ const response = await fetch(`${convexHttpUrl}/streaming`, {
8687
+ method: "POST",
8688
+ headers: {
8689
+ "Content-Type": "application/json",
8690
+ Authorization: `Bearer ${convexAuthToken}`
8691
+ },
8692
+ body: JSON.stringify({
8693
+ model: "meta-llama/llama-3.3-70b-instruct",
8694
+ tools: [],
8695
+ messages: JSON.stringify(messageHistory)
8696
+ })
8697
+ });
8698
+ if (!response.ok) {
8699
+ const errorText = await response.text();
8700
+ throw new Error(`Failed to generate test cases: ${errorText}`);
8701
+ }
8702
+ const data = await response.json();
8703
+ if (!data.ok || !Array.isArray(data.messages)) {
8704
+ throw new Error("Invalid response from backend LLM");
8705
+ }
8706
+ let assistantResponse = "";
8707
+ for (const msg of data.messages) {
8708
+ if (msg.role === "assistant") {
8709
+ const content = msg.content;
8710
+ if (typeof content === "string") {
8711
+ assistantResponse += content;
8712
+ } else if (Array.isArray(content)) {
8713
+ for (const item of content) {
8714
+ if (item.type === "text" && item.text) {
8715
+ assistantResponse += item.text;
8716
+ }
8717
+ }
8718
+ }
8719
+ }
8720
+ }
8721
+ try {
8722
+ const jsonMatch = assistantResponse.match(/```(?:json)?\s*([\s\S]*?)```/);
8723
+ const jsonText = jsonMatch ? jsonMatch[1].trim() : assistantResponse.trim();
8724
+ const testCases = JSON.parse(jsonText);
8725
+ if (!Array.isArray(testCases)) {
8726
+ throw new Error("Response is not an array");
8727
+ }
8728
+ const validatedTests = testCases.map((tc) => ({
8729
+ title: tc.title || "Untitled Test",
8730
+ query: tc.query || "",
8731
+ runs: typeof tc.runs === "number" ? tc.runs : 1,
8732
+ expectedToolCalls: Array.isArray(tc.expectedToolCalls) ? tc.expectedToolCalls : [],
8733
+ judgeRequirement: tc.judgeRequirement
8734
+ }));
8735
+ return validatedTests;
8736
+ } catch (parseError) {
8737
+ console.error("Failed to parse LLM response:", assistantResponse);
8738
+ throw new Error(
8739
+ `Failed to parse test cases from LLM response: ${parseError instanceof Error ? parseError.message : "Unknown error"}`
8740
+ );
8741
+ }
8742
+ }
8743
+
9039
8744
  // routes/mcp/evals.ts
9040
8745
  var evals = new Hono10();
9041
8746
  var RunEvalsRequestSchema = z3.object({
@@ -9081,7 +8786,8 @@ evals.post("/run", async (c) => {
9081
8786
  serverIds,
9082
8787
  clientManager
9083
8788
  );
9084
- const llms = transformLLMConfigToLlmsConfig(llmConfig);
8789
+ const modelId = tests.length > 0 ? tests[0].model : void 0;
8790
+ const llms = transformLLMConfigToLlmsConfig(llmConfig, modelId);
9085
8791
  const convexUrl = process.env.CONVEX_URL;
9086
8792
  if (!convexUrl) {
9087
8793
  throw new Error("CONVEX_URL is not set");
@@ -9100,7 +8806,8 @@ evals.post("/run", async (c) => {
9100
8806
  convexHttpUrl,
9101
8807
  convexAuthToken
9102
8808
  ).catch((error) => {
9103
- console.error("[Hono:Evals] Error running evals:", error);
8809
+ const errorMessage = error instanceof Error ? error.message : String(error);
8810
+ console.error("[Error running evals:", errorMessage);
9104
8811
  });
9105
8812
  return c.json({
9106
8813
  success: true,
@@ -9116,6 +8823,63 @@ evals.post("/run", async (c) => {
9116
8823
  );
9117
8824
  }
9118
8825
  });
8826
+ var GenerateTestsRequestSchema = z3.object({
8827
+ serverIds: z3.array(z3.string()).min(1, "At least one server must be selected"),
8828
+ convexAuthToken: z3.string()
8829
+ });
8830
+ evals.post("/generate-tests", async (c) => {
8831
+ try {
8832
+ const body = await c.req.json();
8833
+ const validationResult = GenerateTestsRequestSchema.safeParse(body);
8834
+ if (!validationResult.success) {
8835
+ return c.json(
8836
+ {
8837
+ error: "Invalid request body",
8838
+ details: validationResult.error.errors
8839
+ },
8840
+ 400
8841
+ );
8842
+ }
8843
+ const { serverIds, convexAuthToken } = validationResult.data;
8844
+ const clientManager = c.mcpJamClientManager;
8845
+ const allTools = clientManager.getAvailableTools();
8846
+ const serverIdSet = new Set(
8847
+ serverIds.map((name) => clientManager.getServerIdForName(name)).filter(Boolean)
8848
+ );
8849
+ const filteredTools = allTools.filter(
8850
+ (tool2) => serverIdSet.has(tool2.serverId)
8851
+ );
8852
+ if (filteredTools.length === 0) {
8853
+ return c.json(
8854
+ {
8855
+ error: "No tools found for selected servers"
8856
+ },
8857
+ 400
8858
+ );
8859
+ }
8860
+ const convexHttpUrl = process.env.CONVEX_HTTP_URL;
8861
+ if (!convexHttpUrl) {
8862
+ throw new Error("CONVEX_HTTP_URL is not set");
8863
+ }
8864
+ const testCases = await generateTestCases(
8865
+ filteredTools,
8866
+ convexHttpUrl,
8867
+ convexAuthToken
8868
+ );
8869
+ return c.json({
8870
+ success: true,
8871
+ tests: testCases
8872
+ });
8873
+ } catch (error) {
8874
+ console.error("Error in /evals/generate-tests:", error);
8875
+ return c.json(
8876
+ {
8877
+ error: error instanceof Error ? error.message : "Unknown error"
8878
+ },
8879
+ 500
8880
+ );
8881
+ }
8882
+ });
9119
8883
  var evals_default = evals;
9120
8884
 
9121
8885
  // routes/mcp/http-adapters.ts
@@ -9360,13 +9124,13 @@ function createHttpHandler(mode, routePrefix) {
9360
9124
  const xfProto = c.req.header("x-forwarded-proto");
9361
9125
  const xfHost = c.req.header("x-forwarded-host");
9362
9126
  const host = xfHost || c.req.header("host");
9363
- let proto2 = xfProto;
9364
- if (!proto2) {
9127
+ let proto = xfProto;
9128
+ if (!proto) {
9365
9129
  const originHeader = c.req.header("origin");
9366
- if (originHeader && /^https:/i.test(originHeader)) proto2 = "https";
9130
+ if (originHeader && /^https:/i.test(originHeader)) proto = "https";
9367
9131
  }
9368
- if (!proto2) proto2 = "http";
9369
- const origin = host ? `${proto2}://${host}` : incomingUrl.origin;
9132
+ if (!proto) proto = "http";
9133
+ const origin = host ? `${proto}://${host}` : incomingUrl.origin;
9370
9134
  endpointBase = `${origin}/api/mcp/${routePrefix}/${serverId}/messages`;
9371
9135
  }
9372
9136
  const sessionId = crypto.randomUUID();
@@ -9588,23 +9352,22 @@ function validateServerConfig(serverConfig) {
9588
9352
  };
9589
9353
  }
9590
9354
  if (config.oauth?.access_token) {
9355
+ const accessToken = config.oauth.access_token;
9591
9356
  const authHeaders = {
9592
- Authorization: `Bearer ${config.oauth.access_token}`,
9357
+ Authorization: `Bearer ${accessToken}`,
9593
9358
  ...config.requestInit?.headers || {}
9594
9359
  };
9595
9360
  config.requestInit = {
9596
9361
  ...config.requestInit,
9597
9362
  headers: authHeaders
9598
9363
  };
9364
+ const requestInitHeaders = config.requestInit?.headers;
9599
9365
  config.eventSourceInit = {
9600
9366
  fetch(input, init2) {
9601
9367
  const headers = new Headers(init2?.headers || {});
9602
- headers.set(
9603
- "Authorization",
9604
- `Bearer ${config.oauth.access_token}`
9605
- );
9606
- if (config.requestInit?.headers) {
9607
- const requestHeaders = new Headers(config.requestInit.headers);
9368
+ headers.set("Authorization", `Bearer ${accessToken}`);
9369
+ if (requestInitHeaders) {
9370
+ const requestHeaders = new Headers(requestInitHeaders);
9608
9371
  requestHeaders.forEach((value, key) => {
9609
9372
  if (key.toLowerCase() !== "authorization") {
9610
9373
  headers.set(key, value);
@@ -9617,6 +9380,7 @@ function validateServerConfig(serverConfig) {
9617
9380
  });
9618
9381
  }
9619
9382
  };
9383
+ delete config.oauth;
9620
9384
  } else if (config.requestInit?.headers) {
9621
9385
  config.eventSourceInit = {
9622
9386
  fetch(input, init2) {