@wix/evalforge-evaluator 0.62.0 → 0.64.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -6451,6 +6451,7 @@ var import_crypto = require("crypto");
6451
6451
 
6452
6452
  // src/run-scenario/agents/claude-code/write-mcp.ts
6453
6453
  var import_promises4 = require("fs/promises");
6454
+ var import_child_process = require("child_process");
6454
6455
  var import_path6 = require("path");
6455
6456
  var import_evalforge_types2 = require("@wix/evalforge-types");
6456
6457
  async function writeMcpToFilesystem(cwd, mcps) {
@@ -6476,6 +6477,73 @@ async function writeMcpToFilesystem(cwd, mcps) {
6476
6477
  await (0, import_promises4.writeFile)(filePath, content, "utf8");
6477
6478
  console.log(`[MCP] Written to ${filePath}`);
6478
6479
  }
6480
+ async function probeMcpServers(mcps, probeMs = 5e3) {
6481
+ const results = [];
6482
+ for (const mcp of mcps) {
6483
+ const config = mcp.config;
6484
+ for (const [name2, value] of Object.entries(config)) {
6485
+ if (typeof value !== "object" || value === null) continue;
6486
+ const cfg = value;
6487
+ const command = cfg.command;
6488
+ const args = cfg.args;
6489
+ if (typeof command !== "string" || !Array.isArray(args)) continue;
6490
+ const result = await probeOneServer(
6491
+ name2,
6492
+ command,
6493
+ args,
6494
+ probeMs
6495
+ );
6496
+ results.push(result);
6497
+ }
6498
+ }
6499
+ return results;
6500
+ }
6501
+ function probeOneServer(name2, command, args, probeMs) {
6502
+ return new Promise((resolve2) => {
6503
+ const startMs = Date.now();
6504
+ let stdout = "";
6505
+ let stderr = "";
6506
+ let settled = false;
6507
+ const finish = (exitCode, signal) => {
6508
+ if (settled) return;
6509
+ settled = true;
6510
+ resolve2({
6511
+ name: name2,
6512
+ command,
6513
+ args,
6514
+ exitCode,
6515
+ signal,
6516
+ stdout: stdout.slice(-2e3),
6517
+ stderr: stderr.slice(-2e3),
6518
+ durationMs: Date.now() - startMs
6519
+ });
6520
+ };
6521
+ const child = (0, import_child_process.spawn)(command, args, {
6522
+ stdio: ["pipe", "pipe", "pipe"],
6523
+ env: process.env
6524
+ });
6525
+ child.stdout.on("data", (chunk) => {
6526
+ stdout += chunk.toString();
6527
+ });
6528
+ child.stderr.on("data", (chunk) => {
6529
+ stderr += chunk.toString();
6530
+ });
6531
+ child.on("error", (err) => {
6532
+ stderr += `
6533
+ spawn error: ${err.message}`;
6534
+ finish(null, null);
6535
+ });
6536
+ child.on("close", (code2, sig) => {
6537
+ finish(code2, sig);
6538
+ });
6539
+ setTimeout(() => {
6540
+ if (!settled) {
6541
+ child.kill("SIGTERM");
6542
+ finish(null, "PROBE_TIMEOUT");
6543
+ }
6544
+ }, probeMs);
6545
+ });
6546
+ }
6479
6547
 
6480
6548
  // src/run-scenario/agents/claude-code/write-sub-agents.ts
6481
6549
  var import_promises5 = require("fs/promises");
@@ -6730,6 +6798,29 @@ async function executeWithClaudeCode(skills, scenario, options) {
6730
6798
  const allMessages = [];
6731
6799
  if (options.mcps && options.mcps.length > 0) {
6732
6800
  await writeMcpToFilesystem(options.cwd, options.mcps);
6801
+ const probeResults = await probeMcpServers(options.mcps);
6802
+ if (options.traceContext) {
6803
+ emitTraceEvent(
6804
+ {
6805
+ evalRunId: options.traceContext.evalRunId,
6806
+ scenarioId: options.traceContext.scenarioId,
6807
+ scenarioName: options.traceContext.scenarioName,
6808
+ targetId: options.traceContext.targetId,
6809
+ targetName: options.traceContext.targetName,
6810
+ stepNumber: 0,
6811
+ type: import_evalforge_types3.LiveTraceEventType.DIAGNOSTIC,
6812
+ outputPreview: JSON.stringify({
6813
+ event: "mcp-probe",
6814
+ results: probeResults
6815
+ }).slice(0, 2e3),
6816
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
6817
+ isComplete: false
6818
+ },
6819
+ options.traceContext.tracePushUrl,
6820
+ options.traceContext.routeHeader,
6821
+ options.traceContext.authToken
6822
+ );
6823
+ }
6733
6824
  }
6734
6825
  if (options.subAgents && options.subAgents.length > 0) {
6735
6826
  await writeSubAgentsToFilesystem(options.cwd, options.subAgents);
@@ -6990,6 +7081,41 @@ IMPORTANT: This is an automated evaluation run. Follow these guidelines:
6990
7081
  })
6991
7082
  );
6992
7083
  }
7084
+ const sdkMsg = message;
7085
+ if (sdkMsg.type === "system" && sdkMsg.subtype === "init") {
7086
+ const initData = sdkMsg;
7087
+ const mcpInfo = {
7088
+ mcp_servers: initData.mcp_servers,
7089
+ tools: initData.tools,
7090
+ cwd: options.cwd
7091
+ };
7092
+ console.error(
7093
+ "[MCP-DIAG] Init message MCP status:",
7094
+ JSON.stringify(mcpInfo, null, 2)
7095
+ );
7096
+ if (traceContext) {
7097
+ emitTraceEvent(
7098
+ {
7099
+ evalRunId: traceContext.evalRunId,
7100
+ scenarioId: traceContext.scenarioId,
7101
+ scenarioName: traceContext.scenarioName,
7102
+ targetId: traceContext.targetId,
7103
+ targetName: traceContext.targetName,
7104
+ stepNumber: traceStepNumber,
7105
+ type: import_evalforge_types3.LiveTraceEventType.DIAGNOSTIC,
7106
+ outputPreview: JSON.stringify({
7107
+ event: "mcp-init-status",
7108
+ ...mcpInfo
7109
+ }).slice(0, 2e3),
7110
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
7111
+ isComplete: false
7112
+ },
7113
+ traceContext.tracePushUrl,
7114
+ traceContext.routeHeader,
7115
+ traceContext.authToken
7116
+ );
7117
+ }
7118
+ }
6993
7119
  if (traceContext) {
6994
7120
  traceStepNumber++;
6995
7121
  const traceEvent = createTraceEventFromAnyMessage(