npm - @wix/evalforge-evaluator - Versions diffs - 0.62.0 → 0.64.0 - Mend

@wix/evalforge-evaluator 0.62.0 → 0.64.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/build/index.js +126 -0
package/build/index.js.map +3 -3
package/build/index.mjs +126 -0
package/build/index.mjs.map +3 -3
package/build/types/run-scenario/agents/claude-code/write-mcp.d.ts +19 -0
package/package.json +2 -2

package/build/index.js CHANGED Viewed

@@ -6451,6 +6451,7 @@ var import_crypto = require("crypto");
 // src/run-scenario/agents/claude-code/write-mcp.ts
 var import_promises4 = require("fs/promises");
+var import_child_process = require("child_process");
 var import_path6 = require("path");
 var import_evalforge_types2 = require("@wix/evalforge-types");
 async function writeMcpToFilesystem(cwd, mcps) {
@@ -6476,6 +6477,73 @@ async function writeMcpToFilesystem(cwd, mcps) {
   await (0, import_promises4.writeFile)(filePath, content, "utf8");
   console.log(`[MCP] Written to ${filePath}`);
 }
+async function probeMcpServers(mcps, probeMs = 5e3) {
+  const results = [];
+  for (const mcp of mcps) {
+    const config = mcp.config;
+    for (const [name2, value] of Object.entries(config)) {
+      if (typeof value !== "object" || value === null) continue;
+      const cfg = value;
+      const command = cfg.command;
+      const args = cfg.args;
+      if (typeof command !== "string" || !Array.isArray(args)) continue;
+      const result = await probeOneServer(
+        name2,
+        command,
+        args,
+        probeMs
+      );
+      results.push(result);
+    }
+  }
+  return results;
+}
+function probeOneServer(name2, command, args, probeMs) {
+  return new Promise((resolve2) => {
+    const startMs = Date.now();
+    let stdout = "";
+    let stderr = "";
+    let settled = false;
+    const finish = (exitCode, signal) => {
+      if (settled) return;
+      settled = true;
+      resolve2({
+        name: name2,
+        command,
+        args,
+        exitCode,
+        signal,
+        stdout: stdout.slice(-2e3),
+        stderr: stderr.slice(-2e3),
+        durationMs: Date.now() - startMs
+      });
+    };
+    const child = (0, import_child_process.spawn)(command, args, {
+      stdio: ["pipe", "pipe", "pipe"],
+      env: process.env
+    });
+    child.stdout.on("data", (chunk) => {
+      stdout += chunk.toString();
+    });
+    child.stderr.on("data", (chunk) => {
+      stderr += chunk.toString();
+    });
+    child.on("error", (err) => {
+      stderr += `
+spawn error: ${err.message}`;
+      finish(null, null);
+    });
+    child.on("close", (code2, sig) => {
+      finish(code2, sig);
+    });
+    setTimeout(() => {
+      if (!settled) {
+        child.kill("SIGTERM");
+        finish(null, "PROBE_TIMEOUT");
+      }
+    }, probeMs);
+  });
+}
 // src/run-scenario/agents/claude-code/write-sub-agents.ts
 var import_promises5 = require("fs/promises");
@@ -6730,6 +6798,29 @@ async function executeWithClaudeCode(skills, scenario, options) {
   const allMessages = [];
   if (options.mcps && options.mcps.length > 0) {
     await writeMcpToFilesystem(options.cwd, options.mcps);
+    const probeResults = await probeMcpServers(options.mcps);
+    if (options.traceContext) {
+      emitTraceEvent(
+        {
+          evalRunId: options.traceContext.evalRunId,
+          scenarioId: options.traceContext.scenarioId,
+          scenarioName: options.traceContext.scenarioName,
+          targetId: options.traceContext.targetId,
+          targetName: options.traceContext.targetName,
+          stepNumber: 0,
+          type: import_evalforge_types3.LiveTraceEventType.DIAGNOSTIC,
+          outputPreview: JSON.stringify({
+            event: "mcp-probe",
+            results: probeResults
+          }).slice(0, 2e3),
+          timestamp: (/* @__PURE__ */ new Date()).toISOString(),
+          isComplete: false
+        },
+        options.traceContext.tracePushUrl,
+        options.traceContext.routeHeader,
+        options.traceContext.authToken
+      );
+    }
   }
   if (options.subAgents && options.subAgents.length > 0) {
     await writeSubAgentsToFilesystem(options.cwd, options.subAgents);
@@ -6990,6 +7081,41 @@ IMPORTANT: This is an automated evaluation run. Follow these guidelines:
             })
           );
         }
+        const sdkMsg = message;
+        if (sdkMsg.type === "system" && sdkMsg.subtype === "init") {
+          const initData = sdkMsg;
+          const mcpInfo = {
+            mcp_servers: initData.mcp_servers,
+            tools: initData.tools,
+            cwd: options.cwd
+          };
+          console.error(
+            "[MCP-DIAG] Init message MCP status:",
+            JSON.stringify(mcpInfo, null, 2)
+          );
+          if (traceContext) {
+            emitTraceEvent(
+              {
+                evalRunId: traceContext.evalRunId,
+                scenarioId: traceContext.scenarioId,
+                scenarioName: traceContext.scenarioName,
+                targetId: traceContext.targetId,
+                targetName: traceContext.targetName,
+                stepNumber: traceStepNumber,
+                type: import_evalforge_types3.LiveTraceEventType.DIAGNOSTIC,
+                outputPreview: JSON.stringify({
+                  event: "mcp-init-status",
+                  ...mcpInfo
+                }).slice(0, 2e3),
+                timestamp: (/* @__PURE__ */ new Date()).toISOString(),
+                isComplete: false
+              },
+              traceContext.tracePushUrl,
+              traceContext.routeHeader,
+              traceContext.authToken
+            );
+          }
+        }
         if (traceContext) {
           traceStepNumber++;
           const traceEvent = createTraceEventFromAnyMessage(