@wix/evalforge-evaluator 0.63.0 → 0.65.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -6451,6 +6451,7 @@ var import_crypto = require("crypto");
6451
6451
 
6452
6452
  // src/run-scenario/agents/claude-code/write-mcp.ts
6453
6453
  var import_promises4 = require("fs/promises");
6454
+ var import_child_process = require("child_process");
6454
6455
  var import_path6 = require("path");
6455
6456
  var import_evalforge_types2 = require("@wix/evalforge-types");
6456
6457
  async function writeMcpToFilesystem(cwd, mcps) {
@@ -6476,6 +6477,137 @@ async function writeMcpToFilesystem(cwd, mcps) {
6476
6477
  await (0, import_promises4.writeFile)(filePath, content, "utf8");
6477
6478
  console.log(`[MCP] Written to ${filePath}`);
6478
6479
  }
6480
+ async function probeMcpServers(mcps, probeMs = 5e3) {
6481
+ const results = [];
6482
+ for (const mcp of mcps) {
6483
+ const config = mcp.config;
6484
+ for (const [name2, value] of Object.entries(config)) {
6485
+ if (typeof value !== "object" || value === null) continue;
6486
+ const cfg = value;
6487
+ const command = cfg.command;
6488
+ const args = cfg.args;
6489
+ if (typeof command !== "string" || !Array.isArray(args)) continue;
6490
+ const result = await probeOneServer(
6491
+ name2,
6492
+ command,
6493
+ args,
6494
+ probeMs
6495
+ );
6496
+ results.push(result);
6497
+ }
6498
+ }
6499
+ return results;
6500
+ }
6501
+ function probeOneServer(name2, command, args, probeMs) {
6502
+ return new Promise((resolve2) => {
6503
+ const startMs = Date.now();
6504
+ let stdout = "";
6505
+ let stderr = "";
6506
+ let settled = false;
6507
+ const finish = (exitCode, signal) => {
6508
+ if (settled) return;
6509
+ settled = true;
6510
+ resolve2({
6511
+ name: name2,
6512
+ command,
6513
+ args,
6514
+ exitCode,
6515
+ signal,
6516
+ stdout: stdout.slice(-2e3),
6517
+ stderr: stderr.slice(-2e3),
6518
+ durationMs: Date.now() - startMs
6519
+ });
6520
+ };
6521
+ const child = (0, import_child_process.spawn)(command, args, {
6522
+ stdio: ["pipe", "pipe", "pipe"],
6523
+ env: process.env
6524
+ });
6525
+ child.stdout.on("data", (chunk) => {
6526
+ stdout += chunk.toString();
6527
+ });
6528
+ child.stderr.on("data", (chunk) => {
6529
+ stderr += chunk.toString();
6530
+ });
6531
+ child.on("error", (err) => {
6532
+ stderr += `
6533
+ spawn error: ${err.message}`;
6534
+ finish(null, null);
6535
+ });
6536
+ child.on("close", (code2, sig) => {
6537
+ finish(code2, sig);
6538
+ });
6539
+ setTimeout(() => {
6540
+ if (!settled) {
6541
+ child.kill("SIGTERM");
6542
+ finish(null, "PROBE_TIMEOUT");
6543
+ }
6544
+ }, probeMs);
6545
+ });
6546
+ }
6547
+ async function probeNpmConfig(cwd) {
6548
+ const homedir = process.env.HOME ?? "~";
6549
+ const [cwdNpmrc, homeNpmrc, npmConfigList, npmConfigGetRegistry] = await Promise.all([
6550
+ (0, import_promises4.readFile)((0, import_path6.join)(cwd, ".npmrc"), "utf8").catch(
6551
+ (e) => `[not found: ${e.message}]`
6552
+ ),
6553
+ (0, import_promises4.readFile)((0, import_path6.join)(homedir, ".npmrc"), "utf8").catch(
6554
+ (e) => `[not found: ${e.message}]`
6555
+ ),
6556
+ runShellCapture("npm config list", cwd, 3e3),
6557
+ runShellCapture("npm config get registry", cwd, 3e3)
6558
+ ]);
6559
+ const npmEnvVars = {};
6560
+ for (const [key, value] of Object.entries(process.env)) {
6561
+ if (key.toLowerCase().startsWith("npm_config")) {
6562
+ npmEnvVars[key] = value;
6563
+ }
6564
+ }
6565
+ return {
6566
+ cwd,
6567
+ cwdNpmrc: cwdNpmrc.slice(0, 1e3),
6568
+ homeNpmrc: homeNpmrc.slice(0, 1e3),
6569
+ npmConfigList: npmConfigList.slice(0, 2e3),
6570
+ npmConfigGetRegistry: npmConfigGetRegistry.trim(),
6571
+ npmEnvVars,
6572
+ homedir
6573
+ };
6574
+ }
6575
+ function runShellCapture(cmd, cwd, timeoutMs) {
6576
+ return new Promise((resolve2) => {
6577
+ let output = "";
6578
+ let settled = false;
6579
+ const child = (0, import_child_process.spawn)("sh", ["-c", cmd], {
6580
+ stdio: ["pipe", "pipe", "pipe"],
6581
+ cwd,
6582
+ env: process.env
6583
+ });
6584
+ child.stdout.on("data", (chunk) => {
6585
+ output += chunk.toString();
6586
+ });
6587
+ child.stderr.on("data", (chunk) => {
6588
+ output += chunk.toString();
6589
+ });
6590
+ child.on("close", () => {
6591
+ if (!settled) {
6592
+ settled = true;
6593
+ resolve2(output);
6594
+ }
6595
+ });
6596
+ child.on("error", (err) => {
6597
+ if (!settled) {
6598
+ settled = true;
6599
+ resolve2(`[error: ${err.message}]`);
6600
+ }
6601
+ });
6602
+ setTimeout(() => {
6603
+ if (!settled) {
6604
+ settled = true;
6605
+ child.kill("SIGTERM");
6606
+ resolve2(output + "\n[timed out]");
6607
+ }
6608
+ }, timeoutMs);
6609
+ });
6610
+ }
6479
6611
 
6480
6612
  // src/run-scenario/agents/claude-code/write-sub-agents.ts
6481
6613
  var import_promises5 = require("fs/promises");
@@ -6730,6 +6862,56 @@ async function executeWithClaudeCode(skills, scenario, options) {
6730
6862
  const allMessages = [];
6731
6863
  if (options.mcps && options.mcps.length > 0) {
6732
6864
  await writeMcpToFilesystem(options.cwd, options.mcps);
6865
+ const probeResults = await probeMcpServers(options.mcps);
6866
+ if (options.traceContext) {
6867
+ emitTraceEvent(
6868
+ {
6869
+ evalRunId: options.traceContext.evalRunId,
6870
+ scenarioId: options.traceContext.scenarioId,
6871
+ scenarioName: options.traceContext.scenarioName,
6872
+ targetId: options.traceContext.targetId,
6873
+ targetName: options.traceContext.targetName,
6874
+ stepNumber: 0,
6875
+ type: import_evalforge_types3.LiveTraceEventType.DIAGNOSTIC,
6876
+ outputPreview: JSON.stringify({
6877
+ event: "mcp-probe",
6878
+ results: probeResults
6879
+ }).slice(0, 2e3),
6880
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
6881
+ isComplete: false
6882
+ },
6883
+ options.traceContext.tracePushUrl,
6884
+ options.traceContext.routeHeader,
6885
+ options.traceContext.authToken
6886
+ );
6887
+ }
6888
+ const npmDiag = await probeNpmConfig(options.cwd);
6889
+ console.log(
6890
+ "[DEBUG-d744ca] npm-config-diagnostic",
6891
+ JSON.stringify(npmDiag)
6892
+ );
6893
+ if (options.traceContext) {
6894
+ emitTraceEvent(
6895
+ {
6896
+ evalRunId: options.traceContext.evalRunId,
6897
+ scenarioId: options.traceContext.scenarioId,
6898
+ scenarioName: options.traceContext.scenarioName,
6899
+ targetId: options.traceContext.targetId,
6900
+ targetName: options.traceContext.targetName,
6901
+ stepNumber: 0,
6902
+ type: import_evalforge_types3.LiveTraceEventType.DIAGNOSTIC,
6903
+ outputPreview: JSON.stringify({
6904
+ event: "npm-config-diagnostic",
6905
+ ...npmDiag
6906
+ }).slice(0, 4e3),
6907
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
6908
+ isComplete: false
6909
+ },
6910
+ options.traceContext.tracePushUrl,
6911
+ options.traceContext.routeHeader,
6912
+ options.traceContext.authToken
6913
+ );
6914
+ }
6733
6915
  }
6734
6916
  if (options.subAgents && options.subAgents.length > 0) {
6735
6917
  await writeSubAgentsToFilesystem(options.cwd, options.subAgents);