@wix/evalforge-evaluator 0.68.0 → 0.69.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -6451,7 +6451,6 @@ var import_crypto = require("crypto");
6451
6451
 
6452
6452
  // src/run-scenario/agents/claude-code/write-mcp.ts
6453
6453
  var import_promises4 = require("fs/promises");
6454
- var import_child_process = require("child_process");
6455
6454
  var import_path6 = require("path");
6456
6455
  var import_evalforge_types2 = require("@wix/evalforge-types");
6457
6456
  async function writeMcpToFilesystem(cwd, mcps) {
@@ -6477,6 +6476,11 @@ async function writeMcpToFilesystem(cwd, mcps) {
6477
6476
  await (0, import_promises4.writeFile)(filePath, content, "utf8");
6478
6477
  console.log(`[MCP] Written to ${filePath}`);
6479
6478
  }
6479
+
6480
+ // src/run-scenario/agents/claude-code/mcp-setup.ts
6481
+ var import_promises5 = require("fs/promises");
6482
+ var import_child_process = require("child_process");
6483
+ var import_path7 = require("path");
6480
6484
  async function probeMcpServers(mcps, probeMs = 5e3) {
6481
6485
  const results = [];
6482
6486
  for (const mcp of mcps) {
@@ -6544,10 +6548,125 @@ spawn error: ${err.message}`;
6544
6548
  }, probeMs);
6545
6549
  });
6546
6550
  }
6551
+ async function preInstallMcpPackages(mcps, timeoutMs = 12e4) {
6552
+ const packages = extractNpxPackages(mcps);
6553
+ if (packages.length === 0) return [];
6554
+ const results = [];
6555
+ for (const pkg of packages) {
6556
+ const startMs = Date.now();
6557
+ const alreadyInstalled = await isPackageGloballyInstalled(pkg);
6558
+ if (alreadyInstalled) {
6559
+ const durationMs2 = Date.now() - startMs;
6560
+ console.log(
6561
+ `[MCP] Package already installed globally, skipping: ${pkg} (${durationMs2}ms)`
6562
+ );
6563
+ results.push({
6564
+ package: pkg,
6565
+ success: true,
6566
+ skipped: true,
6567
+ output: "already installed",
6568
+ durationMs: durationMs2
6569
+ });
6570
+ continue;
6571
+ }
6572
+ console.log(`[MCP] Pre-installing npx package globally: ${pkg}`);
6573
+ const result = await installPackageGlobally(pkg, timeoutMs);
6574
+ const durationMs = Date.now() - startMs;
6575
+ results.push({ package: pkg, skipped: false, durationMs, ...result });
6576
+ console.log(
6577
+ `[MCP] Install ${pkg}: ${result.success ? "SUCCESS" : "FAILED"} (${durationMs}ms)`
6578
+ );
6579
+ }
6580
+ return results;
6581
+ }
6582
+ function isPackageGloballyInstalled(pkg) {
6583
+ return new Promise((resolve2) => {
6584
+ let settled = false;
6585
+ const child = (0, import_child_process.spawn)("npm", ["ls", "-g", "--depth=0", pkg], {
6586
+ stdio: ["pipe", "pipe", "pipe"],
6587
+ env: process.env
6588
+ });
6589
+ child.on("close", (code2) => {
6590
+ if (!settled) {
6591
+ settled = true;
6592
+ resolve2(code2 === 0);
6593
+ }
6594
+ });
6595
+ child.on("error", () => {
6596
+ if (!settled) {
6597
+ settled = true;
6598
+ resolve2(false);
6599
+ }
6600
+ });
6601
+ setTimeout(() => {
6602
+ if (!settled) {
6603
+ settled = true;
6604
+ child.kill("SIGTERM");
6605
+ resolve2(false);
6606
+ }
6607
+ }, 5e3);
6608
+ });
6609
+ }
6610
+ function extractNpxPackages(mcps) {
6611
+ const packages = [];
6612
+ for (const mcp of mcps) {
6613
+ const config = mcp.config;
6614
+ for (const [, value] of Object.entries(config)) {
6615
+ if (typeof value !== "object" || value === null) continue;
6616
+ const cfg = value;
6617
+ if (cfg.command !== "npx" || !Array.isArray(cfg.args)) continue;
6618
+ for (const arg of cfg.args) {
6619
+ if (!arg.startsWith("-")) {
6620
+ packages.push(arg);
6621
+ break;
6622
+ }
6623
+ }
6624
+ }
6625
+ }
6626
+ return [...new Set(packages)];
6627
+ }
6628
+ function installPackageGlobally(pkg, timeoutMs) {
6629
+ return new Promise((resolve2) => {
6630
+ let output = "";
6631
+ let settled = false;
6632
+ const child = (0, import_child_process.spawn)("sh", ["-c", `npm install -g ${pkg} 2>&1`], {
6633
+ stdio: ["pipe", "pipe", "pipe"],
6634
+ env: process.env
6635
+ });
6636
+ child.stdout.on("data", (chunk) => {
6637
+ output += chunk.toString();
6638
+ });
6639
+ child.stderr.on("data", (chunk) => {
6640
+ output += chunk.toString();
6641
+ });
6642
+ child.on("close", (code2) => {
6643
+ if (!settled) {
6644
+ settled = true;
6645
+ resolve2({ success: code2 === 0, output: output.slice(-1e3) });
6646
+ }
6647
+ });
6648
+ child.on("error", (err) => {
6649
+ if (!settled) {
6650
+ settled = true;
6651
+ resolve2({ success: false, output: `spawn error: ${err.message}` });
6652
+ }
6653
+ });
6654
+ setTimeout(() => {
6655
+ if (!settled) {
6656
+ settled = true;
6657
+ child.kill("SIGTERM");
6658
+ resolve2({
6659
+ success: false,
6660
+ output: output.slice(-1e3) + "\n[install timed out]"
6661
+ });
6662
+ }
6663
+ }, timeoutMs);
6664
+ });
6665
+ }
6547
6666
  async function probeNpmConfig(cwd) {
6548
6667
  const homedir = process.env.HOME ?? "~";
6549
6668
  const [wixAuthFile, npmInstallDryRun, mcpPackageInstalled] = await Promise.all([
6550
- (0, import_promises4.readFile)((0, import_path6.join)(homedir, ".wix", "auth", "api-key.json"), "utf8").catch(
6669
+ (0, import_promises5.readFile)((0, import_path7.join)(homedir, ".wix", "auth", "api-key.json"), "utf8").catch(
6551
6670
  (e) => `[not found: ${e.message}]`
6552
6671
  ),
6553
6672
  runShellCapture(
@@ -6630,8 +6749,8 @@ function runShellCapture(cmd, cwd, timeoutMs) {
6630
6749
  }
6631
6750
 
6632
6751
  // src/run-scenario/agents/claude-code/write-sub-agents.ts
6633
- var import_promises5 = require("fs/promises");
6634
- var import_path7 = require("path");
6752
+ var import_promises6 = require("fs/promises");
6753
+ var import_path8 = require("path");
6635
6754
  var AGENTS_DIR = ".claude/agents";
6636
6755
  function toAgentFilename(name2, index, nameCount) {
6637
6756
  const base = (name2 || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
@@ -6641,13 +6760,13 @@ function toAgentFilename(name2, index, nameCount) {
6641
6760
  }
6642
6761
  async function writeSubAgentsToFilesystem(cwd, subAgents) {
6643
6762
  if (subAgents.length === 0) return;
6644
- const agentsDir = (0, import_path7.join)(cwd, AGENTS_DIR);
6645
- await (0, import_promises5.mkdir)(agentsDir, { recursive: true });
6763
+ const agentsDir = (0, import_path8.join)(cwd, AGENTS_DIR);
6764
+ await (0, import_promises6.mkdir)(agentsDir, { recursive: true });
6646
6765
  const nameCount = /* @__PURE__ */ new Map();
6647
6766
  for (const [i, agent] of subAgents.entries()) {
6648
6767
  const filename = toAgentFilename(agent.name, i, nameCount);
6649
- const filePath = (0, import_path7.join)(agentsDir, `${filename}.md`);
6650
- await (0, import_promises5.writeFile)(filePath, agent.subAgentMd, "utf8");
6768
+ const filePath = (0, import_path8.join)(agentsDir, `${filename}.md`);
6769
+ await (0, import_promises6.writeFile)(filePath, agent.subAgentMd, "utf8");
6651
6770
  }
6652
6771
  console.log(`[SubAgents] Written to ${agentsDir}`);
6653
6772
  }
@@ -6882,6 +7001,29 @@ async function executeWithClaudeCode(skills, scenario, options) {
6882
7001
  const allMessages = [];
6883
7002
  if (options.mcps && options.mcps.length > 0) {
6884
7003
  await writeMcpToFilesystem(options.cwd, options.mcps);
7004
+ const installResults = await preInstallMcpPackages(options.mcps);
7005
+ if (installResults.length > 0 && options.traceContext) {
7006
+ emitTraceEvent(
7007
+ {
7008
+ evalRunId: options.traceContext.evalRunId,
7009
+ scenarioId: options.traceContext.scenarioId,
7010
+ scenarioName: options.traceContext.scenarioName,
7011
+ targetId: options.traceContext.targetId,
7012
+ targetName: options.traceContext.targetName,
7013
+ stepNumber: 0,
7014
+ type: import_evalforge_types3.LiveTraceEventType.DIAGNOSTIC,
7015
+ outputPreview: JSON.stringify({
7016
+ event: "mcp-pre-install",
7017
+ results: installResults
7018
+ }).slice(0, 2e3),
7019
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
7020
+ isComplete: false
7021
+ },
7022
+ options.traceContext.tracePushUrl,
7023
+ options.traceContext.routeHeader,
7024
+ options.traceContext.authToken
7025
+ );
7026
+ }
6885
7027
  const probeResults = await probeMcpServers(options.mcps);
6886
7028
  if (options.traceContext) {
6887
7029
  emitTraceEvent(
@@ -7752,7 +7894,7 @@ defaultRegistry.register(claudeCodeAdapter);
7752
7894
 
7753
7895
  // src/run-scenario/file-diff.ts
7754
7896
  var import_fs6 = require("fs");
7755
- var import_path8 = require("path");
7897
+ var import_path9 = require("path");
7756
7898
 
7757
7899
  // ../../node_modules/diff/lib/index.mjs
7758
7900
  function Diff() {
@@ -7928,7 +8070,7 @@ Diff.prototype = {
7928
8070
  tokenize: function tokenize(value) {
7929
8071
  return Array.from(value);
7930
8072
  },
7931
- join: function join5(chars) {
8073
+ join: function join6(chars) {
7932
8074
  return chars.join("");
7933
8075
  },
7934
8076
  postProcess: function postProcess(changeObjects) {
@@ -8368,8 +8510,8 @@ function snapshotDirectory(dir, baseDir) {
8368
8510
  }
8369
8511
  const entries = (0, import_fs6.readdirSync)(dir, { withFileTypes: true });
8370
8512
  for (const entry of entries) {
8371
- const fullPath = (0, import_path8.join)(dir, entry.name);
8372
- const relativePath = (0, import_path8.relative)(base, fullPath);
8513
+ const fullPath = (0, import_path9.join)(dir, entry.name);
8514
+ const relativePath = (0, import_path9.relative)(base, fullPath);
8373
8515
  if (shouldIgnore(entry.name)) {
8374
8516
  continue;
8375
8517
  }