@wix/evalforge-evaluator 0.71.0 → 0.73.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -6477,165 +6477,9 @@ async function writeMcpToFilesystem(cwd, mcps) {
6477
6477
  console.log(`[MCP] Written to ${filePath}`);
6478
6478
  }
6479
6479
 
6480
- // src/run-scenario/agents/claude-code/mcp-setup.ts
6480
+ // src/run-scenario/agents/claude-code/write-sub-agents.ts
6481
6481
  var import_promises5 = require("fs/promises");
6482
- var import_child_process = require("child_process");
6483
6482
  var import_path7 = require("path");
6484
- async function probeMcpServers(mcps, probeMs = 5e3) {
6485
- const results = [];
6486
- for (const mcp of mcps) {
6487
- const config = mcp.config;
6488
- for (const [name2, value] of Object.entries(config)) {
6489
- if (typeof value !== "object" || value === null) continue;
6490
- const cfg = value;
6491
- const command = cfg.command;
6492
- const args = cfg.args;
6493
- if (typeof command !== "string" || !Array.isArray(args)) continue;
6494
- const result = await probeOneServer(
6495
- name2,
6496
- command,
6497
- args,
6498
- probeMs
6499
- );
6500
- results.push(result);
6501
- }
6502
- }
6503
- return results;
6504
- }
6505
- function probeOneServer(name2, command, args, probeMs) {
6506
- return new Promise((resolve2) => {
6507
- const startMs = Date.now();
6508
- let stdout = "";
6509
- let stderr = "";
6510
- let settled = false;
6511
- const finish = (exitCode, signal) => {
6512
- if (settled) return;
6513
- settled = true;
6514
- resolve2({
6515
- name: name2,
6516
- command,
6517
- args,
6518
- exitCode,
6519
- signal,
6520
- stdout: stdout.slice(-2e3),
6521
- stderr: stderr.slice(-2e3),
6522
- durationMs: Date.now() - startMs
6523
- });
6524
- };
6525
- const child = (0, import_child_process.spawn)(command, args, {
6526
- stdio: ["pipe", "pipe", "pipe"],
6527
- env: process.env
6528
- });
6529
- child.stdout.on("data", (chunk) => {
6530
- stdout += chunk.toString();
6531
- });
6532
- child.stderr.on("data", (chunk) => {
6533
- stderr += chunk.toString();
6534
- });
6535
- child.on("error", (err) => {
6536
- stderr += `
6537
- spawn error: ${err.message}`;
6538
- finish(null, null);
6539
- });
6540
- child.on("close", (code2, sig) => {
6541
- finish(code2, sig);
6542
- });
6543
- setTimeout(() => {
6544
- if (!settled) {
6545
- child.kill("SIGTERM");
6546
- finish(null, "PROBE_TIMEOUT");
6547
- }
6548
- }, probeMs);
6549
- });
6550
- }
6551
- async function probeNpmConfig(cwd) {
6552
- const homedir = process.env.HOME ?? "~";
6553
- const [wixAuthFile, npmInstallDryRun, mcpPackageInstalled] = await Promise.all([
6554
- (0, import_promises5.readFile)((0, import_path7.join)(homedir, ".wix", "auth", "api-key.json"), "utf8").catch(
6555
- (e) => `[not found: ${e.message}]`
6556
- ),
6557
- runShellCapture(
6558
- "npm install --dry-run @wix/mcp 2>&1 | tail -15",
6559
- cwd,
6560
- 15e3
6561
- ),
6562
- runShellCapture(
6563
- 'ls -la $(npm root -g)/@wix/mcp/package.json 2>/dev/null || echo "[not globally installed]" && npm list -g @wix/mcp 2>&1 | tail -5',
6564
- cwd,
6565
- 5e3
6566
- )
6567
- ]);
6568
- const wixAuthParsed = (() => {
6569
- try {
6570
- const parsed = JSON.parse(wixAuthFile);
6571
- return JSON.stringify({
6572
- hasToken: typeof parsed.token === "string",
6573
- tokenLength: typeof parsed.token === "string" ? parsed.token.length : 0,
6574
- hasAccountId: typeof parsed.accountId === "string",
6575
- hasUserInfo: typeof parsed.userInfo === "object"
6576
- });
6577
- } catch {
6578
- return wixAuthFile.slice(0, 500);
6579
- }
6580
- })();
6581
- const npmEnvVars = {};
6582
- for (const [key, value] of Object.entries(process.env)) {
6583
- const lk = key.toLowerCase();
6584
- if (lk.startsWith("npm_config") || lk.includes("token") || lk.includes("auth") || lk.includes("registry") || lk.startsWith("wix_")) {
6585
- npmEnvVars[key] = lk.includes("token") || lk.includes("auth") || lk.includes("secret") ? `[REDACTED, length=${(value ?? "").length}]` : value;
6586
- }
6587
- }
6588
- return {
6589
- cwd,
6590
- processCwd: process.cwd(),
6591
- wixAuthFile: wixAuthParsed,
6592
- wixAuthFileExists: !wixAuthFile.startsWith("[not found"),
6593
- npmInstallDryRun: npmInstallDryRun.trim().slice(0, 1500),
6594
- mcpPackageInstalled: mcpPackageInstalled.trim().slice(0, 500),
6595
- npmEnvVars,
6596
- homedir
6597
- };
6598
- }
6599
- function runShellCapture(cmd, cwd, timeoutMs) {
6600
- return new Promise((resolve2) => {
6601
- let output = "";
6602
- let settled = false;
6603
- const child = (0, import_child_process.spawn)("sh", ["-c", cmd], {
6604
- stdio: ["pipe", "pipe", "pipe"],
6605
- cwd,
6606
- env: process.env
6607
- });
6608
- child.stdout.on("data", (chunk) => {
6609
- output += chunk.toString();
6610
- });
6611
- child.stderr.on("data", (chunk) => {
6612
- output += chunk.toString();
6613
- });
6614
- child.on("close", () => {
6615
- if (!settled) {
6616
- settled = true;
6617
- resolve2(output);
6618
- }
6619
- });
6620
- child.on("error", (err) => {
6621
- if (!settled) {
6622
- settled = true;
6623
- resolve2(`[error: ${err.message}]`);
6624
- }
6625
- });
6626
- setTimeout(() => {
6627
- if (!settled) {
6628
- settled = true;
6629
- child.kill("SIGTERM");
6630
- resolve2(output + "\n[timed out]");
6631
- }
6632
- }, timeoutMs);
6633
- });
6634
- }
6635
-
6636
- // src/run-scenario/agents/claude-code/write-sub-agents.ts
6637
- var import_promises6 = require("fs/promises");
6638
- var import_path8 = require("path");
6639
6483
  var AGENTS_DIR = ".claude/agents";
6640
6484
  function toAgentFilename(name2, index, nameCount) {
6641
6485
  const base = (name2 || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
@@ -6645,13 +6489,13 @@ function toAgentFilename(name2, index, nameCount) {
6645
6489
  }
6646
6490
  async function writeSubAgentsToFilesystem(cwd, subAgents) {
6647
6491
  if (subAgents.length === 0) return;
6648
- const agentsDir = (0, import_path8.join)(cwd, AGENTS_DIR);
6649
- await (0, import_promises6.mkdir)(agentsDir, { recursive: true });
6492
+ const agentsDir = (0, import_path7.join)(cwd, AGENTS_DIR);
6493
+ await (0, import_promises5.mkdir)(agentsDir, { recursive: true });
6650
6494
  const nameCount = /* @__PURE__ */ new Map();
6651
6495
  for (const [i, agent] of subAgents.entries()) {
6652
6496
  const filename = toAgentFilename(agent.name, i, nameCount);
6653
- const filePath = (0, import_path8.join)(agentsDir, `${filename}.md`);
6654
- await (0, import_promises6.writeFile)(filePath, agent.subAgentMd, "utf8");
6497
+ const filePath = (0, import_path7.join)(agentsDir, `${filename}.md`);
6498
+ await (0, import_promises5.writeFile)(filePath, agent.subAgentMd, "utf8");
6655
6499
  }
6656
6500
  console.log(`[SubAgents] Written to ${agentsDir}`);
6657
6501
  }
@@ -6886,56 +6730,6 @@ async function executeWithClaudeCode(skills, scenario, options) {
6886
6730
  const allMessages = [];
6887
6731
  if (options.mcps && options.mcps.length > 0) {
6888
6732
  await writeMcpToFilesystem(options.cwd, options.mcps);
6889
- const probeResults = await probeMcpServers(options.mcps);
6890
- if (options.traceContext) {
6891
- emitTraceEvent(
6892
- {
6893
- evalRunId: options.traceContext.evalRunId,
6894
- scenarioId: options.traceContext.scenarioId,
6895
- scenarioName: options.traceContext.scenarioName,
6896
- targetId: options.traceContext.targetId,
6897
- targetName: options.traceContext.targetName,
6898
- stepNumber: 0,
6899
- type: import_evalforge_types3.LiveTraceEventType.DIAGNOSTIC,
6900
- outputPreview: JSON.stringify({
6901
- event: "mcp-probe",
6902
- results: probeResults
6903
- }).slice(0, 2e3),
6904
- timestamp: (/* @__PURE__ */ new Date()).toISOString(),
6905
- isComplete: false
6906
- },
6907
- options.traceContext.tracePushUrl,
6908
- options.traceContext.routeHeader,
6909
- options.traceContext.authToken
6910
- );
6911
- }
6912
- const npmDiag = await probeNpmConfig(options.cwd);
6913
- console.log(
6914
- "[DEBUG-d744ca] npm-config-diagnostic",
6915
- JSON.stringify(npmDiag)
6916
- );
6917
- if (options.traceContext) {
6918
- emitTraceEvent(
6919
- {
6920
- evalRunId: options.traceContext.evalRunId,
6921
- scenarioId: options.traceContext.scenarioId,
6922
- scenarioName: options.traceContext.scenarioName,
6923
- targetId: options.traceContext.targetId,
6924
- targetName: options.traceContext.targetName,
6925
- stepNumber: 0,
6926
- type: import_evalforge_types3.LiveTraceEventType.DIAGNOSTIC,
6927
- outputPreview: JSON.stringify({
6928
- event: "npm-config-diagnostic",
6929
- ...npmDiag
6930
- }).slice(0, 8e3),
6931
- timestamp: (/* @__PURE__ */ new Date()).toISOString(),
6932
- isComplete: false
6933
- },
6934
- options.traceContext.tracePushUrl,
6935
- options.traceContext.routeHeader,
6936
- options.traceContext.authToken
6937
- );
6938
- }
6939
6733
  }
6940
6734
  if (options.subAgents && options.subAgents.length > 0) {
6941
6735
  await writeSubAgentsToFilesystem(options.cwd, options.subAgents);
@@ -7196,41 +6990,6 @@ IMPORTANT: This is an automated evaluation run. Follow these guidelines:
7196
6990
  })
7197
6991
  );
7198
6992
  }
7199
- const sdkMsg = message;
7200
- if (sdkMsg.type === "system" && sdkMsg.subtype === "init") {
7201
- const initData = sdkMsg;
7202
- const mcpInfo = {
7203
- mcp_servers: initData.mcp_servers,
7204
- tools: initData.tools,
7205
- cwd: options.cwd
7206
- };
7207
- console.error(
7208
- "[MCP-DIAG] Init message MCP status:",
7209
- JSON.stringify(mcpInfo, null, 2)
7210
- );
7211
- if (traceContext) {
7212
- emitTraceEvent(
7213
- {
7214
- evalRunId: traceContext.evalRunId,
7215
- scenarioId: traceContext.scenarioId,
7216
- scenarioName: traceContext.scenarioName,
7217
- targetId: traceContext.targetId,
7218
- targetName: traceContext.targetName,
7219
- stepNumber: traceStepNumber,
7220
- type: import_evalforge_types3.LiveTraceEventType.DIAGNOSTIC,
7221
- outputPreview: JSON.stringify({
7222
- event: "mcp-init-status",
7223
- ...mcpInfo
7224
- }).slice(0, 2e3),
7225
- timestamp: (/* @__PURE__ */ new Date()).toISOString(),
7226
- isComplete: false
7227
- },
7228
- traceContext.tracePushUrl,
7229
- traceContext.routeHeader,
7230
- traceContext.authToken
7231
- );
7232
- }
7233
- }
7234
6993
  if (traceContext) {
7235
6994
  traceStepNumber++;
7236
6995
  const traceEvent = createTraceEventFromAnyMessage(
@@ -7756,7 +7515,7 @@ defaultRegistry.register(claudeCodeAdapter);
7756
7515
 
7757
7516
  // src/run-scenario/file-diff.ts
7758
7517
  var import_fs6 = require("fs");
7759
- var import_path9 = require("path");
7518
+ var import_path8 = require("path");
7760
7519
 
7761
7520
  // ../../node_modules/diff/lib/index.mjs
7762
7521
  function Diff() {
@@ -7932,7 +7691,7 @@ Diff.prototype = {
7932
7691
  tokenize: function tokenize(value) {
7933
7692
  return Array.from(value);
7934
7693
  },
7935
- join: function join6(chars) {
7694
+ join: function join5(chars) {
7936
7695
  return chars.join("");
7937
7696
  },
7938
7697
  postProcess: function postProcess(changeObjects) {
@@ -8372,8 +8131,8 @@ function snapshotDirectory(dir, baseDir) {
8372
8131
  }
8373
8132
  const entries = (0, import_fs6.readdirSync)(dir, { withFileTypes: true });
8374
8133
  for (const entry of entries) {
8375
- const fullPath = (0, import_path9.join)(dir, entry.name);
8376
- const relativePath = (0, import_path9.relative)(base, fullPath);
8134
+ const fullPath = (0, import_path8.join)(dir, entry.name);
8135
+ const relativePath = (0, import_path8.relative)(base, fullPath);
8377
8136
  if (shouldIgnore(entry.name)) {
8378
8137
  continue;
8379
8138
  }