@wix/evalforge-evaluator 0.71.0 → 0.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -6464,165 +6464,9 @@ async function writeMcpToFilesystem(cwd, mcps) {
6464
6464
  console.log(`[MCP] Written to ${filePath}`);
6465
6465
  }
6466
6466
 
6467
- // src/run-scenario/agents/claude-code/mcp-setup.ts
6468
- import { readFile } from "fs/promises";
6469
- import { spawn } from "child_process";
6470
- import { join as join4 } from "path";
6471
- async function probeMcpServers(mcps, probeMs = 5e3) {
6472
- const results = [];
6473
- for (const mcp of mcps) {
6474
- const config = mcp.config;
6475
- for (const [name2, value] of Object.entries(config)) {
6476
- if (typeof value !== "object" || value === null) continue;
6477
- const cfg = value;
6478
- const command = cfg.command;
6479
- const args = cfg.args;
6480
- if (typeof command !== "string" || !Array.isArray(args)) continue;
6481
- const result = await probeOneServer(
6482
- name2,
6483
- command,
6484
- args,
6485
- probeMs
6486
- );
6487
- results.push(result);
6488
- }
6489
- }
6490
- return results;
6491
- }
6492
- function probeOneServer(name2, command, args, probeMs) {
6493
- return new Promise((resolve2) => {
6494
- const startMs = Date.now();
6495
- let stdout = "";
6496
- let stderr = "";
6497
- let settled = false;
6498
- const finish = (exitCode, signal) => {
6499
- if (settled) return;
6500
- settled = true;
6501
- resolve2({
6502
- name: name2,
6503
- command,
6504
- args,
6505
- exitCode,
6506
- signal,
6507
- stdout: stdout.slice(-2e3),
6508
- stderr: stderr.slice(-2e3),
6509
- durationMs: Date.now() - startMs
6510
- });
6511
- };
6512
- const child = spawn(command, args, {
6513
- stdio: ["pipe", "pipe", "pipe"],
6514
- env: process.env
6515
- });
6516
- child.stdout.on("data", (chunk) => {
6517
- stdout += chunk.toString();
6518
- });
6519
- child.stderr.on("data", (chunk) => {
6520
- stderr += chunk.toString();
6521
- });
6522
- child.on("error", (err) => {
6523
- stderr += `
6524
- spawn error: ${err.message}`;
6525
- finish(null, null);
6526
- });
6527
- child.on("close", (code2, sig) => {
6528
- finish(code2, sig);
6529
- });
6530
- setTimeout(() => {
6531
- if (!settled) {
6532
- child.kill("SIGTERM");
6533
- finish(null, "PROBE_TIMEOUT");
6534
- }
6535
- }, probeMs);
6536
- });
6537
- }
6538
- async function probeNpmConfig(cwd) {
6539
- const homedir = process.env.HOME ?? "~";
6540
- const [wixAuthFile, npmInstallDryRun, mcpPackageInstalled] = await Promise.all([
6541
- readFile(join4(homedir, ".wix", "auth", "api-key.json"), "utf8").catch(
6542
- (e) => `[not found: ${e.message}]`
6543
- ),
6544
- runShellCapture(
6545
- "npm install --dry-run @wix/mcp 2>&1 | tail -15",
6546
- cwd,
6547
- 15e3
6548
- ),
6549
- runShellCapture(
6550
- 'ls -la $(npm root -g)/@wix/mcp/package.json 2>/dev/null || echo "[not globally installed]" && npm list -g @wix/mcp 2>&1 | tail -5',
6551
- cwd,
6552
- 5e3
6553
- )
6554
- ]);
6555
- const wixAuthParsed = (() => {
6556
- try {
6557
- const parsed = JSON.parse(wixAuthFile);
6558
- return JSON.stringify({
6559
- hasToken: typeof parsed.token === "string",
6560
- tokenLength: typeof parsed.token === "string" ? parsed.token.length : 0,
6561
- hasAccountId: typeof parsed.accountId === "string",
6562
- hasUserInfo: typeof parsed.userInfo === "object"
6563
- });
6564
- } catch {
6565
- return wixAuthFile.slice(0, 500);
6566
- }
6567
- })();
6568
- const npmEnvVars = {};
6569
- for (const [key, value] of Object.entries(process.env)) {
6570
- const lk = key.toLowerCase();
6571
- if (lk.startsWith("npm_config") || lk.includes("token") || lk.includes("auth") || lk.includes("registry") || lk.startsWith("wix_")) {
6572
- npmEnvVars[key] = lk.includes("token") || lk.includes("auth") || lk.includes("secret") ? `[REDACTED, length=${(value ?? "").length}]` : value;
6573
- }
6574
- }
6575
- return {
6576
- cwd,
6577
- processCwd: process.cwd(),
6578
- wixAuthFile: wixAuthParsed,
6579
- wixAuthFileExists: !wixAuthFile.startsWith("[not found"),
6580
- npmInstallDryRun: npmInstallDryRun.trim().slice(0, 1500),
6581
- mcpPackageInstalled: mcpPackageInstalled.trim().slice(0, 500),
6582
- npmEnvVars,
6583
- homedir
6584
- };
6585
- }
6586
- function runShellCapture(cmd, cwd, timeoutMs) {
6587
- return new Promise((resolve2) => {
6588
- let output = "";
6589
- let settled = false;
6590
- const child = spawn("sh", ["-c", cmd], {
6591
- stdio: ["pipe", "pipe", "pipe"],
6592
- cwd,
6593
- env: process.env
6594
- });
6595
- child.stdout.on("data", (chunk) => {
6596
- output += chunk.toString();
6597
- });
6598
- child.stderr.on("data", (chunk) => {
6599
- output += chunk.toString();
6600
- });
6601
- child.on("close", () => {
6602
- if (!settled) {
6603
- settled = true;
6604
- resolve2(output);
6605
- }
6606
- });
6607
- child.on("error", (err) => {
6608
- if (!settled) {
6609
- settled = true;
6610
- resolve2(`[error: ${err.message}]`);
6611
- }
6612
- });
6613
- setTimeout(() => {
6614
- if (!settled) {
6615
- settled = true;
6616
- child.kill("SIGTERM");
6617
- resolve2(output + "\n[timed out]");
6618
- }
6619
- }, timeoutMs);
6620
- });
6621
- }
6622
-
6623
6467
  // src/run-scenario/agents/claude-code/write-sub-agents.ts
6624
6468
  import { mkdir as mkdir3, writeFile as writeFile3 } from "fs/promises";
6625
- import { join as join5 } from "path";
6469
+ import { join as join4 } from "path";
6626
6470
  var AGENTS_DIR = ".claude/agents";
6627
6471
  function toAgentFilename(name2, index, nameCount) {
6628
6472
  const base = (name2 || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
@@ -6632,12 +6476,12 @@ function toAgentFilename(name2, index, nameCount) {
6632
6476
  }
6633
6477
  async function writeSubAgentsToFilesystem(cwd, subAgents) {
6634
6478
  if (subAgents.length === 0) return;
6635
- const agentsDir = join5(cwd, AGENTS_DIR);
6479
+ const agentsDir = join4(cwd, AGENTS_DIR);
6636
6480
  await mkdir3(agentsDir, { recursive: true });
6637
6481
  const nameCount = /* @__PURE__ */ new Map();
6638
6482
  for (const [i, agent] of subAgents.entries()) {
6639
6483
  const filename = toAgentFilename(agent.name, i, nameCount);
6640
- const filePath = join5(agentsDir, `${filename}.md`);
6484
+ const filePath = join4(agentsDir, `${filename}.md`);
6641
6485
  await writeFile3(filePath, agent.subAgentMd, "utf8");
6642
6486
  }
6643
6487
  console.log(`[SubAgents] Written to ${agentsDir}`);
@@ -6873,56 +6717,6 @@ async function executeWithClaudeCode(skills, scenario, options) {
6873
6717
  const allMessages = [];
6874
6718
  if (options.mcps && options.mcps.length > 0) {
6875
6719
  await writeMcpToFilesystem(options.cwd, options.mcps);
6876
- const probeResults = await probeMcpServers(options.mcps);
6877
- if (options.traceContext) {
6878
- emitTraceEvent(
6879
- {
6880
- evalRunId: options.traceContext.evalRunId,
6881
- scenarioId: options.traceContext.scenarioId,
6882
- scenarioName: options.traceContext.scenarioName,
6883
- targetId: options.traceContext.targetId,
6884
- targetName: options.traceContext.targetName,
6885
- stepNumber: 0,
6886
- type: LiveTraceEventType.DIAGNOSTIC,
6887
- outputPreview: JSON.stringify({
6888
- event: "mcp-probe",
6889
- results: probeResults
6890
- }).slice(0, 2e3),
6891
- timestamp: (/* @__PURE__ */ new Date()).toISOString(),
6892
- isComplete: false
6893
- },
6894
- options.traceContext.tracePushUrl,
6895
- options.traceContext.routeHeader,
6896
- options.traceContext.authToken
6897
- );
6898
- }
6899
- const npmDiag = await probeNpmConfig(options.cwd);
6900
- console.log(
6901
- "[DEBUG-d744ca] npm-config-diagnostic",
6902
- JSON.stringify(npmDiag)
6903
- );
6904
- if (options.traceContext) {
6905
- emitTraceEvent(
6906
- {
6907
- evalRunId: options.traceContext.evalRunId,
6908
- scenarioId: options.traceContext.scenarioId,
6909
- scenarioName: options.traceContext.scenarioName,
6910
- targetId: options.traceContext.targetId,
6911
- targetName: options.traceContext.targetName,
6912
- stepNumber: 0,
6913
- type: LiveTraceEventType.DIAGNOSTIC,
6914
- outputPreview: JSON.stringify({
6915
- event: "npm-config-diagnostic",
6916
- ...npmDiag
6917
- }).slice(0, 8e3),
6918
- timestamp: (/* @__PURE__ */ new Date()).toISOString(),
6919
- isComplete: false
6920
- },
6921
- options.traceContext.tracePushUrl,
6922
- options.traceContext.routeHeader,
6923
- options.traceContext.authToken
6924
- );
6925
- }
6926
6720
  }
6927
6721
  if (options.subAgents && options.subAgents.length > 0) {
6928
6722
  await writeSubAgentsToFilesystem(options.cwd, options.subAgents);
@@ -7183,41 +6977,6 @@ IMPORTANT: This is an automated evaluation run. Follow these guidelines:
7183
6977
  })
7184
6978
  );
7185
6979
  }
7186
- const sdkMsg = message;
7187
- if (sdkMsg.type === "system" && sdkMsg.subtype === "init") {
7188
- const initData = sdkMsg;
7189
- const mcpInfo = {
7190
- mcp_servers: initData.mcp_servers,
7191
- tools: initData.tools,
7192
- cwd: options.cwd
7193
- };
7194
- console.error(
7195
- "[MCP-DIAG] Init message MCP status:",
7196
- JSON.stringify(mcpInfo, null, 2)
7197
- );
7198
- if (traceContext) {
7199
- emitTraceEvent(
7200
- {
7201
- evalRunId: traceContext.evalRunId,
7202
- scenarioId: traceContext.scenarioId,
7203
- scenarioName: traceContext.scenarioName,
7204
- targetId: traceContext.targetId,
7205
- targetName: traceContext.targetName,
7206
- stepNumber: traceStepNumber,
7207
- type: LiveTraceEventType.DIAGNOSTIC,
7208
- outputPreview: JSON.stringify({
7209
- event: "mcp-init-status",
7210
- ...mcpInfo
7211
- }).slice(0, 2e3),
7212
- timestamp: (/* @__PURE__ */ new Date()).toISOString(),
7213
- isComplete: false
7214
- },
7215
- traceContext.tracePushUrl,
7216
- traceContext.routeHeader,
7217
- traceContext.authToken
7218
- );
7219
- }
7220
- }
7221
6980
  if (traceContext) {
7222
6981
  traceStepNumber++;
7223
6982
  const traceEvent = createTraceEventFromAnyMessage(
@@ -7743,7 +7502,7 @@ defaultRegistry.register(claudeCodeAdapter);
7743
7502
 
7744
7503
  // src/run-scenario/file-diff.ts
7745
7504
  import { readdirSync as readdirSync2, readFileSync, statSync, existsSync as existsSync2 } from "fs";
7746
- import { join as join7, relative } from "path";
7505
+ import { join as join6, relative } from "path";
7747
7506
 
7748
7507
  // ../../node_modules/diff/lib/index.mjs
7749
7508
  function Diff() {
@@ -7919,7 +7678,7 @@ Diff.prototype = {
7919
7678
  tokenize: function tokenize(value) {
7920
7679
  return Array.from(value);
7921
7680
  },
7922
- join: function join6(chars) {
7681
+ join: function join5(chars) {
7923
7682
  return chars.join("");
7924
7683
  },
7925
7684
  postProcess: function postProcess(changeObjects) {
@@ -8359,7 +8118,7 @@ function snapshotDirectory(dir, baseDir) {
8359
8118
  }
8360
8119
  const entries = readdirSync2(dir, { withFileTypes: true });
8361
8120
  for (const entry of entries) {
8362
- const fullPath = join7(dir, entry.name);
8121
+ const fullPath = join6(dir, entry.name);
8363
8122
  const relativePath = relative(base, fullPath);
8364
8123
  if (shouldIgnore(entry.name)) {
8365
8124
  continue;