@wix/evalforge-evaluator 0.71.0 → 0.72.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +9 -250
- package/build/index.js.map +4 -4
- package/build/index.mjs +6 -247
- package/build/index.mjs.map +4 -4
- package/package.json +2 -2
- package/build/types/run-scenario/agents/claude-code/mcp-setup.d.ts +0 -36
package/build/index.mjs
CHANGED
|
@@ -6464,165 +6464,9 @@ async function writeMcpToFilesystem(cwd, mcps) {
|
|
|
6464
6464
|
console.log(`[MCP] Written to ${filePath}`);
|
|
6465
6465
|
}
|
|
6466
6466
|
|
|
6467
|
-
// src/run-scenario/agents/claude-code/mcp-setup.ts
|
|
6468
|
-
import { readFile } from "fs/promises";
|
|
6469
|
-
import { spawn } from "child_process";
|
|
6470
|
-
import { join as join4 } from "path";
|
|
6471
|
-
async function probeMcpServers(mcps, probeMs = 5e3) {
|
|
6472
|
-
const results = [];
|
|
6473
|
-
for (const mcp of mcps) {
|
|
6474
|
-
const config = mcp.config;
|
|
6475
|
-
for (const [name2, value] of Object.entries(config)) {
|
|
6476
|
-
if (typeof value !== "object" || value === null) continue;
|
|
6477
|
-
const cfg = value;
|
|
6478
|
-
const command = cfg.command;
|
|
6479
|
-
const args = cfg.args;
|
|
6480
|
-
if (typeof command !== "string" || !Array.isArray(args)) continue;
|
|
6481
|
-
const result = await probeOneServer(
|
|
6482
|
-
name2,
|
|
6483
|
-
command,
|
|
6484
|
-
args,
|
|
6485
|
-
probeMs
|
|
6486
|
-
);
|
|
6487
|
-
results.push(result);
|
|
6488
|
-
}
|
|
6489
|
-
}
|
|
6490
|
-
return results;
|
|
6491
|
-
}
|
|
6492
|
-
function probeOneServer(name2, command, args, probeMs) {
|
|
6493
|
-
return new Promise((resolve2) => {
|
|
6494
|
-
const startMs = Date.now();
|
|
6495
|
-
let stdout = "";
|
|
6496
|
-
let stderr = "";
|
|
6497
|
-
let settled = false;
|
|
6498
|
-
const finish = (exitCode, signal) => {
|
|
6499
|
-
if (settled) return;
|
|
6500
|
-
settled = true;
|
|
6501
|
-
resolve2({
|
|
6502
|
-
name: name2,
|
|
6503
|
-
command,
|
|
6504
|
-
args,
|
|
6505
|
-
exitCode,
|
|
6506
|
-
signal,
|
|
6507
|
-
stdout: stdout.slice(-2e3),
|
|
6508
|
-
stderr: stderr.slice(-2e3),
|
|
6509
|
-
durationMs: Date.now() - startMs
|
|
6510
|
-
});
|
|
6511
|
-
};
|
|
6512
|
-
const child = spawn(command, args, {
|
|
6513
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
6514
|
-
env: process.env
|
|
6515
|
-
});
|
|
6516
|
-
child.stdout.on("data", (chunk) => {
|
|
6517
|
-
stdout += chunk.toString();
|
|
6518
|
-
});
|
|
6519
|
-
child.stderr.on("data", (chunk) => {
|
|
6520
|
-
stderr += chunk.toString();
|
|
6521
|
-
});
|
|
6522
|
-
child.on("error", (err) => {
|
|
6523
|
-
stderr += `
|
|
6524
|
-
spawn error: ${err.message}`;
|
|
6525
|
-
finish(null, null);
|
|
6526
|
-
});
|
|
6527
|
-
child.on("close", (code2, sig) => {
|
|
6528
|
-
finish(code2, sig);
|
|
6529
|
-
});
|
|
6530
|
-
setTimeout(() => {
|
|
6531
|
-
if (!settled) {
|
|
6532
|
-
child.kill("SIGTERM");
|
|
6533
|
-
finish(null, "PROBE_TIMEOUT");
|
|
6534
|
-
}
|
|
6535
|
-
}, probeMs);
|
|
6536
|
-
});
|
|
6537
|
-
}
|
|
6538
|
-
async function probeNpmConfig(cwd) {
|
|
6539
|
-
const homedir = process.env.HOME ?? "~";
|
|
6540
|
-
const [wixAuthFile, npmInstallDryRun, mcpPackageInstalled] = await Promise.all([
|
|
6541
|
-
readFile(join4(homedir, ".wix", "auth", "api-key.json"), "utf8").catch(
|
|
6542
|
-
(e) => `[not found: ${e.message}]`
|
|
6543
|
-
),
|
|
6544
|
-
runShellCapture(
|
|
6545
|
-
"npm install --dry-run @wix/mcp 2>&1 | tail -15",
|
|
6546
|
-
cwd,
|
|
6547
|
-
15e3
|
|
6548
|
-
),
|
|
6549
|
-
runShellCapture(
|
|
6550
|
-
'ls -la $(npm root -g)/@wix/mcp/package.json 2>/dev/null || echo "[not globally installed]" && npm list -g @wix/mcp 2>&1 | tail -5',
|
|
6551
|
-
cwd,
|
|
6552
|
-
5e3
|
|
6553
|
-
)
|
|
6554
|
-
]);
|
|
6555
|
-
const wixAuthParsed = (() => {
|
|
6556
|
-
try {
|
|
6557
|
-
const parsed = JSON.parse(wixAuthFile);
|
|
6558
|
-
return JSON.stringify({
|
|
6559
|
-
hasToken: typeof parsed.token === "string",
|
|
6560
|
-
tokenLength: typeof parsed.token === "string" ? parsed.token.length : 0,
|
|
6561
|
-
hasAccountId: typeof parsed.accountId === "string",
|
|
6562
|
-
hasUserInfo: typeof parsed.userInfo === "object"
|
|
6563
|
-
});
|
|
6564
|
-
} catch {
|
|
6565
|
-
return wixAuthFile.slice(0, 500);
|
|
6566
|
-
}
|
|
6567
|
-
})();
|
|
6568
|
-
const npmEnvVars = {};
|
|
6569
|
-
for (const [key, value] of Object.entries(process.env)) {
|
|
6570
|
-
const lk = key.toLowerCase();
|
|
6571
|
-
if (lk.startsWith("npm_config") || lk.includes("token") || lk.includes("auth") || lk.includes("registry") || lk.startsWith("wix_")) {
|
|
6572
|
-
npmEnvVars[key] = lk.includes("token") || lk.includes("auth") || lk.includes("secret") ? `[REDACTED, length=${(value ?? "").length}]` : value;
|
|
6573
|
-
}
|
|
6574
|
-
}
|
|
6575
|
-
return {
|
|
6576
|
-
cwd,
|
|
6577
|
-
processCwd: process.cwd(),
|
|
6578
|
-
wixAuthFile: wixAuthParsed,
|
|
6579
|
-
wixAuthFileExists: !wixAuthFile.startsWith("[not found"),
|
|
6580
|
-
npmInstallDryRun: npmInstallDryRun.trim().slice(0, 1500),
|
|
6581
|
-
mcpPackageInstalled: mcpPackageInstalled.trim().slice(0, 500),
|
|
6582
|
-
npmEnvVars,
|
|
6583
|
-
homedir
|
|
6584
|
-
};
|
|
6585
|
-
}
|
|
6586
|
-
function runShellCapture(cmd, cwd, timeoutMs) {
|
|
6587
|
-
return new Promise((resolve2) => {
|
|
6588
|
-
let output = "";
|
|
6589
|
-
let settled = false;
|
|
6590
|
-
const child = spawn("sh", ["-c", cmd], {
|
|
6591
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
6592
|
-
cwd,
|
|
6593
|
-
env: process.env
|
|
6594
|
-
});
|
|
6595
|
-
child.stdout.on("data", (chunk) => {
|
|
6596
|
-
output += chunk.toString();
|
|
6597
|
-
});
|
|
6598
|
-
child.stderr.on("data", (chunk) => {
|
|
6599
|
-
output += chunk.toString();
|
|
6600
|
-
});
|
|
6601
|
-
child.on("close", () => {
|
|
6602
|
-
if (!settled) {
|
|
6603
|
-
settled = true;
|
|
6604
|
-
resolve2(output);
|
|
6605
|
-
}
|
|
6606
|
-
});
|
|
6607
|
-
child.on("error", (err) => {
|
|
6608
|
-
if (!settled) {
|
|
6609
|
-
settled = true;
|
|
6610
|
-
resolve2(`[error: ${err.message}]`);
|
|
6611
|
-
}
|
|
6612
|
-
});
|
|
6613
|
-
setTimeout(() => {
|
|
6614
|
-
if (!settled) {
|
|
6615
|
-
settled = true;
|
|
6616
|
-
child.kill("SIGTERM");
|
|
6617
|
-
resolve2(output + "\n[timed out]");
|
|
6618
|
-
}
|
|
6619
|
-
}, timeoutMs);
|
|
6620
|
-
});
|
|
6621
|
-
}
|
|
6622
|
-
|
|
6623
6467
|
// src/run-scenario/agents/claude-code/write-sub-agents.ts
|
|
6624
6468
|
import { mkdir as mkdir3, writeFile as writeFile3 } from "fs/promises";
|
|
6625
|
-
import { join as
|
|
6469
|
+
import { join as join4 } from "path";
|
|
6626
6470
|
var AGENTS_DIR = ".claude/agents";
|
|
6627
6471
|
function toAgentFilename(name2, index, nameCount) {
|
|
6628
6472
|
const base = (name2 || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
|
|
@@ -6632,12 +6476,12 @@ function toAgentFilename(name2, index, nameCount) {
|
|
|
6632
6476
|
}
|
|
6633
6477
|
async function writeSubAgentsToFilesystem(cwd, subAgents) {
|
|
6634
6478
|
if (subAgents.length === 0) return;
|
|
6635
|
-
const agentsDir =
|
|
6479
|
+
const agentsDir = join4(cwd, AGENTS_DIR);
|
|
6636
6480
|
await mkdir3(agentsDir, { recursive: true });
|
|
6637
6481
|
const nameCount = /* @__PURE__ */ new Map();
|
|
6638
6482
|
for (const [i, agent] of subAgents.entries()) {
|
|
6639
6483
|
const filename = toAgentFilename(agent.name, i, nameCount);
|
|
6640
|
-
const filePath =
|
|
6484
|
+
const filePath = join4(agentsDir, `${filename}.md`);
|
|
6641
6485
|
await writeFile3(filePath, agent.subAgentMd, "utf8");
|
|
6642
6486
|
}
|
|
6643
6487
|
console.log(`[SubAgents] Written to ${agentsDir}`);
|
|
@@ -6873,56 +6717,6 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
6873
6717
|
const allMessages = [];
|
|
6874
6718
|
if (options.mcps && options.mcps.length > 0) {
|
|
6875
6719
|
await writeMcpToFilesystem(options.cwd, options.mcps);
|
|
6876
|
-
const probeResults = await probeMcpServers(options.mcps);
|
|
6877
|
-
if (options.traceContext) {
|
|
6878
|
-
emitTraceEvent(
|
|
6879
|
-
{
|
|
6880
|
-
evalRunId: options.traceContext.evalRunId,
|
|
6881
|
-
scenarioId: options.traceContext.scenarioId,
|
|
6882
|
-
scenarioName: options.traceContext.scenarioName,
|
|
6883
|
-
targetId: options.traceContext.targetId,
|
|
6884
|
-
targetName: options.traceContext.targetName,
|
|
6885
|
-
stepNumber: 0,
|
|
6886
|
-
type: LiveTraceEventType.DIAGNOSTIC,
|
|
6887
|
-
outputPreview: JSON.stringify({
|
|
6888
|
-
event: "mcp-probe",
|
|
6889
|
-
results: probeResults
|
|
6890
|
-
}).slice(0, 2e3),
|
|
6891
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
6892
|
-
isComplete: false
|
|
6893
|
-
},
|
|
6894
|
-
options.traceContext.tracePushUrl,
|
|
6895
|
-
options.traceContext.routeHeader,
|
|
6896
|
-
options.traceContext.authToken
|
|
6897
|
-
);
|
|
6898
|
-
}
|
|
6899
|
-
const npmDiag = await probeNpmConfig(options.cwd);
|
|
6900
|
-
console.log(
|
|
6901
|
-
"[DEBUG-d744ca] npm-config-diagnostic",
|
|
6902
|
-
JSON.stringify(npmDiag)
|
|
6903
|
-
);
|
|
6904
|
-
if (options.traceContext) {
|
|
6905
|
-
emitTraceEvent(
|
|
6906
|
-
{
|
|
6907
|
-
evalRunId: options.traceContext.evalRunId,
|
|
6908
|
-
scenarioId: options.traceContext.scenarioId,
|
|
6909
|
-
scenarioName: options.traceContext.scenarioName,
|
|
6910
|
-
targetId: options.traceContext.targetId,
|
|
6911
|
-
targetName: options.traceContext.targetName,
|
|
6912
|
-
stepNumber: 0,
|
|
6913
|
-
type: LiveTraceEventType.DIAGNOSTIC,
|
|
6914
|
-
outputPreview: JSON.stringify({
|
|
6915
|
-
event: "npm-config-diagnostic",
|
|
6916
|
-
...npmDiag
|
|
6917
|
-
}).slice(0, 8e3),
|
|
6918
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
6919
|
-
isComplete: false
|
|
6920
|
-
},
|
|
6921
|
-
options.traceContext.tracePushUrl,
|
|
6922
|
-
options.traceContext.routeHeader,
|
|
6923
|
-
options.traceContext.authToken
|
|
6924
|
-
);
|
|
6925
|
-
}
|
|
6926
6720
|
}
|
|
6927
6721
|
if (options.subAgents && options.subAgents.length > 0) {
|
|
6928
6722
|
await writeSubAgentsToFilesystem(options.cwd, options.subAgents);
|
|
@@ -7183,41 +6977,6 @@ IMPORTANT: This is an automated evaluation run. Follow these guidelines:
|
|
|
7183
6977
|
})
|
|
7184
6978
|
);
|
|
7185
6979
|
}
|
|
7186
|
-
const sdkMsg = message;
|
|
7187
|
-
if (sdkMsg.type === "system" && sdkMsg.subtype === "init") {
|
|
7188
|
-
const initData = sdkMsg;
|
|
7189
|
-
const mcpInfo = {
|
|
7190
|
-
mcp_servers: initData.mcp_servers,
|
|
7191
|
-
tools: initData.tools,
|
|
7192
|
-
cwd: options.cwd
|
|
7193
|
-
};
|
|
7194
|
-
console.error(
|
|
7195
|
-
"[MCP-DIAG] Init message MCP status:",
|
|
7196
|
-
JSON.stringify(mcpInfo, null, 2)
|
|
7197
|
-
);
|
|
7198
|
-
if (traceContext) {
|
|
7199
|
-
emitTraceEvent(
|
|
7200
|
-
{
|
|
7201
|
-
evalRunId: traceContext.evalRunId,
|
|
7202
|
-
scenarioId: traceContext.scenarioId,
|
|
7203
|
-
scenarioName: traceContext.scenarioName,
|
|
7204
|
-
targetId: traceContext.targetId,
|
|
7205
|
-
targetName: traceContext.targetName,
|
|
7206
|
-
stepNumber: traceStepNumber,
|
|
7207
|
-
type: LiveTraceEventType.DIAGNOSTIC,
|
|
7208
|
-
outputPreview: JSON.stringify({
|
|
7209
|
-
event: "mcp-init-status",
|
|
7210
|
-
...mcpInfo
|
|
7211
|
-
}).slice(0, 2e3),
|
|
7212
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7213
|
-
isComplete: false
|
|
7214
|
-
},
|
|
7215
|
-
traceContext.tracePushUrl,
|
|
7216
|
-
traceContext.routeHeader,
|
|
7217
|
-
traceContext.authToken
|
|
7218
|
-
);
|
|
7219
|
-
}
|
|
7220
|
-
}
|
|
7221
6980
|
if (traceContext) {
|
|
7222
6981
|
traceStepNumber++;
|
|
7223
6982
|
const traceEvent = createTraceEventFromAnyMessage(
|
|
@@ -7743,7 +7502,7 @@ defaultRegistry.register(claudeCodeAdapter);
|
|
|
7743
7502
|
|
|
7744
7503
|
// src/run-scenario/file-diff.ts
|
|
7745
7504
|
import { readdirSync as readdirSync2, readFileSync, statSync, existsSync as existsSync2 } from "fs";
|
|
7746
|
-
import { join as
|
|
7505
|
+
import { join as join6, relative } from "path";
|
|
7747
7506
|
|
|
7748
7507
|
// ../../node_modules/diff/lib/index.mjs
|
|
7749
7508
|
function Diff() {
|
|
@@ -7919,7 +7678,7 @@ Diff.prototype = {
|
|
|
7919
7678
|
tokenize: function tokenize(value) {
|
|
7920
7679
|
return Array.from(value);
|
|
7921
7680
|
},
|
|
7922
|
-
join: function
|
|
7681
|
+
join: function join5(chars) {
|
|
7923
7682
|
return chars.join("");
|
|
7924
7683
|
},
|
|
7925
7684
|
postProcess: function postProcess(changeObjects) {
|
|
@@ -8359,7 +8118,7 @@ function snapshotDirectory(dir, baseDir) {
|
|
|
8359
8118
|
}
|
|
8360
8119
|
const entries = readdirSync2(dir, { withFileTypes: true });
|
|
8361
8120
|
for (const entry of entries) {
|
|
8362
|
-
const fullPath =
|
|
8121
|
+
const fullPath = join6(dir, entry.name);
|
|
8363
8122
|
const relativePath = relative(base, fullPath);
|
|
8364
8123
|
if (shouldIgnore(entry.name)) {
|
|
8365
8124
|
continue;
|