@wix/evalforge-evaluator 0.71.0 → 0.73.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +9 -250
- package/build/index.js.map +4 -4
- package/build/index.mjs +6 -247
- package/build/index.mjs.map +4 -4
- package/package.json +4 -4
- package/build/types/run-scenario/agents/claude-code/mcp-setup.d.ts +0 -36
package/build/index.js
CHANGED
|
@@ -6477,165 +6477,9 @@ async function writeMcpToFilesystem(cwd, mcps) {
|
|
|
6477
6477
|
console.log(`[MCP] Written to ${filePath}`);
|
|
6478
6478
|
}
|
|
6479
6479
|
|
|
6480
|
-
// src/run-scenario/agents/claude-code/
|
|
6480
|
+
// src/run-scenario/agents/claude-code/write-sub-agents.ts
|
|
6481
6481
|
var import_promises5 = require("fs/promises");
|
|
6482
|
-
var import_child_process = require("child_process");
|
|
6483
6482
|
var import_path7 = require("path");
|
|
6484
|
-
async function probeMcpServers(mcps, probeMs = 5e3) {
|
|
6485
|
-
const results = [];
|
|
6486
|
-
for (const mcp of mcps) {
|
|
6487
|
-
const config = mcp.config;
|
|
6488
|
-
for (const [name2, value] of Object.entries(config)) {
|
|
6489
|
-
if (typeof value !== "object" || value === null) continue;
|
|
6490
|
-
const cfg = value;
|
|
6491
|
-
const command = cfg.command;
|
|
6492
|
-
const args = cfg.args;
|
|
6493
|
-
if (typeof command !== "string" || !Array.isArray(args)) continue;
|
|
6494
|
-
const result = await probeOneServer(
|
|
6495
|
-
name2,
|
|
6496
|
-
command,
|
|
6497
|
-
args,
|
|
6498
|
-
probeMs
|
|
6499
|
-
);
|
|
6500
|
-
results.push(result);
|
|
6501
|
-
}
|
|
6502
|
-
}
|
|
6503
|
-
return results;
|
|
6504
|
-
}
|
|
6505
|
-
function probeOneServer(name2, command, args, probeMs) {
|
|
6506
|
-
return new Promise((resolve2) => {
|
|
6507
|
-
const startMs = Date.now();
|
|
6508
|
-
let stdout = "";
|
|
6509
|
-
let stderr = "";
|
|
6510
|
-
let settled = false;
|
|
6511
|
-
const finish = (exitCode, signal) => {
|
|
6512
|
-
if (settled) return;
|
|
6513
|
-
settled = true;
|
|
6514
|
-
resolve2({
|
|
6515
|
-
name: name2,
|
|
6516
|
-
command,
|
|
6517
|
-
args,
|
|
6518
|
-
exitCode,
|
|
6519
|
-
signal,
|
|
6520
|
-
stdout: stdout.slice(-2e3),
|
|
6521
|
-
stderr: stderr.slice(-2e3),
|
|
6522
|
-
durationMs: Date.now() - startMs
|
|
6523
|
-
});
|
|
6524
|
-
};
|
|
6525
|
-
const child = (0, import_child_process.spawn)(command, args, {
|
|
6526
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
6527
|
-
env: process.env
|
|
6528
|
-
});
|
|
6529
|
-
child.stdout.on("data", (chunk) => {
|
|
6530
|
-
stdout += chunk.toString();
|
|
6531
|
-
});
|
|
6532
|
-
child.stderr.on("data", (chunk) => {
|
|
6533
|
-
stderr += chunk.toString();
|
|
6534
|
-
});
|
|
6535
|
-
child.on("error", (err) => {
|
|
6536
|
-
stderr += `
|
|
6537
|
-
spawn error: ${err.message}`;
|
|
6538
|
-
finish(null, null);
|
|
6539
|
-
});
|
|
6540
|
-
child.on("close", (code2, sig) => {
|
|
6541
|
-
finish(code2, sig);
|
|
6542
|
-
});
|
|
6543
|
-
setTimeout(() => {
|
|
6544
|
-
if (!settled) {
|
|
6545
|
-
child.kill("SIGTERM");
|
|
6546
|
-
finish(null, "PROBE_TIMEOUT");
|
|
6547
|
-
}
|
|
6548
|
-
}, probeMs);
|
|
6549
|
-
});
|
|
6550
|
-
}
|
|
6551
|
-
async function probeNpmConfig(cwd) {
|
|
6552
|
-
const homedir = process.env.HOME ?? "~";
|
|
6553
|
-
const [wixAuthFile, npmInstallDryRun, mcpPackageInstalled] = await Promise.all([
|
|
6554
|
-
(0, import_promises5.readFile)((0, import_path7.join)(homedir, ".wix", "auth", "api-key.json"), "utf8").catch(
|
|
6555
|
-
(e) => `[not found: ${e.message}]`
|
|
6556
|
-
),
|
|
6557
|
-
runShellCapture(
|
|
6558
|
-
"npm install --dry-run @wix/mcp 2>&1 | tail -15",
|
|
6559
|
-
cwd,
|
|
6560
|
-
15e3
|
|
6561
|
-
),
|
|
6562
|
-
runShellCapture(
|
|
6563
|
-
'ls -la $(npm root -g)/@wix/mcp/package.json 2>/dev/null || echo "[not globally installed]" && npm list -g @wix/mcp 2>&1 | tail -5',
|
|
6564
|
-
cwd,
|
|
6565
|
-
5e3
|
|
6566
|
-
)
|
|
6567
|
-
]);
|
|
6568
|
-
const wixAuthParsed = (() => {
|
|
6569
|
-
try {
|
|
6570
|
-
const parsed = JSON.parse(wixAuthFile);
|
|
6571
|
-
return JSON.stringify({
|
|
6572
|
-
hasToken: typeof parsed.token === "string",
|
|
6573
|
-
tokenLength: typeof parsed.token === "string" ? parsed.token.length : 0,
|
|
6574
|
-
hasAccountId: typeof parsed.accountId === "string",
|
|
6575
|
-
hasUserInfo: typeof parsed.userInfo === "object"
|
|
6576
|
-
});
|
|
6577
|
-
} catch {
|
|
6578
|
-
return wixAuthFile.slice(0, 500);
|
|
6579
|
-
}
|
|
6580
|
-
})();
|
|
6581
|
-
const npmEnvVars = {};
|
|
6582
|
-
for (const [key, value] of Object.entries(process.env)) {
|
|
6583
|
-
const lk = key.toLowerCase();
|
|
6584
|
-
if (lk.startsWith("npm_config") || lk.includes("token") || lk.includes("auth") || lk.includes("registry") || lk.startsWith("wix_")) {
|
|
6585
|
-
npmEnvVars[key] = lk.includes("token") || lk.includes("auth") || lk.includes("secret") ? `[REDACTED, length=${(value ?? "").length}]` : value;
|
|
6586
|
-
}
|
|
6587
|
-
}
|
|
6588
|
-
return {
|
|
6589
|
-
cwd,
|
|
6590
|
-
processCwd: process.cwd(),
|
|
6591
|
-
wixAuthFile: wixAuthParsed,
|
|
6592
|
-
wixAuthFileExists: !wixAuthFile.startsWith("[not found"),
|
|
6593
|
-
npmInstallDryRun: npmInstallDryRun.trim().slice(0, 1500),
|
|
6594
|
-
mcpPackageInstalled: mcpPackageInstalled.trim().slice(0, 500),
|
|
6595
|
-
npmEnvVars,
|
|
6596
|
-
homedir
|
|
6597
|
-
};
|
|
6598
|
-
}
|
|
6599
|
-
function runShellCapture(cmd, cwd, timeoutMs) {
|
|
6600
|
-
return new Promise((resolve2) => {
|
|
6601
|
-
let output = "";
|
|
6602
|
-
let settled = false;
|
|
6603
|
-
const child = (0, import_child_process.spawn)("sh", ["-c", cmd], {
|
|
6604
|
-
stdio: ["pipe", "pipe", "pipe"],
|
|
6605
|
-
cwd,
|
|
6606
|
-
env: process.env
|
|
6607
|
-
});
|
|
6608
|
-
child.stdout.on("data", (chunk) => {
|
|
6609
|
-
output += chunk.toString();
|
|
6610
|
-
});
|
|
6611
|
-
child.stderr.on("data", (chunk) => {
|
|
6612
|
-
output += chunk.toString();
|
|
6613
|
-
});
|
|
6614
|
-
child.on("close", () => {
|
|
6615
|
-
if (!settled) {
|
|
6616
|
-
settled = true;
|
|
6617
|
-
resolve2(output);
|
|
6618
|
-
}
|
|
6619
|
-
});
|
|
6620
|
-
child.on("error", (err) => {
|
|
6621
|
-
if (!settled) {
|
|
6622
|
-
settled = true;
|
|
6623
|
-
resolve2(`[error: ${err.message}]`);
|
|
6624
|
-
}
|
|
6625
|
-
});
|
|
6626
|
-
setTimeout(() => {
|
|
6627
|
-
if (!settled) {
|
|
6628
|
-
settled = true;
|
|
6629
|
-
child.kill("SIGTERM");
|
|
6630
|
-
resolve2(output + "\n[timed out]");
|
|
6631
|
-
}
|
|
6632
|
-
}, timeoutMs);
|
|
6633
|
-
});
|
|
6634
|
-
}
|
|
6635
|
-
|
|
6636
|
-
// src/run-scenario/agents/claude-code/write-sub-agents.ts
|
|
6637
|
-
var import_promises6 = require("fs/promises");
|
|
6638
|
-
var import_path8 = require("path");
|
|
6639
6483
|
var AGENTS_DIR = ".claude/agents";
|
|
6640
6484
|
function toAgentFilename(name2, index, nameCount) {
|
|
6641
6485
|
const base = (name2 || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
|
|
@@ -6645,13 +6489,13 @@ function toAgentFilename(name2, index, nameCount) {
|
|
|
6645
6489
|
}
|
|
6646
6490
|
async function writeSubAgentsToFilesystem(cwd, subAgents) {
|
|
6647
6491
|
if (subAgents.length === 0) return;
|
|
6648
|
-
const agentsDir = (0,
|
|
6649
|
-
await (0,
|
|
6492
|
+
const agentsDir = (0, import_path7.join)(cwd, AGENTS_DIR);
|
|
6493
|
+
await (0, import_promises5.mkdir)(agentsDir, { recursive: true });
|
|
6650
6494
|
const nameCount = /* @__PURE__ */ new Map();
|
|
6651
6495
|
for (const [i, agent] of subAgents.entries()) {
|
|
6652
6496
|
const filename = toAgentFilename(agent.name, i, nameCount);
|
|
6653
|
-
const filePath = (0,
|
|
6654
|
-
await (0,
|
|
6497
|
+
const filePath = (0, import_path7.join)(agentsDir, `${filename}.md`);
|
|
6498
|
+
await (0, import_promises5.writeFile)(filePath, agent.subAgentMd, "utf8");
|
|
6655
6499
|
}
|
|
6656
6500
|
console.log(`[SubAgents] Written to ${agentsDir}`);
|
|
6657
6501
|
}
|
|
@@ -6886,56 +6730,6 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
6886
6730
|
const allMessages = [];
|
|
6887
6731
|
if (options.mcps && options.mcps.length > 0) {
|
|
6888
6732
|
await writeMcpToFilesystem(options.cwd, options.mcps);
|
|
6889
|
-
const probeResults = await probeMcpServers(options.mcps);
|
|
6890
|
-
if (options.traceContext) {
|
|
6891
|
-
emitTraceEvent(
|
|
6892
|
-
{
|
|
6893
|
-
evalRunId: options.traceContext.evalRunId,
|
|
6894
|
-
scenarioId: options.traceContext.scenarioId,
|
|
6895
|
-
scenarioName: options.traceContext.scenarioName,
|
|
6896
|
-
targetId: options.traceContext.targetId,
|
|
6897
|
-
targetName: options.traceContext.targetName,
|
|
6898
|
-
stepNumber: 0,
|
|
6899
|
-
type: import_evalforge_types3.LiveTraceEventType.DIAGNOSTIC,
|
|
6900
|
-
outputPreview: JSON.stringify({
|
|
6901
|
-
event: "mcp-probe",
|
|
6902
|
-
results: probeResults
|
|
6903
|
-
}).slice(0, 2e3),
|
|
6904
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
6905
|
-
isComplete: false
|
|
6906
|
-
},
|
|
6907
|
-
options.traceContext.tracePushUrl,
|
|
6908
|
-
options.traceContext.routeHeader,
|
|
6909
|
-
options.traceContext.authToken
|
|
6910
|
-
);
|
|
6911
|
-
}
|
|
6912
|
-
const npmDiag = await probeNpmConfig(options.cwd);
|
|
6913
|
-
console.log(
|
|
6914
|
-
"[DEBUG-d744ca] npm-config-diagnostic",
|
|
6915
|
-
JSON.stringify(npmDiag)
|
|
6916
|
-
);
|
|
6917
|
-
if (options.traceContext) {
|
|
6918
|
-
emitTraceEvent(
|
|
6919
|
-
{
|
|
6920
|
-
evalRunId: options.traceContext.evalRunId,
|
|
6921
|
-
scenarioId: options.traceContext.scenarioId,
|
|
6922
|
-
scenarioName: options.traceContext.scenarioName,
|
|
6923
|
-
targetId: options.traceContext.targetId,
|
|
6924
|
-
targetName: options.traceContext.targetName,
|
|
6925
|
-
stepNumber: 0,
|
|
6926
|
-
type: import_evalforge_types3.LiveTraceEventType.DIAGNOSTIC,
|
|
6927
|
-
outputPreview: JSON.stringify({
|
|
6928
|
-
event: "npm-config-diagnostic",
|
|
6929
|
-
...npmDiag
|
|
6930
|
-
}).slice(0, 8e3),
|
|
6931
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
6932
|
-
isComplete: false
|
|
6933
|
-
},
|
|
6934
|
-
options.traceContext.tracePushUrl,
|
|
6935
|
-
options.traceContext.routeHeader,
|
|
6936
|
-
options.traceContext.authToken
|
|
6937
|
-
);
|
|
6938
|
-
}
|
|
6939
6733
|
}
|
|
6940
6734
|
if (options.subAgents && options.subAgents.length > 0) {
|
|
6941
6735
|
await writeSubAgentsToFilesystem(options.cwd, options.subAgents);
|
|
@@ -7196,41 +6990,6 @@ IMPORTANT: This is an automated evaluation run. Follow these guidelines:
|
|
|
7196
6990
|
})
|
|
7197
6991
|
);
|
|
7198
6992
|
}
|
|
7199
|
-
const sdkMsg = message;
|
|
7200
|
-
if (sdkMsg.type === "system" && sdkMsg.subtype === "init") {
|
|
7201
|
-
const initData = sdkMsg;
|
|
7202
|
-
const mcpInfo = {
|
|
7203
|
-
mcp_servers: initData.mcp_servers,
|
|
7204
|
-
tools: initData.tools,
|
|
7205
|
-
cwd: options.cwd
|
|
7206
|
-
};
|
|
7207
|
-
console.error(
|
|
7208
|
-
"[MCP-DIAG] Init message MCP status:",
|
|
7209
|
-
JSON.stringify(mcpInfo, null, 2)
|
|
7210
|
-
);
|
|
7211
|
-
if (traceContext) {
|
|
7212
|
-
emitTraceEvent(
|
|
7213
|
-
{
|
|
7214
|
-
evalRunId: traceContext.evalRunId,
|
|
7215
|
-
scenarioId: traceContext.scenarioId,
|
|
7216
|
-
scenarioName: traceContext.scenarioName,
|
|
7217
|
-
targetId: traceContext.targetId,
|
|
7218
|
-
targetName: traceContext.targetName,
|
|
7219
|
-
stepNumber: traceStepNumber,
|
|
7220
|
-
type: import_evalforge_types3.LiveTraceEventType.DIAGNOSTIC,
|
|
7221
|
-
outputPreview: JSON.stringify({
|
|
7222
|
-
event: "mcp-init-status",
|
|
7223
|
-
...mcpInfo
|
|
7224
|
-
}).slice(0, 2e3),
|
|
7225
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7226
|
-
isComplete: false
|
|
7227
|
-
},
|
|
7228
|
-
traceContext.tracePushUrl,
|
|
7229
|
-
traceContext.routeHeader,
|
|
7230
|
-
traceContext.authToken
|
|
7231
|
-
);
|
|
7232
|
-
}
|
|
7233
|
-
}
|
|
7234
6993
|
if (traceContext) {
|
|
7235
6994
|
traceStepNumber++;
|
|
7236
6995
|
const traceEvent = createTraceEventFromAnyMessage(
|
|
@@ -7756,7 +7515,7 @@ defaultRegistry.register(claudeCodeAdapter);
|
|
|
7756
7515
|
|
|
7757
7516
|
// src/run-scenario/file-diff.ts
|
|
7758
7517
|
var import_fs6 = require("fs");
|
|
7759
|
-
var
|
|
7518
|
+
var import_path8 = require("path");
|
|
7760
7519
|
|
|
7761
7520
|
// ../../node_modules/diff/lib/index.mjs
|
|
7762
7521
|
function Diff() {
|
|
@@ -7932,7 +7691,7 @@ Diff.prototype = {
|
|
|
7932
7691
|
tokenize: function tokenize(value) {
|
|
7933
7692
|
return Array.from(value);
|
|
7934
7693
|
},
|
|
7935
|
-
join: function
|
|
7694
|
+
join: function join5(chars) {
|
|
7936
7695
|
return chars.join("");
|
|
7937
7696
|
},
|
|
7938
7697
|
postProcess: function postProcess(changeObjects) {
|
|
@@ -8372,8 +8131,8 @@ function snapshotDirectory(dir, baseDir) {
|
|
|
8372
8131
|
}
|
|
8373
8132
|
const entries = (0, import_fs6.readdirSync)(dir, { withFileTypes: true });
|
|
8374
8133
|
for (const entry of entries) {
|
|
8375
|
-
const fullPath = (0,
|
|
8376
|
-
const relativePath = (0,
|
|
8134
|
+
const fullPath = (0, import_path8.join)(dir, entry.name);
|
|
8135
|
+
const relativePath = (0, import_path8.relative)(base, fullPath);
|
|
8377
8136
|
if (shouldIgnore(entry.name)) {
|
|
8378
8137
|
continue;
|
|
8379
8138
|
}
|