@wix/evalforge-evaluator 0.67.0 → 0.69.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +177 -42
- package/build/index.js.map +4 -4
- package/build/index.mjs +174 -39
- package/build/index.mjs.map +4 -4
- package/build/types/run-scenario/agents/claude-code/mcp-setup.d.ts +54 -0
- package/build/types/run-scenario/agents/claude-code/write-mcp.d.ts +0 -38
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -6451,7 +6451,6 @@ var import_crypto = require("crypto");
|
|
|
6451
6451
|
|
|
6452
6452
|
// src/run-scenario/agents/claude-code/write-mcp.ts
|
|
6453
6453
|
var import_promises4 = require("fs/promises");
|
|
6454
|
-
var import_child_process = require("child_process");
|
|
6455
6454
|
var import_path6 = require("path");
|
|
6456
6455
|
var import_evalforge_types2 = require("@wix/evalforge-types");
|
|
6457
6456
|
async function writeMcpToFilesystem(cwd, mcps) {
|
|
@@ -6477,6 +6476,11 @@ async function writeMcpToFilesystem(cwd, mcps) {
|
|
|
6477
6476
|
await (0, import_promises4.writeFile)(filePath, content, "utf8");
|
|
6478
6477
|
console.log(`[MCP] Written to ${filePath}`);
|
|
6479
6478
|
}
|
|
6479
|
+
|
|
6480
|
+
// src/run-scenario/agents/claude-code/mcp-setup.ts
|
|
6481
|
+
var import_promises5 = require("fs/promises");
|
|
6482
|
+
var import_child_process = require("child_process");
|
|
6483
|
+
var import_path7 = require("path");
|
|
6480
6484
|
async function probeMcpServers(mcps, probeMs = 5e3) {
|
|
6481
6485
|
const results = [];
|
|
6482
6486
|
for (const mcp of mcps) {
|
|
@@ -6544,56 +6548,165 @@ spawn error: ${err.message}`;
|
|
|
6544
6548
|
}, probeMs);
|
|
6545
6549
|
});
|
|
6546
6550
|
}
|
|
6551
|
+
async function preInstallMcpPackages(mcps, timeoutMs = 12e4) {
|
|
6552
|
+
const packages = extractNpxPackages(mcps);
|
|
6553
|
+
if (packages.length === 0) return [];
|
|
6554
|
+
const results = [];
|
|
6555
|
+
for (const pkg of packages) {
|
|
6556
|
+
const startMs = Date.now();
|
|
6557
|
+
const alreadyInstalled = await isPackageGloballyInstalled(pkg);
|
|
6558
|
+
if (alreadyInstalled) {
|
|
6559
|
+
const durationMs2 = Date.now() - startMs;
|
|
6560
|
+
console.log(
|
|
6561
|
+
`[MCP] Package already installed globally, skipping: ${pkg} (${durationMs2}ms)`
|
|
6562
|
+
);
|
|
6563
|
+
results.push({
|
|
6564
|
+
package: pkg,
|
|
6565
|
+
success: true,
|
|
6566
|
+
skipped: true,
|
|
6567
|
+
output: "already installed",
|
|
6568
|
+
durationMs: durationMs2
|
|
6569
|
+
});
|
|
6570
|
+
continue;
|
|
6571
|
+
}
|
|
6572
|
+
console.log(`[MCP] Pre-installing npx package globally: ${pkg}`);
|
|
6573
|
+
const result = await installPackageGlobally(pkg, timeoutMs);
|
|
6574
|
+
const durationMs = Date.now() - startMs;
|
|
6575
|
+
results.push({ package: pkg, skipped: false, durationMs, ...result });
|
|
6576
|
+
console.log(
|
|
6577
|
+
`[MCP] Install ${pkg}: ${result.success ? "SUCCESS" : "FAILED"} (${durationMs}ms)`
|
|
6578
|
+
);
|
|
6579
|
+
}
|
|
6580
|
+
return results;
|
|
6581
|
+
}
|
|
6582
|
+
function isPackageGloballyInstalled(pkg) {
|
|
6583
|
+
return new Promise((resolve2) => {
|
|
6584
|
+
let settled = false;
|
|
6585
|
+
const child = (0, import_child_process.spawn)("npm", ["ls", "-g", "--depth=0", pkg], {
|
|
6586
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
6587
|
+
env: process.env
|
|
6588
|
+
});
|
|
6589
|
+
child.on("close", (code2) => {
|
|
6590
|
+
if (!settled) {
|
|
6591
|
+
settled = true;
|
|
6592
|
+
resolve2(code2 === 0);
|
|
6593
|
+
}
|
|
6594
|
+
});
|
|
6595
|
+
child.on("error", () => {
|
|
6596
|
+
if (!settled) {
|
|
6597
|
+
settled = true;
|
|
6598
|
+
resolve2(false);
|
|
6599
|
+
}
|
|
6600
|
+
});
|
|
6601
|
+
setTimeout(() => {
|
|
6602
|
+
if (!settled) {
|
|
6603
|
+
settled = true;
|
|
6604
|
+
child.kill("SIGTERM");
|
|
6605
|
+
resolve2(false);
|
|
6606
|
+
}
|
|
6607
|
+
}, 5e3);
|
|
6608
|
+
});
|
|
6609
|
+
}
|
|
6610
|
+
function extractNpxPackages(mcps) {
|
|
6611
|
+
const packages = [];
|
|
6612
|
+
for (const mcp of mcps) {
|
|
6613
|
+
const config = mcp.config;
|
|
6614
|
+
for (const [, value] of Object.entries(config)) {
|
|
6615
|
+
if (typeof value !== "object" || value === null) continue;
|
|
6616
|
+
const cfg = value;
|
|
6617
|
+
if (cfg.command !== "npx" || !Array.isArray(cfg.args)) continue;
|
|
6618
|
+
for (const arg of cfg.args) {
|
|
6619
|
+
if (!arg.startsWith("-")) {
|
|
6620
|
+
packages.push(arg);
|
|
6621
|
+
break;
|
|
6622
|
+
}
|
|
6623
|
+
}
|
|
6624
|
+
}
|
|
6625
|
+
}
|
|
6626
|
+
return [...new Set(packages)];
|
|
6627
|
+
}
|
|
6628
|
+
function installPackageGlobally(pkg, timeoutMs) {
|
|
6629
|
+
return new Promise((resolve2) => {
|
|
6630
|
+
let output = "";
|
|
6631
|
+
let settled = false;
|
|
6632
|
+
const child = (0, import_child_process.spawn)("sh", ["-c", `npm install -g ${pkg} 2>&1`], {
|
|
6633
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
6634
|
+
env: process.env
|
|
6635
|
+
});
|
|
6636
|
+
child.stdout.on("data", (chunk) => {
|
|
6637
|
+
output += chunk.toString();
|
|
6638
|
+
});
|
|
6639
|
+
child.stderr.on("data", (chunk) => {
|
|
6640
|
+
output += chunk.toString();
|
|
6641
|
+
});
|
|
6642
|
+
child.on("close", (code2) => {
|
|
6643
|
+
if (!settled) {
|
|
6644
|
+
settled = true;
|
|
6645
|
+
resolve2({ success: code2 === 0, output: output.slice(-1e3) });
|
|
6646
|
+
}
|
|
6647
|
+
});
|
|
6648
|
+
child.on("error", (err) => {
|
|
6649
|
+
if (!settled) {
|
|
6650
|
+
settled = true;
|
|
6651
|
+
resolve2({ success: false, output: `spawn error: ${err.message}` });
|
|
6652
|
+
}
|
|
6653
|
+
});
|
|
6654
|
+
setTimeout(() => {
|
|
6655
|
+
if (!settled) {
|
|
6656
|
+
settled = true;
|
|
6657
|
+
child.kill("SIGTERM");
|
|
6658
|
+
resolve2({
|
|
6659
|
+
success: false,
|
|
6660
|
+
output: output.slice(-1e3) + "\n[install timed out]"
|
|
6661
|
+
});
|
|
6662
|
+
}
|
|
6663
|
+
}, timeoutMs);
|
|
6664
|
+
});
|
|
6665
|
+
}
|
|
6547
6666
|
async function probeNpmConfig(cwd) {
|
|
6548
6667
|
const homedir = process.env.HOME ?? "~";
|
|
6549
|
-
const [
|
|
6550
|
-
|
|
6551
|
-
homeNpmrc,
|
|
6552
|
-
projectRootNpmrc,
|
|
6553
|
-
globalNpmrc,
|
|
6554
|
-
npmConfigListFull,
|
|
6555
|
-
npmConfigGetRegistry,
|
|
6556
|
-
npmViewWixMcp
|
|
6557
|
-
] = await Promise.all([
|
|
6558
|
-
(0, import_promises4.readFile)((0, import_path6.join)(cwd, ".npmrc"), "utf8").catch(
|
|
6559
|
-
(e) => `[not found: ${e.message}]`
|
|
6560
|
-
),
|
|
6561
|
-
(0, import_promises4.readFile)((0, import_path6.join)(homedir, ".npmrc"), "utf8").catch(
|
|
6562
|
-
(e) => `[not found: ${e.message}]`
|
|
6563
|
-
),
|
|
6564
|
-
(0, import_promises4.readFile)("/user-code/.npmrc", "utf8").catch(
|
|
6668
|
+
const [wixAuthFile, npmInstallDryRun, mcpPackageInstalled] = await Promise.all([
|
|
6669
|
+
(0, import_promises5.readFile)((0, import_path7.join)(homedir, ".wix", "auth", "api-key.json"), "utf8").catch(
|
|
6565
6670
|
(e) => `[not found: ${e.message}]`
|
|
6566
6671
|
),
|
|
6567
6672
|
runShellCapture(
|
|
6568
|
-
|
|
6673
|
+
"npm install --dry-run @wix/mcp 2>&1 | tail -15",
|
|
6569
6674
|
cwd,
|
|
6570
|
-
|
|
6675
|
+
15e3
|
|
6571
6676
|
),
|
|
6572
|
-
runShellCapture("npm config list -l 2>&1 | head -60", cwd, 5e3),
|
|
6573
|
-
runShellCapture("npm config get registry", cwd, 3e3),
|
|
6574
6677
|
runShellCapture(
|
|
6575
|
-
|
|
6678
|
+
'ls -la $(npm root -g)/@wix/mcp/package.json 2>/dev/null || echo "[not globally installed]" && npm list -g @wix/mcp 2>&1 | tail -5',
|
|
6576
6679
|
cwd,
|
|
6577
|
-
|
|
6680
|
+
5e3
|
|
6578
6681
|
)
|
|
6579
6682
|
]);
|
|
6683
|
+
const wixAuthParsed = (() => {
|
|
6684
|
+
try {
|
|
6685
|
+
const parsed = JSON.parse(wixAuthFile);
|
|
6686
|
+
return JSON.stringify({
|
|
6687
|
+
hasToken: typeof parsed.token === "string",
|
|
6688
|
+
tokenLength: typeof parsed.token === "string" ? parsed.token.length : 0,
|
|
6689
|
+
hasAccountId: typeof parsed.accountId === "string",
|
|
6690
|
+
hasUserInfo: typeof parsed.userInfo === "object"
|
|
6691
|
+
});
|
|
6692
|
+
} catch {
|
|
6693
|
+
return wixAuthFile.slice(0, 500);
|
|
6694
|
+
}
|
|
6695
|
+
})();
|
|
6580
6696
|
const npmEnvVars = {};
|
|
6581
6697
|
for (const [key, value] of Object.entries(process.env)) {
|
|
6582
6698
|
const lk = key.toLowerCase();
|
|
6583
|
-
if (lk.startsWith("npm_config") || lk.includes("token") || lk.includes("auth") || lk.includes("registry")) {
|
|
6699
|
+
if (lk.startsWith("npm_config") || lk.includes("token") || lk.includes("auth") || lk.includes("registry") || lk.startsWith("wix_")) {
|
|
6584
6700
|
npmEnvVars[key] = lk.includes("token") || lk.includes("auth") || lk.includes("secret") ? `[REDACTED, length=${(value ?? "").length}]` : value;
|
|
6585
6701
|
}
|
|
6586
6702
|
}
|
|
6587
6703
|
return {
|
|
6588
6704
|
cwd,
|
|
6589
6705
|
processCwd: process.cwd(),
|
|
6590
|
-
|
|
6591
|
-
|
|
6592
|
-
|
|
6593
|
-
|
|
6594
|
-
npmConfigListFull: npmConfigListFull.slice(0, 3e3),
|
|
6595
|
-
npmConfigGetRegistry: npmConfigGetRegistry.trim(),
|
|
6596
|
-
npmViewWixMcp: npmViewWixMcp.trim().slice(0, 500),
|
|
6706
|
+
wixAuthFile: wixAuthParsed,
|
|
6707
|
+
wixAuthFileExists: !wixAuthFile.startsWith("[not found"),
|
|
6708
|
+
npmInstallDryRun: npmInstallDryRun.trim().slice(0, 1500),
|
|
6709
|
+
mcpPackageInstalled: mcpPackageInstalled.trim().slice(0, 500),
|
|
6597
6710
|
npmEnvVars,
|
|
6598
6711
|
homedir
|
|
6599
6712
|
};
|
|
@@ -6636,8 +6749,8 @@ function runShellCapture(cmd, cwd, timeoutMs) {
|
|
|
6636
6749
|
}
|
|
6637
6750
|
|
|
6638
6751
|
// src/run-scenario/agents/claude-code/write-sub-agents.ts
|
|
6639
|
-
var
|
|
6640
|
-
var
|
|
6752
|
+
var import_promises6 = require("fs/promises");
|
|
6753
|
+
var import_path8 = require("path");
|
|
6641
6754
|
var AGENTS_DIR = ".claude/agents";
|
|
6642
6755
|
function toAgentFilename(name2, index, nameCount) {
|
|
6643
6756
|
const base = (name2 || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
|
|
@@ -6647,13 +6760,13 @@ function toAgentFilename(name2, index, nameCount) {
|
|
|
6647
6760
|
}
|
|
6648
6761
|
async function writeSubAgentsToFilesystem(cwd, subAgents) {
|
|
6649
6762
|
if (subAgents.length === 0) return;
|
|
6650
|
-
const agentsDir = (0,
|
|
6651
|
-
await (0,
|
|
6763
|
+
const agentsDir = (0, import_path8.join)(cwd, AGENTS_DIR);
|
|
6764
|
+
await (0, import_promises6.mkdir)(agentsDir, { recursive: true });
|
|
6652
6765
|
const nameCount = /* @__PURE__ */ new Map();
|
|
6653
6766
|
for (const [i, agent] of subAgents.entries()) {
|
|
6654
6767
|
const filename = toAgentFilename(agent.name, i, nameCount);
|
|
6655
|
-
const filePath = (0,
|
|
6656
|
-
await (0,
|
|
6768
|
+
const filePath = (0, import_path8.join)(agentsDir, `${filename}.md`);
|
|
6769
|
+
await (0, import_promises6.writeFile)(filePath, agent.subAgentMd, "utf8");
|
|
6657
6770
|
}
|
|
6658
6771
|
console.log(`[SubAgents] Written to ${agentsDir}`);
|
|
6659
6772
|
}
|
|
@@ -6887,8 +7000,30 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
6887
7000
|
const startTime = /* @__PURE__ */ new Date();
|
|
6888
7001
|
const allMessages = [];
|
|
6889
7002
|
if (options.mcps && options.mcps.length > 0) {
|
|
6890
|
-
process.env.npm_config__auth = "";
|
|
6891
7003
|
await writeMcpToFilesystem(options.cwd, options.mcps);
|
|
7004
|
+
const installResults = await preInstallMcpPackages(options.mcps);
|
|
7005
|
+
if (installResults.length > 0 && options.traceContext) {
|
|
7006
|
+
emitTraceEvent(
|
|
7007
|
+
{
|
|
7008
|
+
evalRunId: options.traceContext.evalRunId,
|
|
7009
|
+
scenarioId: options.traceContext.scenarioId,
|
|
7010
|
+
scenarioName: options.traceContext.scenarioName,
|
|
7011
|
+
targetId: options.traceContext.targetId,
|
|
7012
|
+
targetName: options.traceContext.targetName,
|
|
7013
|
+
stepNumber: 0,
|
|
7014
|
+
type: import_evalforge_types3.LiveTraceEventType.DIAGNOSTIC,
|
|
7015
|
+
outputPreview: JSON.stringify({
|
|
7016
|
+
event: "mcp-pre-install",
|
|
7017
|
+
results: installResults
|
|
7018
|
+
}).slice(0, 2e3),
|
|
7019
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7020
|
+
isComplete: false
|
|
7021
|
+
},
|
|
7022
|
+
options.traceContext.tracePushUrl,
|
|
7023
|
+
options.traceContext.routeHeader,
|
|
7024
|
+
options.traceContext.authToken
|
|
7025
|
+
);
|
|
7026
|
+
}
|
|
6892
7027
|
const probeResults = await probeMcpServers(options.mcps);
|
|
6893
7028
|
if (options.traceContext) {
|
|
6894
7029
|
emitTraceEvent(
|
|
@@ -7759,7 +7894,7 @@ defaultRegistry.register(claudeCodeAdapter);
|
|
|
7759
7894
|
|
|
7760
7895
|
// src/run-scenario/file-diff.ts
|
|
7761
7896
|
var import_fs6 = require("fs");
|
|
7762
|
-
var
|
|
7897
|
+
var import_path9 = require("path");
|
|
7763
7898
|
|
|
7764
7899
|
// ../../node_modules/diff/lib/index.mjs
|
|
7765
7900
|
function Diff() {
|
|
@@ -7935,7 +8070,7 @@ Diff.prototype = {
|
|
|
7935
8070
|
tokenize: function tokenize(value) {
|
|
7936
8071
|
return Array.from(value);
|
|
7937
8072
|
},
|
|
7938
|
-
join: function
|
|
8073
|
+
join: function join6(chars) {
|
|
7939
8074
|
return chars.join("");
|
|
7940
8075
|
},
|
|
7941
8076
|
postProcess: function postProcess(changeObjects) {
|
|
@@ -8375,8 +8510,8 @@ function snapshotDirectory(dir, baseDir) {
|
|
|
8375
8510
|
}
|
|
8376
8511
|
const entries = (0, import_fs6.readdirSync)(dir, { withFileTypes: true });
|
|
8377
8512
|
for (const entry of entries) {
|
|
8378
|
-
const fullPath = (0,
|
|
8379
|
-
const relativePath = (0,
|
|
8513
|
+
const fullPath = (0, import_path9.join)(dir, entry.name);
|
|
8514
|
+
const relativePath = (0, import_path9.relative)(base, fullPath);
|
|
8380
8515
|
if (shouldIgnore(entry.name)) {
|
|
8381
8516
|
continue;
|
|
8382
8517
|
}
|