@wix/evalforge-evaluator 0.67.0 → 0.69.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +177 -42
- package/build/index.js.map +4 -4
- package/build/index.mjs +174 -39
- package/build/index.mjs.map +4 -4
- package/build/types/run-scenario/agents/claude-code/mcp-setup.d.ts +54 -0
- package/build/types/run-scenario/agents/claude-code/write-mcp.d.ts +0 -38
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -6437,8 +6437,7 @@ async function writeSkillFiles(skillDir, files) {
|
|
|
6437
6437
|
import { randomUUID } from "crypto";
|
|
6438
6438
|
|
|
6439
6439
|
// src/run-scenario/agents/claude-code/write-mcp.ts
|
|
6440
|
-
import { writeFile as writeFile2
|
|
6441
|
-
import { spawn } from "child_process";
|
|
6440
|
+
import { writeFile as writeFile2 } from "fs/promises";
|
|
6442
6441
|
import { join as join3 } from "path";
|
|
6443
6442
|
import { MCP_SERVERS_JSON_KEY } from "@wix/evalforge-types";
|
|
6444
6443
|
async function writeMcpToFilesystem(cwd, mcps) {
|
|
@@ -6464,6 +6463,11 @@ async function writeMcpToFilesystem(cwd, mcps) {
|
|
|
6464
6463
|
await writeFile2(filePath, content, "utf8");
|
|
6465
6464
|
console.log(`[MCP] Written to ${filePath}`);
|
|
6466
6465
|
}
|
|
6466
|
+
|
|
6467
|
+
// src/run-scenario/agents/claude-code/mcp-setup.ts
|
|
6468
|
+
import { readFile } from "fs/promises";
|
|
6469
|
+
import { spawn } from "child_process";
|
|
6470
|
+
import { join as join4 } from "path";
|
|
6467
6471
|
async function probeMcpServers(mcps, probeMs = 5e3) {
|
|
6468
6472
|
const results = [];
|
|
6469
6473
|
for (const mcp of mcps) {
|
|
@@ -6531,56 +6535,165 @@ spawn error: ${err.message}`;
|
|
|
6531
6535
|
}, probeMs);
|
|
6532
6536
|
});
|
|
6533
6537
|
}
|
|
6538
|
+
async function preInstallMcpPackages(mcps, timeoutMs = 12e4) {
|
|
6539
|
+
const packages = extractNpxPackages(mcps);
|
|
6540
|
+
if (packages.length === 0) return [];
|
|
6541
|
+
const results = [];
|
|
6542
|
+
for (const pkg of packages) {
|
|
6543
|
+
const startMs = Date.now();
|
|
6544
|
+
const alreadyInstalled = await isPackageGloballyInstalled(pkg);
|
|
6545
|
+
if (alreadyInstalled) {
|
|
6546
|
+
const durationMs2 = Date.now() - startMs;
|
|
6547
|
+
console.log(
|
|
6548
|
+
`[MCP] Package already installed globally, skipping: ${pkg} (${durationMs2}ms)`
|
|
6549
|
+
);
|
|
6550
|
+
results.push({
|
|
6551
|
+
package: pkg,
|
|
6552
|
+
success: true,
|
|
6553
|
+
skipped: true,
|
|
6554
|
+
output: "already installed",
|
|
6555
|
+
durationMs: durationMs2
|
|
6556
|
+
});
|
|
6557
|
+
continue;
|
|
6558
|
+
}
|
|
6559
|
+
console.log(`[MCP] Pre-installing npx package globally: ${pkg}`);
|
|
6560
|
+
const result = await installPackageGlobally(pkg, timeoutMs);
|
|
6561
|
+
const durationMs = Date.now() - startMs;
|
|
6562
|
+
results.push({ package: pkg, skipped: false, durationMs, ...result });
|
|
6563
|
+
console.log(
|
|
6564
|
+
`[MCP] Install ${pkg}: ${result.success ? "SUCCESS" : "FAILED"} (${durationMs}ms)`
|
|
6565
|
+
);
|
|
6566
|
+
}
|
|
6567
|
+
return results;
|
|
6568
|
+
}
|
|
6569
|
+
function isPackageGloballyInstalled(pkg) {
|
|
6570
|
+
return new Promise((resolve2) => {
|
|
6571
|
+
let settled = false;
|
|
6572
|
+
const child = spawn("npm", ["ls", "-g", "--depth=0", pkg], {
|
|
6573
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
6574
|
+
env: process.env
|
|
6575
|
+
});
|
|
6576
|
+
child.on("close", (code2) => {
|
|
6577
|
+
if (!settled) {
|
|
6578
|
+
settled = true;
|
|
6579
|
+
resolve2(code2 === 0);
|
|
6580
|
+
}
|
|
6581
|
+
});
|
|
6582
|
+
child.on("error", () => {
|
|
6583
|
+
if (!settled) {
|
|
6584
|
+
settled = true;
|
|
6585
|
+
resolve2(false);
|
|
6586
|
+
}
|
|
6587
|
+
});
|
|
6588
|
+
setTimeout(() => {
|
|
6589
|
+
if (!settled) {
|
|
6590
|
+
settled = true;
|
|
6591
|
+
child.kill("SIGTERM");
|
|
6592
|
+
resolve2(false);
|
|
6593
|
+
}
|
|
6594
|
+
}, 5e3);
|
|
6595
|
+
});
|
|
6596
|
+
}
|
|
6597
|
+
function extractNpxPackages(mcps) {
|
|
6598
|
+
const packages = [];
|
|
6599
|
+
for (const mcp of mcps) {
|
|
6600
|
+
const config = mcp.config;
|
|
6601
|
+
for (const [, value] of Object.entries(config)) {
|
|
6602
|
+
if (typeof value !== "object" || value === null) continue;
|
|
6603
|
+
const cfg = value;
|
|
6604
|
+
if (cfg.command !== "npx" || !Array.isArray(cfg.args)) continue;
|
|
6605
|
+
for (const arg of cfg.args) {
|
|
6606
|
+
if (!arg.startsWith("-")) {
|
|
6607
|
+
packages.push(arg);
|
|
6608
|
+
break;
|
|
6609
|
+
}
|
|
6610
|
+
}
|
|
6611
|
+
}
|
|
6612
|
+
}
|
|
6613
|
+
return [...new Set(packages)];
|
|
6614
|
+
}
|
|
6615
|
+
function installPackageGlobally(pkg, timeoutMs) {
|
|
6616
|
+
return new Promise((resolve2) => {
|
|
6617
|
+
let output = "";
|
|
6618
|
+
let settled = false;
|
|
6619
|
+
const child = spawn("sh", ["-c", `npm install -g ${pkg} 2>&1`], {
|
|
6620
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
6621
|
+
env: process.env
|
|
6622
|
+
});
|
|
6623
|
+
child.stdout.on("data", (chunk) => {
|
|
6624
|
+
output += chunk.toString();
|
|
6625
|
+
});
|
|
6626
|
+
child.stderr.on("data", (chunk) => {
|
|
6627
|
+
output += chunk.toString();
|
|
6628
|
+
});
|
|
6629
|
+
child.on("close", (code2) => {
|
|
6630
|
+
if (!settled) {
|
|
6631
|
+
settled = true;
|
|
6632
|
+
resolve2({ success: code2 === 0, output: output.slice(-1e3) });
|
|
6633
|
+
}
|
|
6634
|
+
});
|
|
6635
|
+
child.on("error", (err) => {
|
|
6636
|
+
if (!settled) {
|
|
6637
|
+
settled = true;
|
|
6638
|
+
resolve2({ success: false, output: `spawn error: ${err.message}` });
|
|
6639
|
+
}
|
|
6640
|
+
});
|
|
6641
|
+
setTimeout(() => {
|
|
6642
|
+
if (!settled) {
|
|
6643
|
+
settled = true;
|
|
6644
|
+
child.kill("SIGTERM");
|
|
6645
|
+
resolve2({
|
|
6646
|
+
success: false,
|
|
6647
|
+
output: output.slice(-1e3) + "\n[install timed out]"
|
|
6648
|
+
});
|
|
6649
|
+
}
|
|
6650
|
+
}, timeoutMs);
|
|
6651
|
+
});
|
|
6652
|
+
}
|
|
6534
6653
|
async function probeNpmConfig(cwd) {
|
|
6535
6654
|
const homedir = process.env.HOME ?? "~";
|
|
6536
|
-
const [
|
|
6537
|
-
|
|
6538
|
-
homeNpmrc,
|
|
6539
|
-
projectRootNpmrc,
|
|
6540
|
-
globalNpmrc,
|
|
6541
|
-
npmConfigListFull,
|
|
6542
|
-
npmConfigGetRegistry,
|
|
6543
|
-
npmViewWixMcp
|
|
6544
|
-
] = await Promise.all([
|
|
6545
|
-
readFile(join3(cwd, ".npmrc"), "utf8").catch(
|
|
6546
|
-
(e) => `[not found: ${e.message}]`
|
|
6547
|
-
),
|
|
6548
|
-
readFile(join3(homedir, ".npmrc"), "utf8").catch(
|
|
6549
|
-
(e) => `[not found: ${e.message}]`
|
|
6550
|
-
),
|
|
6551
|
-
readFile("/user-code/.npmrc", "utf8").catch(
|
|
6655
|
+
const [wixAuthFile, npmInstallDryRun, mcpPackageInstalled] = await Promise.all([
|
|
6656
|
+
readFile(join4(homedir, ".wix", "auth", "api-key.json"), "utf8").catch(
|
|
6552
6657
|
(e) => `[not found: ${e.message}]`
|
|
6553
6658
|
),
|
|
6554
6659
|
runShellCapture(
|
|
6555
|
-
|
|
6660
|
+
"npm install --dry-run @wix/mcp 2>&1 | tail -15",
|
|
6556
6661
|
cwd,
|
|
6557
|
-
|
|
6662
|
+
15e3
|
|
6558
6663
|
),
|
|
6559
|
-
runShellCapture("npm config list -l 2>&1 | head -60", cwd, 5e3),
|
|
6560
|
-
runShellCapture("npm config get registry", cwd, 3e3),
|
|
6561
6664
|
runShellCapture(
|
|
6562
|
-
|
|
6665
|
+
'ls -la $(npm root -g)/@wix/mcp/package.json 2>/dev/null || echo "[not globally installed]" && npm list -g @wix/mcp 2>&1 | tail -5',
|
|
6563
6666
|
cwd,
|
|
6564
|
-
|
|
6667
|
+
5e3
|
|
6565
6668
|
)
|
|
6566
6669
|
]);
|
|
6670
|
+
const wixAuthParsed = (() => {
|
|
6671
|
+
try {
|
|
6672
|
+
const parsed = JSON.parse(wixAuthFile);
|
|
6673
|
+
return JSON.stringify({
|
|
6674
|
+
hasToken: typeof parsed.token === "string",
|
|
6675
|
+
tokenLength: typeof parsed.token === "string" ? parsed.token.length : 0,
|
|
6676
|
+
hasAccountId: typeof parsed.accountId === "string",
|
|
6677
|
+
hasUserInfo: typeof parsed.userInfo === "object"
|
|
6678
|
+
});
|
|
6679
|
+
} catch {
|
|
6680
|
+
return wixAuthFile.slice(0, 500);
|
|
6681
|
+
}
|
|
6682
|
+
})();
|
|
6567
6683
|
const npmEnvVars = {};
|
|
6568
6684
|
for (const [key, value] of Object.entries(process.env)) {
|
|
6569
6685
|
const lk = key.toLowerCase();
|
|
6570
|
-
if (lk.startsWith("npm_config") || lk.includes("token") || lk.includes("auth") || lk.includes("registry")) {
|
|
6686
|
+
if (lk.startsWith("npm_config") || lk.includes("token") || lk.includes("auth") || lk.includes("registry") || lk.startsWith("wix_")) {
|
|
6571
6687
|
npmEnvVars[key] = lk.includes("token") || lk.includes("auth") || lk.includes("secret") ? `[REDACTED, length=${(value ?? "").length}]` : value;
|
|
6572
6688
|
}
|
|
6573
6689
|
}
|
|
6574
6690
|
return {
|
|
6575
6691
|
cwd,
|
|
6576
6692
|
processCwd: process.cwd(),
|
|
6577
|
-
|
|
6578
|
-
|
|
6579
|
-
|
|
6580
|
-
|
|
6581
|
-
npmConfigListFull: npmConfigListFull.slice(0, 3e3),
|
|
6582
|
-
npmConfigGetRegistry: npmConfigGetRegistry.trim(),
|
|
6583
|
-
npmViewWixMcp: npmViewWixMcp.trim().slice(0, 500),
|
|
6693
|
+
wixAuthFile: wixAuthParsed,
|
|
6694
|
+
wixAuthFileExists: !wixAuthFile.startsWith("[not found"),
|
|
6695
|
+
npmInstallDryRun: npmInstallDryRun.trim().slice(0, 1500),
|
|
6696
|
+
mcpPackageInstalled: mcpPackageInstalled.trim().slice(0, 500),
|
|
6584
6697
|
npmEnvVars,
|
|
6585
6698
|
homedir
|
|
6586
6699
|
};
|
|
@@ -6624,7 +6737,7 @@ function runShellCapture(cmd, cwd, timeoutMs) {
|
|
|
6624
6737
|
|
|
6625
6738
|
// src/run-scenario/agents/claude-code/write-sub-agents.ts
|
|
6626
6739
|
import { mkdir as mkdir3, writeFile as writeFile3 } from "fs/promises";
|
|
6627
|
-
import { join as
|
|
6740
|
+
import { join as join5 } from "path";
|
|
6628
6741
|
var AGENTS_DIR = ".claude/agents";
|
|
6629
6742
|
function toAgentFilename(name2, index, nameCount) {
|
|
6630
6743
|
const base = (name2 || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
|
|
@@ -6634,12 +6747,12 @@ function toAgentFilename(name2, index, nameCount) {
|
|
|
6634
6747
|
}
|
|
6635
6748
|
async function writeSubAgentsToFilesystem(cwd, subAgents) {
|
|
6636
6749
|
if (subAgents.length === 0) return;
|
|
6637
|
-
const agentsDir =
|
|
6750
|
+
const agentsDir = join5(cwd, AGENTS_DIR);
|
|
6638
6751
|
await mkdir3(agentsDir, { recursive: true });
|
|
6639
6752
|
const nameCount = /* @__PURE__ */ new Map();
|
|
6640
6753
|
for (const [i, agent] of subAgents.entries()) {
|
|
6641
6754
|
const filename = toAgentFilename(agent.name, i, nameCount);
|
|
6642
|
-
const filePath =
|
|
6755
|
+
const filePath = join5(agentsDir, `${filename}.md`);
|
|
6643
6756
|
await writeFile3(filePath, agent.subAgentMd, "utf8");
|
|
6644
6757
|
}
|
|
6645
6758
|
console.log(`[SubAgents] Written to ${agentsDir}`);
|
|
@@ -6874,8 +6987,30 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
6874
6987
|
const startTime = /* @__PURE__ */ new Date();
|
|
6875
6988
|
const allMessages = [];
|
|
6876
6989
|
if (options.mcps && options.mcps.length > 0) {
|
|
6877
|
-
process.env.npm_config__auth = "";
|
|
6878
6990
|
await writeMcpToFilesystem(options.cwd, options.mcps);
|
|
6991
|
+
const installResults = await preInstallMcpPackages(options.mcps);
|
|
6992
|
+
if (installResults.length > 0 && options.traceContext) {
|
|
6993
|
+
emitTraceEvent(
|
|
6994
|
+
{
|
|
6995
|
+
evalRunId: options.traceContext.evalRunId,
|
|
6996
|
+
scenarioId: options.traceContext.scenarioId,
|
|
6997
|
+
scenarioName: options.traceContext.scenarioName,
|
|
6998
|
+
targetId: options.traceContext.targetId,
|
|
6999
|
+
targetName: options.traceContext.targetName,
|
|
7000
|
+
stepNumber: 0,
|
|
7001
|
+
type: LiveTraceEventType.DIAGNOSTIC,
|
|
7002
|
+
outputPreview: JSON.stringify({
|
|
7003
|
+
event: "mcp-pre-install",
|
|
7004
|
+
results: installResults
|
|
7005
|
+
}).slice(0, 2e3),
|
|
7006
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7007
|
+
isComplete: false
|
|
7008
|
+
},
|
|
7009
|
+
options.traceContext.tracePushUrl,
|
|
7010
|
+
options.traceContext.routeHeader,
|
|
7011
|
+
options.traceContext.authToken
|
|
7012
|
+
);
|
|
7013
|
+
}
|
|
6879
7014
|
const probeResults = await probeMcpServers(options.mcps);
|
|
6880
7015
|
if (options.traceContext) {
|
|
6881
7016
|
emitTraceEvent(
|
|
@@ -7746,7 +7881,7 @@ defaultRegistry.register(claudeCodeAdapter);
|
|
|
7746
7881
|
|
|
7747
7882
|
// src/run-scenario/file-diff.ts
|
|
7748
7883
|
import { readdirSync as readdirSync2, readFileSync, statSync, existsSync as existsSync2 } from "fs";
|
|
7749
|
-
import { join as
|
|
7884
|
+
import { join as join7, relative } from "path";
|
|
7750
7885
|
|
|
7751
7886
|
// ../../node_modules/diff/lib/index.mjs
|
|
7752
7887
|
function Diff() {
|
|
@@ -7922,7 +8057,7 @@ Diff.prototype = {
|
|
|
7922
8057
|
tokenize: function tokenize(value) {
|
|
7923
8058
|
return Array.from(value);
|
|
7924
8059
|
},
|
|
7925
|
-
join: function
|
|
8060
|
+
join: function join6(chars) {
|
|
7926
8061
|
return chars.join("");
|
|
7927
8062
|
},
|
|
7928
8063
|
postProcess: function postProcess(changeObjects) {
|
|
@@ -8362,7 +8497,7 @@ function snapshotDirectory(dir, baseDir) {
|
|
|
8362
8497
|
}
|
|
8363
8498
|
const entries = readdirSync2(dir, { withFileTypes: true });
|
|
8364
8499
|
for (const entry of entries) {
|
|
8365
|
-
const fullPath =
|
|
8500
|
+
const fullPath = join7(dir, entry.name);
|
|
8366
8501
|
const relativePath = relative(base, fullPath);
|
|
8367
8502
|
if (shouldIgnore(entry.name)) {
|
|
8368
8503
|
continue;
|