@wix/evalforge-evaluator 0.68.0 → 0.69.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +154 -12
- package/build/index.js.map +4 -4
- package/build/index.mjs +151 -9
- package/build/index.mjs.map +4 -4
- package/build/types/run-scenario/agents/claude-code/mcp-setup.d.ts +54 -0
- package/build/types/run-scenario/agents/claude-code/write-mcp.d.ts +0 -35
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -6451,7 +6451,6 @@ var import_crypto = require("crypto");
|
|
|
6451
6451
|
|
|
6452
6452
|
// src/run-scenario/agents/claude-code/write-mcp.ts
|
|
6453
6453
|
var import_promises4 = require("fs/promises");
|
|
6454
|
-
var import_child_process = require("child_process");
|
|
6455
6454
|
var import_path6 = require("path");
|
|
6456
6455
|
var import_evalforge_types2 = require("@wix/evalforge-types");
|
|
6457
6456
|
async function writeMcpToFilesystem(cwd, mcps) {
|
|
@@ -6477,6 +6476,11 @@ async function writeMcpToFilesystem(cwd, mcps) {
|
|
|
6477
6476
|
await (0, import_promises4.writeFile)(filePath, content, "utf8");
|
|
6478
6477
|
console.log(`[MCP] Written to ${filePath}`);
|
|
6479
6478
|
}
|
|
6479
|
+
|
|
6480
|
+
// src/run-scenario/agents/claude-code/mcp-setup.ts
|
|
6481
|
+
var import_promises5 = require("fs/promises");
|
|
6482
|
+
var import_child_process = require("child_process");
|
|
6483
|
+
var import_path7 = require("path");
|
|
6480
6484
|
async function probeMcpServers(mcps, probeMs = 5e3) {
|
|
6481
6485
|
const results = [];
|
|
6482
6486
|
for (const mcp of mcps) {
|
|
@@ -6544,10 +6548,125 @@ spawn error: ${err.message}`;
|
|
|
6544
6548
|
}, probeMs);
|
|
6545
6549
|
});
|
|
6546
6550
|
}
|
|
6551
|
+
async function preInstallMcpPackages(mcps, timeoutMs = 12e4) {
|
|
6552
|
+
const packages = extractNpxPackages(mcps);
|
|
6553
|
+
if (packages.length === 0) return [];
|
|
6554
|
+
const results = [];
|
|
6555
|
+
for (const pkg of packages) {
|
|
6556
|
+
const startMs = Date.now();
|
|
6557
|
+
const alreadyInstalled = await isPackageGloballyInstalled(pkg);
|
|
6558
|
+
if (alreadyInstalled) {
|
|
6559
|
+
const durationMs2 = Date.now() - startMs;
|
|
6560
|
+
console.log(
|
|
6561
|
+
`[MCP] Package already installed globally, skipping: ${pkg} (${durationMs2}ms)`
|
|
6562
|
+
);
|
|
6563
|
+
results.push({
|
|
6564
|
+
package: pkg,
|
|
6565
|
+
success: true,
|
|
6566
|
+
skipped: true,
|
|
6567
|
+
output: "already installed",
|
|
6568
|
+
durationMs: durationMs2
|
|
6569
|
+
});
|
|
6570
|
+
continue;
|
|
6571
|
+
}
|
|
6572
|
+
console.log(`[MCP] Pre-installing npx package globally: ${pkg}`);
|
|
6573
|
+
const result = await installPackageGlobally(pkg, timeoutMs);
|
|
6574
|
+
const durationMs = Date.now() - startMs;
|
|
6575
|
+
results.push({ package: pkg, skipped: false, durationMs, ...result });
|
|
6576
|
+
console.log(
|
|
6577
|
+
`[MCP] Install ${pkg}: ${result.success ? "SUCCESS" : "FAILED"} (${durationMs}ms)`
|
|
6578
|
+
);
|
|
6579
|
+
}
|
|
6580
|
+
return results;
|
|
6581
|
+
}
|
|
6582
|
+
function isPackageGloballyInstalled(pkg) {
|
|
6583
|
+
return new Promise((resolve2) => {
|
|
6584
|
+
let settled = false;
|
|
6585
|
+
const child = (0, import_child_process.spawn)("npm", ["ls", "-g", "--depth=0", pkg], {
|
|
6586
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
6587
|
+
env: process.env
|
|
6588
|
+
});
|
|
6589
|
+
child.on("close", (code2) => {
|
|
6590
|
+
if (!settled) {
|
|
6591
|
+
settled = true;
|
|
6592
|
+
resolve2(code2 === 0);
|
|
6593
|
+
}
|
|
6594
|
+
});
|
|
6595
|
+
child.on("error", () => {
|
|
6596
|
+
if (!settled) {
|
|
6597
|
+
settled = true;
|
|
6598
|
+
resolve2(false);
|
|
6599
|
+
}
|
|
6600
|
+
});
|
|
6601
|
+
setTimeout(() => {
|
|
6602
|
+
if (!settled) {
|
|
6603
|
+
settled = true;
|
|
6604
|
+
child.kill("SIGTERM");
|
|
6605
|
+
resolve2(false);
|
|
6606
|
+
}
|
|
6607
|
+
}, 5e3);
|
|
6608
|
+
});
|
|
6609
|
+
}
|
|
6610
|
+
function extractNpxPackages(mcps) {
|
|
6611
|
+
const packages = [];
|
|
6612
|
+
for (const mcp of mcps) {
|
|
6613
|
+
const config = mcp.config;
|
|
6614
|
+
for (const [, value] of Object.entries(config)) {
|
|
6615
|
+
if (typeof value !== "object" || value === null) continue;
|
|
6616
|
+
const cfg = value;
|
|
6617
|
+
if (cfg.command !== "npx" || !Array.isArray(cfg.args)) continue;
|
|
6618
|
+
for (const arg of cfg.args) {
|
|
6619
|
+
if (!arg.startsWith("-")) {
|
|
6620
|
+
packages.push(arg);
|
|
6621
|
+
break;
|
|
6622
|
+
}
|
|
6623
|
+
}
|
|
6624
|
+
}
|
|
6625
|
+
}
|
|
6626
|
+
return [...new Set(packages)];
|
|
6627
|
+
}
|
|
6628
|
+
function installPackageGlobally(pkg, timeoutMs) {
|
|
6629
|
+
return new Promise((resolve2) => {
|
|
6630
|
+
let output = "";
|
|
6631
|
+
let settled = false;
|
|
6632
|
+
const child = (0, import_child_process.spawn)("sh", ["-c", `npm install -g ${pkg} 2>&1`], {
|
|
6633
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
6634
|
+
env: process.env
|
|
6635
|
+
});
|
|
6636
|
+
child.stdout.on("data", (chunk) => {
|
|
6637
|
+
output += chunk.toString();
|
|
6638
|
+
});
|
|
6639
|
+
child.stderr.on("data", (chunk) => {
|
|
6640
|
+
output += chunk.toString();
|
|
6641
|
+
});
|
|
6642
|
+
child.on("close", (code2) => {
|
|
6643
|
+
if (!settled) {
|
|
6644
|
+
settled = true;
|
|
6645
|
+
resolve2({ success: code2 === 0, output: output.slice(-1e3) });
|
|
6646
|
+
}
|
|
6647
|
+
});
|
|
6648
|
+
child.on("error", (err) => {
|
|
6649
|
+
if (!settled) {
|
|
6650
|
+
settled = true;
|
|
6651
|
+
resolve2({ success: false, output: `spawn error: ${err.message}` });
|
|
6652
|
+
}
|
|
6653
|
+
});
|
|
6654
|
+
setTimeout(() => {
|
|
6655
|
+
if (!settled) {
|
|
6656
|
+
settled = true;
|
|
6657
|
+
child.kill("SIGTERM");
|
|
6658
|
+
resolve2({
|
|
6659
|
+
success: false,
|
|
6660
|
+
output: output.slice(-1e3) + "\n[install timed out]"
|
|
6661
|
+
});
|
|
6662
|
+
}
|
|
6663
|
+
}, timeoutMs);
|
|
6664
|
+
});
|
|
6665
|
+
}
|
|
6547
6666
|
async function probeNpmConfig(cwd) {
|
|
6548
6667
|
const homedir = process.env.HOME ?? "~";
|
|
6549
6668
|
const [wixAuthFile, npmInstallDryRun, mcpPackageInstalled] = await Promise.all([
|
|
6550
|
-
(0,
|
|
6669
|
+
(0, import_promises5.readFile)((0, import_path7.join)(homedir, ".wix", "auth", "api-key.json"), "utf8").catch(
|
|
6551
6670
|
(e) => `[not found: ${e.message}]`
|
|
6552
6671
|
),
|
|
6553
6672
|
runShellCapture(
|
|
@@ -6630,8 +6749,8 @@ function runShellCapture(cmd, cwd, timeoutMs) {
|
|
|
6630
6749
|
}
|
|
6631
6750
|
|
|
6632
6751
|
// src/run-scenario/agents/claude-code/write-sub-agents.ts
|
|
6633
|
-
var
|
|
6634
|
-
var
|
|
6752
|
+
var import_promises6 = require("fs/promises");
|
|
6753
|
+
var import_path8 = require("path");
|
|
6635
6754
|
var AGENTS_DIR = ".claude/agents";
|
|
6636
6755
|
function toAgentFilename(name2, index, nameCount) {
|
|
6637
6756
|
const base = (name2 || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
|
|
@@ -6641,13 +6760,13 @@ function toAgentFilename(name2, index, nameCount) {
|
|
|
6641
6760
|
}
|
|
6642
6761
|
async function writeSubAgentsToFilesystem(cwd, subAgents) {
|
|
6643
6762
|
if (subAgents.length === 0) return;
|
|
6644
|
-
const agentsDir = (0,
|
|
6645
|
-
await (0,
|
|
6763
|
+
const agentsDir = (0, import_path8.join)(cwd, AGENTS_DIR);
|
|
6764
|
+
await (0, import_promises6.mkdir)(agentsDir, { recursive: true });
|
|
6646
6765
|
const nameCount = /* @__PURE__ */ new Map();
|
|
6647
6766
|
for (const [i, agent] of subAgents.entries()) {
|
|
6648
6767
|
const filename = toAgentFilename(agent.name, i, nameCount);
|
|
6649
|
-
const filePath = (0,
|
|
6650
|
-
await (0,
|
|
6768
|
+
const filePath = (0, import_path8.join)(agentsDir, `${filename}.md`);
|
|
6769
|
+
await (0, import_promises6.writeFile)(filePath, agent.subAgentMd, "utf8");
|
|
6651
6770
|
}
|
|
6652
6771
|
console.log(`[SubAgents] Written to ${agentsDir}`);
|
|
6653
6772
|
}
|
|
@@ -6882,6 +7001,29 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
6882
7001
|
const allMessages = [];
|
|
6883
7002
|
if (options.mcps && options.mcps.length > 0) {
|
|
6884
7003
|
await writeMcpToFilesystem(options.cwd, options.mcps);
|
|
7004
|
+
const installResults = await preInstallMcpPackages(options.mcps);
|
|
7005
|
+
if (installResults.length > 0 && options.traceContext) {
|
|
7006
|
+
emitTraceEvent(
|
|
7007
|
+
{
|
|
7008
|
+
evalRunId: options.traceContext.evalRunId,
|
|
7009
|
+
scenarioId: options.traceContext.scenarioId,
|
|
7010
|
+
scenarioName: options.traceContext.scenarioName,
|
|
7011
|
+
targetId: options.traceContext.targetId,
|
|
7012
|
+
targetName: options.traceContext.targetName,
|
|
7013
|
+
stepNumber: 0,
|
|
7014
|
+
type: import_evalforge_types3.LiveTraceEventType.DIAGNOSTIC,
|
|
7015
|
+
outputPreview: JSON.stringify({
|
|
7016
|
+
event: "mcp-pre-install",
|
|
7017
|
+
results: installResults
|
|
7018
|
+
}).slice(0, 2e3),
|
|
7019
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7020
|
+
isComplete: false
|
|
7021
|
+
},
|
|
7022
|
+
options.traceContext.tracePushUrl,
|
|
7023
|
+
options.traceContext.routeHeader,
|
|
7024
|
+
options.traceContext.authToken
|
|
7025
|
+
);
|
|
7026
|
+
}
|
|
6885
7027
|
const probeResults = await probeMcpServers(options.mcps);
|
|
6886
7028
|
if (options.traceContext) {
|
|
6887
7029
|
emitTraceEvent(
|
|
@@ -7752,7 +7894,7 @@ defaultRegistry.register(claudeCodeAdapter);
|
|
|
7752
7894
|
|
|
7753
7895
|
// src/run-scenario/file-diff.ts
|
|
7754
7896
|
var import_fs6 = require("fs");
|
|
7755
|
-
var
|
|
7897
|
+
var import_path9 = require("path");
|
|
7756
7898
|
|
|
7757
7899
|
// ../../node_modules/diff/lib/index.mjs
|
|
7758
7900
|
function Diff() {
|
|
@@ -7928,7 +8070,7 @@ Diff.prototype = {
|
|
|
7928
8070
|
tokenize: function tokenize(value) {
|
|
7929
8071
|
return Array.from(value);
|
|
7930
8072
|
},
|
|
7931
|
-
join: function
|
|
8073
|
+
join: function join6(chars) {
|
|
7932
8074
|
return chars.join("");
|
|
7933
8075
|
},
|
|
7934
8076
|
postProcess: function postProcess(changeObjects) {
|
|
@@ -8368,8 +8510,8 @@ function snapshotDirectory(dir, baseDir) {
|
|
|
8368
8510
|
}
|
|
8369
8511
|
const entries = (0, import_fs6.readdirSync)(dir, { withFileTypes: true });
|
|
8370
8512
|
for (const entry of entries) {
|
|
8371
|
-
const fullPath = (0,
|
|
8372
|
-
const relativePath = (0,
|
|
8513
|
+
const fullPath = (0, import_path9.join)(dir, entry.name);
|
|
8514
|
+
const relativePath = (0, import_path9.relative)(base, fullPath);
|
|
8373
8515
|
if (shouldIgnore(entry.name)) {
|
|
8374
8516
|
continue;
|
|
8375
8517
|
}
|