@wix/evalforge-evaluator 0.63.0 → 0.65.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +182 -0
- package/build/index.js.map +3 -3
- package/build/index.mjs +183 -1
- package/build/index.mjs.map +3 -3
- package/build/types/run-scenario/agents/claude-code/write-mcp.d.ts +34 -0
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -6451,6 +6451,7 @@ var import_crypto = require("crypto");
|
|
|
6451
6451
|
|
|
6452
6452
|
// src/run-scenario/agents/claude-code/write-mcp.ts
|
|
6453
6453
|
var import_promises4 = require("fs/promises");
|
|
6454
|
+
var import_child_process = require("child_process");
|
|
6454
6455
|
var import_path6 = require("path");
|
|
6455
6456
|
var import_evalforge_types2 = require("@wix/evalforge-types");
|
|
6456
6457
|
async function writeMcpToFilesystem(cwd, mcps) {
|
|
@@ -6476,6 +6477,137 @@ async function writeMcpToFilesystem(cwd, mcps) {
|
|
|
6476
6477
|
await (0, import_promises4.writeFile)(filePath, content, "utf8");
|
|
6477
6478
|
console.log(`[MCP] Written to ${filePath}`);
|
|
6478
6479
|
}
|
|
6480
|
+
async function probeMcpServers(mcps, probeMs = 5e3) {
|
|
6481
|
+
const results = [];
|
|
6482
|
+
for (const mcp of mcps) {
|
|
6483
|
+
const config = mcp.config;
|
|
6484
|
+
for (const [name2, value] of Object.entries(config)) {
|
|
6485
|
+
if (typeof value !== "object" || value === null) continue;
|
|
6486
|
+
const cfg = value;
|
|
6487
|
+
const command = cfg.command;
|
|
6488
|
+
const args = cfg.args;
|
|
6489
|
+
if (typeof command !== "string" || !Array.isArray(args)) continue;
|
|
6490
|
+
const result = await probeOneServer(
|
|
6491
|
+
name2,
|
|
6492
|
+
command,
|
|
6493
|
+
args,
|
|
6494
|
+
probeMs
|
|
6495
|
+
);
|
|
6496
|
+
results.push(result);
|
|
6497
|
+
}
|
|
6498
|
+
}
|
|
6499
|
+
return results;
|
|
6500
|
+
}
|
|
6501
|
+
function probeOneServer(name2, command, args, probeMs) {
|
|
6502
|
+
return new Promise((resolve2) => {
|
|
6503
|
+
const startMs = Date.now();
|
|
6504
|
+
let stdout = "";
|
|
6505
|
+
let stderr = "";
|
|
6506
|
+
let settled = false;
|
|
6507
|
+
const finish = (exitCode, signal) => {
|
|
6508
|
+
if (settled) return;
|
|
6509
|
+
settled = true;
|
|
6510
|
+
resolve2({
|
|
6511
|
+
name: name2,
|
|
6512
|
+
command,
|
|
6513
|
+
args,
|
|
6514
|
+
exitCode,
|
|
6515
|
+
signal,
|
|
6516
|
+
stdout: stdout.slice(-2e3),
|
|
6517
|
+
stderr: stderr.slice(-2e3),
|
|
6518
|
+
durationMs: Date.now() - startMs
|
|
6519
|
+
});
|
|
6520
|
+
};
|
|
6521
|
+
const child = (0, import_child_process.spawn)(command, args, {
|
|
6522
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
6523
|
+
env: process.env
|
|
6524
|
+
});
|
|
6525
|
+
child.stdout.on("data", (chunk) => {
|
|
6526
|
+
stdout += chunk.toString();
|
|
6527
|
+
});
|
|
6528
|
+
child.stderr.on("data", (chunk) => {
|
|
6529
|
+
stderr += chunk.toString();
|
|
6530
|
+
});
|
|
6531
|
+
child.on("error", (err) => {
|
|
6532
|
+
stderr += `
|
|
6533
|
+
spawn error: ${err.message}`;
|
|
6534
|
+
finish(null, null);
|
|
6535
|
+
});
|
|
6536
|
+
child.on("close", (code2, sig) => {
|
|
6537
|
+
finish(code2, sig);
|
|
6538
|
+
});
|
|
6539
|
+
setTimeout(() => {
|
|
6540
|
+
if (!settled) {
|
|
6541
|
+
child.kill("SIGTERM");
|
|
6542
|
+
finish(null, "PROBE_TIMEOUT");
|
|
6543
|
+
}
|
|
6544
|
+
}, probeMs);
|
|
6545
|
+
});
|
|
6546
|
+
}
|
|
6547
|
+
async function probeNpmConfig(cwd) {
|
|
6548
|
+
const homedir = process.env.HOME ?? "~";
|
|
6549
|
+
const [cwdNpmrc, homeNpmrc, npmConfigList, npmConfigGetRegistry] = await Promise.all([
|
|
6550
|
+
(0, import_promises4.readFile)((0, import_path6.join)(cwd, ".npmrc"), "utf8").catch(
|
|
6551
|
+
(e) => `[not found: ${e.message}]`
|
|
6552
|
+
),
|
|
6553
|
+
(0, import_promises4.readFile)((0, import_path6.join)(homedir, ".npmrc"), "utf8").catch(
|
|
6554
|
+
(e) => `[not found: ${e.message}]`
|
|
6555
|
+
),
|
|
6556
|
+
runShellCapture("npm config list", cwd, 3e3),
|
|
6557
|
+
runShellCapture("npm config get registry", cwd, 3e3)
|
|
6558
|
+
]);
|
|
6559
|
+
const npmEnvVars = {};
|
|
6560
|
+
for (const [key, value] of Object.entries(process.env)) {
|
|
6561
|
+
if (key.toLowerCase().startsWith("npm_config")) {
|
|
6562
|
+
npmEnvVars[key] = value;
|
|
6563
|
+
}
|
|
6564
|
+
}
|
|
6565
|
+
return {
|
|
6566
|
+
cwd,
|
|
6567
|
+
cwdNpmrc: cwdNpmrc.slice(0, 1e3),
|
|
6568
|
+
homeNpmrc: homeNpmrc.slice(0, 1e3),
|
|
6569
|
+
npmConfigList: npmConfigList.slice(0, 2e3),
|
|
6570
|
+
npmConfigGetRegistry: npmConfigGetRegistry.trim(),
|
|
6571
|
+
npmEnvVars,
|
|
6572
|
+
homedir
|
|
6573
|
+
};
|
|
6574
|
+
}
|
|
6575
|
+
function runShellCapture(cmd, cwd, timeoutMs) {
|
|
6576
|
+
return new Promise((resolve2) => {
|
|
6577
|
+
let output = "";
|
|
6578
|
+
let settled = false;
|
|
6579
|
+
const child = (0, import_child_process.spawn)("sh", ["-c", cmd], {
|
|
6580
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
6581
|
+
cwd,
|
|
6582
|
+
env: process.env
|
|
6583
|
+
});
|
|
6584
|
+
child.stdout.on("data", (chunk) => {
|
|
6585
|
+
output += chunk.toString();
|
|
6586
|
+
});
|
|
6587
|
+
child.stderr.on("data", (chunk) => {
|
|
6588
|
+
output += chunk.toString();
|
|
6589
|
+
});
|
|
6590
|
+
child.on("close", () => {
|
|
6591
|
+
if (!settled) {
|
|
6592
|
+
settled = true;
|
|
6593
|
+
resolve2(output);
|
|
6594
|
+
}
|
|
6595
|
+
});
|
|
6596
|
+
child.on("error", (err) => {
|
|
6597
|
+
if (!settled) {
|
|
6598
|
+
settled = true;
|
|
6599
|
+
resolve2(`[error: ${err.message}]`);
|
|
6600
|
+
}
|
|
6601
|
+
});
|
|
6602
|
+
setTimeout(() => {
|
|
6603
|
+
if (!settled) {
|
|
6604
|
+
settled = true;
|
|
6605
|
+
child.kill("SIGTERM");
|
|
6606
|
+
resolve2(output + "\n[timed out]");
|
|
6607
|
+
}
|
|
6608
|
+
}, timeoutMs);
|
|
6609
|
+
});
|
|
6610
|
+
}
|
|
6479
6611
|
|
|
6480
6612
|
// src/run-scenario/agents/claude-code/write-sub-agents.ts
|
|
6481
6613
|
var import_promises5 = require("fs/promises");
|
|
@@ -6730,6 +6862,56 @@ async function executeWithClaudeCode(skills, scenario, options) {
|
|
|
6730
6862
|
const allMessages = [];
|
|
6731
6863
|
if (options.mcps && options.mcps.length > 0) {
|
|
6732
6864
|
await writeMcpToFilesystem(options.cwd, options.mcps);
|
|
6865
|
+
const probeResults = await probeMcpServers(options.mcps);
|
|
6866
|
+
if (options.traceContext) {
|
|
6867
|
+
emitTraceEvent(
|
|
6868
|
+
{
|
|
6869
|
+
evalRunId: options.traceContext.evalRunId,
|
|
6870
|
+
scenarioId: options.traceContext.scenarioId,
|
|
6871
|
+
scenarioName: options.traceContext.scenarioName,
|
|
6872
|
+
targetId: options.traceContext.targetId,
|
|
6873
|
+
targetName: options.traceContext.targetName,
|
|
6874
|
+
stepNumber: 0,
|
|
6875
|
+
type: import_evalforge_types3.LiveTraceEventType.DIAGNOSTIC,
|
|
6876
|
+
outputPreview: JSON.stringify({
|
|
6877
|
+
event: "mcp-probe",
|
|
6878
|
+
results: probeResults
|
|
6879
|
+
}).slice(0, 2e3),
|
|
6880
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
6881
|
+
isComplete: false
|
|
6882
|
+
},
|
|
6883
|
+
options.traceContext.tracePushUrl,
|
|
6884
|
+
options.traceContext.routeHeader,
|
|
6885
|
+
options.traceContext.authToken
|
|
6886
|
+
);
|
|
6887
|
+
}
|
|
6888
|
+
const npmDiag = await probeNpmConfig(options.cwd);
|
|
6889
|
+
console.log(
|
|
6890
|
+
"[DEBUG-d744ca] npm-config-diagnostic",
|
|
6891
|
+
JSON.stringify(npmDiag)
|
|
6892
|
+
);
|
|
6893
|
+
if (options.traceContext) {
|
|
6894
|
+
emitTraceEvent(
|
|
6895
|
+
{
|
|
6896
|
+
evalRunId: options.traceContext.evalRunId,
|
|
6897
|
+
scenarioId: options.traceContext.scenarioId,
|
|
6898
|
+
scenarioName: options.traceContext.scenarioName,
|
|
6899
|
+
targetId: options.traceContext.targetId,
|
|
6900
|
+
targetName: options.traceContext.targetName,
|
|
6901
|
+
stepNumber: 0,
|
|
6902
|
+
type: import_evalforge_types3.LiveTraceEventType.DIAGNOSTIC,
|
|
6903
|
+
outputPreview: JSON.stringify({
|
|
6904
|
+
event: "npm-config-diagnostic",
|
|
6905
|
+
...npmDiag
|
|
6906
|
+
}).slice(0, 4e3),
|
|
6907
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
6908
|
+
isComplete: false
|
|
6909
|
+
},
|
|
6910
|
+
options.traceContext.tracePushUrl,
|
|
6911
|
+
options.traceContext.routeHeader,
|
|
6912
|
+
options.traceContext.authToken
|
|
6913
|
+
);
|
|
6914
|
+
}
|
|
6733
6915
|
}
|
|
6734
6916
|
if (options.subAgents && options.subAgents.length > 0) {
|
|
6735
6917
|
await writeSubAgentsToFilesystem(options.cwd, options.subAgents);
|