@wix/evalforge-evaluator 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +96 -38
- package/build/index.js.map +2 -2
- package/build/index.mjs +96 -38
- package/build/index.mjs.map +2 -2
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -7365,6 +7365,35 @@ async function testAiGatewayApiCall(config) {
|
|
|
7365
7365
|
async function testClaudeDirectExecution(config) {
|
|
7366
7366
|
const start = Date.now();
|
|
7367
7367
|
const details = {};
|
|
7368
|
+
const commandResults = [];
|
|
7369
|
+
const runAndLog = async (name2, command, timeoutMs = 3e4) => {
|
|
7370
|
+
console.error(`
|
|
7371
|
+
[CLAUDE-DIAG] ========== ${name2} ==========`);
|
|
7372
|
+
const cmdPreview = command.length > 500 ? command.slice(0, 500) + "..." : command;
|
|
7373
|
+
console.error(`[CLAUDE-DIAG] Command: ${cmdPreview}`);
|
|
7374
|
+
const cmdStart = Date.now();
|
|
7375
|
+
const result = await execCommand(command, timeoutMs);
|
|
7376
|
+
const cmdDuration = Date.now() - cmdStart;
|
|
7377
|
+
console.error(`[CLAUDE-DIAG] Exit code: ${result.exitCode}`);
|
|
7378
|
+
console.error(`[CLAUDE-DIAG] Duration: ${cmdDuration}ms`);
|
|
7379
|
+
console.error(`[CLAUDE-DIAG] Stdout (${result.stdout.length} chars):`);
|
|
7380
|
+
console.error(result.stdout || "(empty)");
|
|
7381
|
+
if (result.stderr) {
|
|
7382
|
+
console.error(`[CLAUDE-DIAG] Stderr (${result.stderr.length} chars):`);
|
|
7383
|
+
console.error(result.stderr);
|
|
7384
|
+
}
|
|
7385
|
+
console.error(`[CLAUDE-DIAG] ========== END ${name2} ==========
|
|
7386
|
+
`);
|
|
7387
|
+
commandResults.push({
|
|
7388
|
+
name: name2,
|
|
7389
|
+
command: command.slice(0, 300),
|
|
7390
|
+
exitCode: result.exitCode,
|
|
7391
|
+
stdout: result.stdout.slice(0, 1500),
|
|
7392
|
+
stderr: result.stderr.slice(0, 500),
|
|
7393
|
+
durationMs: cmdDuration
|
|
7394
|
+
});
|
|
7395
|
+
return result;
|
|
7396
|
+
};
|
|
7368
7397
|
const npmRootResult = await execCommand("npm root -g");
|
|
7369
7398
|
const npmRoot = npmRootResult.stdout;
|
|
7370
7399
|
const claudePath = path9.join(
|
|
@@ -7378,6 +7407,7 @@ async function testClaudeDirectExecution(config) {
|
|
|
7378
7407
|
details.claudePath = claudePath;
|
|
7379
7408
|
details.claudeExists = fs11.existsSync(claudePath);
|
|
7380
7409
|
if (!details.claudeExists) {
|
|
7410
|
+
details.commandResults = commandResults;
|
|
7381
7411
|
return {
|
|
7382
7412
|
name: "claude-direct-execution",
|
|
7383
7413
|
passed: false,
|
|
@@ -7390,34 +7420,51 @@ async function testClaudeDirectExecution(config) {
|
|
|
7390
7420
|
const headers = config.aiGatewayHeaders;
|
|
7391
7421
|
details.gatewayUrl = gatewayUrl;
|
|
7392
7422
|
details.hasHeaders = !!headers;
|
|
7393
|
-
const
|
|
7394
|
-
const
|
|
7395
|
-
`ANTHROPIC_API_KEY=sk-ant-api03-placeholder-auth-via-gateway-000000000000`,
|
|
7396
|
-
`ANTHROPIC_AUTH_TOKEN=sk-ant-api03-placeholder-auth-via-gateway-000000000000`,
|
|
7397
|
-
`ANTHROPIC_BASE_URL
|
|
7398
|
-
`ANTHROPIC_CUSTOM_HEADERS="$
|
|
7399
|
-
|
|
7400
|
-
`
|
|
7423
|
+
const headerLinesEscaped = headers ? Object.entries(headers).map(([key, value]) => `${key}:${value}`).join("\\n") : "";
|
|
7424
|
+
const envExportParts = [
|
|
7425
|
+
`export ANTHROPIC_API_KEY="sk-ant-api03-placeholder-auth-via-gateway-000000000000"`,
|
|
7426
|
+
`export ANTHROPIC_AUTH_TOKEN="sk-ant-api03-placeholder-auth-via-gateway-000000000000"`,
|
|
7427
|
+
`export ANTHROPIC_BASE_URL="${gatewayUrl || ""}"`,
|
|
7428
|
+
`export ANTHROPIC_CUSTOM_HEADERS="$(printf '${headerLinesEscaped}')"`,
|
|
7429
|
+
// Use printf!
|
|
7430
|
+
`export HOME="${process.env.HOME || "/tmp"}"`,
|
|
7431
|
+
`export PATH="${process.env.PATH || ""}"`
|
|
7401
7432
|
];
|
|
7402
|
-
const envExports =
|
|
7403
|
-
|
|
7404
|
-
|
|
7405
|
-
|
|
7406
|
-
|
|
7407
|
-
|
|
7408
|
-
|
|
7409
|
-
|
|
7410
|
-
|
|
7411
|
-
|
|
7412
|
-
|
|
7413
|
-
|
|
7414
|
-
|
|
7415
|
-
|
|
7416
|
-
|
|
7417
|
-
|
|
7418
|
-
|
|
7419
|
-
|
|
7420
|
-
|
|
7433
|
+
const envExports = envExportParts.join(" && ");
|
|
7434
|
+
details.envExportsPreview = envExportParts.map(
|
|
7435
|
+
(e) => e.includes("SECRET") || e.includes("secret") ? e.replace(/:.+/, ":[REDACTED]") : e
|
|
7436
|
+
).join("\n");
|
|
7437
|
+
const versionResult = await runAndLog(
|
|
7438
|
+
"claude --version",
|
|
7439
|
+
`${envExports} && "${claudePath}" --version 2>&1`,
|
|
7440
|
+
15e3
|
|
7441
|
+
);
|
|
7442
|
+
const fullCmdResult = await runAndLog(
|
|
7443
|
+
"claude -p (full SDK style)",
|
|
7444
|
+
`${envExports} && "${claudePath}" -p "Say hello" --output-format json --dangerously-skip-permissions 2>&1`,
|
|
7445
|
+
45e3
|
|
7446
|
+
);
|
|
7447
|
+
const simpleCmdResult = await runAndLog(
|
|
7448
|
+
"claude -p (simple)",
|
|
7449
|
+
`${envExports} && "${claudePath}" -p "Hello" 2>&1`,
|
|
7450
|
+
45e3
|
|
7451
|
+
);
|
|
7452
|
+
const printFlagResult = await runAndLog(
|
|
7453
|
+
"claude --print (long flag)",
|
|
7454
|
+
`${envExports} && "${claudePath}" --print "Hi" 2>&1`,
|
|
7455
|
+
45e3
|
|
7456
|
+
);
|
|
7457
|
+
const positionalResult = await runAndLog(
|
|
7458
|
+
'claude "prompt" (positional)',
|
|
7459
|
+
`${envExports} && "${claudePath}" "Hello world" 2>&1`,
|
|
7460
|
+
45e3
|
|
7461
|
+
);
|
|
7462
|
+
await runAndLog("claude --help", `"${claudePath}" --help 2>&1`, 15e3);
|
|
7463
|
+
await runAndLog(
|
|
7464
|
+
"claude --version (no custom env)",
|
|
7465
|
+
`"${claudePath}" --version 2>&1`,
|
|
7466
|
+
15e3
|
|
7467
|
+
);
|
|
7421
7468
|
const homeDir = process.env.HOME || "/tmp";
|
|
7422
7469
|
const claudeConfigDir = path9.join(homeDir, ".claude");
|
|
7423
7470
|
details.claudeConfigDir = claudeConfigDir;
|
|
@@ -7426,24 +7473,32 @@ async function testClaudeDirectExecution(config) {
|
|
|
7426
7473
|
try {
|
|
7427
7474
|
const configContents = fs11.readdirSync(claudeConfigDir);
|
|
7428
7475
|
details.claudeConfigContents = configContents;
|
|
7476
|
+
for (const file of configContents) {
|
|
7477
|
+
if (file.includes("log") || file.includes("error")) {
|
|
7478
|
+
const logPath = path9.join(claudeConfigDir, file);
|
|
7479
|
+
const catCmd = `cat "${logPath}" 2>&1 | tail -50`;
|
|
7480
|
+
const logContent = await execCommand(catCmd);
|
|
7481
|
+
details[`claudeLogFile_${file}`] = logContent.stdout.slice(0, 1e3);
|
|
7482
|
+
}
|
|
7483
|
+
}
|
|
7429
7484
|
} catch (e) {
|
|
7430
7485
|
details.claudeConfigError = e instanceof Error ? e.message : String(e);
|
|
7431
7486
|
}
|
|
7432
7487
|
}
|
|
7433
|
-
|
|
7434
|
-
const
|
|
7435
|
-
const
|
|
7436
|
-
|
|
7437
|
-
|
|
7438
|
-
|
|
7439
|
-
|
|
7440
|
-
|
|
7441
|
-
|
|
7488
|
+
details.commandResults = commandResults;
|
|
7489
|
+
const anyPromptWorked = fullCmdResult.exitCode === 0 || simpleCmdResult.exitCode === 0 || printFlagResult.exitCode === 0 || positionalResult.exitCode === 0;
|
|
7490
|
+
const versionWorked = versionResult.exitCode === 0;
|
|
7491
|
+
const passed = anyPromptWorked;
|
|
7492
|
+
let errorMsg;
|
|
7493
|
+
if (!passed) {
|
|
7494
|
+
const failedCmds = commandResults.filter((r) => r.exitCode !== 0).map((r) => `${r.name}: exit=${r.exitCode}`).join(", ");
|
|
7495
|
+
errorMsg = `All Claude CLI commands failed. ${failedCmds}. Version works: ${versionWorked}`;
|
|
7496
|
+
}
|
|
7442
7497
|
return {
|
|
7443
7498
|
name: "claude-direct-execution",
|
|
7444
7499
|
passed,
|
|
7445
7500
|
details,
|
|
7446
|
-
error:
|
|
7501
|
+
error: errorMsg,
|
|
7447
7502
|
durationMs: Date.now() - start
|
|
7448
7503
|
};
|
|
7449
7504
|
}
|
|
@@ -7653,7 +7708,10 @@ async function runDiagnostics(config, evalRunId2) {
|
|
|
7653
7708
|
await runTest("file-system-structure", testFileSystemStructure);
|
|
7654
7709
|
await runTest("network-connectivity", () => testNetworkConnectivity(config));
|
|
7655
7710
|
await runTest("ai-gateway-api-call", () => testAiGatewayApiCall(config));
|
|
7656
|
-
await runTest(
|
|
7711
|
+
await runTest(
|
|
7712
|
+
"claude-direct-execution",
|
|
7713
|
+
() => testClaudeDirectExecution(config)
|
|
7714
|
+
);
|
|
7657
7715
|
await runTest("child-process-spawning", testChildProcessSpawning);
|
|
7658
7716
|
await runTest("sdk-import", testSdkImport);
|
|
7659
7717
|
await runTest("file-system-write", testFileSystemWrite);
|