@wix/evalforge-evaluator 0.18.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +87 -1
- package/build/index.js.map +2 -2
- package/build/index.mjs +87 -1
- package/build/index.mjs.map +2 -2
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -6103,7 +6103,7 @@ var import_evalforge_types = require("@wix/evalforge-types");
|
|
|
6103
6103
|
var import_crypto = require("crypto");
|
|
6104
6104
|
var import_promises3 = require("fs/promises");
|
|
6105
6105
|
var import_path5 = require("path");
|
|
6106
|
-
var DEFAULT_MODEL = "claude-sonnet-
|
|
6106
|
+
var DEFAULT_MODEL = "claude-3-5-sonnet-latest";
|
|
6107
6107
|
function emitTraceEvent(event, tracePushUrl, routeHeader, authToken) {
|
|
6108
6108
|
console.log(`${import_evalforge_types.TRACE_EVENT_PREFIX}${JSON.stringify(event)}`);
|
|
6109
6109
|
if (tracePushUrl) {
|
|
@@ -7362,6 +7362,91 @@ async function testAiGatewayApiCall(config) {
|
|
|
7362
7362
|
durationMs: Date.now() - start
|
|
7363
7363
|
};
|
|
7364
7364
|
}
|
|
7365
|
+
async function testClaudeDirectExecution(config) {
|
|
7366
|
+
const start = Date.now();
|
|
7367
|
+
const details = {};
|
|
7368
|
+
const npmRootResult = await execCommand("npm root -g");
|
|
7369
|
+
const npmRoot = npmRootResult.stdout;
|
|
7370
|
+
const claudePath = path9.join(
|
|
7371
|
+
npmRoot,
|
|
7372
|
+
"@wix",
|
|
7373
|
+
"evalforge-evaluator",
|
|
7374
|
+
"node_modules",
|
|
7375
|
+
".bin",
|
|
7376
|
+
"claude"
|
|
7377
|
+
);
|
|
7378
|
+
details.claudePath = claudePath;
|
|
7379
|
+
details.claudeExists = fs11.existsSync(claudePath);
|
|
7380
|
+
if (!details.claudeExists) {
|
|
7381
|
+
return {
|
|
7382
|
+
name: "claude-direct-execution",
|
|
7383
|
+
passed: false,
|
|
7384
|
+
details,
|
|
7385
|
+
error: `Claude binary not found at ${claudePath}`,
|
|
7386
|
+
durationMs: Date.now() - start
|
|
7387
|
+
};
|
|
7388
|
+
}
|
|
7389
|
+
const gatewayUrl = config.aiGatewayUrl;
|
|
7390
|
+
const headers = config.aiGatewayHeaders;
|
|
7391
|
+
details.gatewayUrl = gatewayUrl;
|
|
7392
|
+
details.hasHeaders = !!headers;
|
|
7393
|
+
const headerLines = headers ? Object.entries(headers).map(([key, value]) => `${key}:${value}`).join("\n") : "";
|
|
7394
|
+
const envVars = [
|
|
7395
|
+
`ANTHROPIC_API_KEY=sk-ant-api03-placeholder-auth-via-gateway-000000000000`,
|
|
7396
|
+
`ANTHROPIC_AUTH_TOKEN=sk-ant-api03-placeholder-auth-via-gateway-000000000000`,
|
|
7397
|
+
`ANTHROPIC_BASE_URL=${gatewayUrl || ""}`,
|
|
7398
|
+
`ANTHROPIC_CUSTOM_HEADERS="${headerLines.replace(/"/g, '\\"')}"`,
|
|
7399
|
+
`HOME=${process.env.HOME || "/tmp"}`,
|
|
7400
|
+
`PATH=${process.env.PATH || ""}`
|
|
7401
|
+
];
|
|
7402
|
+
const envExports = envVars.map((v) => `export ${v}`).join(" && ");
|
|
7403
|
+
console.error("[DIAG] Testing claude --version with SDK environment...");
|
|
7404
|
+
const versionCmd = `${envExports} && "${claudePath}" --version 2>&1`;
|
|
7405
|
+
const versionResult = await execCommand(versionCmd, 15e3);
|
|
7406
|
+
details.versionTest = {
|
|
7407
|
+
exitCode: versionResult.exitCode,
|
|
7408
|
+
stdout: versionResult.stdout.slice(0, 1e3),
|
|
7409
|
+
stderr: versionResult.stderr.slice(0, 1e3)
|
|
7410
|
+
};
|
|
7411
|
+
console.error("[DIAG] Testing claude with simple prompt (like SDK does)...");
|
|
7412
|
+
const promptCmd = `${envExports} && "${claudePath}" -p "Say hello" --output-format json --dangerously-skip-permissions 2>&1`;
|
|
7413
|
+
const promptResult = await execCommand(promptCmd, 3e4);
|
|
7414
|
+
details.promptTest = {
|
|
7415
|
+
command: 'claude -p "Say hello" --output-format json --dangerously-skip-permissions',
|
|
7416
|
+
exitCode: promptResult.exitCode,
|
|
7417
|
+
stdout: promptResult.stdout.slice(0, 2e3),
|
|
7418
|
+
stderr: promptResult.stderr.slice(0, 1e3)
|
|
7419
|
+
};
|
|
7420
|
+
console.error("[DIAG] Checking for claude error logs or state...");
|
|
7421
|
+
const homeDir = process.env.HOME || "/tmp";
|
|
7422
|
+
const claudeConfigDir = path9.join(homeDir, ".claude");
|
|
7423
|
+
details.claudeConfigDir = claudeConfigDir;
|
|
7424
|
+
details.claudeConfigExists = fs11.existsSync(claudeConfigDir);
|
|
7425
|
+
if (fs11.existsSync(claudeConfigDir)) {
|
|
7426
|
+
try {
|
|
7427
|
+
const configContents = fs11.readdirSync(claudeConfigDir);
|
|
7428
|
+
details.claudeConfigContents = configContents;
|
|
7429
|
+
} catch (e) {
|
|
7430
|
+
details.claudeConfigError = e instanceof Error ? e.message : String(e);
|
|
7431
|
+
}
|
|
7432
|
+
}
|
|
7433
|
+
console.error("[DIAG] Testing claude with potential debug flags...");
|
|
7434
|
+
const debugCmd = `${envExports} && "${claudePath}" -p "hi" --verbose 2>&1 || echo "VERBOSE_NOT_SUPPORTED"`;
|
|
7435
|
+
const debugResult = await execCommand(debugCmd, 15e3);
|
|
7436
|
+
details.debugTest = {
|
|
7437
|
+
exitCode: debugResult.exitCode,
|
|
7438
|
+
stdout: debugResult.stdout.slice(0, 1500),
|
|
7439
|
+
stderr: debugResult.stderr.slice(0, 500)
|
|
7440
|
+
};
|
|
7441
|
+
const passed = promptResult.exitCode === 0;
|
|
7442
|
+
return {
|
|
7443
|
+
name: "claude-direct-execution",
|
|
7444
|
+
passed,
|
|
7445
|
+
details,
|
|
7446
|
+
error: passed ? void 0 : `Claude CLI failed with exit code ${promptResult.exitCode}. Output: ${promptResult.stdout.slice(0, 300)}`,
|
|
7447
|
+
durationMs: Date.now() - start
|
|
7448
|
+
};
|
|
7449
|
+
}
|
|
7365
7450
|
async function testChildProcessSpawning() {
|
|
7366
7451
|
const start = Date.now();
|
|
7367
7452
|
const details = {};
|
|
@@ -7568,6 +7653,7 @@ async function runDiagnostics(config, evalRunId2) {
|
|
|
7568
7653
|
await runTest("file-system-structure", testFileSystemStructure);
|
|
7569
7654
|
await runTest("network-connectivity", () => testNetworkConnectivity(config));
|
|
7570
7655
|
await runTest("ai-gateway-api-call", () => testAiGatewayApiCall(config));
|
|
7656
|
+
await runTest("claude-direct-execution", () => testClaudeDirectExecution(config));
|
|
7571
7657
|
await runTest("child-process-spawning", testChildProcessSpawning);
|
|
7572
7658
|
await runTest("sdk-import", testSdkImport);
|
|
7573
7659
|
await runTest("file-system-write", testFileSystemWrite);
|