@wix/evalforge-evaluator 0.18.0 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -6103,7 +6103,7 @@ var import_evalforge_types = require("@wix/evalforge-types");
6103
6103
  var import_crypto = require("crypto");
6104
6104
  var import_promises3 = require("fs/promises");
6105
6105
  var import_path5 = require("path");
6106
- var DEFAULT_MODEL = "claude-sonnet-4-20250514";
6106
+ var DEFAULT_MODEL = "claude-3-5-sonnet-latest";
6107
6107
  function emitTraceEvent(event, tracePushUrl, routeHeader, authToken) {
6108
6108
  console.log(`${import_evalforge_types.TRACE_EVENT_PREFIX}${JSON.stringify(event)}`);
6109
6109
  if (tracePushUrl) {
@@ -7362,6 +7362,91 @@ async function testAiGatewayApiCall(config) {
7362
7362
  durationMs: Date.now() - start
7363
7363
  };
7364
7364
  }
7365
+ async function testClaudeDirectExecution(config) {
7366
+ const start = Date.now();
7367
+ const details = {};
7368
+ const npmRootResult = await execCommand("npm root -g");
7369
+ const npmRoot = npmRootResult.stdout;
7370
+ const claudePath = path9.join(
7371
+ npmRoot,
7372
+ "@wix",
7373
+ "evalforge-evaluator",
7374
+ "node_modules",
7375
+ ".bin",
7376
+ "claude"
7377
+ );
7378
+ details.claudePath = claudePath;
7379
+ details.claudeExists = fs11.existsSync(claudePath);
7380
+ if (!details.claudeExists) {
7381
+ return {
7382
+ name: "claude-direct-execution",
7383
+ passed: false,
7384
+ details,
7385
+ error: `Claude binary not found at ${claudePath}`,
7386
+ durationMs: Date.now() - start
7387
+ };
7388
+ }
7389
+ const gatewayUrl = config.aiGatewayUrl;
7390
+ const headers = config.aiGatewayHeaders;
7391
+ details.gatewayUrl = gatewayUrl;
7392
+ details.hasHeaders = !!headers;
7393
+ const headerLines = headers ? Object.entries(headers).map(([key, value]) => `${key}:${value}`).join("\n") : "";
7394
+ const envVars = [
7395
+ `ANTHROPIC_API_KEY=sk-ant-api03-placeholder-auth-via-gateway-000000000000`,
7396
+ `ANTHROPIC_AUTH_TOKEN=sk-ant-api03-placeholder-auth-via-gateway-000000000000`,
7397
+ `ANTHROPIC_BASE_URL=${gatewayUrl || ""}`,
7398
+ `ANTHROPIC_CUSTOM_HEADERS="${headerLines.replace(/"/g, '\\"')}"`,
7399
+ `HOME=${process.env.HOME || "/tmp"}`,
7400
+ `PATH=${process.env.PATH || ""}`
7401
+ ];
7402
+ const envExports = envVars.map((v) => `export ${v}`).join(" && ");
7403
+ console.error("[DIAG] Testing claude --version with SDK environment...");
7404
+ const versionCmd = `${envExports} && "${claudePath}" --version 2>&1`;
7405
+ const versionResult = await execCommand(versionCmd, 15e3);
7406
+ details.versionTest = {
7407
+ exitCode: versionResult.exitCode,
7408
+ stdout: versionResult.stdout.slice(0, 1e3),
7409
+ stderr: versionResult.stderr.slice(0, 1e3)
7410
+ };
7411
+ console.error("[DIAG] Testing claude with simple prompt (like SDK does)...");
7412
+ const promptCmd = `${envExports} && "${claudePath}" -p "Say hello" --output-format json --dangerously-skip-permissions 2>&1`;
7413
+ const promptResult = await execCommand(promptCmd, 3e4);
7414
+ details.promptTest = {
7415
+ command: 'claude -p "Say hello" --output-format json --dangerously-skip-permissions',
7416
+ exitCode: promptResult.exitCode,
7417
+ stdout: promptResult.stdout.slice(0, 2e3),
7418
+ stderr: promptResult.stderr.slice(0, 1e3)
7419
+ };
7420
+ console.error("[DIAG] Checking for claude error logs or state...");
7421
+ const homeDir = process.env.HOME || "/tmp";
7422
+ const claudeConfigDir = path9.join(homeDir, ".claude");
7423
+ details.claudeConfigDir = claudeConfigDir;
7424
+ details.claudeConfigExists = fs11.existsSync(claudeConfigDir);
7425
+ if (fs11.existsSync(claudeConfigDir)) {
7426
+ try {
7427
+ const configContents = fs11.readdirSync(claudeConfigDir);
7428
+ details.claudeConfigContents = configContents;
7429
+ } catch (e) {
7430
+ details.claudeConfigError = e instanceof Error ? e.message : String(e);
7431
+ }
7432
+ }
7433
+ console.error("[DIAG] Testing claude with potential debug flags...");
7434
+ const debugCmd = `${envExports} && "${claudePath}" -p "hi" --verbose 2>&1 || echo "VERBOSE_NOT_SUPPORTED"`;
7435
+ const debugResult = await execCommand(debugCmd, 15e3);
7436
+ details.debugTest = {
7437
+ exitCode: debugResult.exitCode,
7438
+ stdout: debugResult.stdout.slice(0, 1500),
7439
+ stderr: debugResult.stderr.slice(0, 500)
7440
+ };
7441
+ const passed = promptResult.exitCode === 0;
7442
+ return {
7443
+ name: "claude-direct-execution",
7444
+ passed,
7445
+ details,
7446
+ error: passed ? void 0 : `Claude CLI failed with exit code ${promptResult.exitCode}. Output: ${promptResult.stdout.slice(0, 300)}`,
7447
+ durationMs: Date.now() - start
7448
+ };
7449
+ }
7365
7450
  async function testChildProcessSpawning() {
7366
7451
  const start = Date.now();
7367
7452
  const details = {};
@@ -7568,6 +7653,7 @@ async function runDiagnostics(config, evalRunId2) {
7568
7653
  await runTest("file-system-structure", testFileSystemStructure);
7569
7654
  await runTest("network-connectivity", () => testNetworkConnectivity(config));
7570
7655
  await runTest("ai-gateway-api-call", () => testAiGatewayApiCall(config));
7656
+ await runTest("claude-direct-execution", () => testClaudeDirectExecution(config));
7571
7657
  await runTest("child-process-spawning", testChildProcessSpawning);
7572
7658
  await runTest("sdk-import", testSdkImport);
7573
7659
  await runTest("file-system-write", testFileSystemWrite);