@wix/evalforge-evaluator 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -7365,6 +7365,35 @@ async function testAiGatewayApiCall(config) {
7365
7365
  async function testClaudeDirectExecution(config) {
7366
7366
  const start = Date.now();
7367
7367
  const details = {};
7368
+ const commandResults = [];
7369
+ const runAndLog = async (name2, command, timeoutMs = 3e4) => {
7370
+ console.error(`
7371
+ [CLAUDE-DIAG] ========== ${name2} ==========`);
7372
+ const cmdPreview = command.length > 500 ? command.slice(0, 500) + "..." : command;
7373
+ console.error(`[CLAUDE-DIAG] Command: ${cmdPreview}`);
7374
+ const cmdStart = Date.now();
7375
+ const result = await execCommand(command, timeoutMs);
7376
+ const cmdDuration = Date.now() - cmdStart;
7377
+ console.error(`[CLAUDE-DIAG] Exit code: ${result.exitCode}`);
7378
+ console.error(`[CLAUDE-DIAG] Duration: ${cmdDuration}ms`);
7379
+ console.error(`[CLAUDE-DIAG] Stdout (${result.stdout.length} chars):`);
7380
+ console.error(result.stdout || "(empty)");
7381
+ if (result.stderr) {
7382
+ console.error(`[CLAUDE-DIAG] Stderr (${result.stderr.length} chars):`);
7383
+ console.error(result.stderr);
7384
+ }
7385
+ console.error(`[CLAUDE-DIAG] ========== END ${name2} ==========
7386
+ `);
7387
+ commandResults.push({
7388
+ name: name2,
7389
+ command: command.slice(0, 300),
7390
+ exitCode: result.exitCode,
7391
+ stdout: result.stdout.slice(0, 1500),
7392
+ stderr: result.stderr.slice(0, 500),
7393
+ durationMs: cmdDuration
7394
+ });
7395
+ return result;
7396
+ };
7368
7397
  const npmRootResult = await execCommand("npm root -g");
7369
7398
  const npmRoot = npmRootResult.stdout;
7370
7399
  const claudePath = path9.join(
@@ -7378,6 +7407,7 @@ async function testClaudeDirectExecution(config) {
7378
7407
  details.claudePath = claudePath;
7379
7408
  details.claudeExists = fs11.existsSync(claudePath);
7380
7409
  if (!details.claudeExists) {
7410
+ details.commandResults = commandResults;
7381
7411
  return {
7382
7412
  name: "claude-direct-execution",
7383
7413
  passed: false,
@@ -7390,34 +7420,51 @@ async function testClaudeDirectExecution(config) {
7390
7420
  const headers = config.aiGatewayHeaders;
7391
7421
  details.gatewayUrl = gatewayUrl;
7392
7422
  details.hasHeaders = !!headers;
7393
- const headerLines = headers ? Object.entries(headers).map(([key, value]) => `${key}:${value}`).join("\n") : "";
7394
- const envVars = [
7395
- `ANTHROPIC_API_KEY=sk-ant-api03-placeholder-auth-via-gateway-000000000000`,
7396
- `ANTHROPIC_AUTH_TOKEN=sk-ant-api03-placeholder-auth-via-gateway-000000000000`,
7397
- `ANTHROPIC_BASE_URL=${gatewayUrl || ""}`,
7398
- `ANTHROPIC_CUSTOM_HEADERS="${headerLines.replace(/"/g, '\\"')}"`,
7399
- `HOME=${process.env.HOME || "/tmp"}`,
7400
- `PATH=${process.env.PATH || ""}`
7423
+ const headerLinesEscaped = headers ? Object.entries(headers).map(([key, value]) => `${key}:${value}`).join("\\n") : "";
7424
+ const envExportParts = [
7425
+ `export ANTHROPIC_API_KEY="sk-ant-api03-placeholder-auth-via-gateway-000000000000"`,
7426
+ `export ANTHROPIC_AUTH_TOKEN="sk-ant-api03-placeholder-auth-via-gateway-000000000000"`,
7427
+ `export ANTHROPIC_BASE_URL="${gatewayUrl || ""}"`,
7428
+ `export ANTHROPIC_CUSTOM_HEADERS="$(printf '${headerLinesEscaped}')"`,
7429
+ // Use printf!
7430
+ `export HOME="${process.env.HOME || "/tmp"}"`,
7431
+ `export PATH="${process.env.PATH || ""}"`
7401
7432
  ];
7402
- const envExports = envVars.map((v) => `export ${v}`).join(" && ");
7403
- console.error("[DIAG] Testing claude --version with SDK environment...");
7404
- const versionCmd = `${envExports} && "${claudePath}" --version 2>&1`;
7405
- const versionResult = await execCommand(versionCmd, 15e3);
7406
- details.versionTest = {
7407
- exitCode: versionResult.exitCode,
7408
- stdout: versionResult.stdout.slice(0, 1e3),
7409
- stderr: versionResult.stderr.slice(0, 1e3)
7410
- };
7411
- console.error("[DIAG] Testing claude with simple prompt (like SDK does)...");
7412
- const promptCmd = `${envExports} && "${claudePath}" -p "Say hello" --output-format json --dangerously-skip-permissions 2>&1`;
7413
- const promptResult = await execCommand(promptCmd, 3e4);
7414
- details.promptTest = {
7415
- command: 'claude -p "Say hello" --output-format json --dangerously-skip-permissions',
7416
- exitCode: promptResult.exitCode,
7417
- stdout: promptResult.stdout.slice(0, 2e3),
7418
- stderr: promptResult.stderr.slice(0, 1e3)
7419
- };
7420
- console.error("[DIAG] Checking for claude error logs or state...");
7433
+ const envExports = envExportParts.join(" && ");
7434
+ details.envExportsPreview = envExportParts.map(
7435
+ (e) => e.includes("SECRET") || e.includes("secret") ? e.replace(/:.+/, ":[REDACTED]") : e
7436
+ ).join("\n");
7437
+ const versionResult = await runAndLog(
7438
+ "claude --version",
7439
+ `${envExports} && "${claudePath}" --version 2>&1`,
7440
+ 15e3
7441
+ );
7442
+ const fullCmdResult = await runAndLog(
7443
+ "claude -p (full SDK style)",
7444
+ `${envExports} && "${claudePath}" -p "Say hello" --output-format json --dangerously-skip-permissions 2>&1`,
7445
+ 45e3
7446
+ );
7447
+ const simpleCmdResult = await runAndLog(
7448
+ "claude -p (simple)",
7449
+ `${envExports} && "${claudePath}" -p "Hello" 2>&1`,
7450
+ 45e3
7451
+ );
7452
+ const printFlagResult = await runAndLog(
7453
+ "claude --print (long flag)",
7454
+ `${envExports} && "${claudePath}" --print "Hi" 2>&1`,
7455
+ 45e3
7456
+ );
7457
+ const positionalResult = await runAndLog(
7458
+ 'claude "prompt" (positional)',
7459
+ `${envExports} && "${claudePath}" "Hello world" 2>&1`,
7460
+ 45e3
7461
+ );
7462
+ await runAndLog("claude --help", `"${claudePath}" --help 2>&1`, 15e3);
7463
+ await runAndLog(
7464
+ "claude --version (no custom env)",
7465
+ `"${claudePath}" --version 2>&1`,
7466
+ 15e3
7467
+ );
7421
7468
  const homeDir = process.env.HOME || "/tmp";
7422
7469
  const claudeConfigDir = path9.join(homeDir, ".claude");
7423
7470
  details.claudeConfigDir = claudeConfigDir;
@@ -7426,24 +7473,32 @@ async function testClaudeDirectExecution(config) {
7426
7473
  try {
7427
7474
  const configContents = fs11.readdirSync(claudeConfigDir);
7428
7475
  details.claudeConfigContents = configContents;
7476
+ for (const file of configContents) {
7477
+ if (file.includes("log") || file.includes("error")) {
7478
+ const logPath = path9.join(claudeConfigDir, file);
7479
+ const catCmd = `cat "${logPath}" 2>&1 | tail -50`;
7480
+ const logContent = await execCommand(catCmd);
7481
+ details[`claudeLogFile_${file}`] = logContent.stdout.slice(0, 1e3);
7482
+ }
7483
+ }
7429
7484
  } catch (e) {
7430
7485
  details.claudeConfigError = e instanceof Error ? e.message : String(e);
7431
7486
  }
7432
7487
  }
7433
- console.error("[DIAG] Testing claude with potential debug flags...");
7434
- const debugCmd = `${envExports} && "${claudePath}" -p "hi" --verbose 2>&1 || echo "VERBOSE_NOT_SUPPORTED"`;
7435
- const debugResult = await execCommand(debugCmd, 15e3);
7436
- details.debugTest = {
7437
- exitCode: debugResult.exitCode,
7438
- stdout: debugResult.stdout.slice(0, 1500),
7439
- stderr: debugResult.stderr.slice(0, 500)
7440
- };
7441
- const passed = promptResult.exitCode === 0;
7488
+ details.commandResults = commandResults;
7489
+ const anyPromptWorked = fullCmdResult.exitCode === 0 || simpleCmdResult.exitCode === 0 || printFlagResult.exitCode === 0 || positionalResult.exitCode === 0;
7490
+ const versionWorked = versionResult.exitCode === 0;
7491
+ const passed = anyPromptWorked;
7492
+ let errorMsg;
7493
+ if (!passed) {
7494
+ const failedCmds = commandResults.filter((r) => r.exitCode !== 0).map((r) => `${r.name}: exit=${r.exitCode}`).join(", ");
7495
+ errorMsg = `All Claude CLI commands failed. ${failedCmds}. Version works: ${versionWorked}`;
7496
+ }
7442
7497
  return {
7443
7498
  name: "claude-direct-execution",
7444
7499
  passed,
7445
7500
  details,
7446
- error: passed ? void 0 : `Claude CLI failed with exit code ${promptResult.exitCode}. Output: ${promptResult.stdout.slice(0, 300)}`,
7501
+ error: errorMsg,
7447
7502
  durationMs: Date.now() - start
7448
7503
  };
7449
7504
  }
@@ -7653,7 +7708,10 @@ async function runDiagnostics(config, evalRunId2) {
7653
7708
  await runTest("file-system-structure", testFileSystemStructure);
7654
7709
  await runTest("network-connectivity", () => testNetworkConnectivity(config));
7655
7710
  await runTest("ai-gateway-api-call", () => testAiGatewayApiCall(config));
7656
- await runTest("claude-direct-execution", () => testClaudeDirectExecution(config));
7711
+ await runTest(
7712
+ "claude-direct-execution",
7713
+ () => testClaudeDirectExecution(config)
7714
+ );
7657
7715
  await runTest("child-process-spawning", testChildProcessSpawning);
7658
7716
  await runTest("sdk-import", testSdkImport);
7659
7717
  await runTest("file-system-write", testFileSystemWrite);