@wix/evalforge-evaluator 0.20.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -6285,6 +6285,9 @@ async function executeWithClaudeCode(skill, scenario, options) {
6285
6285
  })
6286
6286
  );
6287
6287
  let messageCount = 0;
6288
+ const canUseTool = async () => {
6289
+ return { behavior: "allow" };
6290
+ };
6288
6291
  const queryOptions = {
6289
6292
  env: sdkEnv,
6290
6293
  cwd: options.cwd,
@@ -6294,8 +6297,10 @@ async function executeWithClaudeCode(skill, scenario, options) {
6294
6297
  maxTurns,
6295
6298
  maxThinkingTokens: options.maxThinkingTokens,
6296
6299
  mcpServers: options.mcpServers,
6297
- permissionMode: "bypassPermissions",
6298
- allowDangerouslySkipPermissions: true
6300
+ // Use 'default' permission mode with custom canUseTool handler
6301
+ // instead of 'bypassPermissions' which fails on root
6302
+ permissionMode: "default",
6303
+ canUseTool
6299
6304
  };
6300
6305
  if (options.temperature !== void 0) {
6301
6306
  queryOptions.temperature = options.temperature;
@@ -6314,8 +6319,8 @@ async function executeWithClaudeCode(skill, scenario, options) {
6314
6319
  console.log("[SDK-DEBUG] maxTokens:", queryOptions.maxTokens);
6315
6320
  console.log("[SDK-DEBUG] permissionMode:", queryOptions.permissionMode);
6316
6321
  console.log(
6317
- "[SDK-DEBUG] allowDangerouslySkipPermissions:",
6318
- queryOptions.allowDangerouslySkipPermissions
6322
+ "[SDK-DEBUG] canUseTool:",
6323
+ queryOptions.canUseTool ? "custom handler (auto-allow)" : "not set"
6319
6324
  );
6320
6325
  console.log("[SDK-DEBUG] settingSources:", queryOptions.settingSources);
6321
6326
  console.log("[SDK-DEBUG] allowedTools:", queryOptions.allowedTools);
@@ -7365,6 +7370,35 @@ async function testAiGatewayApiCall(config) {
7365
7370
  async function testClaudeDirectExecution(config) {
7366
7371
  const start = Date.now();
7367
7372
  const details = {};
7373
+ const commandResults = [];
7374
+ const runAndLog = async (name2, command, timeoutMs = 3e4) => {
7375
+ console.error(`
7376
+ [CLAUDE-DIAG] ========== ${name2} ==========`);
7377
+ const cmdPreview = command.length > 500 ? command.slice(0, 500) + "..." : command;
7378
+ console.error(`[CLAUDE-DIAG] Command: ${cmdPreview}`);
7379
+ const cmdStart = Date.now();
7380
+ const result = await execCommand(command, timeoutMs);
7381
+ const cmdDuration = Date.now() - cmdStart;
7382
+ console.error(`[CLAUDE-DIAG] Exit code: ${result.exitCode}`);
7383
+ console.error(`[CLAUDE-DIAG] Duration: ${cmdDuration}ms`);
7384
+ console.error(`[CLAUDE-DIAG] Stdout (${result.stdout.length} chars):`);
7385
+ console.error(result.stdout || "(empty)");
7386
+ if (result.stderr) {
7387
+ console.error(`[CLAUDE-DIAG] Stderr (${result.stderr.length} chars):`);
7388
+ console.error(result.stderr);
7389
+ }
7390
+ console.error(`[CLAUDE-DIAG] ========== END ${name2} ==========
7391
+ `);
7392
+ commandResults.push({
7393
+ name: name2,
7394
+ command: command.slice(0, 300),
7395
+ exitCode: result.exitCode,
7396
+ stdout: result.stdout.slice(0, 1500),
7397
+ stderr: result.stderr.slice(0, 500),
7398
+ durationMs: cmdDuration
7399
+ });
7400
+ return result;
7401
+ };
7368
7402
  const npmRootResult = await execCommand("npm root -g");
7369
7403
  const npmRoot = npmRootResult.stdout;
7370
7404
  const claudePath = path9.join(
@@ -7378,6 +7412,7 @@ async function testClaudeDirectExecution(config) {
7378
7412
  details.claudePath = claudePath;
7379
7413
  details.claudeExists = fs11.existsSync(claudePath);
7380
7414
  if (!details.claudeExists) {
7415
+ details.commandResults = commandResults;
7381
7416
  return {
7382
7417
  name: "claude-direct-execution",
7383
7418
  passed: false,
@@ -7390,34 +7425,51 @@ async function testClaudeDirectExecution(config) {
7390
7425
  const headers = config.aiGatewayHeaders;
7391
7426
  details.gatewayUrl = gatewayUrl;
7392
7427
  details.hasHeaders = !!headers;
7393
- const headerLines = headers ? Object.entries(headers).map(([key, value]) => `${key}:${value}`).join("\n") : "";
7394
- const envVars = [
7395
- `ANTHROPIC_API_KEY=sk-ant-api03-placeholder-auth-via-gateway-000000000000`,
7396
- `ANTHROPIC_AUTH_TOKEN=sk-ant-api03-placeholder-auth-via-gateway-000000000000`,
7397
- `ANTHROPIC_BASE_URL=${gatewayUrl || ""}`,
7398
- `ANTHROPIC_CUSTOM_HEADERS="${headerLines.replace(/"/g, '\\"')}"`,
7399
- `HOME=${process.env.HOME || "/tmp"}`,
7400
- `PATH=${process.env.PATH || ""}`
7428
+ const headerLinesEscaped = headers ? Object.entries(headers).map(([key, value]) => `${key}:${value}`).join("\\n") : "";
7429
+ const envExportParts = [
7430
+ `export ANTHROPIC_API_KEY="sk-ant-api03-placeholder-auth-via-gateway-000000000000"`,
7431
+ `export ANTHROPIC_AUTH_TOKEN="sk-ant-api03-placeholder-auth-via-gateway-000000000000"`,
7432
+ `export ANTHROPIC_BASE_URL="${gatewayUrl || ""}"`,
7433
+ `export ANTHROPIC_CUSTOM_HEADERS="$(printf '${headerLinesEscaped}')"`,
7434
+ // Use printf!
7435
+ `export HOME="${process.env.HOME || "/tmp"}"`,
7436
+ `export PATH="${process.env.PATH || ""}"`
7401
7437
  ];
7402
- const envExports = envVars.map((v) => `export ${v}`).join(" && ");
7403
- console.error("[DIAG] Testing claude --version with SDK environment...");
7404
- const versionCmd = `${envExports} && "${claudePath}" --version 2>&1`;
7405
- const versionResult = await execCommand(versionCmd, 15e3);
7406
- details.versionTest = {
7407
- exitCode: versionResult.exitCode,
7408
- stdout: versionResult.stdout.slice(0, 1e3),
7409
- stderr: versionResult.stderr.slice(0, 1e3)
7410
- };
7411
- console.error("[DIAG] Testing claude with simple prompt (like SDK does)...");
7412
- const promptCmd = `${envExports} && "${claudePath}" -p "Say hello" --output-format json --dangerously-skip-permissions 2>&1`;
7413
- const promptResult = await execCommand(promptCmd, 3e4);
7414
- details.promptTest = {
7415
- command: 'claude -p "Say hello" --output-format json --dangerously-skip-permissions',
7416
- exitCode: promptResult.exitCode,
7417
- stdout: promptResult.stdout.slice(0, 2e3),
7418
- stderr: promptResult.stderr.slice(0, 1e3)
7419
- };
7420
- console.error("[DIAG] Checking for claude error logs or state...");
7438
+ const envExports = envExportParts.join(" && ");
7439
+ details.envExportsPreview = envExportParts.map(
7440
+ (e) => e.includes("SECRET") || e.includes("secret") ? e.replace(/:.+/, ":[REDACTED]") : e
7441
+ ).join("\n");
7442
+ const versionResult = await runAndLog(
7443
+ "claude --version",
7444
+ `${envExports} && "${claudePath}" --version 2>&1`,
7445
+ 15e3
7446
+ );
7447
+ const fullCmdResult = await runAndLog(
7448
+ "claude -p (with json output)",
7449
+ `${envExports} && "${claudePath}" -p "Say hello" --output-format json 2>&1`,
7450
+ 45e3
7451
+ );
7452
+ const simpleCmdResult = await runAndLog(
7453
+ "claude -p (simple)",
7454
+ `${envExports} && "${claudePath}" -p "Hello" 2>&1`,
7455
+ 45e3
7456
+ );
7457
+ const printFlagResult = await runAndLog(
7458
+ "claude --print (long flag)",
7459
+ `${envExports} && "${claudePath}" --print "Hi" 2>&1`,
7460
+ 45e3
7461
+ );
7462
+ const positionalResult = await runAndLog(
7463
+ 'claude "prompt" (positional)',
7464
+ `${envExports} && "${claudePath}" "Hello world" 2>&1`,
7465
+ 45e3
7466
+ );
7467
+ await runAndLog("claude --help", `"${claudePath}" --help 2>&1`, 15e3);
7468
+ await runAndLog(
7469
+ "claude --version (no custom env)",
7470
+ `"${claudePath}" --version 2>&1`,
7471
+ 15e3
7472
+ );
7421
7473
  const homeDir = process.env.HOME || "/tmp";
7422
7474
  const claudeConfigDir = path9.join(homeDir, ".claude");
7423
7475
  details.claudeConfigDir = claudeConfigDir;
@@ -7426,24 +7478,32 @@ async function testClaudeDirectExecution(config) {
7426
7478
  try {
7427
7479
  const configContents = fs11.readdirSync(claudeConfigDir);
7428
7480
  details.claudeConfigContents = configContents;
7481
+ for (const file of configContents) {
7482
+ if (file.includes("log") || file.includes("error")) {
7483
+ const logPath = path9.join(claudeConfigDir, file);
7484
+ const catCmd = `cat "${logPath}" 2>&1 | tail -50`;
7485
+ const logContent = await execCommand(catCmd);
7486
+ details[`claudeLogFile_${file}`] = logContent.stdout.slice(0, 1e3);
7487
+ }
7488
+ }
7429
7489
  } catch (e) {
7430
7490
  details.claudeConfigError = e instanceof Error ? e.message : String(e);
7431
7491
  }
7432
7492
  }
7433
- console.error("[DIAG] Testing claude with potential debug flags...");
7434
- const debugCmd = `${envExports} && "${claudePath}" -p "hi" --verbose 2>&1 || echo "VERBOSE_NOT_SUPPORTED"`;
7435
- const debugResult = await execCommand(debugCmd, 15e3);
7436
- details.debugTest = {
7437
- exitCode: debugResult.exitCode,
7438
- stdout: debugResult.stdout.slice(0, 1500),
7439
- stderr: debugResult.stderr.slice(0, 500)
7440
- };
7441
- const passed = promptResult.exitCode === 0;
7493
+ details.commandResults = commandResults;
7494
+ const anyPromptWorked = fullCmdResult.exitCode === 0 || simpleCmdResult.exitCode === 0 || printFlagResult.exitCode === 0 || positionalResult.exitCode === 0;
7495
+ const versionWorked = versionResult.exitCode === 0;
7496
+ const passed = anyPromptWorked;
7497
+ let errorMsg;
7498
+ if (!passed) {
7499
+ const failedCmds = commandResults.filter((r) => r.exitCode !== 0).map((r) => `${r.name}: exit=${r.exitCode}`).join(", ");
7500
+ errorMsg = `All Claude CLI commands failed. ${failedCmds}. Version works: ${versionWorked}`;
7501
+ }
7442
7502
  return {
7443
7503
  name: "claude-direct-execution",
7444
7504
  passed,
7445
7505
  details,
7446
- error: passed ? void 0 : `Claude CLI failed with exit code ${promptResult.exitCode}. Output: ${promptResult.stdout.slice(0, 300)}`,
7506
+ error: errorMsg,
7447
7507
  durationMs: Date.now() - start
7448
7508
  };
7449
7509
  }
@@ -7653,7 +7713,10 @@ async function runDiagnostics(config, evalRunId2) {
7653
7713
  await runTest("file-system-structure", testFileSystemStructure);
7654
7714
  await runTest("network-connectivity", () => testNetworkConnectivity(config));
7655
7715
  await runTest("ai-gateway-api-call", () => testAiGatewayApiCall(config));
7656
- await runTest("claude-direct-execution", () => testClaudeDirectExecution(config));
7716
+ await runTest(
7717
+ "claude-direct-execution",
7718
+ () => testClaudeDirectExecution(config)
7719
+ );
7657
7720
  await runTest("child-process-spawning", testChildProcessSpawning);
7658
7721
  await runTest("sdk-import", testSdkImport);
7659
7722
  await runTest("file-system-write", testFileSystemWrite);