npm - @wix/evalforge-evaluator - Versions diffs - 0.15.0 → 0.17.0 - Mend

@wix/evalforge-evaluator 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/build/index.js +461 -204
package/build/index.js.map +3 -3
package/build/index.mjs +461 -204
package/build/index.mjs.map +3 -3
package/build/types/diagnostics.d.ts +6 -10
package/package.json +2 -2

package/build/index.mjs CHANGED Viewed

@@ -6939,243 +6939,473 @@ import { spawn } from "child_process";
 import * as fs11 from "fs";
 import * as path9 from "path";
 import { LiveTraceEventType as LiveTraceEventType2 } from "@wix/evalforge-types";
-async function execCommand(command, timeoutMs = 5e3) {
+async function execCommand(command, timeoutMs = 1e4) {
   return new Promise((resolve) => {
-    const proc2 = spawn("sh", ["-c", command], {
-      timeout: timeoutMs
-    });
-    let stdout = "";
-    let stderr = "";
-    proc2.stdout.on("data", (data) => {
-      stdout += data.toString();
-    });
-    proc2.stderr.on("data", (data) => {
-      stderr += data.toString();
-    });
-    proc2.on("close", (code2) => {
-      resolve({
-        stdout: stdout.trim(),
-        stderr: stderr.trim(),
-        exitCode: code2 ?? -1
+    try {
+      const proc2 = spawn("sh", ["-c", command], {
+        timeout: timeoutMs
       });
-    });
-    proc2.on("error", (err) => {
+      let stdout = "";
+      let stderr = "";
+      proc2.stdout.on("data", (data) => {
+        stdout += data.toString();
+      });
+      proc2.stderr.on("data", (data) => {
+        stderr += data.toString();
+      });
+      proc2.on("close", (code2) => {
+        resolve({
+          stdout: stdout.trim(),
+          stderr: stderr.trim(),
+          exitCode: code2 ?? -1
+        });
+      });
+      proc2.on("error", (err) => {
+        resolve({
+          stdout: "",
+          stderr: err.message,
+          exitCode: -1
+        });
+      });
+    } catch (err) {
       resolve({
         stdout: "",
-        stderr: err.message,
-        exitCode: -1
+        stderr: err instanceof Error ? err.message : String(err),
+        exitCode: -99
       });
-    });
+    }
   });
 }
-async function testEnvironmentVariables() {
+async function safeRunTest(testName, testFn) {
   const start = Date.now();
-  const envVars = [
+  try {
+    return await testFn();
+  } catch (err) {
+    const error = err instanceof Error ? err.message : String(err);
+    return {
+      name: testName,
+      passed: false,
+      details: {
+        testCrashed: true,
+        error,
+        stack: err instanceof Error ? err.stack : void 0
+      },
+      error: `Test crashed: ${error}`,
+      durationMs: Date.now() - start
+    };
+  }
+}
+async function testClaudeBinaryDiscovery() {
+  const start = Date.now();
+  const details = {};
+  const npmRootResult = await execCommand("npm root -g");
+  const npmBinResult = await execCommand("npm bin -g");
+  const npmRoot = npmRootResult.stdout;
+  const npmBin = npmBinResult.stdout;
+  details.npmRoot = npmRoot;
+  details.npmBin = npmBin;
+  const evaluatorBinPath = path9.join(
+    npmRoot,
+    "@wix",
+    "evalforge-evaluator",
+    "node_modules",
+    ".bin"
+  );
+  details.evaluatorBinPath = evaluatorBinPath;
+  const lsBinResult = await execCommand(`ls -la "${evaluatorBinPath}" 2>&1`);
+  details.evaluatorBinContents = lsBinResult.stdout || lsBinResult.stderr;
+  details.lsBinExitCode = lsBinResult.exitCode;
+  const claudePath = path9.join(evaluatorBinPath, "claude");
+  let claudeExists = false;
+  try {
+    claudeExists = fs11.existsSync(claudePath);
+  } catch {
+    claudeExists = false;
+  }
+  details.claudePath = claudePath;
+  details.claudeExists = claudeExists;
+  if (claudeExists) {
+    const readlinkResult = await execCommand(
+      `readlink -f "${claudePath}" 2>&1`
+    );
+    details.claudeRealPath = readlinkResult.stdout || readlinkResult.stderr;
+    const statResult = await execCommand(`stat "${claudePath}" 2>&1`);
+    details.claudeStat = statResult.stdout || statResult.stderr;
+    const lsClaudeResult = await execCommand(`ls -la "${claudePath}" 2>&1`);
+    details.claudeFileInfo = lsClaudeResult.stdout;
+  }
+  const whichResult = await execCommand("which claude 2>&1");
+  details.whichClaude = whichResult.stdout || "(not in PATH)";
+  details.whichExitCode = whichResult.exitCode;
+  const currentPath = process.env.PATH || "";
+  details.currentPATH = currentPath.split(":");
+  details.pathLength = currentPath.split(":").length;
+  const passed = claudeExists || whichResult.exitCode === 0;
+  return {
+    name: "claude-binary-discovery",
+    passed,
+    details,
+    error: passed ? void 0 : `Claude binary not found at ${claudePath}`,
+    durationMs: Date.now() - start
+  };
+}
+async function testClaudeExecution() {
+  const start = Date.now();
+  const details = {};
+  const npmRootResult = await execCommand("npm root -g");
+  const npmRoot = npmRootResult.stdout;
+  const claudePath = path9.join(
+    npmRoot,
+    "@wix",
+    "evalforge-evaluator",
+    "node_modules",
+    ".bin",
+    "claude"
+  );
+  details.claudePath = claudePath;
+  const versionResult = await execCommand(
+    `"${claudePath}" --version 2>&1`,
+    15e3
+  );
+  details.versionCommand = {
+    command: `"${claudePath}" --version`,
+    stdout: versionResult.stdout,
+    stderr: versionResult.stderr,
+    exitCode: versionResult.exitCode
+  };
+  const helpResult = await execCommand(
+    `"${claudePath}" --help 2>&1 | head -50`,
+    15e3
+  );
+  details.helpCommand = {
+    command: `"${claudePath}" --help | head -50`,
+    stdout: helpResult.stdout.slice(0, 1500),
+    stderr: helpResult.stderr.slice(0, 500),
+    exitCode: helpResult.exitCode
+  };
+  const whichClaudeResult = await execCommand("which claude 2>&1");
+  if (whichClaudeResult.exitCode === 0) {
+    const pathVersionResult = await execCommand("claude --version 2>&1", 15e3);
+    details.pathVersionCommand = {
+      whichClaude: whichClaudeResult.stdout,
+      stdout: pathVersionResult.stdout,
+      stderr: pathVersionResult.stderr,
+      exitCode: pathVersionResult.exitCode
+    };
+  }
+  const passed = versionResult.exitCode === 0 || helpResult.exitCode === 0;
+  return {
+    name: "claude-cli-execution",
+    passed,
+    details,
+    error: passed ? void 0 : `Claude CLI failed. Version exit: ${versionResult.exitCode}, Help exit: ${helpResult.exitCode}`,
+    durationMs: Date.now() - start
+  };
+}
+async function testEnvironmentDump() {
+  const start = Date.now();
+  const details = {};
+  const importantVars = [
     "PATH",
     "HOME",
     "USER",
     "SHELL",
     "NODE_ENV",
+    "PWD",
     "EVAL_SERVER_URL",
     "AI_GATEWAY_URL",
+    "TRACE_PUSH_URL",
+    "EVAL_AUTH_TOKEN",
     "ANTHROPIC_API_KEY",
     "ANTHROPIC_AUTH_TOKEN",
     "ANTHROPIC_BASE_URL",
     "ANTHROPIC_CUSTOM_HEADERS"
   ];
-  const details = {};
-  const missing = [];
-  for (const key of envVars) {
+  const capturedVars = {};
+  for (const key of importantVars) {
     const value = process.env[key];
     if (value) {
       if (key.includes("SECRET") || key.includes("TOKEN") || key.includes("API_KEY")) {
-        details[key] = `[SET - ${value.length} chars]`;
-      } else if (key === "PATH") {
-        details[key] = value.split(":");
+        capturedVars[key] = `[REDACTED - ${value.length} chars, starts: ${value.slice(0, 10)}...]`;
       } else if (key === "ANTHROPIC_CUSTOM_HEADERS") {
-        details[key] = value.split("\n").map((h) => h.split(":")[0]).join(", ");
+        capturedVars[key] = value.split("\n").map((h) => {
+          const [name2, val] = h.split(":");
+          return `${name2}: ${val ? "[" + val.length + " chars]" : "(empty)"}`;
+        }).join(" | ");
+      } else if (key === "PATH") {
+        const parts = value.split(":");
+        capturedVars[key] = `[${parts.length} entries] First: ${parts.slice(0, 3).join(":")} ... Last: ${parts.slice(-2).join(":")}`;
       } else {
-        details[key] = value;
+        capturedVars[key] = value;
       }
     } else {
-      missing.push(key);
+      capturedVars[key] = "(NOT SET)";
     }
   }
-  details.missingEnvVars = missing;
-  return {
-    name: "environment-variables",
-    passed: true,
-    // Info only, doesn't fail
-    details,
-    durationMs: Date.now() - start
-  };
-}
-async function testNodeEnvironment() {
-  const start = Date.now();
-  const details = {
-    nodeVersion: process.version,
+  details.importantVars = capturedVars;
+  const envResult = await execCommand("env | sort | head -50");
+  details.envCommandOutput = envResult.stdout;
+  details.envExitCode = envResult.exitCode;
+  details.nodeInfo = {
+    version: process.version,
     platform: process.platform,
     arch: process.arch,
-    cwd: process.cwd(),
     pid: process.pid,
-    uptime: process.uptime(),
-    memoryUsage: process.memoryUsage(),
+    cwd: process.cwd(),
     execPath: process.execPath
   };
   return {
-    name: "node-environment",
+    name: "environment-dump",
     passed: true,
+    // Info test, always passes
     details,
     durationMs: Date.now() - start
   };
 }
-async function testNpmGlobalDirectory() {
+async function testFileSystemStructure() {
   const start = Date.now();
+  const details = {};
   const npmRootResult = await execCommand("npm root -g");
-  const npmBinResult = await execCommand("npm bin -g");
   const npmRoot = npmRootResult.stdout;
-  const npmBin = npmBinResult.stdout;
-  const details = {
-    npmRootGlobal: npmRoot,
-    npmBinGlobal: npmBin,
-    npmRootExitCode: npmRootResult.exitCode,
-    npmBinExitCode: npmBinResult.exitCode
+  const lsCwdResult = await execCommand("ls -la");
+  details.currentDirectory = {
+    path: process.cwd(),
+    contents: lsCwdResult.stdout
   };
-  if (npmRoot) {
-    const evaluatorPath = path9.join(npmRoot, "@wix", "evalforge-evaluator");
-    const evaluatorExists = fs11.existsSync(evaluatorPath);
-    details.evaluatorInstalled = evaluatorExists;
-    if (evaluatorExists) {
-      try {
-        const files = fs11.readdirSync(evaluatorPath);
-        details.evaluatorFiles = files;
-      } catch {
-        details.evaluatorFiles = "Failed to list files";
-      }
-    }
-  }
-  if (npmRoot) {
-    const sdkPath = path9.join(
-      npmRoot,
-      "@wix",
-      "evalforge-evaluator",
-      "node_modules",
-      "@anthropic-ai",
-      "claude-agent-sdk"
-    );
-    const sdkExists = fs11.existsSync(sdkPath);
-    details.claudeAgentSdkInstalled = sdkExists;
-  }
-  const passed = npmRootResult.exitCode === 0 && npmBinResult.exitCode === 0;
-  return {
-    name: "npm-global-directory",
-    passed,
-    details,
-    error: passed ? void 0 : npmRootResult.stderr || npmBinResult.stderr,
-    durationMs: Date.now() - start
+  const lsNpmRootResult = await execCommand(
+    `ls -la "${npmRoot}" 2>&1 | head -30`
+  );
+  details.npmGlobalRoot = {
+    path: npmRoot,
+    contents: lsNpmRootResult.stdout
   };
-}
-async function testClaudeBinary() {
-  const start = Date.now();
-  const whichResult = await execCommand("which claude");
-  const versionResult = await execCommand("claude --version");
-  const npmBinResult = await execCommand("npm bin -g");
-  const npmBin = npmBinResult.stdout;
-  let claudeInNpmBin = false;
-  if (npmBin) {
-    const claudePath = path9.join(npmBin, "claude");
-    claudeInNpmBin = fs11.existsSync(claudePath);
-  }
-  const details = {
-    whichClaude: whichResult.stdout || "(not found)",
-    whichExitCode: whichResult.exitCode,
-    claudeVersion: versionResult.stdout || versionResult.stderr,
-    versionExitCode: versionResult.exitCode,
-    claudeInNpmGlobalBin: claudeInNpmBin,
-    npmGlobalBin: npmBin
+  const wixPath = path9.join(npmRoot, "@wix");
+  const lsWixResult = await execCommand(`ls -la "${wixPath}" 2>&1`);
+  details.wixPackages = {
+    path: wixPath,
+    contents: lsWixResult.stdout
+  };
+  const evaluatorPath = path9.join(npmRoot, "@wix", "evalforge-evaluator");
+  const lsEvaluatorResult = await execCommand(`ls -la "${evaluatorPath}" 2>&1`);
+  details.evaluatorDir = {
+    path: evaluatorPath,
+    contents: lsEvaluatorResult.stdout
+  };
+  const nodeModulesPath = path9.join(evaluatorPath, "node_modules");
+  const lsNodeModulesResult = await execCommand(
+    `ls "${nodeModulesPath}" 2>&1 | head -30`
+  );
+  details.evaluatorNodeModules = {
+    path: nodeModulesPath,
+    contents: lsNodeModulesResult.stdout
+  };
+  const anthropicPath = path9.join(nodeModulesPath, "@anthropic-ai");
+  const lsAnthropicResult = await execCommand(`ls -la "${anthropicPath}" 2>&1`);
+  details.anthropicPackages = {
+    path: anthropicPath,
+    contents: lsAnthropicResult.stdout
+  };
+  const binPath = path9.join(nodeModulesPath, ".bin");
+  const lsBinResult = await execCommand(`ls -la "${binPath}" 2>&1`);
+  details.binDirectory = {
+    path: binPath,
+    contents: lsBinResult.stdout
   };
-  const pathDirs = (process.env.PATH || "").split(":");
-  const claudeFoundIn = [];
-  for (const dir of pathDirs) {
-    const claudePath = path9.join(dir, "claude");
-    if (fs11.existsSync(claudePath)) {
-      claudeFoundIn.push(dir);
-    }
-  }
-  details.claudeFoundInPathDirs = claudeFoundIn;
-  const passed = whichResult.exitCode === 0 || claudeInNpmBin;
   return {
-    name: "claude-cli-binary",
-    passed,
+    name: "file-system-structure",
+    passed: true,
+    // Info test, always passes
     details,
-    error: passed ? void 0 : "Claude CLI binary not found in PATH. The SDK will fail to spawn it.",
     durationMs: Date.now() - start
   };
 }
-async function testChildProcess() {
+async function testNetworkConnectivity(config) {
   const start = Date.now();
-  const echoResult = await execCommand('echo "diagnostic-test-success"');
-  const nodeResult = await execCommand('node -e "console.log(process.pid)"');
-  const details = {
-    echoResult: echoResult.stdout,
-    echoExitCode: echoResult.exitCode,
-    nodeResult: nodeResult.stdout,
-    nodeExitCode: nodeResult.exitCode
+  const details = {};
+  const dnsResult = await execCommand(
+    "nslookup manage.wix.com 2>&1 | head -10"
+  );
+  details.dnsLookup = {
+    command: "nslookup manage.wix.com",
+    output: dnsResult.stdout || dnsResult.stderr,
+    exitCode: dnsResult.exitCode
+  };
+  const pingResult = await execCommand("ping -c 2 manage.wix.com 2>&1");
+  details.pingTest = {
+    command: "ping -c 2 manage.wix.com",
+    output: pingResult.stdout || pingResult.stderr,
+    exitCode: pingResult.exitCode
+  };
+  const gatewayUrl = config.aiGatewayUrl || "https://manage.wix.com/_api/eval-wix-ai-gateway-proxy";
+  const curlGatewayResult = await execCommand(
+    `curl -v -s --connect-timeout 5 --max-time 10 "${gatewayUrl}" 2>&1 | tail -30`
+  );
+  details.aiGatewayTest = {
+    url: gatewayUrl,
+    output: curlGatewayResult.stdout,
+    exitCode: curlGatewayResult.exitCode
+  };
+  const serverUrl = config.serverUrl;
+  const curlServerResult = await execCommand(
+    `curl -v -s --connect-timeout 5 --max-time 10 "${serverUrl}/health" 2>&1 | tail -30`
+  );
+  details.backendServerTest = {
+    url: `${serverUrl}/health`,
+    output: curlServerResult.stdout,
+    exitCode: curlServerResult.exitCode
+  };
+  const httpsResult = await execCommand(
+    'curl -s --connect-timeout 5 -o /dev/null -w "HTTP_CODE:%{http_code} TIME:%{time_total}s" https://www.google.com 2>&1'
+  );
+  details.httpsBaseline = {
+    command: "curl https://www.google.com",
+    output: httpsResult.stdout,
+    exitCode: httpsResult.exitCode
   };
-  const passed = echoResult.exitCode === 0 && echoResult.stdout === "diagnostic-test-success";
+  const networkWorks = pingResult.exitCode === 0 || httpsResult.exitCode === 0;
+  const gatewayReachable = curlGatewayResult.exitCode === 0;
   return {
-    name: "child-process-spawning",
-    passed,
+    name: "network-connectivity",
+    passed: networkWorks && gatewayReachable,
     details,
-    error: passed ? void 0 : "Failed to spawn child process",
+    error: networkWorks && gatewayReachable ? void 0 : `Network: ${networkWorks ? "OK" : "FAILED"}, Gateway: ${gatewayReachable ? "OK" : "FAILED"}`,
     durationMs: Date.now() - start
   };
 }
-async function testNetworkConnectivity(config) {
+async function testAiGatewayApiCall(config) {
   const start = Date.now();
-  const aiGatewayUrl = config.aiGatewayUrl;
-  if (!aiGatewayUrl) {
+  const details = {};
+  const gatewayUrl = config.aiGatewayUrl;
+  const headers = config.aiGatewayHeaders;
+  details.gatewayUrl = gatewayUrl;
+  details.hasHeaders = !!headers;
+  details.headerKeys = headers ? Object.keys(headers) : [];
+  if (!gatewayUrl) {
     return {
-      name: "network-connectivity",
+      name: "ai-gateway-api-call",
       passed: false,
-      details: { error: "No AI_GATEWAY_URL configured" },
+      details,
       error: "No AI_GATEWAY_URL configured",
       durationMs: Date.now() - start
     };
   }
-  const curlResult = await execCommand(
-    `curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${aiGatewayUrl}" 2>&1`
+  if (!headers) {
+    return {
+      name: "ai-gateway-api-call",
+      passed: false,
+      details,
+      error: "No AI_GATEWAY_HEADERS configured",
+      durationMs: Date.now() - start
+    };
+  }
+  const headerFlags = Object.entries(headers).map(([k, v]) => `-H "${k}: ${v}"`).join(" ");
+  const requestBody = JSON.stringify({
+    model: "claude-3-5-sonnet-latest",
+    max_tokens: 10,
+    messages: [{ role: "user", content: "Say hi" }]
+  }).replace(/"/g, '\\"');
+  const messagesUrl = `${gatewayUrl}/v1/messages`;
+  const curlCmd = `curl -s --max-time 15 ${headerFlags} -H "Content-Type: application/json" -H "anthropic-version: 2023-06-01" -d "${requestBody}" "${messagesUrl}" 2>&1`;
+  const redactedCmd = curlCmd.replace(/app-secret:[^"]+/g, "app-secret:[REDACTED]").replace(
+    /-H "x-wix-ai-gateway-app-secret: [^"]+"/g,
+    '-H "x-wix-ai-gateway-app-secret: [REDACTED]"'
   );
-  const serverUrl = config.serverUrl;
-  const serverResult = await execCommand(
-    `curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${serverUrl}/health" 2>&1`
+  details.curlCommand = redactedCmd;
+  console.error("[DIAG] Making actual API call to AI Gateway...");
+  console.error("[DIAG] URL:", messagesUrl);
+  const result = await execCommand(curlCmd, 2e4);
+  details.responseRaw = result.stdout.slice(0, 1500);
+  details.exitCode = result.exitCode;
+  let responseJson = null;
+  try {
+    responseJson = JSON.parse(result.stdout);
+    details.responseParsed = true;
+  } catch {
+    details.responseParsed = false;
+    details.parseError = "Response is not valid JSON";
+  }
+  const isError = result.stdout.includes('"type":"error"') || result.stdout.includes('"error":{') || result.stdout.includes("authentication_error") || result.stdout.includes("permission_error");
+  const isSuccess = result.stdout.includes('"type":"message"') || result.stdout.includes('"content":');
+  details.isError = isError;
+  details.isSuccess = isSuccess;
+  if (isError && responseJson && typeof responseJson === "object") {
+    const errorObj = responseJson;
+    if (errorObj.error && typeof errorObj.error === "object") {
+      const error = errorObj.error;
+      details.errorType = error.type;
+      details.errorMessage = error.message;
+    }
+  }
+  const passed = result.exitCode === 0 && isSuccess && !isError;
+  return {
+    name: "ai-gateway-api-call",
+    passed,
+    details,
+    error: passed ? void 0 : isError ? `API returned error: ${details.errorType || "unknown"} - ${details.errorMessage || result.stdout.slice(0, 200)}` : `API call failed: exit=${result.exitCode}, response=${result.stdout.slice(0, 200)}`,
+    durationMs: Date.now() - start
+  };
+}
+async function testChildProcessSpawning() {
+  const start = Date.now();
+  const details = {};
+  const echoResult = await execCommand('echo "DIAGNOSTIC_TEST_SUCCESS_12345"');
+  details.echoTest = {
+    command: 'echo "DIAGNOSTIC_TEST_SUCCESS_12345"',
+    output: echoResult.stdout,
+    exitCode: echoResult.exitCode,
+    passed: echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345"
+  };
+  const nodeResult = await execCommand(
+    'node -e "console.log(JSON.stringify({pid: process.pid, version: process.version, platform: process.platform}))"'
   );
-  const details = {
-    aiGatewayUrl,
-    aiGatewayHttpCode: curlResult.stdout,
-    aiGatewayExitCode: curlResult.exitCode,
-    serverUrl,
-    serverHttpCode: serverResult.stdout,
-    serverExitCode: serverResult.exitCode
+  details.nodeTest = {
+    command: 'node -e "console.log(JSON.stringify({...}))"',
+    output: nodeResult.stdout,
+    exitCode: nodeResult.exitCode
+  };
+  const shellResult = await execCommand(
+    'echo "PID: $$"; pwd; whoami; date; uname -a'
+  );
+  details.shellTest = {
+    command: 'echo "PID: $$"; pwd; whoami; date; uname -a',
+    output: shellResult.stdout,
+    exitCode: shellResult.exitCode
+  };
+  const stderrResult = await execCommand(
+    `node -e "console.error('stderr test')"`
+  );
+  details.stderrTest = {
+    command: `node -e "console.error('stderr test')"`,
+    stderr: stderrResult.stderr,
+    exitCode: stderrResult.exitCode
   };
-  const gatewayReachable = curlResult.exitCode === 0 && curlResult.stdout !== "000";
+  const exitCodeResult = await execCommand("exit 42");
+  details.exitCodeTest = {
+    command: "exit 42",
+    exitCode: exitCodeResult.exitCode,
+    passed: exitCodeResult.exitCode === 42
+  };
+  const passed = echoResult.exitCode === 0 && echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345";
   return {
-    name: "network-connectivity",
-    passed: gatewayReachable,
+    name: "child-process-spawning",
+    passed,
     details,
-    error: gatewayReachable ? void 0 : "Failed to reach AI Gateway",
+    error: passed ? void 0 : "Echo test failed",
     durationMs: Date.now() - start
   };
 }
 async function testSdkImport() {
   const start = Date.now();
+  const details = {};
   try {
     const sdk = await import("@anthropic-ai/claude-agent-sdk");
-    const details = {
-      sdkImported: true,
-      hasQuery: typeof sdk.query === "function",
-      exportedKeys: Object.keys(sdk)
-    };
+    details.sdkImported = true;
+    details.exportedKeys = Object.keys(sdk);
+    details.hasQuery = typeof sdk.query === "function";
+    if (typeof sdk.query === "function") {
+      details.queryFunctionExists = true;
+      details.queryFunctionType = typeof sdk.query;
+    }
     return {
       name: "sdk-import",
       passed: true,
@@ -7189,36 +7419,37 @@ async function testSdkImport() {
       passed: false,
       details: {
         sdkImported: false,
-        error
+        error,
+        stack: err instanceof Error ? err.stack?.split("\n").slice(0, 5) : void 0
       },
-      error: `Failed to import Claude Agent SDK: ${error}`,
+      error: `Failed to import SDK: ${error}`,
       durationMs: Date.now() - start
     };
   }
 }
-async function testFileSystemAccess() {
+async function testFileSystemWrite() {
   const start = Date.now();
-  const testDir = "/tmp/evalforge-diagnostics";
-  const testFile = path9.join(testDir, "test.txt");
+  const details = {};
+  const testDir = "/tmp/evalforge-diagnostics-test";
+  const testFile = path9.join(testDir, "test-file.txt");
+  const testContent = `Diagnostic test at ${(/* @__PURE__ */ new Date()).toISOString()}`;
   try {
     if (!fs11.existsSync(testDir)) {
       fs11.mkdirSync(testDir, { recursive: true });
     }
-    fs11.writeFileSync(testFile, "diagnostic-test");
-    const content = fs11.readFileSync(testFile, "utf8");
+    details.directoryCreated = true;
+    fs11.writeFileSync(testFile, testContent);
+    details.fileWritten = true;
+    const readContent = fs11.readFileSync(testFile, "utf8");
+    details.fileRead = true;
+    details.contentMatches = readContent === testContent;
+    const lsResult = await execCommand(`ls -la "${testDir}"`);
+    details.directoryContents = lsResult.stdout;
     fs11.unlinkSync(testFile);
     fs11.rmdirSync(testDir);
-    const details = {
-      canCreateDirectory: true,
-      canWriteFile: true,
-      canReadFile: content === "diagnostic-test",
-      testDir,
-      cwd: process.cwd(),
-      cwdContents: fs11.readdirSync(process.cwd()).slice(0, 20)
-      // First 20 files
-    };
+    details.cleanedUp = true;
     return {
-      name: "file-system-access",
+      name: "file-system-write",
       passed: true,
       details,
       durationMs: Date.now() - start
@@ -7226,32 +7457,40 @@ async function testFileSystemAccess() {
   } catch (err) {
     const error = err instanceof Error ? err.message : String(err);
     return {
-      name: "file-system-access",
+      name: "file-system-write",
       passed: false,
       details: {
+        ...details,
         error,
         testDir,
-        cwd: process.cwd()
+        testFile
       },
-      error: `File system access failed: ${error}`,
+      error: `File system write failed: ${error}`,
       durationMs: Date.now() - start
     };
   }
 }
 function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader, authToken) {
+  const truncatedResult = "summary" in result ? result : {
+    ...result,
+    details: JSON.parse(
+      JSON.stringify(
+        result.details,
+        (_, v) => typeof v === "string" && v.length > 500 ? v.slice(0, 500) + "... [truncated]" : v
+      )
+    )
+  };
   const event = {
     evalRunId: evalRunId2,
     scenarioId: "diagnostics",
     scenarioName: "Environment Diagnostics",
     targetId: "system",
-    targetName: "System",
+    targetName: "name" in truncatedResult ? truncatedResult.name : "Summary",
     stepNumber: 0,
     type: LiveTraceEventType2.DIAGNOSTIC,
-    outputPreview: JSON.stringify(result, null, 2).slice(0, 2e3),
-    // Limit size
+    outputPreview: JSON.stringify(truncatedResult, null, 2).slice(0, 3e3),
     timestamp: (/* @__PURE__ */ new Date()).toISOString(),
     isComplete: "summary" in result
-    // Complete if it's the full report
   };
   console.log(`TRACE_EVENT:${JSON.stringify(event)}`);
   if (tracePushUrl) {
@@ -7269,18 +7508,35 @@ function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader,
       headers,
       body: JSON.stringify([event])
     }).catch((err) => {
-      console.error("[DIAGNOSTICS] Failed to push trace event:", err);
+      console.error(
+        "[DIAGNOSTICS] Failed to push trace event to backend:",
+        err
+      );
     });
   }
 }
 async function runDiagnostics(config, evalRunId2) {
   const startedAt = (/* @__PURE__ */ new Date()).toISOString();
   const startTime = Date.now();
-  console.error("[DIAGNOSTICS] Starting environment diagnostics...");
+  console.error("");
+  console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
+  console.error("\u2551           EVALFORGE ENVIRONMENT DIAGNOSTICS              \u2551");
+  console.error("\u2551   (Results sent to backend via trace events)             \u2551");
+  console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
+  console.error("");
   const tests = [];
-  const runTest = async (testFn) => {
-    const result = await testFn();
+  const runTest = async (testName, testFn) => {
+    console.error(`[DIAG] Running: ${testName}...`);
+    const result = await safeRunTest(testName, testFn);
     tests.push(result);
+    const status = result.passed ? "\u2713 PASS" : "\u2717 FAIL";
+    console.error(`[DIAG] ${status}: ${result.name} (${result.durationMs}ms)`);
+    console.error("[DIAG] Details:");
+    console.error(JSON.stringify(result.details, null, 2));
+    console.error("");
+    if (!result.passed && result.error) {
+      console.error(`[DIAG] ERROR: ${result.error}`);
+    }
     emitDiagnosticTraceEvent(
       evalRunId2,
       result,
@@ -7288,22 +7544,16 @@ async function runDiagnostics(config, evalRunId2) {
       config.routeHeader,
       config.authToken
     );
-    const status = result.passed ? "\u2713" : "\u2717";
-    console.error(
-      `[DIAGNOSTICS] ${status} ${result.name} (${result.durationMs}ms)`
-    );
-    if (!result.passed && result.error) {
-      console.error(`[DIAGNOSTICS]   Error: ${result.error}`);
-    }
   };
-  await runTest(testEnvironmentVariables);
-  await runTest(testNodeEnvironment);
-  await runTest(testNpmGlobalDirectory);
-  await runTest(testClaudeBinary);
-  await runTest(testChildProcess);
-  await runTest(() => testNetworkConnectivity(config));
-  await runTest(testSdkImport);
-  await runTest(testFileSystemAccess);
+  await runTest("claude-binary-discovery", testClaudeBinaryDiscovery);
+  await runTest("claude-cli-execution", testClaudeExecution);
+  await runTest("environment-dump", testEnvironmentDump);
+  await runTest("file-system-structure", testFileSystemStructure);
+  await runTest("network-connectivity", () => testNetworkConnectivity(config));
+  await runTest("ai-gateway-api-call", () => testAiGatewayApiCall(config));
+  await runTest("child-process-spawning", testChildProcessSpawning);
+  await runTest("sdk-import", testSdkImport);
+  await runTest("file-system-write", testFileSystemWrite);
   const completedAt = (/* @__PURE__ */ new Date()).toISOString();
   const totalDurationMs = Date.now() - startTime;
   const report = {
@@ -7324,9 +7574,16 @@ async function runDiagnostics(config, evalRunId2) {
     config.routeHeader,
     config.authToken
   );
+  console.error("");
+  console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
   console.error(
-    `[DIAGNOSTICS] Completed: ${report.summary.passed}/${report.summary.total} tests passed (${totalDurationMs}ms)`
+    `\u2551  DIAGNOSTICS COMPLETE: ${report.summary.passed}/${report.summary.total} passed, ${report.summary.failed} failed`.padEnd(
+      60
+    ) + "\u2551"
   );
+  console.error(`\u2551  Total time: ${totalDurationMs}ms`.padEnd(60) + "\u2551");
+  console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
+  console.error("");
   return report;
 }