@wix/evalforge-evaluator 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -6939,243 +6939,473 @@ import { spawn } from "child_process";
6939
6939
  import * as fs11 from "fs";
6940
6940
  import * as path9 from "path";
6941
6941
  import { LiveTraceEventType as LiveTraceEventType2 } from "@wix/evalforge-types";
6942
- async function execCommand(command, timeoutMs = 5e3) {
6942
+ async function execCommand(command, timeoutMs = 1e4) {
6943
6943
  return new Promise((resolve) => {
6944
- const proc2 = spawn("sh", ["-c", command], {
6945
- timeout: timeoutMs
6946
- });
6947
- let stdout = "";
6948
- let stderr = "";
6949
- proc2.stdout.on("data", (data) => {
6950
- stdout += data.toString();
6951
- });
6952
- proc2.stderr.on("data", (data) => {
6953
- stderr += data.toString();
6954
- });
6955
- proc2.on("close", (code2) => {
6956
- resolve({
6957
- stdout: stdout.trim(),
6958
- stderr: stderr.trim(),
6959
- exitCode: code2 ?? -1
6944
+ try {
6945
+ const proc2 = spawn("sh", ["-c", command], {
6946
+ timeout: timeoutMs
6960
6947
  });
6961
- });
6962
- proc2.on("error", (err) => {
6948
+ let stdout = "";
6949
+ let stderr = "";
6950
+ proc2.stdout.on("data", (data) => {
6951
+ stdout += data.toString();
6952
+ });
6953
+ proc2.stderr.on("data", (data) => {
6954
+ stderr += data.toString();
6955
+ });
6956
+ proc2.on("close", (code2) => {
6957
+ resolve({
6958
+ stdout: stdout.trim(),
6959
+ stderr: stderr.trim(),
6960
+ exitCode: code2 ?? -1
6961
+ });
6962
+ });
6963
+ proc2.on("error", (err) => {
6964
+ resolve({
6965
+ stdout: "",
6966
+ stderr: err.message,
6967
+ exitCode: -1
6968
+ });
6969
+ });
6970
+ } catch (err) {
6963
6971
  resolve({
6964
6972
  stdout: "",
6965
- stderr: err.message,
6966
- exitCode: -1
6973
+ stderr: err instanceof Error ? err.message : String(err),
6974
+ exitCode: -99
6967
6975
  });
6968
- });
6976
+ }
6969
6977
  });
6970
6978
  }
6971
- async function testEnvironmentVariables() {
6979
+ async function safeRunTest(testName, testFn) {
6972
6980
  const start = Date.now();
6973
- const envVars = [
6981
+ try {
6982
+ return await testFn();
6983
+ } catch (err) {
6984
+ const error = err instanceof Error ? err.message : String(err);
6985
+ return {
6986
+ name: testName,
6987
+ passed: false,
6988
+ details: {
6989
+ testCrashed: true,
6990
+ error,
6991
+ stack: err instanceof Error ? err.stack : void 0
6992
+ },
6993
+ error: `Test crashed: ${error}`,
6994
+ durationMs: Date.now() - start
6995
+ };
6996
+ }
6997
+ }
6998
+ async function testClaudeBinaryDiscovery() {
6999
+ const start = Date.now();
7000
+ const details = {};
7001
+ const npmRootResult = await execCommand("npm root -g");
7002
+ const npmBinResult = await execCommand("npm bin -g");
7003
+ const npmRoot = npmRootResult.stdout;
7004
+ const npmBin = npmBinResult.stdout;
7005
+ details.npmRoot = npmRoot;
7006
+ details.npmBin = npmBin;
7007
+ const evaluatorBinPath = path9.join(
7008
+ npmRoot,
7009
+ "@wix",
7010
+ "evalforge-evaluator",
7011
+ "node_modules",
7012
+ ".bin"
7013
+ );
7014
+ details.evaluatorBinPath = evaluatorBinPath;
7015
+ const lsBinResult = await execCommand(`ls -la "${evaluatorBinPath}" 2>&1`);
7016
+ details.evaluatorBinContents = lsBinResult.stdout || lsBinResult.stderr;
7017
+ details.lsBinExitCode = lsBinResult.exitCode;
7018
+ const claudePath = path9.join(evaluatorBinPath, "claude");
7019
+ let claudeExists = false;
7020
+ try {
7021
+ claudeExists = fs11.existsSync(claudePath);
7022
+ } catch {
7023
+ claudeExists = false;
7024
+ }
7025
+ details.claudePath = claudePath;
7026
+ details.claudeExists = claudeExists;
7027
+ if (claudeExists) {
7028
+ const readlinkResult = await execCommand(
7029
+ `readlink -f "${claudePath}" 2>&1`
7030
+ );
7031
+ details.claudeRealPath = readlinkResult.stdout || readlinkResult.stderr;
7032
+ const statResult = await execCommand(`stat "${claudePath}" 2>&1`);
7033
+ details.claudeStat = statResult.stdout || statResult.stderr;
7034
+ const lsClaudeResult = await execCommand(`ls -la "${claudePath}" 2>&1`);
7035
+ details.claudeFileInfo = lsClaudeResult.stdout;
7036
+ }
7037
+ const whichResult = await execCommand("which claude 2>&1");
7038
+ details.whichClaude = whichResult.stdout || "(not in PATH)";
7039
+ details.whichExitCode = whichResult.exitCode;
7040
+ const currentPath = process.env.PATH || "";
7041
+ details.currentPATH = currentPath.split(":");
7042
+ details.pathLength = currentPath.split(":").length;
7043
+ const passed = claudeExists || whichResult.exitCode === 0;
7044
+ return {
7045
+ name: "claude-binary-discovery",
7046
+ passed,
7047
+ details,
7048
+ error: passed ? void 0 : `Claude binary not found at ${claudePath}`,
7049
+ durationMs: Date.now() - start
7050
+ };
7051
+ }
7052
+ async function testClaudeExecution() {
7053
+ const start = Date.now();
7054
+ const details = {};
7055
+ const npmRootResult = await execCommand("npm root -g");
7056
+ const npmRoot = npmRootResult.stdout;
7057
+ const claudePath = path9.join(
7058
+ npmRoot,
7059
+ "@wix",
7060
+ "evalforge-evaluator",
7061
+ "node_modules",
7062
+ ".bin",
7063
+ "claude"
7064
+ );
7065
+ details.claudePath = claudePath;
7066
+ const versionResult = await execCommand(
7067
+ `"${claudePath}" --version 2>&1`,
7068
+ 15e3
7069
+ );
7070
+ details.versionCommand = {
7071
+ command: `"${claudePath}" --version`,
7072
+ stdout: versionResult.stdout,
7073
+ stderr: versionResult.stderr,
7074
+ exitCode: versionResult.exitCode
7075
+ };
7076
+ const helpResult = await execCommand(
7077
+ `"${claudePath}" --help 2>&1 | head -50`,
7078
+ 15e3
7079
+ );
7080
+ details.helpCommand = {
7081
+ command: `"${claudePath}" --help | head -50`,
7082
+ stdout: helpResult.stdout.slice(0, 1500),
7083
+ stderr: helpResult.stderr.slice(0, 500),
7084
+ exitCode: helpResult.exitCode
7085
+ };
7086
+ const whichClaudeResult = await execCommand("which claude 2>&1");
7087
+ if (whichClaudeResult.exitCode === 0) {
7088
+ const pathVersionResult = await execCommand("claude --version 2>&1", 15e3);
7089
+ details.pathVersionCommand = {
7090
+ whichClaude: whichClaudeResult.stdout,
7091
+ stdout: pathVersionResult.stdout,
7092
+ stderr: pathVersionResult.stderr,
7093
+ exitCode: pathVersionResult.exitCode
7094
+ };
7095
+ }
7096
+ const passed = versionResult.exitCode === 0 || helpResult.exitCode === 0;
7097
+ return {
7098
+ name: "claude-cli-execution",
7099
+ passed,
7100
+ details,
7101
+ error: passed ? void 0 : `Claude CLI failed. Version exit: ${versionResult.exitCode}, Help exit: ${helpResult.exitCode}`,
7102
+ durationMs: Date.now() - start
7103
+ };
7104
+ }
7105
+ async function testEnvironmentDump() {
7106
+ const start = Date.now();
7107
+ const details = {};
7108
+ const importantVars = [
6974
7109
  "PATH",
6975
7110
  "HOME",
6976
7111
  "USER",
6977
7112
  "SHELL",
6978
7113
  "NODE_ENV",
7114
+ "PWD",
6979
7115
  "EVAL_SERVER_URL",
6980
7116
  "AI_GATEWAY_URL",
7117
+ "TRACE_PUSH_URL",
7118
+ "EVAL_AUTH_TOKEN",
6981
7119
  "ANTHROPIC_API_KEY",
6982
7120
  "ANTHROPIC_AUTH_TOKEN",
6983
7121
  "ANTHROPIC_BASE_URL",
6984
7122
  "ANTHROPIC_CUSTOM_HEADERS"
6985
7123
  ];
6986
- const details = {};
6987
- const missing = [];
6988
- for (const key of envVars) {
7124
+ const capturedVars = {};
7125
+ for (const key of importantVars) {
6989
7126
  const value = process.env[key];
6990
7127
  if (value) {
6991
7128
  if (key.includes("SECRET") || key.includes("TOKEN") || key.includes("API_KEY")) {
6992
- details[key] = `[SET - ${value.length} chars]`;
6993
- } else if (key === "PATH") {
6994
- details[key] = value.split(":");
7129
+ capturedVars[key] = `[REDACTED - ${value.length} chars, starts: ${value.slice(0, 10)}...]`;
6995
7130
  } else if (key === "ANTHROPIC_CUSTOM_HEADERS") {
6996
- details[key] = value.split("\n").map((h) => h.split(":")[0]).join(", ");
7131
+ capturedVars[key] = value.split("\n").map((h) => {
7132
+ const [name2, val] = h.split(":");
7133
+ return `${name2}: ${val ? "[" + val.length + " chars]" : "(empty)"}`;
7134
+ }).join(" | ");
7135
+ } else if (key === "PATH") {
7136
+ const parts = value.split(":");
7137
+ capturedVars[key] = `[${parts.length} entries] First: ${parts.slice(0, 3).join(":")} ... Last: ${parts.slice(-2).join(":")}`;
6997
7138
  } else {
6998
- details[key] = value;
7139
+ capturedVars[key] = value;
6999
7140
  }
7000
7141
  } else {
7001
- missing.push(key);
7142
+ capturedVars[key] = "(NOT SET)";
7002
7143
  }
7003
7144
  }
7004
- details.missingEnvVars = missing;
7005
- return {
7006
- name: "environment-variables",
7007
- passed: true,
7008
- // Info only, doesn't fail
7009
- details,
7010
- durationMs: Date.now() - start
7011
- };
7012
- }
7013
- async function testNodeEnvironment() {
7014
- const start = Date.now();
7015
- const details = {
7016
- nodeVersion: process.version,
7145
+ details.importantVars = capturedVars;
7146
+ const envResult = await execCommand("env | sort | head -50");
7147
+ details.envCommandOutput = envResult.stdout;
7148
+ details.envExitCode = envResult.exitCode;
7149
+ details.nodeInfo = {
7150
+ version: process.version,
7017
7151
  platform: process.platform,
7018
7152
  arch: process.arch,
7019
- cwd: process.cwd(),
7020
7153
  pid: process.pid,
7021
- uptime: process.uptime(),
7022
- memoryUsage: process.memoryUsage(),
7154
+ cwd: process.cwd(),
7023
7155
  execPath: process.execPath
7024
7156
  };
7025
7157
  return {
7026
- name: "node-environment",
7158
+ name: "environment-dump",
7027
7159
  passed: true,
7160
+ // Info test, always passes
7028
7161
  details,
7029
7162
  durationMs: Date.now() - start
7030
7163
  };
7031
7164
  }
7032
- async function testNpmGlobalDirectory() {
7165
+ async function testFileSystemStructure() {
7033
7166
  const start = Date.now();
7167
+ const details = {};
7034
7168
  const npmRootResult = await execCommand("npm root -g");
7035
- const npmBinResult = await execCommand("npm bin -g");
7036
7169
  const npmRoot = npmRootResult.stdout;
7037
- const npmBin = npmBinResult.stdout;
7038
- const details = {
7039
- npmRootGlobal: npmRoot,
7040
- npmBinGlobal: npmBin,
7041
- npmRootExitCode: npmRootResult.exitCode,
7042
- npmBinExitCode: npmBinResult.exitCode
7170
+ const lsCwdResult = await execCommand("ls -la");
7171
+ details.currentDirectory = {
7172
+ path: process.cwd(),
7173
+ contents: lsCwdResult.stdout
7043
7174
  };
7044
- if (npmRoot) {
7045
- const evaluatorPath = path9.join(npmRoot, "@wix", "evalforge-evaluator");
7046
- const evaluatorExists = fs11.existsSync(evaluatorPath);
7047
- details.evaluatorInstalled = evaluatorExists;
7048
- if (evaluatorExists) {
7049
- try {
7050
- const files = fs11.readdirSync(evaluatorPath);
7051
- details.evaluatorFiles = files;
7052
- } catch {
7053
- details.evaluatorFiles = "Failed to list files";
7054
- }
7055
- }
7056
- }
7057
- if (npmRoot) {
7058
- const sdkPath = path9.join(
7059
- npmRoot,
7060
- "@wix",
7061
- "evalforge-evaluator",
7062
- "node_modules",
7063
- "@anthropic-ai",
7064
- "claude-agent-sdk"
7065
- );
7066
- const sdkExists = fs11.existsSync(sdkPath);
7067
- details.claudeAgentSdkInstalled = sdkExists;
7068
- }
7069
- const passed = npmRootResult.exitCode === 0 && npmBinResult.exitCode === 0;
7070
- return {
7071
- name: "npm-global-directory",
7072
- passed,
7073
- details,
7074
- error: passed ? void 0 : npmRootResult.stderr || npmBinResult.stderr,
7075
- durationMs: Date.now() - start
7175
+ const lsNpmRootResult = await execCommand(
7176
+ `ls -la "${npmRoot}" 2>&1 | head -30`
7177
+ );
7178
+ details.npmGlobalRoot = {
7179
+ path: npmRoot,
7180
+ contents: lsNpmRootResult.stdout
7076
7181
  };
7077
- }
7078
- async function testClaudeBinary() {
7079
- const start = Date.now();
7080
- const whichResult = await execCommand("which claude");
7081
- const versionResult = await execCommand("claude --version");
7082
- const npmBinResult = await execCommand("npm bin -g");
7083
- const npmBin = npmBinResult.stdout;
7084
- let claudeInNpmBin = false;
7085
- if (npmBin) {
7086
- const claudePath = path9.join(npmBin, "claude");
7087
- claudeInNpmBin = fs11.existsSync(claudePath);
7088
- }
7089
- const details = {
7090
- whichClaude: whichResult.stdout || "(not found)",
7091
- whichExitCode: whichResult.exitCode,
7092
- claudeVersion: versionResult.stdout || versionResult.stderr,
7093
- versionExitCode: versionResult.exitCode,
7094
- claudeInNpmGlobalBin: claudeInNpmBin,
7095
- npmGlobalBin: npmBin
7182
+ const wixPath = path9.join(npmRoot, "@wix");
7183
+ const lsWixResult = await execCommand(`ls -la "${wixPath}" 2>&1`);
7184
+ details.wixPackages = {
7185
+ path: wixPath,
7186
+ contents: lsWixResult.stdout
7187
+ };
7188
+ const evaluatorPath = path9.join(npmRoot, "@wix", "evalforge-evaluator");
7189
+ const lsEvaluatorResult = await execCommand(`ls -la "${evaluatorPath}" 2>&1`);
7190
+ details.evaluatorDir = {
7191
+ path: evaluatorPath,
7192
+ contents: lsEvaluatorResult.stdout
7193
+ };
7194
+ const nodeModulesPath = path9.join(evaluatorPath, "node_modules");
7195
+ const lsNodeModulesResult = await execCommand(
7196
+ `ls "${nodeModulesPath}" 2>&1 | head -30`
7197
+ );
7198
+ details.evaluatorNodeModules = {
7199
+ path: nodeModulesPath,
7200
+ contents: lsNodeModulesResult.stdout
7201
+ };
7202
+ const anthropicPath = path9.join(nodeModulesPath, "@anthropic-ai");
7203
+ const lsAnthropicResult = await execCommand(`ls -la "${anthropicPath}" 2>&1`);
7204
+ details.anthropicPackages = {
7205
+ path: anthropicPath,
7206
+ contents: lsAnthropicResult.stdout
7207
+ };
7208
+ const binPath = path9.join(nodeModulesPath, ".bin");
7209
+ const lsBinResult = await execCommand(`ls -la "${binPath}" 2>&1`);
7210
+ details.binDirectory = {
7211
+ path: binPath,
7212
+ contents: lsBinResult.stdout
7096
7213
  };
7097
- const pathDirs = (process.env.PATH || "").split(":");
7098
- const claudeFoundIn = [];
7099
- for (const dir of pathDirs) {
7100
- const claudePath = path9.join(dir, "claude");
7101
- if (fs11.existsSync(claudePath)) {
7102
- claudeFoundIn.push(dir);
7103
- }
7104
- }
7105
- details.claudeFoundInPathDirs = claudeFoundIn;
7106
- const passed = whichResult.exitCode === 0 || claudeInNpmBin;
7107
7214
  return {
7108
- name: "claude-cli-binary",
7109
- passed,
7215
+ name: "file-system-structure",
7216
+ passed: true,
7217
+ // Info test, always passes
7110
7218
  details,
7111
- error: passed ? void 0 : "Claude CLI binary not found in PATH. The SDK will fail to spawn it.",
7112
7219
  durationMs: Date.now() - start
7113
7220
  };
7114
7221
  }
7115
- async function testChildProcess() {
7222
+ async function testNetworkConnectivity(config) {
7116
7223
  const start = Date.now();
7117
- const echoResult = await execCommand('echo "diagnostic-test-success"');
7118
- const nodeResult = await execCommand('node -e "console.log(process.pid)"');
7119
- const details = {
7120
- echoResult: echoResult.stdout,
7121
- echoExitCode: echoResult.exitCode,
7122
- nodeResult: nodeResult.stdout,
7123
- nodeExitCode: nodeResult.exitCode
7224
+ const details = {};
7225
+ const dnsResult = await execCommand(
7226
+ "nslookup manage.wix.com 2>&1 | head -10"
7227
+ );
7228
+ details.dnsLookup = {
7229
+ command: "nslookup manage.wix.com",
7230
+ output: dnsResult.stdout || dnsResult.stderr,
7231
+ exitCode: dnsResult.exitCode
7232
+ };
7233
+ const pingResult = await execCommand("ping -c 2 manage.wix.com 2>&1");
7234
+ details.pingTest = {
7235
+ command: "ping -c 2 manage.wix.com",
7236
+ output: pingResult.stdout || pingResult.stderr,
7237
+ exitCode: pingResult.exitCode
7238
+ };
7239
+ const gatewayUrl = config.aiGatewayUrl || "https://manage.wix.com/_api/eval-wix-ai-gateway-proxy";
7240
+ const curlGatewayResult = await execCommand(
7241
+ `curl -v -s --connect-timeout 5 --max-time 10 "${gatewayUrl}" 2>&1 | tail -30`
7242
+ );
7243
+ details.aiGatewayTest = {
7244
+ url: gatewayUrl,
7245
+ output: curlGatewayResult.stdout,
7246
+ exitCode: curlGatewayResult.exitCode
7247
+ };
7248
+ const serverUrl = config.serverUrl;
7249
+ const curlServerResult = await execCommand(
7250
+ `curl -v -s --connect-timeout 5 --max-time 10 "${serverUrl}/health" 2>&1 | tail -30`
7251
+ );
7252
+ details.backendServerTest = {
7253
+ url: `${serverUrl}/health`,
7254
+ output: curlServerResult.stdout,
7255
+ exitCode: curlServerResult.exitCode
7256
+ };
7257
+ const httpsResult = await execCommand(
7258
+ 'curl -s --connect-timeout 5 -o /dev/null -w "HTTP_CODE:%{http_code} TIME:%{time_total}s" https://www.google.com 2>&1'
7259
+ );
7260
+ details.httpsBaseline = {
7261
+ command: "curl https://www.google.com",
7262
+ output: httpsResult.stdout,
7263
+ exitCode: httpsResult.exitCode
7124
7264
  };
7125
- const passed = echoResult.exitCode === 0 && echoResult.stdout === "diagnostic-test-success";
7265
+ const networkWorks = pingResult.exitCode === 0 || httpsResult.exitCode === 0;
7266
+ const gatewayReachable = curlGatewayResult.exitCode === 0;
7126
7267
  return {
7127
- name: "child-process-spawning",
7128
- passed,
7268
+ name: "network-connectivity",
7269
+ passed: networkWorks && gatewayReachable,
7129
7270
  details,
7130
- error: passed ? void 0 : "Failed to spawn child process",
7271
+ error: networkWorks && gatewayReachable ? void 0 : `Network: ${networkWorks ? "OK" : "FAILED"}, Gateway: ${gatewayReachable ? "OK" : "FAILED"}`,
7131
7272
  durationMs: Date.now() - start
7132
7273
  };
7133
7274
  }
7134
- async function testNetworkConnectivity(config) {
7275
+ async function testAiGatewayApiCall(config) {
7135
7276
  const start = Date.now();
7136
- const aiGatewayUrl = config.aiGatewayUrl;
7137
- if (!aiGatewayUrl) {
7277
+ const details = {};
7278
+ const gatewayUrl = config.aiGatewayUrl;
7279
+ const headers = config.aiGatewayHeaders;
7280
+ details.gatewayUrl = gatewayUrl;
7281
+ details.hasHeaders = !!headers;
7282
+ details.headerKeys = headers ? Object.keys(headers) : [];
7283
+ if (!gatewayUrl) {
7138
7284
  return {
7139
- name: "network-connectivity",
7285
+ name: "ai-gateway-api-call",
7140
7286
  passed: false,
7141
- details: { error: "No AI_GATEWAY_URL configured" },
7287
+ details,
7142
7288
  error: "No AI_GATEWAY_URL configured",
7143
7289
  durationMs: Date.now() - start
7144
7290
  };
7145
7291
  }
7146
- const curlResult = await execCommand(
7147
- `curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${aiGatewayUrl}" 2>&1`
7292
+ if (!headers) {
7293
+ return {
7294
+ name: "ai-gateway-api-call",
7295
+ passed: false,
7296
+ details,
7297
+ error: "No AI_GATEWAY_HEADERS configured",
7298
+ durationMs: Date.now() - start
7299
+ };
7300
+ }
7301
+ const headerFlags = Object.entries(headers).map(([k, v]) => `-H "${k}: ${v}"`).join(" ");
7302
+ const requestBody = JSON.stringify({
7303
+ model: "claude-3-5-sonnet-latest",
7304
+ max_tokens: 10,
7305
+ messages: [{ role: "user", content: "Say hi" }]
7306
+ }).replace(/"/g, '\\"');
7307
+ const messagesUrl = `${gatewayUrl}/v1/messages`;
7308
+ const curlCmd = `curl -s --max-time 15 ${headerFlags} -H "Content-Type: application/json" -H "anthropic-version: 2023-06-01" -d "${requestBody}" "${messagesUrl}" 2>&1`;
7309
+ const redactedCmd = curlCmd.replace(/app-secret:[^"]+/g, "app-secret:[REDACTED]").replace(
7310
+ /-H "x-wix-ai-gateway-app-secret: [^"]+"/g,
7311
+ '-H "x-wix-ai-gateway-app-secret: [REDACTED]"'
7148
7312
  );
7149
- const serverUrl = config.serverUrl;
7150
- const serverResult = await execCommand(
7151
- `curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${serverUrl}/health" 2>&1`
7313
+ details.curlCommand = redactedCmd;
7314
+ console.error("[DIAG] Making actual API call to AI Gateway...");
7315
+ console.error("[DIAG] URL:", messagesUrl);
7316
+ const result = await execCommand(curlCmd, 2e4);
7317
+ details.responseRaw = result.stdout.slice(0, 1500);
7318
+ details.exitCode = result.exitCode;
7319
+ let responseJson = null;
7320
+ try {
7321
+ responseJson = JSON.parse(result.stdout);
7322
+ details.responseParsed = true;
7323
+ } catch {
7324
+ details.responseParsed = false;
7325
+ details.parseError = "Response is not valid JSON";
7326
+ }
7327
+ const isError = result.stdout.includes('"type":"error"') || result.stdout.includes('"error":{') || result.stdout.includes("authentication_error") || result.stdout.includes("permission_error");
7328
+ const isSuccess = result.stdout.includes('"type":"message"') || result.stdout.includes('"content":');
7329
+ details.isError = isError;
7330
+ details.isSuccess = isSuccess;
7331
+ if (isError && responseJson && typeof responseJson === "object") {
7332
+ const errorObj = responseJson;
7333
+ if (errorObj.error && typeof errorObj.error === "object") {
7334
+ const error = errorObj.error;
7335
+ details.errorType = error.type;
7336
+ details.errorMessage = error.message;
7337
+ }
7338
+ }
7339
+ const passed = result.exitCode === 0 && isSuccess && !isError;
7340
+ return {
7341
+ name: "ai-gateway-api-call",
7342
+ passed,
7343
+ details,
7344
+ error: passed ? void 0 : isError ? `API returned error: ${details.errorType || "unknown"} - ${details.errorMessage || result.stdout.slice(0, 200)}` : `API call failed: exit=${result.exitCode}, response=${result.stdout.slice(0, 200)}`,
7345
+ durationMs: Date.now() - start
7346
+ };
7347
+ }
7348
+ async function testChildProcessSpawning() {
7349
+ const start = Date.now();
7350
+ const details = {};
7351
+ const echoResult = await execCommand('echo "DIAGNOSTIC_TEST_SUCCESS_12345"');
7352
+ details.echoTest = {
7353
+ command: 'echo "DIAGNOSTIC_TEST_SUCCESS_12345"',
7354
+ output: echoResult.stdout,
7355
+ exitCode: echoResult.exitCode,
7356
+ passed: echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345"
7357
+ };
7358
+ const nodeResult = await execCommand(
7359
+ 'node -e "console.log(JSON.stringify({pid: process.pid, version: process.version, platform: process.platform}))"'
7152
7360
  );
7153
- const details = {
7154
- aiGatewayUrl,
7155
- aiGatewayHttpCode: curlResult.stdout,
7156
- aiGatewayExitCode: curlResult.exitCode,
7157
- serverUrl,
7158
- serverHttpCode: serverResult.stdout,
7159
- serverExitCode: serverResult.exitCode
7361
+ details.nodeTest = {
7362
+ command: 'node -e "console.log(JSON.stringify({...}))"',
7363
+ output: nodeResult.stdout,
7364
+ exitCode: nodeResult.exitCode
7365
+ };
7366
+ const shellResult = await execCommand(
7367
+ 'echo "PID: $$"; pwd; whoami; date; uname -a'
7368
+ );
7369
+ details.shellTest = {
7370
+ command: 'echo "PID: $$"; pwd; whoami; date; uname -a',
7371
+ output: shellResult.stdout,
7372
+ exitCode: shellResult.exitCode
7373
+ };
7374
+ const stderrResult = await execCommand(
7375
+ `node -e "console.error('stderr test')"`
7376
+ );
7377
+ details.stderrTest = {
7378
+ command: `node -e "console.error('stderr test')"`,
7379
+ stderr: stderrResult.stderr,
7380
+ exitCode: stderrResult.exitCode
7160
7381
  };
7161
- const gatewayReachable = curlResult.exitCode === 0 && curlResult.stdout !== "000";
7382
+ const exitCodeResult = await execCommand("exit 42");
7383
+ details.exitCodeTest = {
7384
+ command: "exit 42",
7385
+ exitCode: exitCodeResult.exitCode,
7386
+ passed: exitCodeResult.exitCode === 42
7387
+ };
7388
+ const passed = echoResult.exitCode === 0 && echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345";
7162
7389
  return {
7163
- name: "network-connectivity",
7164
- passed: gatewayReachable,
7390
+ name: "child-process-spawning",
7391
+ passed,
7165
7392
  details,
7166
- error: gatewayReachable ? void 0 : "Failed to reach AI Gateway",
7393
+ error: passed ? void 0 : "Echo test failed",
7167
7394
  durationMs: Date.now() - start
7168
7395
  };
7169
7396
  }
7170
7397
  async function testSdkImport() {
7171
7398
  const start = Date.now();
7399
+ const details = {};
7172
7400
  try {
7173
7401
  const sdk = await import("@anthropic-ai/claude-agent-sdk");
7174
- const details = {
7175
- sdkImported: true,
7176
- hasQuery: typeof sdk.query === "function",
7177
- exportedKeys: Object.keys(sdk)
7178
- };
7402
+ details.sdkImported = true;
7403
+ details.exportedKeys = Object.keys(sdk);
7404
+ details.hasQuery = typeof sdk.query === "function";
7405
+ if (typeof sdk.query === "function") {
7406
+ details.queryFunctionExists = true;
7407
+ details.queryFunctionType = typeof sdk.query;
7408
+ }
7179
7409
  return {
7180
7410
  name: "sdk-import",
7181
7411
  passed: true,
@@ -7189,36 +7419,37 @@ async function testSdkImport() {
7189
7419
  passed: false,
7190
7420
  details: {
7191
7421
  sdkImported: false,
7192
- error
7422
+ error,
7423
+ stack: err instanceof Error ? err.stack?.split("\n").slice(0, 5) : void 0
7193
7424
  },
7194
- error: `Failed to import Claude Agent SDK: ${error}`,
7425
+ error: `Failed to import SDK: ${error}`,
7195
7426
  durationMs: Date.now() - start
7196
7427
  };
7197
7428
  }
7198
7429
  }
7199
- async function testFileSystemAccess() {
7430
+ async function testFileSystemWrite() {
7200
7431
  const start = Date.now();
7201
- const testDir = "/tmp/evalforge-diagnostics";
7202
- const testFile = path9.join(testDir, "test.txt");
7432
+ const details = {};
7433
+ const testDir = "/tmp/evalforge-diagnostics-test";
7434
+ const testFile = path9.join(testDir, "test-file.txt");
7435
+ const testContent = `Diagnostic test at ${(/* @__PURE__ */ new Date()).toISOString()}`;
7203
7436
  try {
7204
7437
  if (!fs11.existsSync(testDir)) {
7205
7438
  fs11.mkdirSync(testDir, { recursive: true });
7206
7439
  }
7207
- fs11.writeFileSync(testFile, "diagnostic-test");
7208
- const content = fs11.readFileSync(testFile, "utf8");
7440
+ details.directoryCreated = true;
7441
+ fs11.writeFileSync(testFile, testContent);
7442
+ details.fileWritten = true;
7443
+ const readContent = fs11.readFileSync(testFile, "utf8");
7444
+ details.fileRead = true;
7445
+ details.contentMatches = readContent === testContent;
7446
+ const lsResult = await execCommand(`ls -la "${testDir}"`);
7447
+ details.directoryContents = lsResult.stdout;
7209
7448
  fs11.unlinkSync(testFile);
7210
7449
  fs11.rmdirSync(testDir);
7211
- const details = {
7212
- canCreateDirectory: true,
7213
- canWriteFile: true,
7214
- canReadFile: content === "diagnostic-test",
7215
- testDir,
7216
- cwd: process.cwd(),
7217
- cwdContents: fs11.readdirSync(process.cwd()).slice(0, 20)
7218
- // First 20 files
7219
- };
7450
+ details.cleanedUp = true;
7220
7451
  return {
7221
- name: "file-system-access",
7452
+ name: "file-system-write",
7222
7453
  passed: true,
7223
7454
  details,
7224
7455
  durationMs: Date.now() - start
@@ -7226,32 +7457,40 @@ async function testFileSystemAccess() {
7226
7457
  } catch (err) {
7227
7458
  const error = err instanceof Error ? err.message : String(err);
7228
7459
  return {
7229
- name: "file-system-access",
7460
+ name: "file-system-write",
7230
7461
  passed: false,
7231
7462
  details: {
7463
+ ...details,
7232
7464
  error,
7233
7465
  testDir,
7234
- cwd: process.cwd()
7466
+ testFile
7235
7467
  },
7236
- error: `File system access failed: ${error}`,
7468
+ error: `File system write failed: ${error}`,
7237
7469
  durationMs: Date.now() - start
7238
7470
  };
7239
7471
  }
7240
7472
  }
7241
7473
  function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader, authToken) {
7474
+ const truncatedResult = "summary" in result ? result : {
7475
+ ...result,
7476
+ details: JSON.parse(
7477
+ JSON.stringify(
7478
+ result.details,
7479
+ (_, v) => typeof v === "string" && v.length > 500 ? v.slice(0, 500) + "... [truncated]" : v
7480
+ )
7481
+ )
7482
+ };
7242
7483
  const event = {
7243
7484
  evalRunId: evalRunId2,
7244
7485
  scenarioId: "diagnostics",
7245
7486
  scenarioName: "Environment Diagnostics",
7246
7487
  targetId: "system",
7247
- targetName: "System",
7488
+ targetName: "name" in truncatedResult ? truncatedResult.name : "Summary",
7248
7489
  stepNumber: 0,
7249
7490
  type: LiveTraceEventType2.DIAGNOSTIC,
7250
- outputPreview: JSON.stringify(result, null, 2).slice(0, 2e3),
7251
- // Limit size
7491
+ outputPreview: JSON.stringify(truncatedResult, null, 2).slice(0, 3e3),
7252
7492
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
7253
7493
  isComplete: "summary" in result
7254
- // Complete if it's the full report
7255
7494
  };
7256
7495
  console.log(`TRACE_EVENT:${JSON.stringify(event)}`);
7257
7496
  if (tracePushUrl) {
@@ -7269,18 +7508,35 @@ function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader,
7269
7508
  headers,
7270
7509
  body: JSON.stringify([event])
7271
7510
  }).catch((err) => {
7272
- console.error("[DIAGNOSTICS] Failed to push trace event:", err);
7511
+ console.error(
7512
+ "[DIAGNOSTICS] Failed to push trace event to backend:",
7513
+ err
7514
+ );
7273
7515
  });
7274
7516
  }
7275
7517
  }
7276
7518
  async function runDiagnostics(config, evalRunId2) {
7277
7519
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
7278
7520
  const startTime = Date.now();
7279
- console.error("[DIAGNOSTICS] Starting environment diagnostics...");
7521
+ console.error("");
7522
+ console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
7523
+ console.error("\u2551 EVALFORGE ENVIRONMENT DIAGNOSTICS \u2551");
7524
+ console.error("\u2551 (Results sent to backend via trace events) \u2551");
7525
+ console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
7526
+ console.error("");
7280
7527
  const tests = [];
7281
- const runTest = async (testFn) => {
7282
- const result = await testFn();
7528
+ const runTest = async (testName, testFn) => {
7529
+ console.error(`[DIAG] Running: ${testName}...`);
7530
+ const result = await safeRunTest(testName, testFn);
7283
7531
  tests.push(result);
7532
+ const status = result.passed ? "\u2713 PASS" : "\u2717 FAIL";
7533
+ console.error(`[DIAG] ${status}: ${result.name} (${result.durationMs}ms)`);
7534
+ console.error("[DIAG] Details:");
7535
+ console.error(JSON.stringify(result.details, null, 2));
7536
+ console.error("");
7537
+ if (!result.passed && result.error) {
7538
+ console.error(`[DIAG] ERROR: ${result.error}`);
7539
+ }
7284
7540
  emitDiagnosticTraceEvent(
7285
7541
  evalRunId2,
7286
7542
  result,
@@ -7288,22 +7544,16 @@ async function runDiagnostics(config, evalRunId2) {
7288
7544
  config.routeHeader,
7289
7545
  config.authToken
7290
7546
  );
7291
- const status = result.passed ? "\u2713" : "\u2717";
7292
- console.error(
7293
- `[DIAGNOSTICS] ${status} ${result.name} (${result.durationMs}ms)`
7294
- );
7295
- if (!result.passed && result.error) {
7296
- console.error(`[DIAGNOSTICS] Error: ${result.error}`);
7297
- }
7298
7547
  };
7299
- await runTest(testEnvironmentVariables);
7300
- await runTest(testNodeEnvironment);
7301
- await runTest(testNpmGlobalDirectory);
7302
- await runTest(testClaudeBinary);
7303
- await runTest(testChildProcess);
7304
- await runTest(() => testNetworkConnectivity(config));
7305
- await runTest(testSdkImport);
7306
- await runTest(testFileSystemAccess);
7548
+ await runTest("claude-binary-discovery", testClaudeBinaryDiscovery);
7549
+ await runTest("claude-cli-execution", testClaudeExecution);
7550
+ await runTest("environment-dump", testEnvironmentDump);
7551
+ await runTest("file-system-structure", testFileSystemStructure);
7552
+ await runTest("network-connectivity", () => testNetworkConnectivity(config));
7553
+ await runTest("ai-gateway-api-call", () => testAiGatewayApiCall(config));
7554
+ await runTest("child-process-spawning", testChildProcessSpawning);
7555
+ await runTest("sdk-import", testSdkImport);
7556
+ await runTest("file-system-write", testFileSystemWrite);
7307
7557
  const completedAt = (/* @__PURE__ */ new Date()).toISOString();
7308
7558
  const totalDurationMs = Date.now() - startTime;
7309
7559
  const report = {
@@ -7324,9 +7574,16 @@ async function runDiagnostics(config, evalRunId2) {
7324
7574
  config.routeHeader,
7325
7575
  config.authToken
7326
7576
  );
7577
+ console.error("");
7578
+ console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
7327
7579
  console.error(
7328
- `[DIAGNOSTICS] Completed: ${report.summary.passed}/${report.summary.total} tests passed (${totalDurationMs}ms)`
7580
+ `\u2551 DIAGNOSTICS COMPLETE: ${report.summary.passed}/${report.summary.total} passed, ${report.summary.failed} failed`.padEnd(
7581
+ 60
7582
+ ) + "\u2551"
7329
7583
  );
7584
+ console.error(`\u2551 Total time: ${totalDurationMs}ms`.padEnd(60) + "\u2551");
7585
+ console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
7586
+ console.error("");
7330
7587
  return report;
7331
7588
  }
7332
7589