@wix/evalforge-evaluator 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -6939,243 +6939,400 @@ import { spawn } from "child_process";
6939
6939
  import * as fs11 from "fs";
6940
6940
  import * as path9 from "path";
6941
6941
  import { LiveTraceEventType as LiveTraceEventType2 } from "@wix/evalforge-types";
6942
- async function execCommand(command, timeoutMs = 5e3) {
6942
+ async function execCommand(command, timeoutMs = 1e4) {
6943
6943
  return new Promise((resolve) => {
6944
- const proc2 = spawn("sh", ["-c", command], {
6945
- timeout: timeoutMs
6946
- });
6947
- let stdout = "";
6948
- let stderr = "";
6949
- proc2.stdout.on("data", (data) => {
6950
- stdout += data.toString();
6951
- });
6952
- proc2.stderr.on("data", (data) => {
6953
- stderr += data.toString();
6954
- });
6955
- proc2.on("close", (code2) => {
6956
- resolve({
6957
- stdout: stdout.trim(),
6958
- stderr: stderr.trim(),
6959
- exitCode: code2 ?? -1
6944
+ try {
6945
+ const proc2 = spawn("sh", ["-c", command], {
6946
+ timeout: timeoutMs
6960
6947
  });
6961
- });
6962
- proc2.on("error", (err) => {
6948
+ let stdout = "";
6949
+ let stderr = "";
6950
+ proc2.stdout.on("data", (data) => {
6951
+ stdout += data.toString();
6952
+ });
6953
+ proc2.stderr.on("data", (data) => {
6954
+ stderr += data.toString();
6955
+ });
6956
+ proc2.on("close", (code2) => {
6957
+ resolve({
6958
+ stdout: stdout.trim(),
6959
+ stderr: stderr.trim(),
6960
+ exitCode: code2 ?? -1
6961
+ });
6962
+ });
6963
+ proc2.on("error", (err) => {
6964
+ resolve({
6965
+ stdout: "",
6966
+ stderr: err.message,
6967
+ exitCode: -1
6968
+ });
6969
+ });
6970
+ } catch (err) {
6963
6971
  resolve({
6964
6972
  stdout: "",
6965
- stderr: err.message,
6966
- exitCode: -1
6973
+ stderr: err instanceof Error ? err.message : String(err),
6974
+ exitCode: -99
6967
6975
  });
6968
- });
6976
+ }
6969
6977
  });
6970
6978
  }
6971
- async function testEnvironmentVariables() {
6979
+ async function safeRunTest(testName, testFn) {
6980
+ const start = Date.now();
6981
+ try {
6982
+ return await testFn();
6983
+ } catch (err) {
6984
+ const error = err instanceof Error ? err.message : String(err);
6985
+ return {
6986
+ name: testName,
6987
+ passed: false,
6988
+ details: {
6989
+ testCrashed: true,
6990
+ error,
6991
+ stack: err instanceof Error ? err.stack : void 0
6992
+ },
6993
+ error: `Test crashed: ${error}`,
6994
+ durationMs: Date.now() - start
6995
+ };
6996
+ }
6997
+ }
6998
+ async function testClaudeBinaryDiscovery() {
6972
6999
  const start = Date.now();
6973
- const envVars = [
7000
+ const details = {};
7001
+ const npmRootResult = await execCommand("npm root -g");
7002
+ const npmBinResult = await execCommand("npm bin -g");
7003
+ const npmRoot = npmRootResult.stdout;
7004
+ const npmBin = npmBinResult.stdout;
7005
+ details.npmRoot = npmRoot;
7006
+ details.npmBin = npmBin;
7007
+ const evaluatorBinPath = path9.join(
7008
+ npmRoot,
7009
+ "@wix",
7010
+ "evalforge-evaluator",
7011
+ "node_modules",
7012
+ ".bin"
7013
+ );
7014
+ details.evaluatorBinPath = evaluatorBinPath;
7015
+ const lsBinResult = await execCommand(`ls -la "${evaluatorBinPath}" 2>&1`);
7016
+ details.evaluatorBinContents = lsBinResult.stdout || lsBinResult.stderr;
7017
+ details.lsBinExitCode = lsBinResult.exitCode;
7018
+ const claudePath = path9.join(evaluatorBinPath, "claude");
7019
+ let claudeExists = false;
7020
+ try {
7021
+ claudeExists = fs11.existsSync(claudePath);
7022
+ } catch {
7023
+ claudeExists = false;
7024
+ }
7025
+ details.claudePath = claudePath;
7026
+ details.claudeExists = claudeExists;
7027
+ if (claudeExists) {
7028
+ const readlinkResult = await execCommand(
7029
+ `readlink -f "${claudePath}" 2>&1`
7030
+ );
7031
+ details.claudeRealPath = readlinkResult.stdout || readlinkResult.stderr;
7032
+ const statResult = await execCommand(`stat "${claudePath}" 2>&1`);
7033
+ details.claudeStat = statResult.stdout || statResult.stderr;
7034
+ const lsClaudeResult = await execCommand(`ls -la "${claudePath}" 2>&1`);
7035
+ details.claudeFileInfo = lsClaudeResult.stdout;
7036
+ }
7037
+ const whichResult = await execCommand("which claude 2>&1");
7038
+ details.whichClaude = whichResult.stdout || "(not in PATH)";
7039
+ details.whichExitCode = whichResult.exitCode;
7040
+ const currentPath = process.env.PATH || "";
7041
+ details.currentPATH = currentPath.split(":");
7042
+ details.pathLength = currentPath.split(":").length;
7043
+ const passed = claudeExists || whichResult.exitCode === 0;
7044
+ return {
7045
+ name: "claude-binary-discovery",
7046
+ passed,
7047
+ details,
7048
+ error: passed ? void 0 : `Claude binary not found at ${claudePath}`,
7049
+ durationMs: Date.now() - start
7050
+ };
7051
+ }
7052
+ async function testClaudeExecution() {
7053
+ const start = Date.now();
7054
+ const details = {};
7055
+ const npmRootResult = await execCommand("npm root -g");
7056
+ const npmRoot = npmRootResult.stdout;
7057
+ const claudePath = path9.join(
7058
+ npmRoot,
7059
+ "@wix",
7060
+ "evalforge-evaluator",
7061
+ "node_modules",
7062
+ ".bin",
7063
+ "claude"
7064
+ );
7065
+ details.claudePath = claudePath;
7066
+ const versionResult = await execCommand(
7067
+ `"${claudePath}" --version 2>&1`,
7068
+ 15e3
7069
+ );
7070
+ details.versionCommand = {
7071
+ command: `"${claudePath}" --version`,
7072
+ stdout: versionResult.stdout,
7073
+ stderr: versionResult.stderr,
7074
+ exitCode: versionResult.exitCode
7075
+ };
7076
+ const helpResult = await execCommand(
7077
+ `"${claudePath}" --help 2>&1 | head -50`,
7078
+ 15e3
7079
+ );
7080
+ details.helpCommand = {
7081
+ command: `"${claudePath}" --help | head -50`,
7082
+ stdout: helpResult.stdout.slice(0, 1500),
7083
+ stderr: helpResult.stderr.slice(0, 500),
7084
+ exitCode: helpResult.exitCode
7085
+ };
7086
+ const whichClaudeResult = await execCommand("which claude 2>&1");
7087
+ if (whichClaudeResult.exitCode === 0) {
7088
+ const pathVersionResult = await execCommand("claude --version 2>&1", 15e3);
7089
+ details.pathVersionCommand = {
7090
+ whichClaude: whichClaudeResult.stdout,
7091
+ stdout: pathVersionResult.stdout,
7092
+ stderr: pathVersionResult.stderr,
7093
+ exitCode: pathVersionResult.exitCode
7094
+ };
7095
+ }
7096
+ const passed = versionResult.exitCode === 0 || helpResult.exitCode === 0;
7097
+ return {
7098
+ name: "claude-cli-execution",
7099
+ passed,
7100
+ details,
7101
+ error: passed ? void 0 : `Claude CLI failed. Version exit: ${versionResult.exitCode}, Help exit: ${helpResult.exitCode}`,
7102
+ durationMs: Date.now() - start
7103
+ };
7104
+ }
7105
+ async function testEnvironmentDump() {
7106
+ const start = Date.now();
7107
+ const details = {};
7108
+ const importantVars = [
6974
7109
  "PATH",
6975
7110
  "HOME",
6976
7111
  "USER",
6977
7112
  "SHELL",
6978
7113
  "NODE_ENV",
7114
+ "PWD",
6979
7115
  "EVAL_SERVER_URL",
6980
7116
  "AI_GATEWAY_URL",
7117
+ "TRACE_PUSH_URL",
7118
+ "EVAL_AUTH_TOKEN",
6981
7119
  "ANTHROPIC_API_KEY",
6982
7120
  "ANTHROPIC_AUTH_TOKEN",
6983
7121
  "ANTHROPIC_BASE_URL",
6984
7122
  "ANTHROPIC_CUSTOM_HEADERS"
6985
7123
  ];
6986
- const details = {};
6987
- const missing = [];
6988
- for (const key of envVars) {
7124
+ const capturedVars = {};
7125
+ for (const key of importantVars) {
6989
7126
  const value = process.env[key];
6990
7127
  if (value) {
6991
7128
  if (key.includes("SECRET") || key.includes("TOKEN") || key.includes("API_KEY")) {
6992
- details[key] = `[SET - ${value.length} chars]`;
6993
- } else if (key === "PATH") {
6994
- details[key] = value.split(":");
7129
+ capturedVars[key] = `[REDACTED - ${value.length} chars, starts: ${value.slice(0, 10)}...]`;
6995
7130
  } else if (key === "ANTHROPIC_CUSTOM_HEADERS") {
6996
- details[key] = value.split("\n").map((h) => h.split(":")[0]).join(", ");
7131
+ capturedVars[key] = value.split("\n").map((h) => {
7132
+ const [name2, val] = h.split(":");
7133
+ return `${name2}: ${val ? "[" + val.length + " chars]" : "(empty)"}`;
7134
+ }).join(" | ");
7135
+ } else if (key === "PATH") {
7136
+ const parts = value.split(":");
7137
+ capturedVars[key] = `[${parts.length} entries] First: ${parts.slice(0, 3).join(":")} ... Last: ${parts.slice(-2).join(":")}`;
6997
7138
  } else {
6998
- details[key] = value;
7139
+ capturedVars[key] = value;
6999
7140
  }
7000
7141
  } else {
7001
- missing.push(key);
7142
+ capturedVars[key] = "(NOT SET)";
7002
7143
  }
7003
7144
  }
7004
- details.missingEnvVars = missing;
7005
- return {
7006
- name: "environment-variables",
7007
- passed: true,
7008
- // Info only, doesn't fail
7009
- details,
7010
- durationMs: Date.now() - start
7011
- };
7012
- }
7013
- async function testNodeEnvironment() {
7014
- const start = Date.now();
7015
- const details = {
7016
- nodeVersion: process.version,
7145
+ details.importantVars = capturedVars;
7146
+ const envResult = await execCommand("env | sort | head -50");
7147
+ details.envCommandOutput = envResult.stdout;
7148
+ details.envExitCode = envResult.exitCode;
7149
+ details.nodeInfo = {
7150
+ version: process.version,
7017
7151
  platform: process.platform,
7018
7152
  arch: process.arch,
7019
- cwd: process.cwd(),
7020
7153
  pid: process.pid,
7021
- uptime: process.uptime(),
7022
- memoryUsage: process.memoryUsage(),
7154
+ cwd: process.cwd(),
7023
7155
  execPath: process.execPath
7024
7156
  };
7025
7157
  return {
7026
- name: "node-environment",
7158
+ name: "environment-dump",
7027
7159
  passed: true,
7160
+ // Info test, always passes
7028
7161
  details,
7029
7162
  durationMs: Date.now() - start
7030
7163
  };
7031
7164
  }
7032
- async function testNpmGlobalDirectory() {
7165
+ async function testFileSystemStructure() {
7033
7166
  const start = Date.now();
7167
+ const details = {};
7034
7168
  const npmRootResult = await execCommand("npm root -g");
7035
- const npmBinResult = await execCommand("npm bin -g");
7036
7169
  const npmRoot = npmRootResult.stdout;
7037
- const npmBin = npmBinResult.stdout;
7038
- const details = {
7039
- npmRootGlobal: npmRoot,
7040
- npmBinGlobal: npmBin,
7041
- npmRootExitCode: npmRootResult.exitCode,
7042
- npmBinExitCode: npmBinResult.exitCode
7170
+ const lsCwdResult = await execCommand("ls -la");
7171
+ details.currentDirectory = {
7172
+ path: process.cwd(),
7173
+ contents: lsCwdResult.stdout
7043
7174
  };
7044
- if (npmRoot) {
7045
- const evaluatorPath = path9.join(npmRoot, "@wix", "evalforge-evaluator");
7046
- const evaluatorExists = fs11.existsSync(evaluatorPath);
7047
- details.evaluatorInstalled = evaluatorExists;
7048
- if (evaluatorExists) {
7049
- try {
7050
- const files = fs11.readdirSync(evaluatorPath);
7051
- details.evaluatorFiles = files;
7052
- } catch {
7053
- details.evaluatorFiles = "Failed to list files";
7054
- }
7055
- }
7056
- }
7057
- if (npmRoot) {
7058
- const sdkPath = path9.join(
7059
- npmRoot,
7060
- "@wix",
7061
- "evalforge-evaluator",
7062
- "node_modules",
7063
- "@anthropic-ai",
7064
- "claude-agent-sdk"
7065
- );
7066
- const sdkExists = fs11.existsSync(sdkPath);
7067
- details.claudeAgentSdkInstalled = sdkExists;
7068
- }
7069
- const passed = npmRootResult.exitCode === 0 && npmBinResult.exitCode === 0;
7070
- return {
7071
- name: "npm-global-directory",
7072
- passed,
7073
- details,
7074
- error: passed ? void 0 : npmRootResult.stderr || npmBinResult.stderr,
7075
- durationMs: Date.now() - start
7175
+ const lsNpmRootResult = await execCommand(
7176
+ `ls -la "${npmRoot}" 2>&1 | head -30`
7177
+ );
7178
+ details.npmGlobalRoot = {
7179
+ path: npmRoot,
7180
+ contents: lsNpmRootResult.stdout
7076
7181
  };
7077
- }
7078
- async function testClaudeBinary() {
7079
- const start = Date.now();
7080
- const whichResult = await execCommand("which claude");
7081
- const versionResult = await execCommand("claude --version");
7082
- const npmBinResult = await execCommand("npm bin -g");
7083
- const npmBin = npmBinResult.stdout;
7084
- let claudeInNpmBin = false;
7085
- if (npmBin) {
7086
- const claudePath = path9.join(npmBin, "claude");
7087
- claudeInNpmBin = fs11.existsSync(claudePath);
7088
- }
7089
- const details = {
7090
- whichClaude: whichResult.stdout || "(not found)",
7091
- whichExitCode: whichResult.exitCode,
7092
- claudeVersion: versionResult.stdout || versionResult.stderr,
7093
- versionExitCode: versionResult.exitCode,
7094
- claudeInNpmGlobalBin: claudeInNpmBin,
7095
- npmGlobalBin: npmBin
7182
+ const wixPath = path9.join(npmRoot, "@wix");
7183
+ const lsWixResult = await execCommand(`ls -la "${wixPath}" 2>&1`);
7184
+ details.wixPackages = {
7185
+ path: wixPath,
7186
+ contents: lsWixResult.stdout
7187
+ };
7188
+ const evaluatorPath = path9.join(npmRoot, "@wix", "evalforge-evaluator");
7189
+ const lsEvaluatorResult = await execCommand(`ls -la "${evaluatorPath}" 2>&1`);
7190
+ details.evaluatorDir = {
7191
+ path: evaluatorPath,
7192
+ contents: lsEvaluatorResult.stdout
7193
+ };
7194
+ const nodeModulesPath = path9.join(evaluatorPath, "node_modules");
7195
+ const lsNodeModulesResult = await execCommand(
7196
+ `ls "${nodeModulesPath}" 2>&1 | head -30`
7197
+ );
7198
+ details.evaluatorNodeModules = {
7199
+ path: nodeModulesPath,
7200
+ contents: lsNodeModulesResult.stdout
7201
+ };
7202
+ const anthropicPath = path9.join(nodeModulesPath, "@anthropic-ai");
7203
+ const lsAnthropicResult = await execCommand(`ls -la "${anthropicPath}" 2>&1`);
7204
+ details.anthropicPackages = {
7205
+ path: anthropicPath,
7206
+ contents: lsAnthropicResult.stdout
7207
+ };
7208
+ const binPath = path9.join(nodeModulesPath, ".bin");
7209
+ const lsBinResult = await execCommand(`ls -la "${binPath}" 2>&1`);
7210
+ details.binDirectory = {
7211
+ path: binPath,
7212
+ contents: lsBinResult.stdout
7096
7213
  };
7097
- const pathDirs = (process.env.PATH || "").split(":");
7098
- const claudeFoundIn = [];
7099
- for (const dir of pathDirs) {
7100
- const claudePath = path9.join(dir, "claude");
7101
- if (fs11.existsSync(claudePath)) {
7102
- claudeFoundIn.push(dir);
7103
- }
7104
- }
7105
- details.claudeFoundInPathDirs = claudeFoundIn;
7106
- const passed = whichResult.exitCode === 0 || claudeInNpmBin;
7107
7214
  return {
7108
- name: "claude-cli-binary",
7109
- passed,
7215
+ name: "file-system-structure",
7216
+ passed: true,
7217
+ // Info test, always passes
7110
7218
  details,
7111
- error: passed ? void 0 : "Claude CLI binary not found in PATH. The SDK will fail to spawn it.",
7112
7219
  durationMs: Date.now() - start
7113
7220
  };
7114
7221
  }
7115
- async function testChildProcess() {
7222
+ async function testNetworkConnectivity(config) {
7116
7223
  const start = Date.now();
7117
- const echoResult = await execCommand('echo "diagnostic-test-success"');
7118
- const nodeResult = await execCommand('node -e "console.log(process.pid)"');
7119
- const details = {
7120
- echoResult: echoResult.stdout,
7121
- echoExitCode: echoResult.exitCode,
7122
- nodeResult: nodeResult.stdout,
7123
- nodeExitCode: nodeResult.exitCode
7224
+ const details = {};
7225
+ const dnsResult = await execCommand(
7226
+ "nslookup manage.wix.com 2>&1 | head -10"
7227
+ );
7228
+ details.dnsLookup = {
7229
+ command: "nslookup manage.wix.com",
7230
+ output: dnsResult.stdout || dnsResult.stderr,
7231
+ exitCode: dnsResult.exitCode
7124
7232
  };
7125
- const passed = echoResult.exitCode === 0 && echoResult.stdout === "diagnostic-test-success";
7233
+ const pingResult = await execCommand("ping -c 2 manage.wix.com 2>&1");
7234
+ details.pingTest = {
7235
+ command: "ping -c 2 manage.wix.com",
7236
+ output: pingResult.stdout || pingResult.stderr,
7237
+ exitCode: pingResult.exitCode
7238
+ };
7239
+ const gatewayUrl = config.aiGatewayUrl || "https://manage.wix.com/_api/eval-wix-ai-gateway-proxy";
7240
+ const curlGatewayResult = await execCommand(
7241
+ `curl -v -s --connect-timeout 5 --max-time 10 "${gatewayUrl}" 2>&1 | tail -30`
7242
+ );
7243
+ details.aiGatewayTest = {
7244
+ url: gatewayUrl,
7245
+ output: curlGatewayResult.stdout,
7246
+ exitCode: curlGatewayResult.exitCode
7247
+ };
7248
+ const serverUrl = config.serverUrl;
7249
+ const curlServerResult = await execCommand(
7250
+ `curl -v -s --connect-timeout 5 --max-time 10 "${serverUrl}/health" 2>&1 | tail -30`
7251
+ );
7252
+ details.backendServerTest = {
7253
+ url: `${serverUrl}/health`,
7254
+ output: curlServerResult.stdout,
7255
+ exitCode: curlServerResult.exitCode
7256
+ };
7257
+ const httpsResult = await execCommand(
7258
+ 'curl -s --connect-timeout 5 -o /dev/null -w "HTTP_CODE:%{http_code} TIME:%{time_total}s" https://www.google.com 2>&1'
7259
+ );
7260
+ details.httpsBaseline = {
7261
+ command: "curl https://www.google.com",
7262
+ output: httpsResult.stdout,
7263
+ exitCode: httpsResult.exitCode
7264
+ };
7265
+ const networkWorks = pingResult.exitCode === 0 || httpsResult.exitCode === 0;
7266
+ const gatewayReachable = curlGatewayResult.exitCode === 0;
7126
7267
  return {
7127
- name: "child-process-spawning",
7128
- passed,
7268
+ name: "network-connectivity",
7269
+ passed: networkWorks && gatewayReachable,
7129
7270
  details,
7130
- error: passed ? void 0 : "Failed to spawn child process",
7271
+ error: networkWorks && gatewayReachable ? void 0 : `Network: ${networkWorks ? "OK" : "FAILED"}, Gateway: ${gatewayReachable ? "OK" : "FAILED"}`,
7131
7272
  durationMs: Date.now() - start
7132
7273
  };
7133
7274
  }
7134
- async function testNetworkConnectivity(config) {
7275
+ async function testChildProcessSpawning() {
7135
7276
  const start = Date.now();
7136
- const aiGatewayUrl = config.aiGatewayUrl;
7137
- if (!aiGatewayUrl) {
7138
- return {
7139
- name: "network-connectivity",
7140
- passed: false,
7141
- details: { error: "No AI_GATEWAY_URL configured" },
7142
- error: "No AI_GATEWAY_URL configured",
7143
- durationMs: Date.now() - start
7144
- };
7145
- }
7146
- const curlResult = await execCommand(
7147
- `curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${aiGatewayUrl}" 2>&1`
7277
+ const details = {};
7278
+ const echoResult = await execCommand('echo "DIAGNOSTIC_TEST_SUCCESS_12345"');
7279
+ details.echoTest = {
7280
+ command: 'echo "DIAGNOSTIC_TEST_SUCCESS_12345"',
7281
+ output: echoResult.stdout,
7282
+ exitCode: echoResult.exitCode,
7283
+ passed: echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345"
7284
+ };
7285
+ const nodeResult = await execCommand(
7286
+ 'node -e "console.log(JSON.stringify({pid: process.pid, version: process.version, platform: process.platform}))"'
7148
7287
  );
7149
- const serverUrl = config.serverUrl;
7150
- const serverResult = await execCommand(
7151
- `curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${serverUrl}/health" 2>&1`
7288
+ details.nodeTest = {
7289
+ command: 'node -e "console.log(JSON.stringify({...}))"',
7290
+ output: nodeResult.stdout,
7291
+ exitCode: nodeResult.exitCode
7292
+ };
7293
+ const shellResult = await execCommand(
7294
+ 'echo "PID: $$"; pwd; whoami; date; uname -a'
7152
7295
  );
7153
- const details = {
7154
- aiGatewayUrl,
7155
- aiGatewayHttpCode: curlResult.stdout,
7156
- aiGatewayExitCode: curlResult.exitCode,
7157
- serverUrl,
7158
- serverHttpCode: serverResult.stdout,
7159
- serverExitCode: serverResult.exitCode
7296
+ details.shellTest = {
7297
+ command: 'echo "PID: $$"; pwd; whoami; date; uname -a',
7298
+ output: shellResult.stdout,
7299
+ exitCode: shellResult.exitCode
7160
7300
  };
7161
- const gatewayReachable = curlResult.exitCode === 0 && curlResult.stdout !== "000";
7301
+ const stderrResult = await execCommand(
7302
+ `node -e "console.error('stderr test')"`
7303
+ );
7304
+ details.stderrTest = {
7305
+ command: `node -e "console.error('stderr test')"`,
7306
+ stderr: stderrResult.stderr,
7307
+ exitCode: stderrResult.exitCode
7308
+ };
7309
+ const exitCodeResult = await execCommand("exit 42");
7310
+ details.exitCodeTest = {
7311
+ command: "exit 42",
7312
+ exitCode: exitCodeResult.exitCode,
7313
+ passed: exitCodeResult.exitCode === 42
7314
+ };
7315
+ const passed = echoResult.exitCode === 0 && echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345";
7162
7316
  return {
7163
- name: "network-connectivity",
7164
- passed: gatewayReachable,
7317
+ name: "child-process-spawning",
7318
+ passed,
7165
7319
  details,
7166
- error: gatewayReachable ? void 0 : "Failed to reach AI Gateway",
7320
+ error: passed ? void 0 : "Echo test failed",
7167
7321
  durationMs: Date.now() - start
7168
7322
  };
7169
7323
  }
7170
7324
  async function testSdkImport() {
7171
7325
  const start = Date.now();
7326
+ const details = {};
7172
7327
  try {
7173
7328
  const sdk = await import("@anthropic-ai/claude-agent-sdk");
7174
- const details = {
7175
- sdkImported: true,
7176
- hasQuery: typeof sdk.query === "function",
7177
- exportedKeys: Object.keys(sdk)
7178
- };
7329
+ details.sdkImported = true;
7330
+ details.exportedKeys = Object.keys(sdk);
7331
+ details.hasQuery = typeof sdk.query === "function";
7332
+ if (typeof sdk.query === "function") {
7333
+ details.queryFunctionExists = true;
7334
+ details.queryFunctionType = typeof sdk.query;
7335
+ }
7179
7336
  return {
7180
7337
  name: "sdk-import",
7181
7338
  passed: true,
@@ -7189,36 +7346,37 @@ async function testSdkImport() {
7189
7346
  passed: false,
7190
7347
  details: {
7191
7348
  sdkImported: false,
7192
- error
7349
+ error,
7350
+ stack: err instanceof Error ? err.stack?.split("\n").slice(0, 5) : void 0
7193
7351
  },
7194
- error: `Failed to import Claude Agent SDK: ${error}`,
7352
+ error: `Failed to import SDK: ${error}`,
7195
7353
  durationMs: Date.now() - start
7196
7354
  };
7197
7355
  }
7198
7356
  }
7199
- async function testFileSystemAccess() {
7357
+ async function testFileSystemWrite() {
7200
7358
  const start = Date.now();
7201
- const testDir = "/tmp/evalforge-diagnostics";
7202
- const testFile = path9.join(testDir, "test.txt");
7359
+ const details = {};
7360
+ const testDir = "/tmp/evalforge-diagnostics-test";
7361
+ const testFile = path9.join(testDir, "test-file.txt");
7362
+ const testContent = `Diagnostic test at ${(/* @__PURE__ */ new Date()).toISOString()}`;
7203
7363
  try {
7204
7364
  if (!fs11.existsSync(testDir)) {
7205
7365
  fs11.mkdirSync(testDir, { recursive: true });
7206
7366
  }
7207
- fs11.writeFileSync(testFile, "diagnostic-test");
7208
- const content = fs11.readFileSync(testFile, "utf8");
7367
+ details.directoryCreated = true;
7368
+ fs11.writeFileSync(testFile, testContent);
7369
+ details.fileWritten = true;
7370
+ const readContent = fs11.readFileSync(testFile, "utf8");
7371
+ details.fileRead = true;
7372
+ details.contentMatches = readContent === testContent;
7373
+ const lsResult = await execCommand(`ls -la "${testDir}"`);
7374
+ details.directoryContents = lsResult.stdout;
7209
7375
  fs11.unlinkSync(testFile);
7210
7376
  fs11.rmdirSync(testDir);
7211
- const details = {
7212
- canCreateDirectory: true,
7213
- canWriteFile: true,
7214
- canReadFile: content === "diagnostic-test",
7215
- testDir,
7216
- cwd: process.cwd(),
7217
- cwdContents: fs11.readdirSync(process.cwd()).slice(0, 20)
7218
- // First 20 files
7219
- };
7377
+ details.cleanedUp = true;
7220
7378
  return {
7221
- name: "file-system-access",
7379
+ name: "file-system-write",
7222
7380
  passed: true,
7223
7381
  details,
7224
7382
  durationMs: Date.now() - start
@@ -7226,32 +7384,40 @@ async function testFileSystemAccess() {
7226
7384
  } catch (err) {
7227
7385
  const error = err instanceof Error ? err.message : String(err);
7228
7386
  return {
7229
- name: "file-system-access",
7387
+ name: "file-system-write",
7230
7388
  passed: false,
7231
7389
  details: {
7390
+ ...details,
7232
7391
  error,
7233
7392
  testDir,
7234
- cwd: process.cwd()
7393
+ testFile
7235
7394
  },
7236
- error: `File system access failed: ${error}`,
7395
+ error: `File system write failed: ${error}`,
7237
7396
  durationMs: Date.now() - start
7238
7397
  };
7239
7398
  }
7240
7399
  }
7241
7400
  function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader, authToken) {
7401
+ const truncatedResult = "summary" in result ? result : {
7402
+ ...result,
7403
+ details: JSON.parse(
7404
+ JSON.stringify(
7405
+ result.details,
7406
+ (_, v) => typeof v === "string" && v.length > 500 ? v.slice(0, 500) + "... [truncated]" : v
7407
+ )
7408
+ )
7409
+ };
7242
7410
  const event = {
7243
7411
  evalRunId: evalRunId2,
7244
7412
  scenarioId: "diagnostics",
7245
7413
  scenarioName: "Environment Diagnostics",
7246
7414
  targetId: "system",
7247
- targetName: "System",
7415
+ targetName: "name" in truncatedResult ? truncatedResult.name : "Summary",
7248
7416
  stepNumber: 0,
7249
7417
  type: LiveTraceEventType2.DIAGNOSTIC,
7250
- outputPreview: JSON.stringify(result, null, 2).slice(0, 2e3),
7251
- // Limit size
7418
+ outputPreview: JSON.stringify(truncatedResult, null, 2).slice(0, 3e3),
7252
7419
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
7253
7420
  isComplete: "summary" in result
7254
- // Complete if it's the full report
7255
7421
  };
7256
7422
  console.log(`TRACE_EVENT:${JSON.stringify(event)}`);
7257
7423
  if (tracePushUrl) {
@@ -7269,18 +7435,35 @@ function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader,
7269
7435
  headers,
7270
7436
  body: JSON.stringify([event])
7271
7437
  }).catch((err) => {
7272
- console.error("[DIAGNOSTICS] Failed to push trace event:", err);
7438
+ console.error(
7439
+ "[DIAGNOSTICS] Failed to push trace event to backend:",
7440
+ err
7441
+ );
7273
7442
  });
7274
7443
  }
7275
7444
  }
7276
7445
  async function runDiagnostics(config, evalRunId2) {
7277
7446
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
7278
7447
  const startTime = Date.now();
7279
- console.error("[DIAGNOSTICS] Starting environment diagnostics...");
7448
+ console.error("");
7449
+ console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
7450
+ console.error("\u2551 EVALFORGE ENVIRONMENT DIAGNOSTICS \u2551");
7451
+ console.error("\u2551 (Results sent to backend via trace events) \u2551");
7452
+ console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
7453
+ console.error("");
7280
7454
  const tests = [];
7281
- const runTest = async (testFn) => {
7282
- const result = await testFn();
7455
+ const runTest = async (testName, testFn) => {
7456
+ console.error(`[DIAG] Running: ${testName}...`);
7457
+ const result = await safeRunTest(testName, testFn);
7283
7458
  tests.push(result);
7459
+ const status = result.passed ? "\u2713 PASS" : "\u2717 FAIL";
7460
+ console.error(`[DIAG] ${status}: ${result.name} (${result.durationMs}ms)`);
7461
+ console.error("[DIAG] Details:");
7462
+ console.error(JSON.stringify(result.details, null, 2));
7463
+ console.error("");
7464
+ if (!result.passed && result.error) {
7465
+ console.error(`[DIAG] ERROR: ${result.error}`);
7466
+ }
7284
7467
  emitDiagnosticTraceEvent(
7285
7468
  evalRunId2,
7286
7469
  result,
@@ -7288,22 +7471,15 @@ async function runDiagnostics(config, evalRunId2) {
7288
7471
  config.routeHeader,
7289
7472
  config.authToken
7290
7473
  );
7291
- const status = result.passed ? "\u2713" : "\u2717";
7292
- console.error(
7293
- `[DIAGNOSTICS] ${status} ${result.name} (${result.durationMs}ms)`
7294
- );
7295
- if (!result.passed && result.error) {
7296
- console.error(`[DIAGNOSTICS] Error: ${result.error}`);
7297
- }
7298
7474
  };
7299
- await runTest(testEnvironmentVariables);
7300
- await runTest(testNodeEnvironment);
7301
- await runTest(testNpmGlobalDirectory);
7302
- await runTest(testClaudeBinary);
7303
- await runTest(testChildProcess);
7304
- await runTest(() => testNetworkConnectivity(config));
7305
- await runTest(testSdkImport);
7306
- await runTest(testFileSystemAccess);
7475
+ await runTest("claude-binary-discovery", testClaudeBinaryDiscovery);
7476
+ await runTest("claude-cli-execution", testClaudeExecution);
7477
+ await runTest("environment-dump", testEnvironmentDump);
7478
+ await runTest("file-system-structure", testFileSystemStructure);
7479
+ await runTest("network-connectivity", () => testNetworkConnectivity(config));
7480
+ await runTest("child-process-spawning", testChildProcessSpawning);
7481
+ await runTest("sdk-import", testSdkImport);
7482
+ await runTest("file-system-write", testFileSystemWrite);
7307
7483
  const completedAt = (/* @__PURE__ */ new Date()).toISOString();
7308
7484
  const totalDurationMs = Date.now() - startTime;
7309
7485
  const report = {
@@ -7324,9 +7500,16 @@ async function runDiagnostics(config, evalRunId2) {
7324
7500
  config.routeHeader,
7325
7501
  config.authToken
7326
7502
  );
7503
+ console.error("");
7504
+ console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
7327
7505
  console.error(
7328
- `[DIAGNOSTICS] Completed: ${report.summary.passed}/${report.summary.total} tests passed (${totalDurationMs}ms)`
7506
+ `\u2551 DIAGNOSTICS COMPLETE: ${report.summary.passed}/${report.summary.total} passed, ${report.summary.failed} failed`.padEnd(
7507
+ 60
7508
+ ) + "\u2551"
7329
7509
  );
7510
+ console.error(`\u2551 Total time: ${totalDurationMs}ms`.padEnd(60) + "\u2551");
7511
+ console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
7512
+ console.error("");
7330
7513
  return report;
7331
7514
  }
7332
7515