@wix/evalforge-evaluator 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -6956,243 +6956,400 @@ var import_child_process = require("child_process");
6956
6956
  var fs11 = __toESM(require("fs"));
6957
6957
  var path9 = __toESM(require("path"));
6958
6958
  var import_evalforge_types4 = require("@wix/evalforge-types");
6959
- async function execCommand(command, timeoutMs = 5e3) {
6959
+ async function execCommand(command, timeoutMs = 1e4) {
6960
6960
  return new Promise((resolve) => {
6961
- const proc2 = (0, import_child_process.spawn)("sh", ["-c", command], {
6962
- timeout: timeoutMs
6963
- });
6964
- let stdout = "";
6965
- let stderr = "";
6966
- proc2.stdout.on("data", (data) => {
6967
- stdout += data.toString();
6968
- });
6969
- proc2.stderr.on("data", (data) => {
6970
- stderr += data.toString();
6971
- });
6972
- proc2.on("close", (code2) => {
6973
- resolve({
6974
- stdout: stdout.trim(),
6975
- stderr: stderr.trim(),
6976
- exitCode: code2 ?? -1
6961
+ try {
6962
+ const proc2 = (0, import_child_process.spawn)("sh", ["-c", command], {
6963
+ timeout: timeoutMs
6977
6964
  });
6978
- });
6979
- proc2.on("error", (err) => {
6965
+ let stdout = "";
6966
+ let stderr = "";
6967
+ proc2.stdout.on("data", (data) => {
6968
+ stdout += data.toString();
6969
+ });
6970
+ proc2.stderr.on("data", (data) => {
6971
+ stderr += data.toString();
6972
+ });
6973
+ proc2.on("close", (code2) => {
6974
+ resolve({
6975
+ stdout: stdout.trim(),
6976
+ stderr: stderr.trim(),
6977
+ exitCode: code2 ?? -1
6978
+ });
6979
+ });
6980
+ proc2.on("error", (err) => {
6981
+ resolve({
6982
+ stdout: "",
6983
+ stderr: err.message,
6984
+ exitCode: -1
6985
+ });
6986
+ });
6987
+ } catch (err) {
6980
6988
  resolve({
6981
6989
  stdout: "",
6982
- stderr: err.message,
6983
- exitCode: -1
6990
+ stderr: err instanceof Error ? err.message : String(err),
6991
+ exitCode: -99
6984
6992
  });
6985
- });
6993
+ }
6986
6994
  });
6987
6995
  }
6988
- async function testEnvironmentVariables() {
6996
+ async function safeRunTest(testName, testFn) {
6997
+ const start = Date.now();
6998
+ try {
6999
+ return await testFn();
7000
+ } catch (err) {
7001
+ const error = err instanceof Error ? err.message : String(err);
7002
+ return {
7003
+ name: testName,
7004
+ passed: false,
7005
+ details: {
7006
+ testCrashed: true,
7007
+ error,
7008
+ stack: err instanceof Error ? err.stack : void 0
7009
+ },
7010
+ error: `Test crashed: ${error}`,
7011
+ durationMs: Date.now() - start
7012
+ };
7013
+ }
7014
+ }
7015
+ async function testClaudeBinaryDiscovery() {
6989
7016
  const start = Date.now();
6990
- const envVars = [
7017
+ const details = {};
7018
+ const npmRootResult = await execCommand("npm root -g");
7019
+ const npmBinResult = await execCommand("npm bin -g");
7020
+ const npmRoot = npmRootResult.stdout;
7021
+ const npmBin = npmBinResult.stdout;
7022
+ details.npmRoot = npmRoot;
7023
+ details.npmBin = npmBin;
7024
+ const evaluatorBinPath = path9.join(
7025
+ npmRoot,
7026
+ "@wix",
7027
+ "evalforge-evaluator",
7028
+ "node_modules",
7029
+ ".bin"
7030
+ );
7031
+ details.evaluatorBinPath = evaluatorBinPath;
7032
+ const lsBinResult = await execCommand(`ls -la "${evaluatorBinPath}" 2>&1`);
7033
+ details.evaluatorBinContents = lsBinResult.stdout || lsBinResult.stderr;
7034
+ details.lsBinExitCode = lsBinResult.exitCode;
7035
+ const claudePath = path9.join(evaluatorBinPath, "claude");
7036
+ let claudeExists = false;
7037
+ try {
7038
+ claudeExists = fs11.existsSync(claudePath);
7039
+ } catch {
7040
+ claudeExists = false;
7041
+ }
7042
+ details.claudePath = claudePath;
7043
+ details.claudeExists = claudeExists;
7044
+ if (claudeExists) {
7045
+ const readlinkResult = await execCommand(
7046
+ `readlink -f "${claudePath}" 2>&1`
7047
+ );
7048
+ details.claudeRealPath = readlinkResult.stdout || readlinkResult.stderr;
7049
+ const statResult = await execCommand(`stat "${claudePath}" 2>&1`);
7050
+ details.claudeStat = statResult.stdout || statResult.stderr;
7051
+ const lsClaudeResult = await execCommand(`ls -la "${claudePath}" 2>&1`);
7052
+ details.claudeFileInfo = lsClaudeResult.stdout;
7053
+ }
7054
+ const whichResult = await execCommand("which claude 2>&1");
7055
+ details.whichClaude = whichResult.stdout || "(not in PATH)";
7056
+ details.whichExitCode = whichResult.exitCode;
7057
+ const currentPath = process.env.PATH || "";
7058
+ details.currentPATH = currentPath.split(":");
7059
+ details.pathLength = currentPath.split(":").length;
7060
+ const passed = claudeExists || whichResult.exitCode === 0;
7061
+ return {
7062
+ name: "claude-binary-discovery",
7063
+ passed,
7064
+ details,
7065
+ error: passed ? void 0 : `Claude binary not found at ${claudePath}`,
7066
+ durationMs: Date.now() - start
7067
+ };
7068
+ }
7069
+ async function testClaudeExecution() {
7070
+ const start = Date.now();
7071
+ const details = {};
7072
+ const npmRootResult = await execCommand("npm root -g");
7073
+ const npmRoot = npmRootResult.stdout;
7074
+ const claudePath = path9.join(
7075
+ npmRoot,
7076
+ "@wix",
7077
+ "evalforge-evaluator",
7078
+ "node_modules",
7079
+ ".bin",
7080
+ "claude"
7081
+ );
7082
+ details.claudePath = claudePath;
7083
+ const versionResult = await execCommand(
7084
+ `"${claudePath}" --version 2>&1`,
7085
+ 15e3
7086
+ );
7087
+ details.versionCommand = {
7088
+ command: `"${claudePath}" --version`,
7089
+ stdout: versionResult.stdout,
7090
+ stderr: versionResult.stderr,
7091
+ exitCode: versionResult.exitCode
7092
+ };
7093
+ const helpResult = await execCommand(
7094
+ `"${claudePath}" --help 2>&1 | head -50`,
7095
+ 15e3
7096
+ );
7097
+ details.helpCommand = {
7098
+ command: `"${claudePath}" --help | head -50`,
7099
+ stdout: helpResult.stdout.slice(0, 1500),
7100
+ stderr: helpResult.stderr.slice(0, 500),
7101
+ exitCode: helpResult.exitCode
7102
+ };
7103
+ const whichClaudeResult = await execCommand("which claude 2>&1");
7104
+ if (whichClaudeResult.exitCode === 0) {
7105
+ const pathVersionResult = await execCommand("claude --version 2>&1", 15e3);
7106
+ details.pathVersionCommand = {
7107
+ whichClaude: whichClaudeResult.stdout,
7108
+ stdout: pathVersionResult.stdout,
7109
+ stderr: pathVersionResult.stderr,
7110
+ exitCode: pathVersionResult.exitCode
7111
+ };
7112
+ }
7113
+ const passed = versionResult.exitCode === 0 || helpResult.exitCode === 0;
7114
+ return {
7115
+ name: "claude-cli-execution",
7116
+ passed,
7117
+ details,
7118
+ error: passed ? void 0 : `Claude CLI failed. Version exit: ${versionResult.exitCode}, Help exit: ${helpResult.exitCode}`,
7119
+ durationMs: Date.now() - start
7120
+ };
7121
+ }
7122
+ async function testEnvironmentDump() {
7123
+ const start = Date.now();
7124
+ const details = {};
7125
+ const importantVars = [
6991
7126
  "PATH",
6992
7127
  "HOME",
6993
7128
  "USER",
6994
7129
  "SHELL",
6995
7130
  "NODE_ENV",
7131
+ "PWD",
6996
7132
  "EVAL_SERVER_URL",
6997
7133
  "AI_GATEWAY_URL",
7134
+ "TRACE_PUSH_URL",
7135
+ "EVAL_AUTH_TOKEN",
6998
7136
  "ANTHROPIC_API_KEY",
6999
7137
  "ANTHROPIC_AUTH_TOKEN",
7000
7138
  "ANTHROPIC_BASE_URL",
7001
7139
  "ANTHROPIC_CUSTOM_HEADERS"
7002
7140
  ];
7003
- const details = {};
7004
- const missing = [];
7005
- for (const key of envVars) {
7141
+ const capturedVars = {};
7142
+ for (const key of importantVars) {
7006
7143
  const value = process.env[key];
7007
7144
  if (value) {
7008
7145
  if (key.includes("SECRET") || key.includes("TOKEN") || key.includes("API_KEY")) {
7009
- details[key] = `[SET - ${value.length} chars]`;
7010
- } else if (key === "PATH") {
7011
- details[key] = value.split(":");
7146
+ capturedVars[key] = `[REDACTED - ${value.length} chars, starts: ${value.slice(0, 10)}...]`;
7012
7147
  } else if (key === "ANTHROPIC_CUSTOM_HEADERS") {
7013
- details[key] = value.split("\n").map((h) => h.split(":")[0]).join(", ");
7148
+ capturedVars[key] = value.split("\n").map((h) => {
7149
+ const [name2, val] = h.split(":");
7150
+ return `${name2}: ${val ? "[" + val.length + " chars]" : "(empty)"}`;
7151
+ }).join(" | ");
7152
+ } else if (key === "PATH") {
7153
+ const parts = value.split(":");
7154
+ capturedVars[key] = `[${parts.length} entries] First: ${parts.slice(0, 3).join(":")} ... Last: ${parts.slice(-2).join(":")}`;
7014
7155
  } else {
7015
- details[key] = value;
7156
+ capturedVars[key] = value;
7016
7157
  }
7017
7158
  } else {
7018
- missing.push(key);
7159
+ capturedVars[key] = "(NOT SET)";
7019
7160
  }
7020
7161
  }
7021
- details.missingEnvVars = missing;
7022
- return {
7023
- name: "environment-variables",
7024
- passed: true,
7025
- // Info only, doesn't fail
7026
- details,
7027
- durationMs: Date.now() - start
7028
- };
7029
- }
7030
- async function testNodeEnvironment() {
7031
- const start = Date.now();
7032
- const details = {
7033
- nodeVersion: process.version,
7162
+ details.importantVars = capturedVars;
7163
+ const envResult = await execCommand("env | sort | head -50");
7164
+ details.envCommandOutput = envResult.stdout;
7165
+ details.envExitCode = envResult.exitCode;
7166
+ details.nodeInfo = {
7167
+ version: process.version,
7034
7168
  platform: process.platform,
7035
7169
  arch: process.arch,
7036
- cwd: process.cwd(),
7037
7170
  pid: process.pid,
7038
- uptime: process.uptime(),
7039
- memoryUsage: process.memoryUsage(),
7171
+ cwd: process.cwd(),
7040
7172
  execPath: process.execPath
7041
7173
  };
7042
7174
  return {
7043
- name: "node-environment",
7175
+ name: "environment-dump",
7044
7176
  passed: true,
7177
+ // Info test, always passes
7045
7178
  details,
7046
7179
  durationMs: Date.now() - start
7047
7180
  };
7048
7181
  }
7049
- async function testNpmGlobalDirectory() {
7182
+ async function testFileSystemStructure() {
7050
7183
  const start = Date.now();
7184
+ const details = {};
7051
7185
  const npmRootResult = await execCommand("npm root -g");
7052
- const npmBinResult = await execCommand("npm bin -g");
7053
7186
  const npmRoot = npmRootResult.stdout;
7054
- const npmBin = npmBinResult.stdout;
7055
- const details = {
7056
- npmRootGlobal: npmRoot,
7057
- npmBinGlobal: npmBin,
7058
- npmRootExitCode: npmRootResult.exitCode,
7059
- npmBinExitCode: npmBinResult.exitCode
7187
+ const lsCwdResult = await execCommand("ls -la");
7188
+ details.currentDirectory = {
7189
+ path: process.cwd(),
7190
+ contents: lsCwdResult.stdout
7060
7191
  };
7061
- if (npmRoot) {
7062
- const evaluatorPath = path9.join(npmRoot, "@wix", "evalforge-evaluator");
7063
- const evaluatorExists = fs11.existsSync(evaluatorPath);
7064
- details.evaluatorInstalled = evaluatorExists;
7065
- if (evaluatorExists) {
7066
- try {
7067
- const files = fs11.readdirSync(evaluatorPath);
7068
- details.evaluatorFiles = files;
7069
- } catch {
7070
- details.evaluatorFiles = "Failed to list files";
7071
- }
7072
- }
7073
- }
7074
- if (npmRoot) {
7075
- const sdkPath = path9.join(
7076
- npmRoot,
7077
- "@wix",
7078
- "evalforge-evaluator",
7079
- "node_modules",
7080
- "@anthropic-ai",
7081
- "claude-agent-sdk"
7082
- );
7083
- const sdkExists = fs11.existsSync(sdkPath);
7084
- details.claudeAgentSdkInstalled = sdkExists;
7085
- }
7086
- const passed = npmRootResult.exitCode === 0 && npmBinResult.exitCode === 0;
7087
- return {
7088
- name: "npm-global-directory",
7089
- passed,
7090
- details,
7091
- error: passed ? void 0 : npmRootResult.stderr || npmBinResult.stderr,
7092
- durationMs: Date.now() - start
7192
+ const lsNpmRootResult = await execCommand(
7193
+ `ls -la "${npmRoot}" 2>&1 | head -30`
7194
+ );
7195
+ details.npmGlobalRoot = {
7196
+ path: npmRoot,
7197
+ contents: lsNpmRootResult.stdout
7093
7198
  };
7094
- }
7095
- async function testClaudeBinary() {
7096
- const start = Date.now();
7097
- const whichResult = await execCommand("which claude");
7098
- const versionResult = await execCommand("claude --version");
7099
- const npmBinResult = await execCommand("npm bin -g");
7100
- const npmBin = npmBinResult.stdout;
7101
- let claudeInNpmBin = false;
7102
- if (npmBin) {
7103
- const claudePath = path9.join(npmBin, "claude");
7104
- claudeInNpmBin = fs11.existsSync(claudePath);
7105
- }
7106
- const details = {
7107
- whichClaude: whichResult.stdout || "(not found)",
7108
- whichExitCode: whichResult.exitCode,
7109
- claudeVersion: versionResult.stdout || versionResult.stderr,
7110
- versionExitCode: versionResult.exitCode,
7111
- claudeInNpmGlobalBin: claudeInNpmBin,
7112
- npmGlobalBin: npmBin
7199
+ const wixPath = path9.join(npmRoot, "@wix");
7200
+ const lsWixResult = await execCommand(`ls -la "${wixPath}" 2>&1`);
7201
+ details.wixPackages = {
7202
+ path: wixPath,
7203
+ contents: lsWixResult.stdout
7204
+ };
7205
+ const evaluatorPath = path9.join(npmRoot, "@wix", "evalforge-evaluator");
7206
+ const lsEvaluatorResult = await execCommand(`ls -la "${evaluatorPath}" 2>&1`);
7207
+ details.evaluatorDir = {
7208
+ path: evaluatorPath,
7209
+ contents: lsEvaluatorResult.stdout
7210
+ };
7211
+ const nodeModulesPath = path9.join(evaluatorPath, "node_modules");
7212
+ const lsNodeModulesResult = await execCommand(
7213
+ `ls "${nodeModulesPath}" 2>&1 | head -30`
7214
+ );
7215
+ details.evaluatorNodeModules = {
7216
+ path: nodeModulesPath,
7217
+ contents: lsNodeModulesResult.stdout
7218
+ };
7219
+ const anthropicPath = path9.join(nodeModulesPath, "@anthropic-ai");
7220
+ const lsAnthropicResult = await execCommand(`ls -la "${anthropicPath}" 2>&1`);
7221
+ details.anthropicPackages = {
7222
+ path: anthropicPath,
7223
+ contents: lsAnthropicResult.stdout
7224
+ };
7225
+ const binPath = path9.join(nodeModulesPath, ".bin");
7226
+ const lsBinResult = await execCommand(`ls -la "${binPath}" 2>&1`);
7227
+ details.binDirectory = {
7228
+ path: binPath,
7229
+ contents: lsBinResult.stdout
7113
7230
  };
7114
- const pathDirs = (process.env.PATH || "").split(":");
7115
- const claudeFoundIn = [];
7116
- for (const dir of pathDirs) {
7117
- const claudePath = path9.join(dir, "claude");
7118
- if (fs11.existsSync(claudePath)) {
7119
- claudeFoundIn.push(dir);
7120
- }
7121
- }
7122
- details.claudeFoundInPathDirs = claudeFoundIn;
7123
- const passed = whichResult.exitCode === 0 || claudeInNpmBin;
7124
7231
  return {
7125
- name: "claude-cli-binary",
7126
- passed,
7232
+ name: "file-system-structure",
7233
+ passed: true,
7234
+ // Info test, always passes
7127
7235
  details,
7128
- error: passed ? void 0 : "Claude CLI binary not found in PATH. The SDK will fail to spawn it.",
7129
7236
  durationMs: Date.now() - start
7130
7237
  };
7131
7238
  }
7132
- async function testChildProcess() {
7239
+ async function testNetworkConnectivity(config) {
7133
7240
  const start = Date.now();
7134
- const echoResult = await execCommand('echo "diagnostic-test-success"');
7135
- const nodeResult = await execCommand('node -e "console.log(process.pid)"');
7136
- const details = {
7137
- echoResult: echoResult.stdout,
7138
- echoExitCode: echoResult.exitCode,
7139
- nodeResult: nodeResult.stdout,
7140
- nodeExitCode: nodeResult.exitCode
7241
+ const details = {};
7242
+ const dnsResult = await execCommand(
7243
+ "nslookup manage.wix.com 2>&1 | head -10"
7244
+ );
7245
+ details.dnsLookup = {
7246
+ command: "nslookup manage.wix.com",
7247
+ output: dnsResult.stdout || dnsResult.stderr,
7248
+ exitCode: dnsResult.exitCode
7141
7249
  };
7142
- const passed = echoResult.exitCode === 0 && echoResult.stdout === "diagnostic-test-success";
7250
+ const pingResult = await execCommand("ping -c 2 manage.wix.com 2>&1");
7251
+ details.pingTest = {
7252
+ command: "ping -c 2 manage.wix.com",
7253
+ output: pingResult.stdout || pingResult.stderr,
7254
+ exitCode: pingResult.exitCode
7255
+ };
7256
+ const gatewayUrl = config.aiGatewayUrl || "https://manage.wix.com/_api/eval-wix-ai-gateway-proxy";
7257
+ const curlGatewayResult = await execCommand(
7258
+ `curl -v -s --connect-timeout 5 --max-time 10 "${gatewayUrl}" 2>&1 | tail -30`
7259
+ );
7260
+ details.aiGatewayTest = {
7261
+ url: gatewayUrl,
7262
+ output: curlGatewayResult.stdout,
7263
+ exitCode: curlGatewayResult.exitCode
7264
+ };
7265
+ const serverUrl = config.serverUrl;
7266
+ const curlServerResult = await execCommand(
7267
+ `curl -v -s --connect-timeout 5 --max-time 10 "${serverUrl}/health" 2>&1 | tail -30`
7268
+ );
7269
+ details.backendServerTest = {
7270
+ url: `${serverUrl}/health`,
7271
+ output: curlServerResult.stdout,
7272
+ exitCode: curlServerResult.exitCode
7273
+ };
7274
+ const httpsResult = await execCommand(
7275
+ 'curl -s --connect-timeout 5 -o /dev/null -w "HTTP_CODE:%{http_code} TIME:%{time_total}s" https://www.google.com 2>&1'
7276
+ );
7277
+ details.httpsBaseline = {
7278
+ command: "curl https://www.google.com",
7279
+ output: httpsResult.stdout,
7280
+ exitCode: httpsResult.exitCode
7281
+ };
7282
+ const networkWorks = pingResult.exitCode === 0 || httpsResult.exitCode === 0;
7283
+ const gatewayReachable = curlGatewayResult.exitCode === 0;
7143
7284
  return {
7144
- name: "child-process-spawning",
7145
- passed,
7285
+ name: "network-connectivity",
7286
+ passed: networkWorks && gatewayReachable,
7146
7287
  details,
7147
- error: passed ? void 0 : "Failed to spawn child process",
7288
+ error: networkWorks && gatewayReachable ? void 0 : `Network: ${networkWorks ? "OK" : "FAILED"}, Gateway: ${gatewayReachable ? "OK" : "FAILED"}`,
7148
7289
  durationMs: Date.now() - start
7149
7290
  };
7150
7291
  }
7151
- async function testNetworkConnectivity(config) {
7292
+ async function testChildProcessSpawning() {
7152
7293
  const start = Date.now();
7153
- const aiGatewayUrl = config.aiGatewayUrl;
7154
- if (!aiGatewayUrl) {
7155
- return {
7156
- name: "network-connectivity",
7157
- passed: false,
7158
- details: { error: "No AI_GATEWAY_URL configured" },
7159
- error: "No AI_GATEWAY_URL configured",
7160
- durationMs: Date.now() - start
7161
- };
7162
- }
7163
- const curlResult = await execCommand(
7164
- `curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${aiGatewayUrl}" 2>&1`
7294
+ const details = {};
7295
+ const echoResult = await execCommand('echo "DIAGNOSTIC_TEST_SUCCESS_12345"');
7296
+ details.echoTest = {
7297
+ command: 'echo "DIAGNOSTIC_TEST_SUCCESS_12345"',
7298
+ output: echoResult.stdout,
7299
+ exitCode: echoResult.exitCode,
7300
+ passed: echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345"
7301
+ };
7302
+ const nodeResult = await execCommand(
7303
+ 'node -e "console.log(JSON.stringify({pid: process.pid, version: process.version, platform: process.platform}))"'
7165
7304
  );
7166
- const serverUrl = config.serverUrl;
7167
- const serverResult = await execCommand(
7168
- `curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${serverUrl}/health" 2>&1`
7305
+ details.nodeTest = {
7306
+ command: 'node -e "console.log(JSON.stringify({...}))"',
7307
+ output: nodeResult.stdout,
7308
+ exitCode: nodeResult.exitCode
7309
+ };
7310
+ const shellResult = await execCommand(
7311
+ 'echo "PID: $$"; pwd; whoami; date; uname -a'
7169
7312
  );
7170
- const details = {
7171
- aiGatewayUrl,
7172
- aiGatewayHttpCode: curlResult.stdout,
7173
- aiGatewayExitCode: curlResult.exitCode,
7174
- serverUrl,
7175
- serverHttpCode: serverResult.stdout,
7176
- serverExitCode: serverResult.exitCode
7313
+ details.shellTest = {
7314
+ command: 'echo "PID: $$"; pwd; whoami; date; uname -a',
7315
+ output: shellResult.stdout,
7316
+ exitCode: shellResult.exitCode
7177
7317
  };
7178
- const gatewayReachable = curlResult.exitCode === 0 && curlResult.stdout !== "000";
7318
+ const stderrResult = await execCommand(
7319
+ `node -e "console.error('stderr test')"`
7320
+ );
7321
+ details.stderrTest = {
7322
+ command: `node -e "console.error('stderr test')"`,
7323
+ stderr: stderrResult.stderr,
7324
+ exitCode: stderrResult.exitCode
7325
+ };
7326
+ const exitCodeResult = await execCommand("exit 42");
7327
+ details.exitCodeTest = {
7328
+ command: "exit 42",
7329
+ exitCode: exitCodeResult.exitCode,
7330
+ passed: exitCodeResult.exitCode === 42
7331
+ };
7332
+ const passed = echoResult.exitCode === 0 && echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345";
7179
7333
  return {
7180
- name: "network-connectivity",
7181
- passed: gatewayReachable,
7334
+ name: "child-process-spawning",
7335
+ passed,
7182
7336
  details,
7183
- error: gatewayReachable ? void 0 : "Failed to reach AI Gateway",
7337
+ error: passed ? void 0 : "Echo test failed",
7184
7338
  durationMs: Date.now() - start
7185
7339
  };
7186
7340
  }
7187
7341
  async function testSdkImport() {
7188
7342
  const start = Date.now();
7343
+ const details = {};
7189
7344
  try {
7190
7345
  const sdk = await import("@anthropic-ai/claude-agent-sdk");
7191
- const details = {
7192
- sdkImported: true,
7193
- hasQuery: typeof sdk.query === "function",
7194
- exportedKeys: Object.keys(sdk)
7195
- };
7346
+ details.sdkImported = true;
7347
+ details.exportedKeys = Object.keys(sdk);
7348
+ details.hasQuery = typeof sdk.query === "function";
7349
+ if (typeof sdk.query === "function") {
7350
+ details.queryFunctionExists = true;
7351
+ details.queryFunctionType = typeof sdk.query;
7352
+ }
7196
7353
  return {
7197
7354
  name: "sdk-import",
7198
7355
  passed: true,
@@ -7206,36 +7363,37 @@ async function testSdkImport() {
7206
7363
  passed: false,
7207
7364
  details: {
7208
7365
  sdkImported: false,
7209
- error
7366
+ error,
7367
+ stack: err instanceof Error ? err.stack?.split("\n").slice(0, 5) : void 0
7210
7368
  },
7211
- error: `Failed to import Claude Agent SDK: ${error}`,
7369
+ error: `Failed to import SDK: ${error}`,
7212
7370
  durationMs: Date.now() - start
7213
7371
  };
7214
7372
  }
7215
7373
  }
7216
- async function testFileSystemAccess() {
7374
+ async function testFileSystemWrite() {
7217
7375
  const start = Date.now();
7218
- const testDir = "/tmp/evalforge-diagnostics";
7219
- const testFile = path9.join(testDir, "test.txt");
7376
+ const details = {};
7377
+ const testDir = "/tmp/evalforge-diagnostics-test";
7378
+ const testFile = path9.join(testDir, "test-file.txt");
7379
+ const testContent = `Diagnostic test at ${(/* @__PURE__ */ new Date()).toISOString()}`;
7220
7380
  try {
7221
7381
  if (!fs11.existsSync(testDir)) {
7222
7382
  fs11.mkdirSync(testDir, { recursive: true });
7223
7383
  }
7224
- fs11.writeFileSync(testFile, "diagnostic-test");
7225
- const content = fs11.readFileSync(testFile, "utf8");
7384
+ details.directoryCreated = true;
7385
+ fs11.writeFileSync(testFile, testContent);
7386
+ details.fileWritten = true;
7387
+ const readContent = fs11.readFileSync(testFile, "utf8");
7388
+ details.fileRead = true;
7389
+ details.contentMatches = readContent === testContent;
7390
+ const lsResult = await execCommand(`ls -la "${testDir}"`);
7391
+ details.directoryContents = lsResult.stdout;
7226
7392
  fs11.unlinkSync(testFile);
7227
7393
  fs11.rmdirSync(testDir);
7228
- const details = {
7229
- canCreateDirectory: true,
7230
- canWriteFile: true,
7231
- canReadFile: content === "diagnostic-test",
7232
- testDir,
7233
- cwd: process.cwd(),
7234
- cwdContents: fs11.readdirSync(process.cwd()).slice(0, 20)
7235
- // First 20 files
7236
- };
7394
+ details.cleanedUp = true;
7237
7395
  return {
7238
- name: "file-system-access",
7396
+ name: "file-system-write",
7239
7397
  passed: true,
7240
7398
  details,
7241
7399
  durationMs: Date.now() - start
@@ -7243,32 +7401,40 @@ async function testFileSystemAccess() {
7243
7401
  } catch (err) {
7244
7402
  const error = err instanceof Error ? err.message : String(err);
7245
7403
  return {
7246
- name: "file-system-access",
7404
+ name: "file-system-write",
7247
7405
  passed: false,
7248
7406
  details: {
7407
+ ...details,
7249
7408
  error,
7250
7409
  testDir,
7251
- cwd: process.cwd()
7410
+ testFile
7252
7411
  },
7253
- error: `File system access failed: ${error}`,
7412
+ error: `File system write failed: ${error}`,
7254
7413
  durationMs: Date.now() - start
7255
7414
  };
7256
7415
  }
7257
7416
  }
7258
7417
  function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader, authToken) {
7418
+ const truncatedResult = "summary" in result ? result : {
7419
+ ...result,
7420
+ details: JSON.parse(
7421
+ JSON.stringify(
7422
+ result.details,
7423
+ (_, v) => typeof v === "string" && v.length > 500 ? v.slice(0, 500) + "... [truncated]" : v
7424
+ )
7425
+ )
7426
+ };
7259
7427
  const event = {
7260
7428
  evalRunId: evalRunId2,
7261
7429
  scenarioId: "diagnostics",
7262
7430
  scenarioName: "Environment Diagnostics",
7263
7431
  targetId: "system",
7264
- targetName: "System",
7432
+ targetName: "name" in truncatedResult ? truncatedResult.name : "Summary",
7265
7433
  stepNumber: 0,
7266
7434
  type: import_evalforge_types4.LiveTraceEventType.DIAGNOSTIC,
7267
- outputPreview: JSON.stringify(result, null, 2).slice(0, 2e3),
7268
- // Limit size
7435
+ outputPreview: JSON.stringify(truncatedResult, null, 2).slice(0, 3e3),
7269
7436
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
7270
7437
  isComplete: "summary" in result
7271
- // Complete if it's the full report
7272
7438
  };
7273
7439
  console.log(`TRACE_EVENT:${JSON.stringify(event)}`);
7274
7440
  if (tracePushUrl) {
@@ -7286,18 +7452,35 @@ function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader,
7286
7452
  headers,
7287
7453
  body: JSON.stringify([event])
7288
7454
  }).catch((err) => {
7289
- console.error("[DIAGNOSTICS] Failed to push trace event:", err);
7455
+ console.error(
7456
+ "[DIAGNOSTICS] Failed to push trace event to backend:",
7457
+ err
7458
+ );
7290
7459
  });
7291
7460
  }
7292
7461
  }
7293
7462
  async function runDiagnostics(config, evalRunId2) {
7294
7463
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
7295
7464
  const startTime = Date.now();
7296
- console.error("[DIAGNOSTICS] Starting environment diagnostics...");
7465
+ console.error("");
7466
+ console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
7467
+ console.error("\u2551 EVALFORGE ENVIRONMENT DIAGNOSTICS \u2551");
7468
+ console.error("\u2551 (Results sent to backend via trace events) \u2551");
7469
+ console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
7470
+ console.error("");
7297
7471
  const tests = [];
7298
- const runTest = async (testFn) => {
7299
- const result = await testFn();
7472
+ const runTest = async (testName, testFn) => {
7473
+ console.error(`[DIAG] Running: ${testName}...`);
7474
+ const result = await safeRunTest(testName, testFn);
7300
7475
  tests.push(result);
7476
+ const status = result.passed ? "\u2713 PASS" : "\u2717 FAIL";
7477
+ console.error(`[DIAG] ${status}: ${result.name} (${result.durationMs}ms)`);
7478
+ console.error("[DIAG] Details:");
7479
+ console.error(JSON.stringify(result.details, null, 2));
7480
+ console.error("");
7481
+ if (!result.passed && result.error) {
7482
+ console.error(`[DIAG] ERROR: ${result.error}`);
7483
+ }
7301
7484
  emitDiagnosticTraceEvent(
7302
7485
  evalRunId2,
7303
7486
  result,
@@ -7305,22 +7488,15 @@ async function runDiagnostics(config, evalRunId2) {
7305
7488
  config.routeHeader,
7306
7489
  config.authToken
7307
7490
  );
7308
- const status = result.passed ? "\u2713" : "\u2717";
7309
- console.error(
7310
- `[DIAGNOSTICS] ${status} ${result.name} (${result.durationMs}ms)`
7311
- );
7312
- if (!result.passed && result.error) {
7313
- console.error(`[DIAGNOSTICS] Error: ${result.error}`);
7314
- }
7315
7491
  };
7316
- await runTest(testEnvironmentVariables);
7317
- await runTest(testNodeEnvironment);
7318
- await runTest(testNpmGlobalDirectory);
7319
- await runTest(testClaudeBinary);
7320
- await runTest(testChildProcess);
7321
- await runTest(() => testNetworkConnectivity(config));
7322
- await runTest(testSdkImport);
7323
- await runTest(testFileSystemAccess);
7492
+ await runTest("claude-binary-discovery", testClaudeBinaryDiscovery);
7493
+ await runTest("claude-cli-execution", testClaudeExecution);
7494
+ await runTest("environment-dump", testEnvironmentDump);
7495
+ await runTest("file-system-structure", testFileSystemStructure);
7496
+ await runTest("network-connectivity", () => testNetworkConnectivity(config));
7497
+ await runTest("child-process-spawning", testChildProcessSpawning);
7498
+ await runTest("sdk-import", testSdkImport);
7499
+ await runTest("file-system-write", testFileSystemWrite);
7324
7500
  const completedAt = (/* @__PURE__ */ new Date()).toISOString();
7325
7501
  const totalDurationMs = Date.now() - startTime;
7326
7502
  const report = {
@@ -7341,9 +7517,16 @@ async function runDiagnostics(config, evalRunId2) {
7341
7517
  config.routeHeader,
7342
7518
  config.authToken
7343
7519
  );
7520
+ console.error("");
7521
+ console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
7344
7522
  console.error(
7345
- `[DIAGNOSTICS] Completed: ${report.summary.passed}/${report.summary.total} tests passed (${totalDurationMs}ms)`
7523
+ `\u2551 DIAGNOSTICS COMPLETE: ${report.summary.passed}/${report.summary.total} passed, ${report.summary.failed} failed`.padEnd(
7524
+ 60
7525
+ ) + "\u2551"
7346
7526
  );
7527
+ console.error(`\u2551 Total time: ${totalDurationMs}ms`.padEnd(60) + "\u2551");
7528
+ console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
7529
+ console.error("");
7347
7530
  return report;
7348
7531
  }
7349
7532