@wix/evalforge-evaluator 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -6956,243 +6956,473 @@ var import_child_process = require("child_process");
6956
6956
  var fs11 = __toESM(require("fs"));
6957
6957
  var path9 = __toESM(require("path"));
6958
6958
  var import_evalforge_types4 = require("@wix/evalforge-types");
6959
- async function execCommand(command, timeoutMs = 5e3) {
6959
+ async function execCommand(command, timeoutMs = 1e4) {
6960
6960
  return new Promise((resolve) => {
6961
- const proc2 = (0, import_child_process.spawn)("sh", ["-c", command], {
6962
- timeout: timeoutMs
6963
- });
6964
- let stdout = "";
6965
- let stderr = "";
6966
- proc2.stdout.on("data", (data) => {
6967
- stdout += data.toString();
6968
- });
6969
- proc2.stderr.on("data", (data) => {
6970
- stderr += data.toString();
6971
- });
6972
- proc2.on("close", (code2) => {
6973
- resolve({
6974
- stdout: stdout.trim(),
6975
- stderr: stderr.trim(),
6976
- exitCode: code2 ?? -1
6961
+ try {
6962
+ const proc2 = (0, import_child_process.spawn)("sh", ["-c", command], {
6963
+ timeout: timeoutMs
6977
6964
  });
6978
- });
6979
- proc2.on("error", (err) => {
6965
+ let stdout = "";
6966
+ let stderr = "";
6967
+ proc2.stdout.on("data", (data) => {
6968
+ stdout += data.toString();
6969
+ });
6970
+ proc2.stderr.on("data", (data) => {
6971
+ stderr += data.toString();
6972
+ });
6973
+ proc2.on("close", (code2) => {
6974
+ resolve({
6975
+ stdout: stdout.trim(),
6976
+ stderr: stderr.trim(),
6977
+ exitCode: code2 ?? -1
6978
+ });
6979
+ });
6980
+ proc2.on("error", (err) => {
6981
+ resolve({
6982
+ stdout: "",
6983
+ stderr: err.message,
6984
+ exitCode: -1
6985
+ });
6986
+ });
6987
+ } catch (err) {
6980
6988
  resolve({
6981
6989
  stdout: "",
6982
- stderr: err.message,
6983
- exitCode: -1
6990
+ stderr: err instanceof Error ? err.message : String(err),
6991
+ exitCode: -99
6984
6992
  });
6985
- });
6993
+ }
6986
6994
  });
6987
6995
  }
6988
- async function testEnvironmentVariables() {
6996
+ async function safeRunTest(testName, testFn) {
6989
6997
  const start = Date.now();
6990
- const envVars = [
6998
+ try {
6999
+ return await testFn();
7000
+ } catch (err) {
7001
+ const error = err instanceof Error ? err.message : String(err);
7002
+ return {
7003
+ name: testName,
7004
+ passed: false,
7005
+ details: {
7006
+ testCrashed: true,
7007
+ error,
7008
+ stack: err instanceof Error ? err.stack : void 0
7009
+ },
7010
+ error: `Test crashed: ${error}`,
7011
+ durationMs: Date.now() - start
7012
+ };
7013
+ }
7014
+ }
7015
+ async function testClaudeBinaryDiscovery() {
7016
+ const start = Date.now();
7017
+ const details = {};
7018
+ const npmRootResult = await execCommand("npm root -g");
7019
+ const npmBinResult = await execCommand("npm bin -g");
7020
+ const npmRoot = npmRootResult.stdout;
7021
+ const npmBin = npmBinResult.stdout;
7022
+ details.npmRoot = npmRoot;
7023
+ details.npmBin = npmBin;
7024
+ const evaluatorBinPath = path9.join(
7025
+ npmRoot,
7026
+ "@wix",
7027
+ "evalforge-evaluator",
7028
+ "node_modules",
7029
+ ".bin"
7030
+ );
7031
+ details.evaluatorBinPath = evaluatorBinPath;
7032
+ const lsBinResult = await execCommand(`ls -la "${evaluatorBinPath}" 2>&1`);
7033
+ details.evaluatorBinContents = lsBinResult.stdout || lsBinResult.stderr;
7034
+ details.lsBinExitCode = lsBinResult.exitCode;
7035
+ const claudePath = path9.join(evaluatorBinPath, "claude");
7036
+ let claudeExists = false;
7037
+ try {
7038
+ claudeExists = fs11.existsSync(claudePath);
7039
+ } catch {
7040
+ claudeExists = false;
7041
+ }
7042
+ details.claudePath = claudePath;
7043
+ details.claudeExists = claudeExists;
7044
+ if (claudeExists) {
7045
+ const readlinkResult = await execCommand(
7046
+ `readlink -f "${claudePath}" 2>&1`
7047
+ );
7048
+ details.claudeRealPath = readlinkResult.stdout || readlinkResult.stderr;
7049
+ const statResult = await execCommand(`stat "${claudePath}" 2>&1`);
7050
+ details.claudeStat = statResult.stdout || statResult.stderr;
7051
+ const lsClaudeResult = await execCommand(`ls -la "${claudePath}" 2>&1`);
7052
+ details.claudeFileInfo = lsClaudeResult.stdout;
7053
+ }
7054
+ const whichResult = await execCommand("which claude 2>&1");
7055
+ details.whichClaude = whichResult.stdout || "(not in PATH)";
7056
+ details.whichExitCode = whichResult.exitCode;
7057
+ const currentPath = process.env.PATH || "";
7058
+ details.currentPATH = currentPath.split(":");
7059
+ details.pathLength = currentPath.split(":").length;
7060
+ const passed = claudeExists || whichResult.exitCode === 0;
7061
+ return {
7062
+ name: "claude-binary-discovery",
7063
+ passed,
7064
+ details,
7065
+ error: passed ? void 0 : `Claude binary not found at ${claudePath}`,
7066
+ durationMs: Date.now() - start
7067
+ };
7068
+ }
7069
+ async function testClaudeExecution() {
7070
+ const start = Date.now();
7071
+ const details = {};
7072
+ const npmRootResult = await execCommand("npm root -g");
7073
+ const npmRoot = npmRootResult.stdout;
7074
+ const claudePath = path9.join(
7075
+ npmRoot,
7076
+ "@wix",
7077
+ "evalforge-evaluator",
7078
+ "node_modules",
7079
+ ".bin",
7080
+ "claude"
7081
+ );
7082
+ details.claudePath = claudePath;
7083
+ const versionResult = await execCommand(
7084
+ `"${claudePath}" --version 2>&1`,
7085
+ 15e3
7086
+ );
7087
+ details.versionCommand = {
7088
+ command: `"${claudePath}" --version`,
7089
+ stdout: versionResult.stdout,
7090
+ stderr: versionResult.stderr,
7091
+ exitCode: versionResult.exitCode
7092
+ };
7093
+ const helpResult = await execCommand(
7094
+ `"${claudePath}" --help 2>&1 | head -50`,
7095
+ 15e3
7096
+ );
7097
+ details.helpCommand = {
7098
+ command: `"${claudePath}" --help | head -50`,
7099
+ stdout: helpResult.stdout.slice(0, 1500),
7100
+ stderr: helpResult.stderr.slice(0, 500),
7101
+ exitCode: helpResult.exitCode
7102
+ };
7103
+ const whichClaudeResult = await execCommand("which claude 2>&1");
7104
+ if (whichClaudeResult.exitCode === 0) {
7105
+ const pathVersionResult = await execCommand("claude --version 2>&1", 15e3);
7106
+ details.pathVersionCommand = {
7107
+ whichClaude: whichClaudeResult.stdout,
7108
+ stdout: pathVersionResult.stdout,
7109
+ stderr: pathVersionResult.stderr,
7110
+ exitCode: pathVersionResult.exitCode
7111
+ };
7112
+ }
7113
+ const passed = versionResult.exitCode === 0 || helpResult.exitCode === 0;
7114
+ return {
7115
+ name: "claude-cli-execution",
7116
+ passed,
7117
+ details,
7118
+ error: passed ? void 0 : `Claude CLI failed. Version exit: ${versionResult.exitCode}, Help exit: ${helpResult.exitCode}`,
7119
+ durationMs: Date.now() - start
7120
+ };
7121
+ }
7122
+ async function testEnvironmentDump() {
7123
+ const start = Date.now();
7124
+ const details = {};
7125
+ const importantVars = [
6991
7126
  "PATH",
6992
7127
  "HOME",
6993
7128
  "USER",
6994
7129
  "SHELL",
6995
7130
  "NODE_ENV",
7131
+ "PWD",
6996
7132
  "EVAL_SERVER_URL",
6997
7133
  "AI_GATEWAY_URL",
7134
+ "TRACE_PUSH_URL",
7135
+ "EVAL_AUTH_TOKEN",
6998
7136
  "ANTHROPIC_API_KEY",
6999
7137
  "ANTHROPIC_AUTH_TOKEN",
7000
7138
  "ANTHROPIC_BASE_URL",
7001
7139
  "ANTHROPIC_CUSTOM_HEADERS"
7002
7140
  ];
7003
- const details = {};
7004
- const missing = [];
7005
- for (const key of envVars) {
7141
+ const capturedVars = {};
7142
+ for (const key of importantVars) {
7006
7143
  const value = process.env[key];
7007
7144
  if (value) {
7008
7145
  if (key.includes("SECRET") || key.includes("TOKEN") || key.includes("API_KEY")) {
7009
- details[key] = `[SET - ${value.length} chars]`;
7010
- } else if (key === "PATH") {
7011
- details[key] = value.split(":");
7146
+ capturedVars[key] = `[REDACTED - ${value.length} chars, starts: ${value.slice(0, 10)}...]`;
7012
7147
  } else if (key === "ANTHROPIC_CUSTOM_HEADERS") {
7013
- details[key] = value.split("\n").map((h) => h.split(":")[0]).join(", ");
7148
+ capturedVars[key] = value.split("\n").map((h) => {
7149
+ const [name2, val] = h.split(":");
7150
+ return `${name2}: ${val ? "[" + val.length + " chars]" : "(empty)"}`;
7151
+ }).join(" | ");
7152
+ } else if (key === "PATH") {
7153
+ const parts = value.split(":");
7154
+ capturedVars[key] = `[${parts.length} entries] First: ${parts.slice(0, 3).join(":")} ... Last: ${parts.slice(-2).join(":")}`;
7014
7155
  } else {
7015
- details[key] = value;
7156
+ capturedVars[key] = value;
7016
7157
  }
7017
7158
  } else {
7018
- missing.push(key);
7159
+ capturedVars[key] = "(NOT SET)";
7019
7160
  }
7020
7161
  }
7021
- details.missingEnvVars = missing;
7022
- return {
7023
- name: "environment-variables",
7024
- passed: true,
7025
- // Info only, doesn't fail
7026
- details,
7027
- durationMs: Date.now() - start
7028
- };
7029
- }
7030
- async function testNodeEnvironment() {
7031
- const start = Date.now();
7032
- const details = {
7033
- nodeVersion: process.version,
7162
+ details.importantVars = capturedVars;
7163
+ const envResult = await execCommand("env | sort | head -50");
7164
+ details.envCommandOutput = envResult.stdout;
7165
+ details.envExitCode = envResult.exitCode;
7166
+ details.nodeInfo = {
7167
+ version: process.version,
7034
7168
  platform: process.platform,
7035
7169
  arch: process.arch,
7036
- cwd: process.cwd(),
7037
7170
  pid: process.pid,
7038
- uptime: process.uptime(),
7039
- memoryUsage: process.memoryUsage(),
7171
+ cwd: process.cwd(),
7040
7172
  execPath: process.execPath
7041
7173
  };
7042
7174
  return {
7043
- name: "node-environment",
7175
+ name: "environment-dump",
7044
7176
  passed: true,
7177
+ // Info test, always passes
7045
7178
  details,
7046
7179
  durationMs: Date.now() - start
7047
7180
  };
7048
7181
  }
7049
- async function testNpmGlobalDirectory() {
7182
+ async function testFileSystemStructure() {
7050
7183
  const start = Date.now();
7184
+ const details = {};
7051
7185
  const npmRootResult = await execCommand("npm root -g");
7052
- const npmBinResult = await execCommand("npm bin -g");
7053
7186
  const npmRoot = npmRootResult.stdout;
7054
- const npmBin = npmBinResult.stdout;
7055
- const details = {
7056
- npmRootGlobal: npmRoot,
7057
- npmBinGlobal: npmBin,
7058
- npmRootExitCode: npmRootResult.exitCode,
7059
- npmBinExitCode: npmBinResult.exitCode
7187
+ const lsCwdResult = await execCommand("ls -la");
7188
+ details.currentDirectory = {
7189
+ path: process.cwd(),
7190
+ contents: lsCwdResult.stdout
7060
7191
  };
7061
- if (npmRoot) {
7062
- const evaluatorPath = path9.join(npmRoot, "@wix", "evalforge-evaluator");
7063
- const evaluatorExists = fs11.existsSync(evaluatorPath);
7064
- details.evaluatorInstalled = evaluatorExists;
7065
- if (evaluatorExists) {
7066
- try {
7067
- const files = fs11.readdirSync(evaluatorPath);
7068
- details.evaluatorFiles = files;
7069
- } catch {
7070
- details.evaluatorFiles = "Failed to list files";
7071
- }
7072
- }
7073
- }
7074
- if (npmRoot) {
7075
- const sdkPath = path9.join(
7076
- npmRoot,
7077
- "@wix",
7078
- "evalforge-evaluator",
7079
- "node_modules",
7080
- "@anthropic-ai",
7081
- "claude-agent-sdk"
7082
- );
7083
- const sdkExists = fs11.existsSync(sdkPath);
7084
- details.claudeAgentSdkInstalled = sdkExists;
7085
- }
7086
- const passed = npmRootResult.exitCode === 0 && npmBinResult.exitCode === 0;
7087
- return {
7088
- name: "npm-global-directory",
7089
- passed,
7090
- details,
7091
- error: passed ? void 0 : npmRootResult.stderr || npmBinResult.stderr,
7092
- durationMs: Date.now() - start
7192
+ const lsNpmRootResult = await execCommand(
7193
+ `ls -la "${npmRoot}" 2>&1 | head -30`
7194
+ );
7195
+ details.npmGlobalRoot = {
7196
+ path: npmRoot,
7197
+ contents: lsNpmRootResult.stdout
7093
7198
  };
7094
- }
7095
- async function testClaudeBinary() {
7096
- const start = Date.now();
7097
- const whichResult = await execCommand("which claude");
7098
- const versionResult = await execCommand("claude --version");
7099
- const npmBinResult = await execCommand("npm bin -g");
7100
- const npmBin = npmBinResult.stdout;
7101
- let claudeInNpmBin = false;
7102
- if (npmBin) {
7103
- const claudePath = path9.join(npmBin, "claude");
7104
- claudeInNpmBin = fs11.existsSync(claudePath);
7105
- }
7106
- const details = {
7107
- whichClaude: whichResult.stdout || "(not found)",
7108
- whichExitCode: whichResult.exitCode,
7109
- claudeVersion: versionResult.stdout || versionResult.stderr,
7110
- versionExitCode: versionResult.exitCode,
7111
- claudeInNpmGlobalBin: claudeInNpmBin,
7112
- npmGlobalBin: npmBin
7199
+ const wixPath = path9.join(npmRoot, "@wix");
7200
+ const lsWixResult = await execCommand(`ls -la "${wixPath}" 2>&1`);
7201
+ details.wixPackages = {
7202
+ path: wixPath,
7203
+ contents: lsWixResult.stdout
7204
+ };
7205
+ const evaluatorPath = path9.join(npmRoot, "@wix", "evalforge-evaluator");
7206
+ const lsEvaluatorResult = await execCommand(`ls -la "${evaluatorPath}" 2>&1`);
7207
+ details.evaluatorDir = {
7208
+ path: evaluatorPath,
7209
+ contents: lsEvaluatorResult.stdout
7210
+ };
7211
+ const nodeModulesPath = path9.join(evaluatorPath, "node_modules");
7212
+ const lsNodeModulesResult = await execCommand(
7213
+ `ls "${nodeModulesPath}" 2>&1 | head -30`
7214
+ );
7215
+ details.evaluatorNodeModules = {
7216
+ path: nodeModulesPath,
7217
+ contents: lsNodeModulesResult.stdout
7218
+ };
7219
+ const anthropicPath = path9.join(nodeModulesPath, "@anthropic-ai");
7220
+ const lsAnthropicResult = await execCommand(`ls -la "${anthropicPath}" 2>&1`);
7221
+ details.anthropicPackages = {
7222
+ path: anthropicPath,
7223
+ contents: lsAnthropicResult.stdout
7224
+ };
7225
+ const binPath = path9.join(nodeModulesPath, ".bin");
7226
+ const lsBinResult = await execCommand(`ls -la "${binPath}" 2>&1`);
7227
+ details.binDirectory = {
7228
+ path: binPath,
7229
+ contents: lsBinResult.stdout
7113
7230
  };
7114
- const pathDirs = (process.env.PATH || "").split(":");
7115
- const claudeFoundIn = [];
7116
- for (const dir of pathDirs) {
7117
- const claudePath = path9.join(dir, "claude");
7118
- if (fs11.existsSync(claudePath)) {
7119
- claudeFoundIn.push(dir);
7120
- }
7121
- }
7122
- details.claudeFoundInPathDirs = claudeFoundIn;
7123
- const passed = whichResult.exitCode === 0 || claudeInNpmBin;
7124
7231
  return {
7125
- name: "claude-cli-binary",
7126
- passed,
7232
+ name: "file-system-structure",
7233
+ passed: true,
7234
+ // Info test, always passes
7127
7235
  details,
7128
- error: passed ? void 0 : "Claude CLI binary not found in PATH. The SDK will fail to spawn it.",
7129
7236
  durationMs: Date.now() - start
7130
7237
  };
7131
7238
  }
7132
- async function testChildProcess() {
7239
+ async function testNetworkConnectivity(config) {
7133
7240
  const start = Date.now();
7134
- const echoResult = await execCommand('echo "diagnostic-test-success"');
7135
- const nodeResult = await execCommand('node -e "console.log(process.pid)"');
7136
- const details = {
7137
- echoResult: echoResult.stdout,
7138
- echoExitCode: echoResult.exitCode,
7139
- nodeResult: nodeResult.stdout,
7140
- nodeExitCode: nodeResult.exitCode
7241
+ const details = {};
7242
+ const dnsResult = await execCommand(
7243
+ "nslookup manage.wix.com 2>&1 | head -10"
7244
+ );
7245
+ details.dnsLookup = {
7246
+ command: "nslookup manage.wix.com",
7247
+ output: dnsResult.stdout || dnsResult.stderr,
7248
+ exitCode: dnsResult.exitCode
7249
+ };
7250
+ const pingResult = await execCommand("ping -c 2 manage.wix.com 2>&1");
7251
+ details.pingTest = {
7252
+ command: "ping -c 2 manage.wix.com",
7253
+ output: pingResult.stdout || pingResult.stderr,
7254
+ exitCode: pingResult.exitCode
7255
+ };
7256
+ const gatewayUrl = config.aiGatewayUrl || "https://manage.wix.com/_api/eval-wix-ai-gateway-proxy";
7257
+ const curlGatewayResult = await execCommand(
7258
+ `curl -v -s --connect-timeout 5 --max-time 10 "${gatewayUrl}" 2>&1 | tail -30`
7259
+ );
7260
+ details.aiGatewayTest = {
7261
+ url: gatewayUrl,
7262
+ output: curlGatewayResult.stdout,
7263
+ exitCode: curlGatewayResult.exitCode
7264
+ };
7265
+ const serverUrl = config.serverUrl;
7266
+ const curlServerResult = await execCommand(
7267
+ `curl -v -s --connect-timeout 5 --max-time 10 "${serverUrl}/health" 2>&1 | tail -30`
7268
+ );
7269
+ details.backendServerTest = {
7270
+ url: `${serverUrl}/health`,
7271
+ output: curlServerResult.stdout,
7272
+ exitCode: curlServerResult.exitCode
7273
+ };
7274
+ const httpsResult = await execCommand(
7275
+ 'curl -s --connect-timeout 5 -o /dev/null -w "HTTP_CODE:%{http_code} TIME:%{time_total}s" https://www.google.com 2>&1'
7276
+ );
7277
+ details.httpsBaseline = {
7278
+ command: "curl https://www.google.com",
7279
+ output: httpsResult.stdout,
7280
+ exitCode: httpsResult.exitCode
7141
7281
  };
7142
- const passed = echoResult.exitCode === 0 && echoResult.stdout === "diagnostic-test-success";
7282
+ const networkWorks = pingResult.exitCode === 0 || httpsResult.exitCode === 0;
7283
+ const gatewayReachable = curlGatewayResult.exitCode === 0;
7143
7284
  return {
7144
- name: "child-process-spawning",
7145
- passed,
7285
+ name: "network-connectivity",
7286
+ passed: networkWorks && gatewayReachable,
7146
7287
  details,
7147
- error: passed ? void 0 : "Failed to spawn child process",
7288
+ error: networkWorks && gatewayReachable ? void 0 : `Network: ${networkWorks ? "OK" : "FAILED"}, Gateway: ${gatewayReachable ? "OK" : "FAILED"}`,
7148
7289
  durationMs: Date.now() - start
7149
7290
  };
7150
7291
  }
7151
- async function testNetworkConnectivity(config) {
7292
+ async function testAiGatewayApiCall(config) {
7152
7293
  const start = Date.now();
7153
- const aiGatewayUrl = config.aiGatewayUrl;
7154
- if (!aiGatewayUrl) {
7294
+ const details = {};
7295
+ const gatewayUrl = config.aiGatewayUrl;
7296
+ const headers = config.aiGatewayHeaders;
7297
+ details.gatewayUrl = gatewayUrl;
7298
+ details.hasHeaders = !!headers;
7299
+ details.headerKeys = headers ? Object.keys(headers) : [];
7300
+ if (!gatewayUrl) {
7155
7301
  return {
7156
- name: "network-connectivity",
7302
+ name: "ai-gateway-api-call",
7157
7303
  passed: false,
7158
- details: { error: "No AI_GATEWAY_URL configured" },
7304
+ details,
7159
7305
  error: "No AI_GATEWAY_URL configured",
7160
7306
  durationMs: Date.now() - start
7161
7307
  };
7162
7308
  }
7163
- const curlResult = await execCommand(
7164
- `curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${aiGatewayUrl}" 2>&1`
7309
+ if (!headers) {
7310
+ return {
7311
+ name: "ai-gateway-api-call",
7312
+ passed: false,
7313
+ details,
7314
+ error: "No AI_GATEWAY_HEADERS configured",
7315
+ durationMs: Date.now() - start
7316
+ };
7317
+ }
7318
+ const headerFlags = Object.entries(headers).map(([k, v]) => `-H "${k}: ${v}"`).join(" ");
7319
+ const requestBody = JSON.stringify({
7320
+ model: "claude-3-5-sonnet-latest",
7321
+ max_tokens: 10,
7322
+ messages: [{ role: "user", content: "Say hi" }]
7323
+ }).replace(/"/g, '\\"');
7324
+ const messagesUrl = `${gatewayUrl}/v1/messages`;
7325
+ const curlCmd = `curl -s --max-time 15 ${headerFlags} -H "Content-Type: application/json" -H "anthropic-version: 2023-06-01" -d "${requestBody}" "${messagesUrl}" 2>&1`;
7326
+ const redactedCmd = curlCmd.replace(/app-secret:[^"]+/g, "app-secret:[REDACTED]").replace(
7327
+ /-H "x-wix-ai-gateway-app-secret: [^"]+"/g,
7328
+ '-H "x-wix-ai-gateway-app-secret: [REDACTED]"'
7165
7329
  );
7166
- const serverUrl = config.serverUrl;
7167
- const serverResult = await execCommand(
7168
- `curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${serverUrl}/health" 2>&1`
7330
+ details.curlCommand = redactedCmd;
7331
+ console.error("[DIAG] Making actual API call to AI Gateway...");
7332
+ console.error("[DIAG] URL:", messagesUrl);
7333
+ const result = await execCommand(curlCmd, 2e4);
7334
+ details.responseRaw = result.stdout.slice(0, 1500);
7335
+ details.exitCode = result.exitCode;
7336
+ let responseJson = null;
7337
+ try {
7338
+ responseJson = JSON.parse(result.stdout);
7339
+ details.responseParsed = true;
7340
+ } catch {
7341
+ details.responseParsed = false;
7342
+ details.parseError = "Response is not valid JSON";
7343
+ }
7344
+ const isError = result.stdout.includes('"type":"error"') || result.stdout.includes('"error":{') || result.stdout.includes("authentication_error") || result.stdout.includes("permission_error");
7345
+ const isSuccess = result.stdout.includes('"type":"message"') || result.stdout.includes('"content":');
7346
+ details.isError = isError;
7347
+ details.isSuccess = isSuccess;
7348
+ if (isError && responseJson && typeof responseJson === "object") {
7349
+ const errorObj = responseJson;
7350
+ if (errorObj.error && typeof errorObj.error === "object") {
7351
+ const error = errorObj.error;
7352
+ details.errorType = error.type;
7353
+ details.errorMessage = error.message;
7354
+ }
7355
+ }
7356
+ const passed = result.exitCode === 0 && isSuccess && !isError;
7357
+ return {
7358
+ name: "ai-gateway-api-call",
7359
+ passed,
7360
+ details,
7361
+ error: passed ? void 0 : isError ? `API returned error: ${details.errorType || "unknown"} - ${details.errorMessage || result.stdout.slice(0, 200)}` : `API call failed: exit=${result.exitCode}, response=${result.stdout.slice(0, 200)}`,
7362
+ durationMs: Date.now() - start
7363
+ };
7364
+ }
7365
+ async function testChildProcessSpawning() {
7366
+ const start = Date.now();
7367
+ const details = {};
7368
+ const echoResult = await execCommand('echo "DIAGNOSTIC_TEST_SUCCESS_12345"');
7369
+ details.echoTest = {
7370
+ command: 'echo "DIAGNOSTIC_TEST_SUCCESS_12345"',
7371
+ output: echoResult.stdout,
7372
+ exitCode: echoResult.exitCode,
7373
+ passed: echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345"
7374
+ };
7375
+ const nodeResult = await execCommand(
7376
+ 'node -e "console.log(JSON.stringify({pid: process.pid, version: process.version, platform: process.platform}))"'
7169
7377
  );
7170
- const details = {
7171
- aiGatewayUrl,
7172
- aiGatewayHttpCode: curlResult.stdout,
7173
- aiGatewayExitCode: curlResult.exitCode,
7174
- serverUrl,
7175
- serverHttpCode: serverResult.stdout,
7176
- serverExitCode: serverResult.exitCode
7378
+ details.nodeTest = {
7379
+ command: 'node -e "console.log(JSON.stringify({...}))"',
7380
+ output: nodeResult.stdout,
7381
+ exitCode: nodeResult.exitCode
7382
+ };
7383
+ const shellResult = await execCommand(
7384
+ 'echo "PID: $$"; pwd; whoami; date; uname -a'
7385
+ );
7386
+ details.shellTest = {
7387
+ command: 'echo "PID: $$"; pwd; whoami; date; uname -a',
7388
+ output: shellResult.stdout,
7389
+ exitCode: shellResult.exitCode
7390
+ };
7391
+ const stderrResult = await execCommand(
7392
+ `node -e "console.error('stderr test')"`
7393
+ );
7394
+ details.stderrTest = {
7395
+ command: `node -e "console.error('stderr test')"`,
7396
+ stderr: stderrResult.stderr,
7397
+ exitCode: stderrResult.exitCode
7177
7398
  };
7178
- const gatewayReachable = curlResult.exitCode === 0 && curlResult.stdout !== "000";
7399
+ const exitCodeResult = await execCommand("exit 42");
7400
+ details.exitCodeTest = {
7401
+ command: "exit 42",
7402
+ exitCode: exitCodeResult.exitCode,
7403
+ passed: exitCodeResult.exitCode === 42
7404
+ };
7405
+ const passed = echoResult.exitCode === 0 && echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345";
7179
7406
  return {
7180
- name: "network-connectivity",
7181
- passed: gatewayReachable,
7407
+ name: "child-process-spawning",
7408
+ passed,
7182
7409
  details,
7183
- error: gatewayReachable ? void 0 : "Failed to reach AI Gateway",
7410
+ error: passed ? void 0 : "Echo test failed",
7184
7411
  durationMs: Date.now() - start
7185
7412
  };
7186
7413
  }
7187
7414
  async function testSdkImport() {
7188
7415
  const start = Date.now();
7416
+ const details = {};
7189
7417
  try {
7190
7418
  const sdk = await import("@anthropic-ai/claude-agent-sdk");
7191
- const details = {
7192
- sdkImported: true,
7193
- hasQuery: typeof sdk.query === "function",
7194
- exportedKeys: Object.keys(sdk)
7195
- };
7419
+ details.sdkImported = true;
7420
+ details.exportedKeys = Object.keys(sdk);
7421
+ details.hasQuery = typeof sdk.query === "function";
7422
+ if (typeof sdk.query === "function") {
7423
+ details.queryFunctionExists = true;
7424
+ details.queryFunctionType = typeof sdk.query;
7425
+ }
7196
7426
  return {
7197
7427
  name: "sdk-import",
7198
7428
  passed: true,
@@ -7206,36 +7436,37 @@ async function testSdkImport() {
7206
7436
  passed: false,
7207
7437
  details: {
7208
7438
  sdkImported: false,
7209
- error
7439
+ error,
7440
+ stack: err instanceof Error ? err.stack?.split("\n").slice(0, 5) : void 0
7210
7441
  },
7211
- error: `Failed to import Claude Agent SDK: ${error}`,
7442
+ error: `Failed to import SDK: ${error}`,
7212
7443
  durationMs: Date.now() - start
7213
7444
  };
7214
7445
  }
7215
7446
  }
7216
- async function testFileSystemAccess() {
7447
+ async function testFileSystemWrite() {
7217
7448
  const start = Date.now();
7218
- const testDir = "/tmp/evalforge-diagnostics";
7219
- const testFile = path9.join(testDir, "test.txt");
7449
+ const details = {};
7450
+ const testDir = "/tmp/evalforge-diagnostics-test";
7451
+ const testFile = path9.join(testDir, "test-file.txt");
7452
+ const testContent = `Diagnostic test at ${(/* @__PURE__ */ new Date()).toISOString()}`;
7220
7453
  try {
7221
7454
  if (!fs11.existsSync(testDir)) {
7222
7455
  fs11.mkdirSync(testDir, { recursive: true });
7223
7456
  }
7224
- fs11.writeFileSync(testFile, "diagnostic-test");
7225
- const content = fs11.readFileSync(testFile, "utf8");
7457
+ details.directoryCreated = true;
7458
+ fs11.writeFileSync(testFile, testContent);
7459
+ details.fileWritten = true;
7460
+ const readContent = fs11.readFileSync(testFile, "utf8");
7461
+ details.fileRead = true;
7462
+ details.contentMatches = readContent === testContent;
7463
+ const lsResult = await execCommand(`ls -la "${testDir}"`);
7464
+ details.directoryContents = lsResult.stdout;
7226
7465
  fs11.unlinkSync(testFile);
7227
7466
  fs11.rmdirSync(testDir);
7228
- const details = {
7229
- canCreateDirectory: true,
7230
- canWriteFile: true,
7231
- canReadFile: content === "diagnostic-test",
7232
- testDir,
7233
- cwd: process.cwd(),
7234
- cwdContents: fs11.readdirSync(process.cwd()).slice(0, 20)
7235
- // First 20 files
7236
- };
7467
+ details.cleanedUp = true;
7237
7468
  return {
7238
- name: "file-system-access",
7469
+ name: "file-system-write",
7239
7470
  passed: true,
7240
7471
  details,
7241
7472
  durationMs: Date.now() - start
@@ -7243,32 +7474,40 @@ async function testFileSystemAccess() {
7243
7474
  } catch (err) {
7244
7475
  const error = err instanceof Error ? err.message : String(err);
7245
7476
  return {
7246
- name: "file-system-access",
7477
+ name: "file-system-write",
7247
7478
  passed: false,
7248
7479
  details: {
7480
+ ...details,
7249
7481
  error,
7250
7482
  testDir,
7251
- cwd: process.cwd()
7483
+ testFile
7252
7484
  },
7253
- error: `File system access failed: ${error}`,
7485
+ error: `File system write failed: ${error}`,
7254
7486
  durationMs: Date.now() - start
7255
7487
  };
7256
7488
  }
7257
7489
  }
7258
7490
  function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader, authToken) {
7491
+ const truncatedResult = "summary" in result ? result : {
7492
+ ...result,
7493
+ details: JSON.parse(
7494
+ JSON.stringify(
7495
+ result.details,
7496
+ (_, v) => typeof v === "string" && v.length > 500 ? v.slice(0, 500) + "... [truncated]" : v
7497
+ )
7498
+ )
7499
+ };
7259
7500
  const event = {
7260
7501
  evalRunId: evalRunId2,
7261
7502
  scenarioId: "diagnostics",
7262
7503
  scenarioName: "Environment Diagnostics",
7263
7504
  targetId: "system",
7264
- targetName: "System",
7505
+ targetName: "name" in truncatedResult ? truncatedResult.name : "Summary",
7265
7506
  stepNumber: 0,
7266
7507
  type: import_evalforge_types4.LiveTraceEventType.DIAGNOSTIC,
7267
- outputPreview: JSON.stringify(result, null, 2).slice(0, 2e3),
7268
- // Limit size
7508
+ outputPreview: JSON.stringify(truncatedResult, null, 2).slice(0, 3e3),
7269
7509
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
7270
7510
  isComplete: "summary" in result
7271
- // Complete if it's the full report
7272
7511
  };
7273
7512
  console.log(`TRACE_EVENT:${JSON.stringify(event)}`);
7274
7513
  if (tracePushUrl) {
@@ -7286,18 +7525,35 @@ function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader,
7286
7525
  headers,
7287
7526
  body: JSON.stringify([event])
7288
7527
  }).catch((err) => {
7289
- console.error("[DIAGNOSTICS] Failed to push trace event:", err);
7528
+ console.error(
7529
+ "[DIAGNOSTICS] Failed to push trace event to backend:",
7530
+ err
7531
+ );
7290
7532
  });
7291
7533
  }
7292
7534
  }
7293
7535
  async function runDiagnostics(config, evalRunId2) {
7294
7536
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
7295
7537
  const startTime = Date.now();
7296
- console.error("[DIAGNOSTICS] Starting environment diagnostics...");
7538
+ console.error("");
7539
+ console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
7540
+ console.error("\u2551 EVALFORGE ENVIRONMENT DIAGNOSTICS \u2551");
7541
+ console.error("\u2551 (Results sent to backend via trace events) \u2551");
7542
+ console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
7543
+ console.error("");
7297
7544
  const tests = [];
7298
- const runTest = async (testFn) => {
7299
- const result = await testFn();
7545
+ const runTest = async (testName, testFn) => {
7546
+ console.error(`[DIAG] Running: ${testName}...`);
7547
+ const result = await safeRunTest(testName, testFn);
7300
7548
  tests.push(result);
7549
+ const status = result.passed ? "\u2713 PASS" : "\u2717 FAIL";
7550
+ console.error(`[DIAG] ${status}: ${result.name} (${result.durationMs}ms)`);
7551
+ console.error("[DIAG] Details:");
7552
+ console.error(JSON.stringify(result.details, null, 2));
7553
+ console.error("");
7554
+ if (!result.passed && result.error) {
7555
+ console.error(`[DIAG] ERROR: ${result.error}`);
7556
+ }
7301
7557
  emitDiagnosticTraceEvent(
7302
7558
  evalRunId2,
7303
7559
  result,
@@ -7305,22 +7561,16 @@ async function runDiagnostics(config, evalRunId2) {
7305
7561
  config.routeHeader,
7306
7562
  config.authToken
7307
7563
  );
7308
- const status = result.passed ? "\u2713" : "\u2717";
7309
- console.error(
7310
- `[DIAGNOSTICS] ${status} ${result.name} (${result.durationMs}ms)`
7311
- );
7312
- if (!result.passed && result.error) {
7313
- console.error(`[DIAGNOSTICS] Error: ${result.error}`);
7314
- }
7315
7564
  };
7316
- await runTest(testEnvironmentVariables);
7317
- await runTest(testNodeEnvironment);
7318
- await runTest(testNpmGlobalDirectory);
7319
- await runTest(testClaudeBinary);
7320
- await runTest(testChildProcess);
7321
- await runTest(() => testNetworkConnectivity(config));
7322
- await runTest(testSdkImport);
7323
- await runTest(testFileSystemAccess);
7565
+ await runTest("claude-binary-discovery", testClaudeBinaryDiscovery);
7566
+ await runTest("claude-cli-execution", testClaudeExecution);
7567
+ await runTest("environment-dump", testEnvironmentDump);
7568
+ await runTest("file-system-structure", testFileSystemStructure);
7569
+ await runTest("network-connectivity", () => testNetworkConnectivity(config));
7570
+ await runTest("ai-gateway-api-call", () => testAiGatewayApiCall(config));
7571
+ await runTest("child-process-spawning", testChildProcessSpawning);
7572
+ await runTest("sdk-import", testSdkImport);
7573
+ await runTest("file-system-write", testFileSystemWrite);
7324
7574
  const completedAt = (/* @__PURE__ */ new Date()).toISOString();
7325
7575
  const totalDurationMs = Date.now() - startTime;
7326
7576
  const report = {
@@ -7341,9 +7591,16 @@ async function runDiagnostics(config, evalRunId2) {
7341
7591
  config.routeHeader,
7342
7592
  config.authToken
7343
7593
  );
7594
+ console.error("");
7595
+ console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
7344
7596
  console.error(
7345
- `[DIAGNOSTICS] Completed: ${report.summary.passed}/${report.summary.total} tests passed (${totalDurationMs}ms)`
7597
+ `\u2551 DIAGNOSTICS COMPLETE: ${report.summary.passed}/${report.summary.total} passed, ${report.summary.failed} failed`.padEnd(
7598
+ 60
7599
+ ) + "\u2551"
7346
7600
  );
7601
+ console.error(`\u2551 Total time: ${totalDurationMs}ms`.padEnd(60) + "\u2551");
7602
+ console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
7603
+ console.error("");
7347
7604
  return report;
7348
7605
  }
7349
7606