@wix/evalforge-evaluator 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +393 -210
- package/build/index.js.map +3 -3
- package/build/index.mjs +393 -210
- package/build/index.mjs.map +3 -3
- package/build/types/diagnostics.d.ts +6 -10
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -6939,243 +6939,400 @@ import { spawn } from "child_process";
|
|
|
6939
6939
|
import * as fs11 from "fs";
|
|
6940
6940
|
import * as path9 from "path";
|
|
6941
6941
|
import { LiveTraceEventType as LiveTraceEventType2 } from "@wix/evalforge-types";
|
|
6942
|
-
async function execCommand(command, timeoutMs =
|
|
6942
|
+
async function execCommand(command, timeoutMs = 1e4) {
|
|
6943
6943
|
return new Promise((resolve) => {
|
|
6944
|
-
|
|
6945
|
-
|
|
6946
|
-
|
|
6947
|
-
let stdout = "";
|
|
6948
|
-
let stderr = "";
|
|
6949
|
-
proc2.stdout.on("data", (data) => {
|
|
6950
|
-
stdout += data.toString();
|
|
6951
|
-
});
|
|
6952
|
-
proc2.stderr.on("data", (data) => {
|
|
6953
|
-
stderr += data.toString();
|
|
6954
|
-
});
|
|
6955
|
-
proc2.on("close", (code2) => {
|
|
6956
|
-
resolve({
|
|
6957
|
-
stdout: stdout.trim(),
|
|
6958
|
-
stderr: stderr.trim(),
|
|
6959
|
-
exitCode: code2 ?? -1
|
|
6944
|
+
try {
|
|
6945
|
+
const proc2 = spawn("sh", ["-c", command], {
|
|
6946
|
+
timeout: timeoutMs
|
|
6960
6947
|
});
|
|
6961
|
-
|
|
6962
|
-
|
|
6948
|
+
let stdout = "";
|
|
6949
|
+
let stderr = "";
|
|
6950
|
+
proc2.stdout.on("data", (data) => {
|
|
6951
|
+
stdout += data.toString();
|
|
6952
|
+
});
|
|
6953
|
+
proc2.stderr.on("data", (data) => {
|
|
6954
|
+
stderr += data.toString();
|
|
6955
|
+
});
|
|
6956
|
+
proc2.on("close", (code2) => {
|
|
6957
|
+
resolve({
|
|
6958
|
+
stdout: stdout.trim(),
|
|
6959
|
+
stderr: stderr.trim(),
|
|
6960
|
+
exitCode: code2 ?? -1
|
|
6961
|
+
});
|
|
6962
|
+
});
|
|
6963
|
+
proc2.on("error", (err) => {
|
|
6964
|
+
resolve({
|
|
6965
|
+
stdout: "",
|
|
6966
|
+
stderr: err.message,
|
|
6967
|
+
exitCode: -1
|
|
6968
|
+
});
|
|
6969
|
+
});
|
|
6970
|
+
} catch (err) {
|
|
6963
6971
|
resolve({
|
|
6964
6972
|
stdout: "",
|
|
6965
|
-
stderr: err.message,
|
|
6966
|
-
exitCode: -
|
|
6973
|
+
stderr: err instanceof Error ? err.message : String(err),
|
|
6974
|
+
exitCode: -99
|
|
6967
6975
|
});
|
|
6968
|
-
}
|
|
6976
|
+
}
|
|
6969
6977
|
});
|
|
6970
6978
|
}
|
|
6971
|
-
async function
|
|
6979
|
+
async function safeRunTest(testName, testFn) {
|
|
6980
|
+
const start = Date.now();
|
|
6981
|
+
try {
|
|
6982
|
+
return await testFn();
|
|
6983
|
+
} catch (err) {
|
|
6984
|
+
const error = err instanceof Error ? err.message : String(err);
|
|
6985
|
+
return {
|
|
6986
|
+
name: testName,
|
|
6987
|
+
passed: false,
|
|
6988
|
+
details: {
|
|
6989
|
+
testCrashed: true,
|
|
6990
|
+
error,
|
|
6991
|
+
stack: err instanceof Error ? err.stack : void 0
|
|
6992
|
+
},
|
|
6993
|
+
error: `Test crashed: ${error}`,
|
|
6994
|
+
durationMs: Date.now() - start
|
|
6995
|
+
};
|
|
6996
|
+
}
|
|
6997
|
+
}
|
|
6998
|
+
async function testClaudeBinaryDiscovery() {
|
|
6972
6999
|
const start = Date.now();
|
|
6973
|
-
const
|
|
7000
|
+
const details = {};
|
|
7001
|
+
const npmRootResult = await execCommand("npm root -g");
|
|
7002
|
+
const npmBinResult = await execCommand("npm bin -g");
|
|
7003
|
+
const npmRoot = npmRootResult.stdout;
|
|
7004
|
+
const npmBin = npmBinResult.stdout;
|
|
7005
|
+
details.npmRoot = npmRoot;
|
|
7006
|
+
details.npmBin = npmBin;
|
|
7007
|
+
const evaluatorBinPath = path9.join(
|
|
7008
|
+
npmRoot,
|
|
7009
|
+
"@wix",
|
|
7010
|
+
"evalforge-evaluator",
|
|
7011
|
+
"node_modules",
|
|
7012
|
+
".bin"
|
|
7013
|
+
);
|
|
7014
|
+
details.evaluatorBinPath = evaluatorBinPath;
|
|
7015
|
+
const lsBinResult = await execCommand(`ls -la "${evaluatorBinPath}" 2>&1`);
|
|
7016
|
+
details.evaluatorBinContents = lsBinResult.stdout || lsBinResult.stderr;
|
|
7017
|
+
details.lsBinExitCode = lsBinResult.exitCode;
|
|
7018
|
+
const claudePath = path9.join(evaluatorBinPath, "claude");
|
|
7019
|
+
let claudeExists = false;
|
|
7020
|
+
try {
|
|
7021
|
+
claudeExists = fs11.existsSync(claudePath);
|
|
7022
|
+
} catch {
|
|
7023
|
+
claudeExists = false;
|
|
7024
|
+
}
|
|
7025
|
+
details.claudePath = claudePath;
|
|
7026
|
+
details.claudeExists = claudeExists;
|
|
7027
|
+
if (claudeExists) {
|
|
7028
|
+
const readlinkResult = await execCommand(
|
|
7029
|
+
`readlink -f "${claudePath}" 2>&1`
|
|
7030
|
+
);
|
|
7031
|
+
details.claudeRealPath = readlinkResult.stdout || readlinkResult.stderr;
|
|
7032
|
+
const statResult = await execCommand(`stat "${claudePath}" 2>&1`);
|
|
7033
|
+
details.claudeStat = statResult.stdout || statResult.stderr;
|
|
7034
|
+
const lsClaudeResult = await execCommand(`ls -la "${claudePath}" 2>&1`);
|
|
7035
|
+
details.claudeFileInfo = lsClaudeResult.stdout;
|
|
7036
|
+
}
|
|
7037
|
+
const whichResult = await execCommand("which claude 2>&1");
|
|
7038
|
+
details.whichClaude = whichResult.stdout || "(not in PATH)";
|
|
7039
|
+
details.whichExitCode = whichResult.exitCode;
|
|
7040
|
+
const currentPath = process.env.PATH || "";
|
|
7041
|
+
details.currentPATH = currentPath.split(":");
|
|
7042
|
+
details.pathLength = currentPath.split(":").length;
|
|
7043
|
+
const passed = claudeExists || whichResult.exitCode === 0;
|
|
7044
|
+
return {
|
|
7045
|
+
name: "claude-binary-discovery",
|
|
7046
|
+
passed,
|
|
7047
|
+
details,
|
|
7048
|
+
error: passed ? void 0 : `Claude binary not found at ${claudePath}`,
|
|
7049
|
+
durationMs: Date.now() - start
|
|
7050
|
+
};
|
|
7051
|
+
}
|
|
7052
|
+
async function testClaudeExecution() {
|
|
7053
|
+
const start = Date.now();
|
|
7054
|
+
const details = {};
|
|
7055
|
+
const npmRootResult = await execCommand("npm root -g");
|
|
7056
|
+
const npmRoot = npmRootResult.stdout;
|
|
7057
|
+
const claudePath = path9.join(
|
|
7058
|
+
npmRoot,
|
|
7059
|
+
"@wix",
|
|
7060
|
+
"evalforge-evaluator",
|
|
7061
|
+
"node_modules",
|
|
7062
|
+
".bin",
|
|
7063
|
+
"claude"
|
|
7064
|
+
);
|
|
7065
|
+
details.claudePath = claudePath;
|
|
7066
|
+
const versionResult = await execCommand(
|
|
7067
|
+
`"${claudePath}" --version 2>&1`,
|
|
7068
|
+
15e3
|
|
7069
|
+
);
|
|
7070
|
+
details.versionCommand = {
|
|
7071
|
+
command: `"${claudePath}" --version`,
|
|
7072
|
+
stdout: versionResult.stdout,
|
|
7073
|
+
stderr: versionResult.stderr,
|
|
7074
|
+
exitCode: versionResult.exitCode
|
|
7075
|
+
};
|
|
7076
|
+
const helpResult = await execCommand(
|
|
7077
|
+
`"${claudePath}" --help 2>&1 | head -50`,
|
|
7078
|
+
15e3
|
|
7079
|
+
);
|
|
7080
|
+
details.helpCommand = {
|
|
7081
|
+
command: `"${claudePath}" --help | head -50`,
|
|
7082
|
+
stdout: helpResult.stdout.slice(0, 1500),
|
|
7083
|
+
stderr: helpResult.stderr.slice(0, 500),
|
|
7084
|
+
exitCode: helpResult.exitCode
|
|
7085
|
+
};
|
|
7086
|
+
const whichClaudeResult = await execCommand("which claude 2>&1");
|
|
7087
|
+
if (whichClaudeResult.exitCode === 0) {
|
|
7088
|
+
const pathVersionResult = await execCommand("claude --version 2>&1", 15e3);
|
|
7089
|
+
details.pathVersionCommand = {
|
|
7090
|
+
whichClaude: whichClaudeResult.stdout,
|
|
7091
|
+
stdout: pathVersionResult.stdout,
|
|
7092
|
+
stderr: pathVersionResult.stderr,
|
|
7093
|
+
exitCode: pathVersionResult.exitCode
|
|
7094
|
+
};
|
|
7095
|
+
}
|
|
7096
|
+
const passed = versionResult.exitCode === 0 || helpResult.exitCode === 0;
|
|
7097
|
+
return {
|
|
7098
|
+
name: "claude-cli-execution",
|
|
7099
|
+
passed,
|
|
7100
|
+
details,
|
|
7101
|
+
error: passed ? void 0 : `Claude CLI failed. Version exit: ${versionResult.exitCode}, Help exit: ${helpResult.exitCode}`,
|
|
7102
|
+
durationMs: Date.now() - start
|
|
7103
|
+
};
|
|
7104
|
+
}
|
|
7105
|
+
async function testEnvironmentDump() {
|
|
7106
|
+
const start = Date.now();
|
|
7107
|
+
const details = {};
|
|
7108
|
+
const importantVars = [
|
|
6974
7109
|
"PATH",
|
|
6975
7110
|
"HOME",
|
|
6976
7111
|
"USER",
|
|
6977
7112
|
"SHELL",
|
|
6978
7113
|
"NODE_ENV",
|
|
7114
|
+
"PWD",
|
|
6979
7115
|
"EVAL_SERVER_URL",
|
|
6980
7116
|
"AI_GATEWAY_URL",
|
|
7117
|
+
"TRACE_PUSH_URL",
|
|
7118
|
+
"EVAL_AUTH_TOKEN",
|
|
6981
7119
|
"ANTHROPIC_API_KEY",
|
|
6982
7120
|
"ANTHROPIC_AUTH_TOKEN",
|
|
6983
7121
|
"ANTHROPIC_BASE_URL",
|
|
6984
7122
|
"ANTHROPIC_CUSTOM_HEADERS"
|
|
6985
7123
|
];
|
|
6986
|
-
const
|
|
6987
|
-
const
|
|
6988
|
-
for (const key of envVars) {
|
|
7124
|
+
const capturedVars = {};
|
|
7125
|
+
for (const key of importantVars) {
|
|
6989
7126
|
const value = process.env[key];
|
|
6990
7127
|
if (value) {
|
|
6991
7128
|
if (key.includes("SECRET") || key.includes("TOKEN") || key.includes("API_KEY")) {
|
|
6992
|
-
|
|
6993
|
-
} else if (key === "PATH") {
|
|
6994
|
-
details[key] = value.split(":");
|
|
7129
|
+
capturedVars[key] = `[REDACTED - ${value.length} chars, starts: ${value.slice(0, 10)}...]`;
|
|
6995
7130
|
} else if (key === "ANTHROPIC_CUSTOM_HEADERS") {
|
|
6996
|
-
|
|
7131
|
+
capturedVars[key] = value.split("\n").map((h) => {
|
|
7132
|
+
const [name2, val] = h.split(":");
|
|
7133
|
+
return `${name2}: ${val ? "[" + val.length + " chars]" : "(empty)"}`;
|
|
7134
|
+
}).join(" | ");
|
|
7135
|
+
} else if (key === "PATH") {
|
|
7136
|
+
const parts = value.split(":");
|
|
7137
|
+
capturedVars[key] = `[${parts.length} entries] First: ${parts.slice(0, 3).join(":")} ... Last: ${parts.slice(-2).join(":")}`;
|
|
6997
7138
|
} else {
|
|
6998
|
-
|
|
7139
|
+
capturedVars[key] = value;
|
|
6999
7140
|
}
|
|
7000
7141
|
} else {
|
|
7001
|
-
|
|
7142
|
+
capturedVars[key] = "(NOT SET)";
|
|
7002
7143
|
}
|
|
7003
7144
|
}
|
|
7004
|
-
details.
|
|
7005
|
-
|
|
7006
|
-
|
|
7007
|
-
|
|
7008
|
-
|
|
7009
|
-
|
|
7010
|
-
durationMs: Date.now() - start
|
|
7011
|
-
};
|
|
7012
|
-
}
|
|
7013
|
-
async function testNodeEnvironment() {
|
|
7014
|
-
const start = Date.now();
|
|
7015
|
-
const details = {
|
|
7016
|
-
nodeVersion: process.version,
|
|
7145
|
+
details.importantVars = capturedVars;
|
|
7146
|
+
const envResult = await execCommand("env | sort | head -50");
|
|
7147
|
+
details.envCommandOutput = envResult.stdout;
|
|
7148
|
+
details.envExitCode = envResult.exitCode;
|
|
7149
|
+
details.nodeInfo = {
|
|
7150
|
+
version: process.version,
|
|
7017
7151
|
platform: process.platform,
|
|
7018
7152
|
arch: process.arch,
|
|
7019
|
-
cwd: process.cwd(),
|
|
7020
7153
|
pid: process.pid,
|
|
7021
|
-
|
|
7022
|
-
memoryUsage: process.memoryUsage(),
|
|
7154
|
+
cwd: process.cwd(),
|
|
7023
7155
|
execPath: process.execPath
|
|
7024
7156
|
};
|
|
7025
7157
|
return {
|
|
7026
|
-
name: "
|
|
7158
|
+
name: "environment-dump",
|
|
7027
7159
|
passed: true,
|
|
7160
|
+
// Info test, always passes
|
|
7028
7161
|
details,
|
|
7029
7162
|
durationMs: Date.now() - start
|
|
7030
7163
|
};
|
|
7031
7164
|
}
|
|
7032
|
-
async function
|
|
7165
|
+
async function testFileSystemStructure() {
|
|
7033
7166
|
const start = Date.now();
|
|
7167
|
+
const details = {};
|
|
7034
7168
|
const npmRootResult = await execCommand("npm root -g");
|
|
7035
|
-
const npmBinResult = await execCommand("npm bin -g");
|
|
7036
7169
|
const npmRoot = npmRootResult.stdout;
|
|
7037
|
-
const
|
|
7038
|
-
|
|
7039
|
-
|
|
7040
|
-
|
|
7041
|
-
npmRootExitCode: npmRootResult.exitCode,
|
|
7042
|
-
npmBinExitCode: npmBinResult.exitCode
|
|
7170
|
+
const lsCwdResult = await execCommand("ls -la");
|
|
7171
|
+
details.currentDirectory = {
|
|
7172
|
+
path: process.cwd(),
|
|
7173
|
+
contents: lsCwdResult.stdout
|
|
7043
7174
|
};
|
|
7044
|
-
|
|
7045
|
-
|
|
7046
|
-
|
|
7047
|
-
|
|
7048
|
-
|
|
7049
|
-
|
|
7050
|
-
const files = fs11.readdirSync(evaluatorPath);
|
|
7051
|
-
details.evaluatorFiles = files;
|
|
7052
|
-
} catch {
|
|
7053
|
-
details.evaluatorFiles = "Failed to list files";
|
|
7054
|
-
}
|
|
7055
|
-
}
|
|
7056
|
-
}
|
|
7057
|
-
if (npmRoot) {
|
|
7058
|
-
const sdkPath = path9.join(
|
|
7059
|
-
npmRoot,
|
|
7060
|
-
"@wix",
|
|
7061
|
-
"evalforge-evaluator",
|
|
7062
|
-
"node_modules",
|
|
7063
|
-
"@anthropic-ai",
|
|
7064
|
-
"claude-agent-sdk"
|
|
7065
|
-
);
|
|
7066
|
-
const sdkExists = fs11.existsSync(sdkPath);
|
|
7067
|
-
details.claudeAgentSdkInstalled = sdkExists;
|
|
7068
|
-
}
|
|
7069
|
-
const passed = npmRootResult.exitCode === 0 && npmBinResult.exitCode === 0;
|
|
7070
|
-
return {
|
|
7071
|
-
name: "npm-global-directory",
|
|
7072
|
-
passed,
|
|
7073
|
-
details,
|
|
7074
|
-
error: passed ? void 0 : npmRootResult.stderr || npmBinResult.stderr,
|
|
7075
|
-
durationMs: Date.now() - start
|
|
7175
|
+
const lsNpmRootResult = await execCommand(
|
|
7176
|
+
`ls -la "${npmRoot}" 2>&1 | head -30`
|
|
7177
|
+
);
|
|
7178
|
+
details.npmGlobalRoot = {
|
|
7179
|
+
path: npmRoot,
|
|
7180
|
+
contents: lsNpmRootResult.stdout
|
|
7076
7181
|
};
|
|
7077
|
-
|
|
7078
|
-
|
|
7079
|
-
|
|
7080
|
-
|
|
7081
|
-
|
|
7082
|
-
|
|
7083
|
-
const
|
|
7084
|
-
|
|
7085
|
-
|
|
7086
|
-
|
|
7087
|
-
|
|
7088
|
-
}
|
|
7089
|
-
const
|
|
7090
|
-
|
|
7091
|
-
|
|
7092
|
-
|
|
7093
|
-
|
|
7094
|
-
|
|
7095
|
-
|
|
7182
|
+
const wixPath = path9.join(npmRoot, "@wix");
|
|
7183
|
+
const lsWixResult = await execCommand(`ls -la "${wixPath}" 2>&1`);
|
|
7184
|
+
details.wixPackages = {
|
|
7185
|
+
path: wixPath,
|
|
7186
|
+
contents: lsWixResult.stdout
|
|
7187
|
+
};
|
|
7188
|
+
const evaluatorPath = path9.join(npmRoot, "@wix", "evalforge-evaluator");
|
|
7189
|
+
const lsEvaluatorResult = await execCommand(`ls -la "${evaluatorPath}" 2>&1`);
|
|
7190
|
+
details.evaluatorDir = {
|
|
7191
|
+
path: evaluatorPath,
|
|
7192
|
+
contents: lsEvaluatorResult.stdout
|
|
7193
|
+
};
|
|
7194
|
+
const nodeModulesPath = path9.join(evaluatorPath, "node_modules");
|
|
7195
|
+
const lsNodeModulesResult = await execCommand(
|
|
7196
|
+
`ls "${nodeModulesPath}" 2>&1 | head -30`
|
|
7197
|
+
);
|
|
7198
|
+
details.evaluatorNodeModules = {
|
|
7199
|
+
path: nodeModulesPath,
|
|
7200
|
+
contents: lsNodeModulesResult.stdout
|
|
7201
|
+
};
|
|
7202
|
+
const anthropicPath = path9.join(nodeModulesPath, "@anthropic-ai");
|
|
7203
|
+
const lsAnthropicResult = await execCommand(`ls -la "${anthropicPath}" 2>&1`);
|
|
7204
|
+
details.anthropicPackages = {
|
|
7205
|
+
path: anthropicPath,
|
|
7206
|
+
contents: lsAnthropicResult.stdout
|
|
7207
|
+
};
|
|
7208
|
+
const binPath = path9.join(nodeModulesPath, ".bin");
|
|
7209
|
+
const lsBinResult = await execCommand(`ls -la "${binPath}" 2>&1`);
|
|
7210
|
+
details.binDirectory = {
|
|
7211
|
+
path: binPath,
|
|
7212
|
+
contents: lsBinResult.stdout
|
|
7096
7213
|
};
|
|
7097
|
-
const pathDirs = (process.env.PATH || "").split(":");
|
|
7098
|
-
const claudeFoundIn = [];
|
|
7099
|
-
for (const dir of pathDirs) {
|
|
7100
|
-
const claudePath = path9.join(dir, "claude");
|
|
7101
|
-
if (fs11.existsSync(claudePath)) {
|
|
7102
|
-
claudeFoundIn.push(dir);
|
|
7103
|
-
}
|
|
7104
|
-
}
|
|
7105
|
-
details.claudeFoundInPathDirs = claudeFoundIn;
|
|
7106
|
-
const passed = whichResult.exitCode === 0 || claudeInNpmBin;
|
|
7107
7214
|
return {
|
|
7108
|
-
name: "
|
|
7109
|
-
passed,
|
|
7215
|
+
name: "file-system-structure",
|
|
7216
|
+
passed: true,
|
|
7217
|
+
// Info test, always passes
|
|
7110
7218
|
details,
|
|
7111
|
-
error: passed ? void 0 : "Claude CLI binary not found in PATH. The SDK will fail to spawn it.",
|
|
7112
7219
|
durationMs: Date.now() - start
|
|
7113
7220
|
};
|
|
7114
7221
|
}
|
|
7115
|
-
async function
|
|
7222
|
+
async function testNetworkConnectivity(config) {
|
|
7116
7223
|
const start = Date.now();
|
|
7117
|
-
const
|
|
7118
|
-
const
|
|
7119
|
-
|
|
7120
|
-
|
|
7121
|
-
|
|
7122
|
-
|
|
7123
|
-
|
|
7224
|
+
const details = {};
|
|
7225
|
+
const dnsResult = await execCommand(
|
|
7226
|
+
"nslookup manage.wix.com 2>&1 | head -10"
|
|
7227
|
+
);
|
|
7228
|
+
details.dnsLookup = {
|
|
7229
|
+
command: "nslookup manage.wix.com",
|
|
7230
|
+
output: dnsResult.stdout || dnsResult.stderr,
|
|
7231
|
+
exitCode: dnsResult.exitCode
|
|
7124
7232
|
};
|
|
7125
|
-
const
|
|
7233
|
+
const pingResult = await execCommand("ping -c 2 manage.wix.com 2>&1");
|
|
7234
|
+
details.pingTest = {
|
|
7235
|
+
command: "ping -c 2 manage.wix.com",
|
|
7236
|
+
output: pingResult.stdout || pingResult.stderr,
|
|
7237
|
+
exitCode: pingResult.exitCode
|
|
7238
|
+
};
|
|
7239
|
+
const gatewayUrl = config.aiGatewayUrl || "https://manage.wix.com/_api/eval-wix-ai-gateway-proxy";
|
|
7240
|
+
const curlGatewayResult = await execCommand(
|
|
7241
|
+
`curl -v -s --connect-timeout 5 --max-time 10 "${gatewayUrl}" 2>&1 | tail -30`
|
|
7242
|
+
);
|
|
7243
|
+
details.aiGatewayTest = {
|
|
7244
|
+
url: gatewayUrl,
|
|
7245
|
+
output: curlGatewayResult.stdout,
|
|
7246
|
+
exitCode: curlGatewayResult.exitCode
|
|
7247
|
+
};
|
|
7248
|
+
const serverUrl = config.serverUrl;
|
|
7249
|
+
const curlServerResult = await execCommand(
|
|
7250
|
+
`curl -v -s --connect-timeout 5 --max-time 10 "${serverUrl}/health" 2>&1 | tail -30`
|
|
7251
|
+
);
|
|
7252
|
+
details.backendServerTest = {
|
|
7253
|
+
url: `${serverUrl}/health`,
|
|
7254
|
+
output: curlServerResult.stdout,
|
|
7255
|
+
exitCode: curlServerResult.exitCode
|
|
7256
|
+
};
|
|
7257
|
+
const httpsResult = await execCommand(
|
|
7258
|
+
'curl -s --connect-timeout 5 -o /dev/null -w "HTTP_CODE:%{http_code} TIME:%{time_total}s" https://www.google.com 2>&1'
|
|
7259
|
+
);
|
|
7260
|
+
details.httpsBaseline = {
|
|
7261
|
+
command: "curl https://www.google.com",
|
|
7262
|
+
output: httpsResult.stdout,
|
|
7263
|
+
exitCode: httpsResult.exitCode
|
|
7264
|
+
};
|
|
7265
|
+
const networkWorks = pingResult.exitCode === 0 || httpsResult.exitCode === 0;
|
|
7266
|
+
const gatewayReachable = curlGatewayResult.exitCode === 0;
|
|
7126
7267
|
return {
|
|
7127
|
-
name: "
|
|
7128
|
-
passed,
|
|
7268
|
+
name: "network-connectivity",
|
|
7269
|
+
passed: networkWorks && gatewayReachable,
|
|
7129
7270
|
details,
|
|
7130
|
-
error:
|
|
7271
|
+
error: networkWorks && gatewayReachable ? void 0 : `Network: ${networkWorks ? "OK" : "FAILED"}, Gateway: ${gatewayReachable ? "OK" : "FAILED"}`,
|
|
7131
7272
|
durationMs: Date.now() - start
|
|
7132
7273
|
};
|
|
7133
7274
|
}
|
|
7134
|
-
async function
|
|
7275
|
+
async function testChildProcessSpawning() {
|
|
7135
7276
|
const start = Date.now();
|
|
7136
|
-
const
|
|
7137
|
-
|
|
7138
|
-
|
|
7139
|
-
|
|
7140
|
-
|
|
7141
|
-
|
|
7142
|
-
|
|
7143
|
-
|
|
7144
|
-
|
|
7145
|
-
|
|
7146
|
-
const curlResult = await execCommand(
|
|
7147
|
-
`curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${aiGatewayUrl}" 2>&1`
|
|
7277
|
+
const details = {};
|
|
7278
|
+
const echoResult = await execCommand('echo "DIAGNOSTIC_TEST_SUCCESS_12345"');
|
|
7279
|
+
details.echoTest = {
|
|
7280
|
+
command: 'echo "DIAGNOSTIC_TEST_SUCCESS_12345"',
|
|
7281
|
+
output: echoResult.stdout,
|
|
7282
|
+
exitCode: echoResult.exitCode,
|
|
7283
|
+
passed: echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345"
|
|
7284
|
+
};
|
|
7285
|
+
const nodeResult = await execCommand(
|
|
7286
|
+
'node -e "console.log(JSON.stringify({pid: process.pid, version: process.version, platform: process.platform}))"'
|
|
7148
7287
|
);
|
|
7149
|
-
|
|
7150
|
-
|
|
7151
|
-
|
|
7288
|
+
details.nodeTest = {
|
|
7289
|
+
command: 'node -e "console.log(JSON.stringify({...}))"',
|
|
7290
|
+
output: nodeResult.stdout,
|
|
7291
|
+
exitCode: nodeResult.exitCode
|
|
7292
|
+
};
|
|
7293
|
+
const shellResult = await execCommand(
|
|
7294
|
+
'echo "PID: $$"; pwd; whoami; date; uname -a'
|
|
7152
7295
|
);
|
|
7153
|
-
|
|
7154
|
-
|
|
7155
|
-
|
|
7156
|
-
|
|
7157
|
-
serverUrl,
|
|
7158
|
-
serverHttpCode: serverResult.stdout,
|
|
7159
|
-
serverExitCode: serverResult.exitCode
|
|
7296
|
+
details.shellTest = {
|
|
7297
|
+
command: 'echo "PID: $$"; pwd; whoami; date; uname -a',
|
|
7298
|
+
output: shellResult.stdout,
|
|
7299
|
+
exitCode: shellResult.exitCode
|
|
7160
7300
|
};
|
|
7161
|
-
const
|
|
7301
|
+
const stderrResult = await execCommand(
|
|
7302
|
+
`node -e "console.error('stderr test')"`
|
|
7303
|
+
);
|
|
7304
|
+
details.stderrTest = {
|
|
7305
|
+
command: `node -e "console.error('stderr test')"`,
|
|
7306
|
+
stderr: stderrResult.stderr,
|
|
7307
|
+
exitCode: stderrResult.exitCode
|
|
7308
|
+
};
|
|
7309
|
+
const exitCodeResult = await execCommand("exit 42");
|
|
7310
|
+
details.exitCodeTest = {
|
|
7311
|
+
command: "exit 42",
|
|
7312
|
+
exitCode: exitCodeResult.exitCode,
|
|
7313
|
+
passed: exitCodeResult.exitCode === 42
|
|
7314
|
+
};
|
|
7315
|
+
const passed = echoResult.exitCode === 0 && echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345";
|
|
7162
7316
|
return {
|
|
7163
|
-
name: "
|
|
7164
|
-
passed
|
|
7317
|
+
name: "child-process-spawning",
|
|
7318
|
+
passed,
|
|
7165
7319
|
details,
|
|
7166
|
-
error:
|
|
7320
|
+
error: passed ? void 0 : "Echo test failed",
|
|
7167
7321
|
durationMs: Date.now() - start
|
|
7168
7322
|
};
|
|
7169
7323
|
}
|
|
7170
7324
|
async function testSdkImport() {
|
|
7171
7325
|
const start = Date.now();
|
|
7326
|
+
const details = {};
|
|
7172
7327
|
try {
|
|
7173
7328
|
const sdk = await import("@anthropic-ai/claude-agent-sdk");
|
|
7174
|
-
|
|
7175
|
-
|
|
7176
|
-
|
|
7177
|
-
|
|
7178
|
-
|
|
7329
|
+
details.sdkImported = true;
|
|
7330
|
+
details.exportedKeys = Object.keys(sdk);
|
|
7331
|
+
details.hasQuery = typeof sdk.query === "function";
|
|
7332
|
+
if (typeof sdk.query === "function") {
|
|
7333
|
+
details.queryFunctionExists = true;
|
|
7334
|
+
details.queryFunctionType = typeof sdk.query;
|
|
7335
|
+
}
|
|
7179
7336
|
return {
|
|
7180
7337
|
name: "sdk-import",
|
|
7181
7338
|
passed: true,
|
|
@@ -7189,36 +7346,37 @@ async function testSdkImport() {
|
|
|
7189
7346
|
passed: false,
|
|
7190
7347
|
details: {
|
|
7191
7348
|
sdkImported: false,
|
|
7192
|
-
error
|
|
7349
|
+
error,
|
|
7350
|
+
stack: err instanceof Error ? err.stack?.split("\n").slice(0, 5) : void 0
|
|
7193
7351
|
},
|
|
7194
|
-
error: `Failed to import
|
|
7352
|
+
error: `Failed to import SDK: ${error}`,
|
|
7195
7353
|
durationMs: Date.now() - start
|
|
7196
7354
|
};
|
|
7197
7355
|
}
|
|
7198
7356
|
}
|
|
7199
|
-
async function
|
|
7357
|
+
async function testFileSystemWrite() {
|
|
7200
7358
|
const start = Date.now();
|
|
7201
|
-
const
|
|
7202
|
-
const
|
|
7359
|
+
const details = {};
|
|
7360
|
+
const testDir = "/tmp/evalforge-diagnostics-test";
|
|
7361
|
+
const testFile = path9.join(testDir, "test-file.txt");
|
|
7362
|
+
const testContent = `Diagnostic test at ${(/* @__PURE__ */ new Date()).toISOString()}`;
|
|
7203
7363
|
try {
|
|
7204
7364
|
if (!fs11.existsSync(testDir)) {
|
|
7205
7365
|
fs11.mkdirSync(testDir, { recursive: true });
|
|
7206
7366
|
}
|
|
7207
|
-
|
|
7208
|
-
|
|
7367
|
+
details.directoryCreated = true;
|
|
7368
|
+
fs11.writeFileSync(testFile, testContent);
|
|
7369
|
+
details.fileWritten = true;
|
|
7370
|
+
const readContent = fs11.readFileSync(testFile, "utf8");
|
|
7371
|
+
details.fileRead = true;
|
|
7372
|
+
details.contentMatches = readContent === testContent;
|
|
7373
|
+
const lsResult = await execCommand(`ls -la "${testDir}"`);
|
|
7374
|
+
details.directoryContents = lsResult.stdout;
|
|
7209
7375
|
fs11.unlinkSync(testFile);
|
|
7210
7376
|
fs11.rmdirSync(testDir);
|
|
7211
|
-
|
|
7212
|
-
canCreateDirectory: true,
|
|
7213
|
-
canWriteFile: true,
|
|
7214
|
-
canReadFile: content === "diagnostic-test",
|
|
7215
|
-
testDir,
|
|
7216
|
-
cwd: process.cwd(),
|
|
7217
|
-
cwdContents: fs11.readdirSync(process.cwd()).slice(0, 20)
|
|
7218
|
-
// First 20 files
|
|
7219
|
-
};
|
|
7377
|
+
details.cleanedUp = true;
|
|
7220
7378
|
return {
|
|
7221
|
-
name: "file-system-
|
|
7379
|
+
name: "file-system-write",
|
|
7222
7380
|
passed: true,
|
|
7223
7381
|
details,
|
|
7224
7382
|
durationMs: Date.now() - start
|
|
@@ -7226,32 +7384,40 @@ async function testFileSystemAccess() {
|
|
|
7226
7384
|
} catch (err) {
|
|
7227
7385
|
const error = err instanceof Error ? err.message : String(err);
|
|
7228
7386
|
return {
|
|
7229
|
-
name: "file-system-
|
|
7387
|
+
name: "file-system-write",
|
|
7230
7388
|
passed: false,
|
|
7231
7389
|
details: {
|
|
7390
|
+
...details,
|
|
7232
7391
|
error,
|
|
7233
7392
|
testDir,
|
|
7234
|
-
|
|
7393
|
+
testFile
|
|
7235
7394
|
},
|
|
7236
|
-
error: `File system
|
|
7395
|
+
error: `File system write failed: ${error}`,
|
|
7237
7396
|
durationMs: Date.now() - start
|
|
7238
7397
|
};
|
|
7239
7398
|
}
|
|
7240
7399
|
}
|
|
7241
7400
|
function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader, authToken) {
|
|
7401
|
+
const truncatedResult = "summary" in result ? result : {
|
|
7402
|
+
...result,
|
|
7403
|
+
details: JSON.parse(
|
|
7404
|
+
JSON.stringify(
|
|
7405
|
+
result.details,
|
|
7406
|
+
(_, v) => typeof v === "string" && v.length > 500 ? v.slice(0, 500) + "... [truncated]" : v
|
|
7407
|
+
)
|
|
7408
|
+
)
|
|
7409
|
+
};
|
|
7242
7410
|
const event = {
|
|
7243
7411
|
evalRunId: evalRunId2,
|
|
7244
7412
|
scenarioId: "diagnostics",
|
|
7245
7413
|
scenarioName: "Environment Diagnostics",
|
|
7246
7414
|
targetId: "system",
|
|
7247
|
-
targetName: "
|
|
7415
|
+
targetName: "name" in truncatedResult ? truncatedResult.name : "Summary",
|
|
7248
7416
|
stepNumber: 0,
|
|
7249
7417
|
type: LiveTraceEventType2.DIAGNOSTIC,
|
|
7250
|
-
outputPreview: JSON.stringify(
|
|
7251
|
-
// Limit size
|
|
7418
|
+
outputPreview: JSON.stringify(truncatedResult, null, 2).slice(0, 3e3),
|
|
7252
7419
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7253
7420
|
isComplete: "summary" in result
|
|
7254
|
-
// Complete if it's the full report
|
|
7255
7421
|
};
|
|
7256
7422
|
console.log(`TRACE_EVENT:${JSON.stringify(event)}`);
|
|
7257
7423
|
if (tracePushUrl) {
|
|
@@ -7269,18 +7435,35 @@ function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader,
|
|
|
7269
7435
|
headers,
|
|
7270
7436
|
body: JSON.stringify([event])
|
|
7271
7437
|
}).catch((err) => {
|
|
7272
|
-
console.error(
|
|
7438
|
+
console.error(
|
|
7439
|
+
"[DIAGNOSTICS] Failed to push trace event to backend:",
|
|
7440
|
+
err
|
|
7441
|
+
);
|
|
7273
7442
|
});
|
|
7274
7443
|
}
|
|
7275
7444
|
}
|
|
7276
7445
|
async function runDiagnostics(config, evalRunId2) {
|
|
7277
7446
|
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
7278
7447
|
const startTime = Date.now();
|
|
7279
|
-
console.error("
|
|
7448
|
+
console.error("");
|
|
7449
|
+
console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
|
|
7450
|
+
console.error("\u2551 EVALFORGE ENVIRONMENT DIAGNOSTICS \u2551");
|
|
7451
|
+
console.error("\u2551 (Results sent to backend via trace events) \u2551");
|
|
7452
|
+
console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
|
|
7453
|
+
console.error("");
|
|
7280
7454
|
const tests = [];
|
|
7281
|
-
const runTest = async (testFn) => {
|
|
7282
|
-
|
|
7455
|
+
const runTest = async (testName, testFn) => {
|
|
7456
|
+
console.error(`[DIAG] Running: ${testName}...`);
|
|
7457
|
+
const result = await safeRunTest(testName, testFn);
|
|
7283
7458
|
tests.push(result);
|
|
7459
|
+
const status = result.passed ? "\u2713 PASS" : "\u2717 FAIL";
|
|
7460
|
+
console.error(`[DIAG] ${status}: ${result.name} (${result.durationMs}ms)`);
|
|
7461
|
+
console.error("[DIAG] Details:");
|
|
7462
|
+
console.error(JSON.stringify(result.details, null, 2));
|
|
7463
|
+
console.error("");
|
|
7464
|
+
if (!result.passed && result.error) {
|
|
7465
|
+
console.error(`[DIAG] ERROR: ${result.error}`);
|
|
7466
|
+
}
|
|
7284
7467
|
emitDiagnosticTraceEvent(
|
|
7285
7468
|
evalRunId2,
|
|
7286
7469
|
result,
|
|
@@ -7288,22 +7471,15 @@ async function runDiagnostics(config, evalRunId2) {
|
|
|
7288
7471
|
config.routeHeader,
|
|
7289
7472
|
config.authToken
|
|
7290
7473
|
);
|
|
7291
|
-
const status = result.passed ? "\u2713" : "\u2717";
|
|
7292
|
-
console.error(
|
|
7293
|
-
`[DIAGNOSTICS] ${status} ${result.name} (${result.durationMs}ms)`
|
|
7294
|
-
);
|
|
7295
|
-
if (!result.passed && result.error) {
|
|
7296
|
-
console.error(`[DIAGNOSTICS] Error: ${result.error}`);
|
|
7297
|
-
}
|
|
7298
7474
|
};
|
|
7299
|
-
await runTest(
|
|
7300
|
-
await runTest(
|
|
7301
|
-
await runTest(
|
|
7302
|
-
await runTest(
|
|
7303
|
-
await runTest(
|
|
7304
|
-
await runTest(
|
|
7305
|
-
await runTest(testSdkImport);
|
|
7306
|
-
await runTest(
|
|
7475
|
+
await runTest("claude-binary-discovery", testClaudeBinaryDiscovery);
|
|
7476
|
+
await runTest("claude-cli-execution", testClaudeExecution);
|
|
7477
|
+
await runTest("environment-dump", testEnvironmentDump);
|
|
7478
|
+
await runTest("file-system-structure", testFileSystemStructure);
|
|
7479
|
+
await runTest("network-connectivity", () => testNetworkConnectivity(config));
|
|
7480
|
+
await runTest("child-process-spawning", testChildProcessSpawning);
|
|
7481
|
+
await runTest("sdk-import", testSdkImport);
|
|
7482
|
+
await runTest("file-system-write", testFileSystemWrite);
|
|
7307
7483
|
const completedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
7308
7484
|
const totalDurationMs = Date.now() - startTime;
|
|
7309
7485
|
const report = {
|
|
@@ -7324,9 +7500,16 @@ async function runDiagnostics(config, evalRunId2) {
|
|
|
7324
7500
|
config.routeHeader,
|
|
7325
7501
|
config.authToken
|
|
7326
7502
|
);
|
|
7503
|
+
console.error("");
|
|
7504
|
+
console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
|
|
7327
7505
|
console.error(
|
|
7328
|
-
|
|
7506
|
+
`\u2551 DIAGNOSTICS COMPLETE: ${report.summary.passed}/${report.summary.total} passed, ${report.summary.failed} failed`.padEnd(
|
|
7507
|
+
60
|
|
7508
|
+
) + "\u2551"
|
|
7329
7509
|
);
|
|
7510
|
+
console.error(`\u2551 Total time: ${totalDurationMs}ms`.padEnd(60) + "\u2551");
|
|
7511
|
+
console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
|
|
7512
|
+
console.error("");
|
|
7330
7513
|
return report;
|
|
7331
7514
|
}
|
|
7332
7515
|
|