@wix/evalforge-evaluator 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +393 -210
- package/build/index.js.map +3 -3
- package/build/index.mjs +393 -210
- package/build/index.mjs.map +3 -3
- package/build/types/diagnostics.d.ts +6 -10
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -6956,243 +6956,400 @@ var import_child_process = require("child_process");
|
|
|
6956
6956
|
var fs11 = __toESM(require("fs"));
|
|
6957
6957
|
var path9 = __toESM(require("path"));
|
|
6958
6958
|
var import_evalforge_types4 = require("@wix/evalforge-types");
|
|
6959
|
-
async function execCommand(command, timeoutMs =
|
|
6959
|
+
async function execCommand(command, timeoutMs = 1e4) {
|
|
6960
6960
|
return new Promise((resolve) => {
|
|
6961
|
-
|
|
6962
|
-
|
|
6963
|
-
|
|
6964
|
-
let stdout = "";
|
|
6965
|
-
let stderr = "";
|
|
6966
|
-
proc2.stdout.on("data", (data) => {
|
|
6967
|
-
stdout += data.toString();
|
|
6968
|
-
});
|
|
6969
|
-
proc2.stderr.on("data", (data) => {
|
|
6970
|
-
stderr += data.toString();
|
|
6971
|
-
});
|
|
6972
|
-
proc2.on("close", (code2) => {
|
|
6973
|
-
resolve({
|
|
6974
|
-
stdout: stdout.trim(),
|
|
6975
|
-
stderr: stderr.trim(),
|
|
6976
|
-
exitCode: code2 ?? -1
|
|
6961
|
+
try {
|
|
6962
|
+
const proc2 = (0, import_child_process.spawn)("sh", ["-c", command], {
|
|
6963
|
+
timeout: timeoutMs
|
|
6977
6964
|
});
|
|
6978
|
-
|
|
6979
|
-
|
|
6965
|
+
let stdout = "";
|
|
6966
|
+
let stderr = "";
|
|
6967
|
+
proc2.stdout.on("data", (data) => {
|
|
6968
|
+
stdout += data.toString();
|
|
6969
|
+
});
|
|
6970
|
+
proc2.stderr.on("data", (data) => {
|
|
6971
|
+
stderr += data.toString();
|
|
6972
|
+
});
|
|
6973
|
+
proc2.on("close", (code2) => {
|
|
6974
|
+
resolve({
|
|
6975
|
+
stdout: stdout.trim(),
|
|
6976
|
+
stderr: stderr.trim(),
|
|
6977
|
+
exitCode: code2 ?? -1
|
|
6978
|
+
});
|
|
6979
|
+
});
|
|
6980
|
+
proc2.on("error", (err) => {
|
|
6981
|
+
resolve({
|
|
6982
|
+
stdout: "",
|
|
6983
|
+
stderr: err.message,
|
|
6984
|
+
exitCode: -1
|
|
6985
|
+
});
|
|
6986
|
+
});
|
|
6987
|
+
} catch (err) {
|
|
6980
6988
|
resolve({
|
|
6981
6989
|
stdout: "",
|
|
6982
|
-
stderr: err.message,
|
|
6983
|
-
exitCode: -
|
|
6990
|
+
stderr: err instanceof Error ? err.message : String(err),
|
|
6991
|
+
exitCode: -99
|
|
6984
6992
|
});
|
|
6985
|
-
}
|
|
6993
|
+
}
|
|
6986
6994
|
});
|
|
6987
6995
|
}
|
|
6988
|
-
async function
|
|
6996
|
+
async function safeRunTest(testName, testFn) {
|
|
6997
|
+
const start = Date.now();
|
|
6998
|
+
try {
|
|
6999
|
+
return await testFn();
|
|
7000
|
+
} catch (err) {
|
|
7001
|
+
const error = err instanceof Error ? err.message : String(err);
|
|
7002
|
+
return {
|
|
7003
|
+
name: testName,
|
|
7004
|
+
passed: false,
|
|
7005
|
+
details: {
|
|
7006
|
+
testCrashed: true,
|
|
7007
|
+
error,
|
|
7008
|
+
stack: err instanceof Error ? err.stack : void 0
|
|
7009
|
+
},
|
|
7010
|
+
error: `Test crashed: ${error}`,
|
|
7011
|
+
durationMs: Date.now() - start
|
|
7012
|
+
};
|
|
7013
|
+
}
|
|
7014
|
+
}
|
|
7015
|
+
async function testClaudeBinaryDiscovery() {
|
|
6989
7016
|
const start = Date.now();
|
|
6990
|
-
const
|
|
7017
|
+
const details = {};
|
|
7018
|
+
const npmRootResult = await execCommand("npm root -g");
|
|
7019
|
+
const npmBinResult = await execCommand("npm bin -g");
|
|
7020
|
+
const npmRoot = npmRootResult.stdout;
|
|
7021
|
+
const npmBin = npmBinResult.stdout;
|
|
7022
|
+
details.npmRoot = npmRoot;
|
|
7023
|
+
details.npmBin = npmBin;
|
|
7024
|
+
const evaluatorBinPath = path9.join(
|
|
7025
|
+
npmRoot,
|
|
7026
|
+
"@wix",
|
|
7027
|
+
"evalforge-evaluator",
|
|
7028
|
+
"node_modules",
|
|
7029
|
+
".bin"
|
|
7030
|
+
);
|
|
7031
|
+
details.evaluatorBinPath = evaluatorBinPath;
|
|
7032
|
+
const lsBinResult = await execCommand(`ls -la "${evaluatorBinPath}" 2>&1`);
|
|
7033
|
+
details.evaluatorBinContents = lsBinResult.stdout || lsBinResult.stderr;
|
|
7034
|
+
details.lsBinExitCode = lsBinResult.exitCode;
|
|
7035
|
+
const claudePath = path9.join(evaluatorBinPath, "claude");
|
|
7036
|
+
let claudeExists = false;
|
|
7037
|
+
try {
|
|
7038
|
+
claudeExists = fs11.existsSync(claudePath);
|
|
7039
|
+
} catch {
|
|
7040
|
+
claudeExists = false;
|
|
7041
|
+
}
|
|
7042
|
+
details.claudePath = claudePath;
|
|
7043
|
+
details.claudeExists = claudeExists;
|
|
7044
|
+
if (claudeExists) {
|
|
7045
|
+
const readlinkResult = await execCommand(
|
|
7046
|
+
`readlink -f "${claudePath}" 2>&1`
|
|
7047
|
+
);
|
|
7048
|
+
details.claudeRealPath = readlinkResult.stdout || readlinkResult.stderr;
|
|
7049
|
+
const statResult = await execCommand(`stat "${claudePath}" 2>&1`);
|
|
7050
|
+
details.claudeStat = statResult.stdout || statResult.stderr;
|
|
7051
|
+
const lsClaudeResult = await execCommand(`ls -la "${claudePath}" 2>&1`);
|
|
7052
|
+
details.claudeFileInfo = lsClaudeResult.stdout;
|
|
7053
|
+
}
|
|
7054
|
+
const whichResult = await execCommand("which claude 2>&1");
|
|
7055
|
+
details.whichClaude = whichResult.stdout || "(not in PATH)";
|
|
7056
|
+
details.whichExitCode = whichResult.exitCode;
|
|
7057
|
+
const currentPath = process.env.PATH || "";
|
|
7058
|
+
details.currentPATH = currentPath.split(":");
|
|
7059
|
+
details.pathLength = currentPath.split(":").length;
|
|
7060
|
+
const passed = claudeExists || whichResult.exitCode === 0;
|
|
7061
|
+
return {
|
|
7062
|
+
name: "claude-binary-discovery",
|
|
7063
|
+
passed,
|
|
7064
|
+
details,
|
|
7065
|
+
error: passed ? void 0 : `Claude binary not found at ${claudePath}`,
|
|
7066
|
+
durationMs: Date.now() - start
|
|
7067
|
+
};
|
|
7068
|
+
}
|
|
7069
|
+
async function testClaudeExecution() {
|
|
7070
|
+
const start = Date.now();
|
|
7071
|
+
const details = {};
|
|
7072
|
+
const npmRootResult = await execCommand("npm root -g");
|
|
7073
|
+
const npmRoot = npmRootResult.stdout;
|
|
7074
|
+
const claudePath = path9.join(
|
|
7075
|
+
npmRoot,
|
|
7076
|
+
"@wix",
|
|
7077
|
+
"evalforge-evaluator",
|
|
7078
|
+
"node_modules",
|
|
7079
|
+
".bin",
|
|
7080
|
+
"claude"
|
|
7081
|
+
);
|
|
7082
|
+
details.claudePath = claudePath;
|
|
7083
|
+
const versionResult = await execCommand(
|
|
7084
|
+
`"${claudePath}" --version 2>&1`,
|
|
7085
|
+
15e3
|
|
7086
|
+
);
|
|
7087
|
+
details.versionCommand = {
|
|
7088
|
+
command: `"${claudePath}" --version`,
|
|
7089
|
+
stdout: versionResult.stdout,
|
|
7090
|
+
stderr: versionResult.stderr,
|
|
7091
|
+
exitCode: versionResult.exitCode
|
|
7092
|
+
};
|
|
7093
|
+
const helpResult = await execCommand(
|
|
7094
|
+
`"${claudePath}" --help 2>&1 | head -50`,
|
|
7095
|
+
15e3
|
|
7096
|
+
);
|
|
7097
|
+
details.helpCommand = {
|
|
7098
|
+
command: `"${claudePath}" --help | head -50`,
|
|
7099
|
+
stdout: helpResult.stdout.slice(0, 1500),
|
|
7100
|
+
stderr: helpResult.stderr.slice(0, 500),
|
|
7101
|
+
exitCode: helpResult.exitCode
|
|
7102
|
+
};
|
|
7103
|
+
const whichClaudeResult = await execCommand("which claude 2>&1");
|
|
7104
|
+
if (whichClaudeResult.exitCode === 0) {
|
|
7105
|
+
const pathVersionResult = await execCommand("claude --version 2>&1", 15e3);
|
|
7106
|
+
details.pathVersionCommand = {
|
|
7107
|
+
whichClaude: whichClaudeResult.stdout,
|
|
7108
|
+
stdout: pathVersionResult.stdout,
|
|
7109
|
+
stderr: pathVersionResult.stderr,
|
|
7110
|
+
exitCode: pathVersionResult.exitCode
|
|
7111
|
+
};
|
|
7112
|
+
}
|
|
7113
|
+
const passed = versionResult.exitCode === 0 || helpResult.exitCode === 0;
|
|
7114
|
+
return {
|
|
7115
|
+
name: "claude-cli-execution",
|
|
7116
|
+
passed,
|
|
7117
|
+
details,
|
|
7118
|
+
error: passed ? void 0 : `Claude CLI failed. Version exit: ${versionResult.exitCode}, Help exit: ${helpResult.exitCode}`,
|
|
7119
|
+
durationMs: Date.now() - start
|
|
7120
|
+
};
|
|
7121
|
+
}
|
|
7122
|
+
async function testEnvironmentDump() {
|
|
7123
|
+
const start = Date.now();
|
|
7124
|
+
const details = {};
|
|
7125
|
+
const importantVars = [
|
|
6991
7126
|
"PATH",
|
|
6992
7127
|
"HOME",
|
|
6993
7128
|
"USER",
|
|
6994
7129
|
"SHELL",
|
|
6995
7130
|
"NODE_ENV",
|
|
7131
|
+
"PWD",
|
|
6996
7132
|
"EVAL_SERVER_URL",
|
|
6997
7133
|
"AI_GATEWAY_URL",
|
|
7134
|
+
"TRACE_PUSH_URL",
|
|
7135
|
+
"EVAL_AUTH_TOKEN",
|
|
6998
7136
|
"ANTHROPIC_API_KEY",
|
|
6999
7137
|
"ANTHROPIC_AUTH_TOKEN",
|
|
7000
7138
|
"ANTHROPIC_BASE_URL",
|
|
7001
7139
|
"ANTHROPIC_CUSTOM_HEADERS"
|
|
7002
7140
|
];
|
|
7003
|
-
const
|
|
7004
|
-
const
|
|
7005
|
-
for (const key of envVars) {
|
|
7141
|
+
const capturedVars = {};
|
|
7142
|
+
for (const key of importantVars) {
|
|
7006
7143
|
const value = process.env[key];
|
|
7007
7144
|
if (value) {
|
|
7008
7145
|
if (key.includes("SECRET") || key.includes("TOKEN") || key.includes("API_KEY")) {
|
|
7009
|
-
|
|
7010
|
-
} else if (key === "PATH") {
|
|
7011
|
-
details[key] = value.split(":");
|
|
7146
|
+
capturedVars[key] = `[REDACTED - ${value.length} chars, starts: ${value.slice(0, 10)}...]`;
|
|
7012
7147
|
} else if (key === "ANTHROPIC_CUSTOM_HEADERS") {
|
|
7013
|
-
|
|
7148
|
+
capturedVars[key] = value.split("\n").map((h) => {
|
|
7149
|
+
const [name2, val] = h.split(":");
|
|
7150
|
+
return `${name2}: ${val ? "[" + val.length + " chars]" : "(empty)"}`;
|
|
7151
|
+
}).join(" | ");
|
|
7152
|
+
} else if (key === "PATH") {
|
|
7153
|
+
const parts = value.split(":");
|
|
7154
|
+
capturedVars[key] = `[${parts.length} entries] First: ${parts.slice(0, 3).join(":")} ... Last: ${parts.slice(-2).join(":")}`;
|
|
7014
7155
|
} else {
|
|
7015
|
-
|
|
7156
|
+
capturedVars[key] = value;
|
|
7016
7157
|
}
|
|
7017
7158
|
} else {
|
|
7018
|
-
|
|
7159
|
+
capturedVars[key] = "(NOT SET)";
|
|
7019
7160
|
}
|
|
7020
7161
|
}
|
|
7021
|
-
details.
|
|
7022
|
-
|
|
7023
|
-
|
|
7024
|
-
|
|
7025
|
-
|
|
7026
|
-
|
|
7027
|
-
durationMs: Date.now() - start
|
|
7028
|
-
};
|
|
7029
|
-
}
|
|
7030
|
-
async function testNodeEnvironment() {
|
|
7031
|
-
const start = Date.now();
|
|
7032
|
-
const details = {
|
|
7033
|
-
nodeVersion: process.version,
|
|
7162
|
+
details.importantVars = capturedVars;
|
|
7163
|
+
const envResult = await execCommand("env | sort | head -50");
|
|
7164
|
+
details.envCommandOutput = envResult.stdout;
|
|
7165
|
+
details.envExitCode = envResult.exitCode;
|
|
7166
|
+
details.nodeInfo = {
|
|
7167
|
+
version: process.version,
|
|
7034
7168
|
platform: process.platform,
|
|
7035
7169
|
arch: process.arch,
|
|
7036
|
-
cwd: process.cwd(),
|
|
7037
7170
|
pid: process.pid,
|
|
7038
|
-
|
|
7039
|
-
memoryUsage: process.memoryUsage(),
|
|
7171
|
+
cwd: process.cwd(),
|
|
7040
7172
|
execPath: process.execPath
|
|
7041
7173
|
};
|
|
7042
7174
|
return {
|
|
7043
|
-
name: "
|
|
7175
|
+
name: "environment-dump",
|
|
7044
7176
|
passed: true,
|
|
7177
|
+
// Info test, always passes
|
|
7045
7178
|
details,
|
|
7046
7179
|
durationMs: Date.now() - start
|
|
7047
7180
|
};
|
|
7048
7181
|
}
|
|
7049
|
-
async function
|
|
7182
|
+
async function testFileSystemStructure() {
|
|
7050
7183
|
const start = Date.now();
|
|
7184
|
+
const details = {};
|
|
7051
7185
|
const npmRootResult = await execCommand("npm root -g");
|
|
7052
|
-
const npmBinResult = await execCommand("npm bin -g");
|
|
7053
7186
|
const npmRoot = npmRootResult.stdout;
|
|
7054
|
-
const
|
|
7055
|
-
|
|
7056
|
-
|
|
7057
|
-
|
|
7058
|
-
npmRootExitCode: npmRootResult.exitCode,
|
|
7059
|
-
npmBinExitCode: npmBinResult.exitCode
|
|
7187
|
+
const lsCwdResult = await execCommand("ls -la");
|
|
7188
|
+
details.currentDirectory = {
|
|
7189
|
+
path: process.cwd(),
|
|
7190
|
+
contents: lsCwdResult.stdout
|
|
7060
7191
|
};
|
|
7061
|
-
|
|
7062
|
-
|
|
7063
|
-
|
|
7064
|
-
|
|
7065
|
-
|
|
7066
|
-
|
|
7067
|
-
const files = fs11.readdirSync(evaluatorPath);
|
|
7068
|
-
details.evaluatorFiles = files;
|
|
7069
|
-
} catch {
|
|
7070
|
-
details.evaluatorFiles = "Failed to list files";
|
|
7071
|
-
}
|
|
7072
|
-
}
|
|
7073
|
-
}
|
|
7074
|
-
if (npmRoot) {
|
|
7075
|
-
const sdkPath = path9.join(
|
|
7076
|
-
npmRoot,
|
|
7077
|
-
"@wix",
|
|
7078
|
-
"evalforge-evaluator",
|
|
7079
|
-
"node_modules",
|
|
7080
|
-
"@anthropic-ai",
|
|
7081
|
-
"claude-agent-sdk"
|
|
7082
|
-
);
|
|
7083
|
-
const sdkExists = fs11.existsSync(sdkPath);
|
|
7084
|
-
details.claudeAgentSdkInstalled = sdkExists;
|
|
7085
|
-
}
|
|
7086
|
-
const passed = npmRootResult.exitCode === 0 && npmBinResult.exitCode === 0;
|
|
7087
|
-
return {
|
|
7088
|
-
name: "npm-global-directory",
|
|
7089
|
-
passed,
|
|
7090
|
-
details,
|
|
7091
|
-
error: passed ? void 0 : npmRootResult.stderr || npmBinResult.stderr,
|
|
7092
|
-
durationMs: Date.now() - start
|
|
7192
|
+
const lsNpmRootResult = await execCommand(
|
|
7193
|
+
`ls -la "${npmRoot}" 2>&1 | head -30`
|
|
7194
|
+
);
|
|
7195
|
+
details.npmGlobalRoot = {
|
|
7196
|
+
path: npmRoot,
|
|
7197
|
+
contents: lsNpmRootResult.stdout
|
|
7093
7198
|
};
|
|
7094
|
-
|
|
7095
|
-
|
|
7096
|
-
|
|
7097
|
-
|
|
7098
|
-
|
|
7099
|
-
|
|
7100
|
-
const
|
|
7101
|
-
|
|
7102
|
-
|
|
7103
|
-
|
|
7104
|
-
|
|
7105
|
-
}
|
|
7106
|
-
const
|
|
7107
|
-
|
|
7108
|
-
|
|
7109
|
-
|
|
7110
|
-
|
|
7111
|
-
|
|
7112
|
-
|
|
7199
|
+
const wixPath = path9.join(npmRoot, "@wix");
|
|
7200
|
+
const lsWixResult = await execCommand(`ls -la "${wixPath}" 2>&1`);
|
|
7201
|
+
details.wixPackages = {
|
|
7202
|
+
path: wixPath,
|
|
7203
|
+
contents: lsWixResult.stdout
|
|
7204
|
+
};
|
|
7205
|
+
const evaluatorPath = path9.join(npmRoot, "@wix", "evalforge-evaluator");
|
|
7206
|
+
const lsEvaluatorResult = await execCommand(`ls -la "${evaluatorPath}" 2>&1`);
|
|
7207
|
+
details.evaluatorDir = {
|
|
7208
|
+
path: evaluatorPath,
|
|
7209
|
+
contents: lsEvaluatorResult.stdout
|
|
7210
|
+
};
|
|
7211
|
+
const nodeModulesPath = path9.join(evaluatorPath, "node_modules");
|
|
7212
|
+
const lsNodeModulesResult = await execCommand(
|
|
7213
|
+
`ls "${nodeModulesPath}" 2>&1 | head -30`
|
|
7214
|
+
);
|
|
7215
|
+
details.evaluatorNodeModules = {
|
|
7216
|
+
path: nodeModulesPath,
|
|
7217
|
+
contents: lsNodeModulesResult.stdout
|
|
7218
|
+
};
|
|
7219
|
+
const anthropicPath = path9.join(nodeModulesPath, "@anthropic-ai");
|
|
7220
|
+
const lsAnthropicResult = await execCommand(`ls -la "${anthropicPath}" 2>&1`);
|
|
7221
|
+
details.anthropicPackages = {
|
|
7222
|
+
path: anthropicPath,
|
|
7223
|
+
contents: lsAnthropicResult.stdout
|
|
7224
|
+
};
|
|
7225
|
+
const binPath = path9.join(nodeModulesPath, ".bin");
|
|
7226
|
+
const lsBinResult = await execCommand(`ls -la "${binPath}" 2>&1`);
|
|
7227
|
+
details.binDirectory = {
|
|
7228
|
+
path: binPath,
|
|
7229
|
+
contents: lsBinResult.stdout
|
|
7113
7230
|
};
|
|
7114
|
-
const pathDirs = (process.env.PATH || "").split(":");
|
|
7115
|
-
const claudeFoundIn = [];
|
|
7116
|
-
for (const dir of pathDirs) {
|
|
7117
|
-
const claudePath = path9.join(dir, "claude");
|
|
7118
|
-
if (fs11.existsSync(claudePath)) {
|
|
7119
|
-
claudeFoundIn.push(dir);
|
|
7120
|
-
}
|
|
7121
|
-
}
|
|
7122
|
-
details.claudeFoundInPathDirs = claudeFoundIn;
|
|
7123
|
-
const passed = whichResult.exitCode === 0 || claudeInNpmBin;
|
|
7124
7231
|
return {
|
|
7125
|
-
name: "
|
|
7126
|
-
passed,
|
|
7232
|
+
name: "file-system-structure",
|
|
7233
|
+
passed: true,
|
|
7234
|
+
// Info test, always passes
|
|
7127
7235
|
details,
|
|
7128
|
-
error: passed ? void 0 : "Claude CLI binary not found in PATH. The SDK will fail to spawn it.",
|
|
7129
7236
|
durationMs: Date.now() - start
|
|
7130
7237
|
};
|
|
7131
7238
|
}
|
|
7132
|
-
async function
|
|
7239
|
+
async function testNetworkConnectivity(config) {
|
|
7133
7240
|
const start = Date.now();
|
|
7134
|
-
const
|
|
7135
|
-
const
|
|
7136
|
-
|
|
7137
|
-
|
|
7138
|
-
|
|
7139
|
-
|
|
7140
|
-
|
|
7241
|
+
const details = {};
|
|
7242
|
+
const dnsResult = await execCommand(
|
|
7243
|
+
"nslookup manage.wix.com 2>&1 | head -10"
|
|
7244
|
+
);
|
|
7245
|
+
details.dnsLookup = {
|
|
7246
|
+
command: "nslookup manage.wix.com",
|
|
7247
|
+
output: dnsResult.stdout || dnsResult.stderr,
|
|
7248
|
+
exitCode: dnsResult.exitCode
|
|
7141
7249
|
};
|
|
7142
|
-
const
|
|
7250
|
+
const pingResult = await execCommand("ping -c 2 manage.wix.com 2>&1");
|
|
7251
|
+
details.pingTest = {
|
|
7252
|
+
command: "ping -c 2 manage.wix.com",
|
|
7253
|
+
output: pingResult.stdout || pingResult.stderr,
|
|
7254
|
+
exitCode: pingResult.exitCode
|
|
7255
|
+
};
|
|
7256
|
+
const gatewayUrl = config.aiGatewayUrl || "https://manage.wix.com/_api/eval-wix-ai-gateway-proxy";
|
|
7257
|
+
const curlGatewayResult = await execCommand(
|
|
7258
|
+
`curl -v -s --connect-timeout 5 --max-time 10 "${gatewayUrl}" 2>&1 | tail -30`
|
|
7259
|
+
);
|
|
7260
|
+
details.aiGatewayTest = {
|
|
7261
|
+
url: gatewayUrl,
|
|
7262
|
+
output: curlGatewayResult.stdout,
|
|
7263
|
+
exitCode: curlGatewayResult.exitCode
|
|
7264
|
+
};
|
|
7265
|
+
const serverUrl = config.serverUrl;
|
|
7266
|
+
const curlServerResult = await execCommand(
|
|
7267
|
+
`curl -v -s --connect-timeout 5 --max-time 10 "${serverUrl}/health" 2>&1 | tail -30`
|
|
7268
|
+
);
|
|
7269
|
+
details.backendServerTest = {
|
|
7270
|
+
url: `${serverUrl}/health`,
|
|
7271
|
+
output: curlServerResult.stdout,
|
|
7272
|
+
exitCode: curlServerResult.exitCode
|
|
7273
|
+
};
|
|
7274
|
+
const httpsResult = await execCommand(
|
|
7275
|
+
'curl -s --connect-timeout 5 -o /dev/null -w "HTTP_CODE:%{http_code} TIME:%{time_total}s" https://www.google.com 2>&1'
|
|
7276
|
+
);
|
|
7277
|
+
details.httpsBaseline = {
|
|
7278
|
+
command: "curl https://www.google.com",
|
|
7279
|
+
output: httpsResult.stdout,
|
|
7280
|
+
exitCode: httpsResult.exitCode
|
|
7281
|
+
};
|
|
7282
|
+
const networkWorks = pingResult.exitCode === 0 || httpsResult.exitCode === 0;
|
|
7283
|
+
const gatewayReachable = curlGatewayResult.exitCode === 0;
|
|
7143
7284
|
return {
|
|
7144
|
-
name: "
|
|
7145
|
-
passed,
|
|
7285
|
+
name: "network-connectivity",
|
|
7286
|
+
passed: networkWorks && gatewayReachable,
|
|
7146
7287
|
details,
|
|
7147
|
-
error:
|
|
7288
|
+
error: networkWorks && gatewayReachable ? void 0 : `Network: ${networkWorks ? "OK" : "FAILED"}, Gateway: ${gatewayReachable ? "OK" : "FAILED"}`,
|
|
7148
7289
|
durationMs: Date.now() - start
|
|
7149
7290
|
};
|
|
7150
7291
|
}
|
|
7151
|
-
async function
|
|
7292
|
+
async function testChildProcessSpawning() {
|
|
7152
7293
|
const start = Date.now();
|
|
7153
|
-
const
|
|
7154
|
-
|
|
7155
|
-
|
|
7156
|
-
|
|
7157
|
-
|
|
7158
|
-
|
|
7159
|
-
|
|
7160
|
-
|
|
7161
|
-
|
|
7162
|
-
|
|
7163
|
-
const curlResult = await execCommand(
|
|
7164
|
-
`curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${aiGatewayUrl}" 2>&1`
|
|
7294
|
+
const details = {};
|
|
7295
|
+
const echoResult = await execCommand('echo "DIAGNOSTIC_TEST_SUCCESS_12345"');
|
|
7296
|
+
details.echoTest = {
|
|
7297
|
+
command: 'echo "DIAGNOSTIC_TEST_SUCCESS_12345"',
|
|
7298
|
+
output: echoResult.stdout,
|
|
7299
|
+
exitCode: echoResult.exitCode,
|
|
7300
|
+
passed: echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345"
|
|
7301
|
+
};
|
|
7302
|
+
const nodeResult = await execCommand(
|
|
7303
|
+
'node -e "console.log(JSON.stringify({pid: process.pid, version: process.version, platform: process.platform}))"'
|
|
7165
7304
|
);
|
|
7166
|
-
|
|
7167
|
-
|
|
7168
|
-
|
|
7305
|
+
details.nodeTest = {
|
|
7306
|
+
command: 'node -e "console.log(JSON.stringify({...}))"',
|
|
7307
|
+
output: nodeResult.stdout,
|
|
7308
|
+
exitCode: nodeResult.exitCode
|
|
7309
|
+
};
|
|
7310
|
+
const shellResult = await execCommand(
|
|
7311
|
+
'echo "PID: $$"; pwd; whoami; date; uname -a'
|
|
7169
7312
|
);
|
|
7170
|
-
|
|
7171
|
-
|
|
7172
|
-
|
|
7173
|
-
|
|
7174
|
-
serverUrl,
|
|
7175
|
-
serverHttpCode: serverResult.stdout,
|
|
7176
|
-
serverExitCode: serverResult.exitCode
|
|
7313
|
+
details.shellTest = {
|
|
7314
|
+
command: 'echo "PID: $$"; pwd; whoami; date; uname -a',
|
|
7315
|
+
output: shellResult.stdout,
|
|
7316
|
+
exitCode: shellResult.exitCode
|
|
7177
7317
|
};
|
|
7178
|
-
const
|
|
7318
|
+
const stderrResult = await execCommand(
|
|
7319
|
+
`node -e "console.error('stderr test')"`
|
|
7320
|
+
);
|
|
7321
|
+
details.stderrTest = {
|
|
7322
|
+
command: `node -e "console.error('stderr test')"`,
|
|
7323
|
+
stderr: stderrResult.stderr,
|
|
7324
|
+
exitCode: stderrResult.exitCode
|
|
7325
|
+
};
|
|
7326
|
+
const exitCodeResult = await execCommand("exit 42");
|
|
7327
|
+
details.exitCodeTest = {
|
|
7328
|
+
command: "exit 42",
|
|
7329
|
+
exitCode: exitCodeResult.exitCode,
|
|
7330
|
+
passed: exitCodeResult.exitCode === 42
|
|
7331
|
+
};
|
|
7332
|
+
const passed = echoResult.exitCode === 0 && echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345";
|
|
7179
7333
|
return {
|
|
7180
|
-
name: "
|
|
7181
|
-
passed
|
|
7334
|
+
name: "child-process-spawning",
|
|
7335
|
+
passed,
|
|
7182
7336
|
details,
|
|
7183
|
-
error:
|
|
7337
|
+
error: passed ? void 0 : "Echo test failed",
|
|
7184
7338
|
durationMs: Date.now() - start
|
|
7185
7339
|
};
|
|
7186
7340
|
}
|
|
7187
7341
|
async function testSdkImport() {
|
|
7188
7342
|
const start = Date.now();
|
|
7343
|
+
const details = {};
|
|
7189
7344
|
try {
|
|
7190
7345
|
const sdk = await import("@anthropic-ai/claude-agent-sdk");
|
|
7191
|
-
|
|
7192
|
-
|
|
7193
|
-
|
|
7194
|
-
|
|
7195
|
-
|
|
7346
|
+
details.sdkImported = true;
|
|
7347
|
+
details.exportedKeys = Object.keys(sdk);
|
|
7348
|
+
details.hasQuery = typeof sdk.query === "function";
|
|
7349
|
+
if (typeof sdk.query === "function") {
|
|
7350
|
+
details.queryFunctionExists = true;
|
|
7351
|
+
details.queryFunctionType = typeof sdk.query;
|
|
7352
|
+
}
|
|
7196
7353
|
return {
|
|
7197
7354
|
name: "sdk-import",
|
|
7198
7355
|
passed: true,
|
|
@@ -7206,36 +7363,37 @@ async function testSdkImport() {
|
|
|
7206
7363
|
passed: false,
|
|
7207
7364
|
details: {
|
|
7208
7365
|
sdkImported: false,
|
|
7209
|
-
error
|
|
7366
|
+
error,
|
|
7367
|
+
stack: err instanceof Error ? err.stack?.split("\n").slice(0, 5) : void 0
|
|
7210
7368
|
},
|
|
7211
|
-
error: `Failed to import
|
|
7369
|
+
error: `Failed to import SDK: ${error}`,
|
|
7212
7370
|
durationMs: Date.now() - start
|
|
7213
7371
|
};
|
|
7214
7372
|
}
|
|
7215
7373
|
}
|
|
7216
|
-
async function
|
|
7374
|
+
async function testFileSystemWrite() {
|
|
7217
7375
|
const start = Date.now();
|
|
7218
|
-
const
|
|
7219
|
-
const
|
|
7376
|
+
const details = {};
|
|
7377
|
+
const testDir = "/tmp/evalforge-diagnostics-test";
|
|
7378
|
+
const testFile = path9.join(testDir, "test-file.txt");
|
|
7379
|
+
const testContent = `Diagnostic test at ${(/* @__PURE__ */ new Date()).toISOString()}`;
|
|
7220
7380
|
try {
|
|
7221
7381
|
if (!fs11.existsSync(testDir)) {
|
|
7222
7382
|
fs11.mkdirSync(testDir, { recursive: true });
|
|
7223
7383
|
}
|
|
7224
|
-
|
|
7225
|
-
|
|
7384
|
+
details.directoryCreated = true;
|
|
7385
|
+
fs11.writeFileSync(testFile, testContent);
|
|
7386
|
+
details.fileWritten = true;
|
|
7387
|
+
const readContent = fs11.readFileSync(testFile, "utf8");
|
|
7388
|
+
details.fileRead = true;
|
|
7389
|
+
details.contentMatches = readContent === testContent;
|
|
7390
|
+
const lsResult = await execCommand(`ls -la "${testDir}"`);
|
|
7391
|
+
details.directoryContents = lsResult.stdout;
|
|
7226
7392
|
fs11.unlinkSync(testFile);
|
|
7227
7393
|
fs11.rmdirSync(testDir);
|
|
7228
|
-
|
|
7229
|
-
canCreateDirectory: true,
|
|
7230
|
-
canWriteFile: true,
|
|
7231
|
-
canReadFile: content === "diagnostic-test",
|
|
7232
|
-
testDir,
|
|
7233
|
-
cwd: process.cwd(),
|
|
7234
|
-
cwdContents: fs11.readdirSync(process.cwd()).slice(0, 20)
|
|
7235
|
-
// First 20 files
|
|
7236
|
-
};
|
|
7394
|
+
details.cleanedUp = true;
|
|
7237
7395
|
return {
|
|
7238
|
-
name: "file-system-
|
|
7396
|
+
name: "file-system-write",
|
|
7239
7397
|
passed: true,
|
|
7240
7398
|
details,
|
|
7241
7399
|
durationMs: Date.now() - start
|
|
@@ -7243,32 +7401,40 @@ async function testFileSystemAccess() {
|
|
|
7243
7401
|
} catch (err) {
|
|
7244
7402
|
const error = err instanceof Error ? err.message : String(err);
|
|
7245
7403
|
return {
|
|
7246
|
-
name: "file-system-
|
|
7404
|
+
name: "file-system-write",
|
|
7247
7405
|
passed: false,
|
|
7248
7406
|
details: {
|
|
7407
|
+
...details,
|
|
7249
7408
|
error,
|
|
7250
7409
|
testDir,
|
|
7251
|
-
|
|
7410
|
+
testFile
|
|
7252
7411
|
},
|
|
7253
|
-
error: `File system
|
|
7412
|
+
error: `File system write failed: ${error}`,
|
|
7254
7413
|
durationMs: Date.now() - start
|
|
7255
7414
|
};
|
|
7256
7415
|
}
|
|
7257
7416
|
}
|
|
7258
7417
|
function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader, authToken) {
|
|
7418
|
+
const truncatedResult = "summary" in result ? result : {
|
|
7419
|
+
...result,
|
|
7420
|
+
details: JSON.parse(
|
|
7421
|
+
JSON.stringify(
|
|
7422
|
+
result.details,
|
|
7423
|
+
(_, v) => typeof v === "string" && v.length > 500 ? v.slice(0, 500) + "... [truncated]" : v
|
|
7424
|
+
)
|
|
7425
|
+
)
|
|
7426
|
+
};
|
|
7259
7427
|
const event = {
|
|
7260
7428
|
evalRunId: evalRunId2,
|
|
7261
7429
|
scenarioId: "diagnostics",
|
|
7262
7430
|
scenarioName: "Environment Diagnostics",
|
|
7263
7431
|
targetId: "system",
|
|
7264
|
-
targetName: "
|
|
7432
|
+
targetName: "name" in truncatedResult ? truncatedResult.name : "Summary",
|
|
7265
7433
|
stepNumber: 0,
|
|
7266
7434
|
type: import_evalforge_types4.LiveTraceEventType.DIAGNOSTIC,
|
|
7267
|
-
outputPreview: JSON.stringify(
|
|
7268
|
-
// Limit size
|
|
7435
|
+
outputPreview: JSON.stringify(truncatedResult, null, 2).slice(0, 3e3),
|
|
7269
7436
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7270
7437
|
isComplete: "summary" in result
|
|
7271
|
-
// Complete if it's the full report
|
|
7272
7438
|
};
|
|
7273
7439
|
console.log(`TRACE_EVENT:${JSON.stringify(event)}`);
|
|
7274
7440
|
if (tracePushUrl) {
|
|
@@ -7286,18 +7452,35 @@ function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader,
|
|
|
7286
7452
|
headers,
|
|
7287
7453
|
body: JSON.stringify([event])
|
|
7288
7454
|
}).catch((err) => {
|
|
7289
|
-
console.error(
|
|
7455
|
+
console.error(
|
|
7456
|
+
"[DIAGNOSTICS] Failed to push trace event to backend:",
|
|
7457
|
+
err
|
|
7458
|
+
);
|
|
7290
7459
|
});
|
|
7291
7460
|
}
|
|
7292
7461
|
}
|
|
7293
7462
|
async function runDiagnostics(config, evalRunId2) {
|
|
7294
7463
|
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
7295
7464
|
const startTime = Date.now();
|
|
7296
|
-
console.error("
|
|
7465
|
+
console.error("");
|
|
7466
|
+
console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
|
|
7467
|
+
console.error("\u2551 EVALFORGE ENVIRONMENT DIAGNOSTICS \u2551");
|
|
7468
|
+
console.error("\u2551 (Results sent to backend via trace events) \u2551");
|
|
7469
|
+
console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
|
|
7470
|
+
console.error("");
|
|
7297
7471
|
const tests = [];
|
|
7298
|
-
const runTest = async (testFn) => {
|
|
7299
|
-
|
|
7472
|
+
const runTest = async (testName, testFn) => {
|
|
7473
|
+
console.error(`[DIAG] Running: ${testName}...`);
|
|
7474
|
+
const result = await safeRunTest(testName, testFn);
|
|
7300
7475
|
tests.push(result);
|
|
7476
|
+
const status = result.passed ? "\u2713 PASS" : "\u2717 FAIL";
|
|
7477
|
+
console.error(`[DIAG] ${status}: ${result.name} (${result.durationMs}ms)`);
|
|
7478
|
+
console.error("[DIAG] Details:");
|
|
7479
|
+
console.error(JSON.stringify(result.details, null, 2));
|
|
7480
|
+
console.error("");
|
|
7481
|
+
if (!result.passed && result.error) {
|
|
7482
|
+
console.error(`[DIAG] ERROR: ${result.error}`);
|
|
7483
|
+
}
|
|
7301
7484
|
emitDiagnosticTraceEvent(
|
|
7302
7485
|
evalRunId2,
|
|
7303
7486
|
result,
|
|
@@ -7305,22 +7488,15 @@ async function runDiagnostics(config, evalRunId2) {
|
|
|
7305
7488
|
config.routeHeader,
|
|
7306
7489
|
config.authToken
|
|
7307
7490
|
);
|
|
7308
|
-
const status = result.passed ? "\u2713" : "\u2717";
|
|
7309
|
-
console.error(
|
|
7310
|
-
`[DIAGNOSTICS] ${status} ${result.name} (${result.durationMs}ms)`
|
|
7311
|
-
);
|
|
7312
|
-
if (!result.passed && result.error) {
|
|
7313
|
-
console.error(`[DIAGNOSTICS] Error: ${result.error}`);
|
|
7314
|
-
}
|
|
7315
7491
|
};
|
|
7316
|
-
await runTest(
|
|
7317
|
-
await runTest(
|
|
7318
|
-
await runTest(
|
|
7319
|
-
await runTest(
|
|
7320
|
-
await runTest(
|
|
7321
|
-
await runTest(
|
|
7322
|
-
await runTest(testSdkImport);
|
|
7323
|
-
await runTest(
|
|
7492
|
+
await runTest("claude-binary-discovery", testClaudeBinaryDiscovery);
|
|
7493
|
+
await runTest("claude-cli-execution", testClaudeExecution);
|
|
7494
|
+
await runTest("environment-dump", testEnvironmentDump);
|
|
7495
|
+
await runTest("file-system-structure", testFileSystemStructure);
|
|
7496
|
+
await runTest("network-connectivity", () => testNetworkConnectivity(config));
|
|
7497
|
+
await runTest("child-process-spawning", testChildProcessSpawning);
|
|
7498
|
+
await runTest("sdk-import", testSdkImport);
|
|
7499
|
+
await runTest("file-system-write", testFileSystemWrite);
|
|
7324
7500
|
const completedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
7325
7501
|
const totalDurationMs = Date.now() - startTime;
|
|
7326
7502
|
const report = {
|
|
@@ -7341,9 +7517,16 @@ async function runDiagnostics(config, evalRunId2) {
|
|
|
7341
7517
|
config.routeHeader,
|
|
7342
7518
|
config.authToken
|
|
7343
7519
|
);
|
|
7520
|
+
console.error("");
|
|
7521
|
+
console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
|
|
7344
7522
|
console.error(
|
|
7345
|
-
|
|
7523
|
+
`\u2551 DIAGNOSTICS COMPLETE: ${report.summary.passed}/${report.summary.total} passed, ${report.summary.failed} failed`.padEnd(
|
|
7524
|
+
60
|
|
7525
|
+
) + "\u2551"
|
|
7346
7526
|
);
|
|
7527
|
+
console.error(`\u2551 Total time: ${totalDurationMs}ms`.padEnd(60) + "\u2551");
|
|
7528
|
+
console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
|
|
7529
|
+
console.error("");
|
|
7347
7530
|
return report;
|
|
7348
7531
|
}
|
|
7349
7532
|
|