@wix/evalforge-evaluator 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +753 -82
- package/build/index.js.map +4 -4
- package/build/index.mjs +749 -78
- package/build/index.mjs.map +4 -4
- package/build/types/diagnostics.d.ts +48 -0
- package/build/types/error-reporter.d.ts +2 -0
- package/package.json +3 -3
package/build/index.mjs
CHANGED
|
@@ -64,8 +64,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
64
64
|
}
|
|
65
65
|
return headers;
|
|
66
66
|
}
|
|
67
|
-
async function fetchJson(
|
|
68
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
67
|
+
async function fetchJson(path10) {
|
|
68
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
|
|
69
69
|
console.error(`[API] GET ${url}`);
|
|
70
70
|
const headers = buildHeaders();
|
|
71
71
|
const response = await fetch(url, {
|
|
@@ -79,8 +79,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
79
79
|
}
|
|
80
80
|
return response.json();
|
|
81
81
|
}
|
|
82
|
-
async function postJson(
|
|
83
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
82
|
+
async function postJson(path10, body) {
|
|
83
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
|
|
84
84
|
console.error(`[API] POST ${url}`);
|
|
85
85
|
const response = await fetch(url, {
|
|
86
86
|
method: "POST",
|
|
@@ -94,8 +94,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
94
94
|
);
|
|
95
95
|
}
|
|
96
96
|
}
|
|
97
|
-
async function deleteRequest(
|
|
98
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
97
|
+
async function deleteRequest(path10) {
|
|
98
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
|
|
99
99
|
console.error(`[API] DELETE ${url}`);
|
|
100
100
|
const headers = buildHeaders();
|
|
101
101
|
const response = await fetch(url, {
|
|
@@ -109,8 +109,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
109
109
|
);
|
|
110
110
|
}
|
|
111
111
|
}
|
|
112
|
-
async function putJson(
|
|
113
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
112
|
+
async function putJson(path10, body) {
|
|
113
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
|
|
114
114
|
console.error(`[API] PUT ${url}`);
|
|
115
115
|
const response = await fetch(url, {
|
|
116
116
|
method: "PUT",
|
|
@@ -1136,17 +1136,17 @@ var ReadStream = class extends Minipass {
|
|
|
1136
1136
|
[_size];
|
|
1137
1137
|
[_remain];
|
|
1138
1138
|
[_autoClose];
|
|
1139
|
-
constructor(
|
|
1139
|
+
constructor(path10, opt) {
|
|
1140
1140
|
opt = opt || {};
|
|
1141
1141
|
super(opt);
|
|
1142
1142
|
this.readable = true;
|
|
1143
1143
|
this.writable = false;
|
|
1144
|
-
if (typeof
|
|
1144
|
+
if (typeof path10 !== "string") {
|
|
1145
1145
|
throw new TypeError("path must be a string");
|
|
1146
1146
|
}
|
|
1147
1147
|
this[_errored] = false;
|
|
1148
1148
|
this[_fd] = typeof opt.fd === "number" ? opt.fd : void 0;
|
|
1149
|
-
this[_path] =
|
|
1149
|
+
this[_path] = path10;
|
|
1150
1150
|
this[_readSize] = opt.readSize || 16 * 1024 * 1024;
|
|
1151
1151
|
this[_reading] = false;
|
|
1152
1152
|
this[_size] = typeof opt.size === "number" ? opt.size : Infinity;
|
|
@@ -1309,10 +1309,10 @@ var WriteStream = class extends EE {
|
|
|
1309
1309
|
[_flags];
|
|
1310
1310
|
[_finished] = false;
|
|
1311
1311
|
[_pos];
|
|
1312
|
-
constructor(
|
|
1312
|
+
constructor(path10, opt) {
|
|
1313
1313
|
opt = opt || {};
|
|
1314
1314
|
super(opt);
|
|
1315
|
-
this[_path] =
|
|
1315
|
+
this[_path] = path10;
|
|
1316
1316
|
this[_fd] = typeof opt.fd === "number" ? opt.fd : void 0;
|
|
1317
1317
|
this[_mode] = opt.mode === void 0 ? 438 : opt.mode;
|
|
1318
1318
|
this[_pos] = typeof opt.start === "number" ? opt.start : void 0;
|
|
@@ -2205,10 +2205,10 @@ var Header = class {
|
|
|
2205
2205
|
}
|
|
2206
2206
|
const prefixSize = this.ctime || this.atime ? 130 : 155;
|
|
2207
2207
|
const split = splitPrefix(this.path || "", prefixSize);
|
|
2208
|
-
const
|
|
2208
|
+
const path10 = split[0];
|
|
2209
2209
|
const prefix = split[1];
|
|
2210
2210
|
this.needPax = !!split[2];
|
|
2211
|
-
this.needPax = encString(buf, off, 100,
|
|
2211
|
+
this.needPax = encString(buf, off, 100, path10) || this.needPax;
|
|
2212
2212
|
this.needPax = encNumber(buf, off + 100, 8, this.mode) || this.needPax;
|
|
2213
2213
|
this.needPax = encNumber(buf, off + 108, 8, this.uid) || this.needPax;
|
|
2214
2214
|
this.needPax = encNumber(buf, off + 116, 8, this.gid) || this.needPax;
|
|
@@ -3184,16 +3184,16 @@ var modeFix = (mode, isDir, portable) => {
|
|
|
3184
3184
|
// ../../node_modules/tar/dist/esm/strip-absolute-path.js
|
|
3185
3185
|
import { win32 } from "node:path";
|
|
3186
3186
|
var { isAbsolute, parse: parse3 } = win32;
|
|
3187
|
-
var stripAbsolutePath = (
|
|
3187
|
+
var stripAbsolutePath = (path10) => {
|
|
3188
3188
|
let r = "";
|
|
3189
|
-
let parsed = parse3(
|
|
3190
|
-
while (isAbsolute(
|
|
3191
|
-
const root =
|
|
3192
|
-
|
|
3189
|
+
let parsed = parse3(path10);
|
|
3190
|
+
while (isAbsolute(path10) || parsed.root) {
|
|
3191
|
+
const root = path10.charAt(0) === "/" && path10.slice(0, 4) !== "//?/" ? "/" : parsed.root;
|
|
3192
|
+
path10 = path10.slice(root.length);
|
|
3193
3193
|
r += root;
|
|
3194
|
-
parsed = parse3(
|
|
3194
|
+
parsed = parse3(path10);
|
|
3195
3195
|
}
|
|
3196
|
-
return [r,
|
|
3196
|
+
return [r, path10];
|
|
3197
3197
|
};
|
|
3198
3198
|
|
|
3199
3199
|
// ../../node_modules/tar/dist/esm/winchars.js
|
|
@@ -3205,12 +3205,12 @@ var encode2 = (s) => raw.reduce((s2, c) => s2.split(c).join(toWin.get(c)), s);
|
|
|
3205
3205
|
var decode = (s) => win.reduce((s2, c) => s2.split(c).join(toRaw.get(c)), s);
|
|
3206
3206
|
|
|
3207
3207
|
// ../../node_modules/tar/dist/esm/write-entry.js
|
|
3208
|
-
var prefixPath = (
|
|
3208
|
+
var prefixPath = (path10, prefix) => {
|
|
3209
3209
|
if (!prefix) {
|
|
3210
|
-
return normalizeWindowsPath(
|
|
3210
|
+
return normalizeWindowsPath(path10);
|
|
3211
3211
|
}
|
|
3212
|
-
|
|
3213
|
-
return stripTrailingSlashes(prefix) + "/" +
|
|
3212
|
+
path10 = normalizeWindowsPath(path10).replace(/^\.(\/|$)/, "");
|
|
3213
|
+
return stripTrailingSlashes(prefix) + "/" + path10;
|
|
3214
3214
|
};
|
|
3215
3215
|
var maxReadSize = 16 * 1024 * 1024;
|
|
3216
3216
|
var PROCESS = /* @__PURE__ */ Symbol("process");
|
|
@@ -3355,8 +3355,8 @@ var WriteEntry = class extends Minipass {
|
|
|
3355
3355
|
[MODE](mode) {
|
|
3356
3356
|
return modeFix(mode, this.type === "Directory", this.portable);
|
|
3357
3357
|
}
|
|
3358
|
-
[PREFIX](
|
|
3359
|
-
return prefixPath(
|
|
3358
|
+
[PREFIX](path10) {
|
|
3359
|
+
return prefixPath(path10, this.prefix);
|
|
3360
3360
|
}
|
|
3361
3361
|
[HEADER]() {
|
|
3362
3362
|
if (!this.stat) {
|
|
@@ -3737,8 +3737,8 @@ var WriteEntryTar = class extends Minipass {
|
|
|
3737
3737
|
super.write(b);
|
|
3738
3738
|
readEntry.pipe(this);
|
|
3739
3739
|
}
|
|
3740
|
-
[PREFIX](
|
|
3741
|
-
return prefixPath(
|
|
3740
|
+
[PREFIX](path10) {
|
|
3741
|
+
return prefixPath(path10, this.prefix);
|
|
3742
3742
|
}
|
|
3743
3743
|
[MODE](mode) {
|
|
3744
3744
|
return modeFix(mode, this.type === "Directory", this.portable);
|
|
@@ -4162,8 +4162,8 @@ var PackJob = class {
|
|
|
4162
4162
|
pending = false;
|
|
4163
4163
|
ignore = false;
|
|
4164
4164
|
piped = false;
|
|
4165
|
-
constructor(
|
|
4166
|
-
this.path =
|
|
4165
|
+
constructor(path10, absolute) {
|
|
4166
|
+
this.path = path10 || "./";
|
|
4167
4167
|
this.absolute = absolute;
|
|
4168
4168
|
}
|
|
4169
4169
|
};
|
|
@@ -4291,21 +4291,21 @@ var Pack = class extends Minipass {
|
|
|
4291
4291
|
[WRITE](chunk) {
|
|
4292
4292
|
return super.write(chunk);
|
|
4293
4293
|
}
|
|
4294
|
-
add(
|
|
4295
|
-
this.write(
|
|
4294
|
+
add(path10) {
|
|
4295
|
+
this.write(path10);
|
|
4296
4296
|
return this;
|
|
4297
4297
|
}
|
|
4298
|
-
end(
|
|
4299
|
-
if (typeof
|
|
4300
|
-
cb =
|
|
4301
|
-
|
|
4298
|
+
end(path10, encoding, cb) {
|
|
4299
|
+
if (typeof path10 === "function") {
|
|
4300
|
+
cb = path10;
|
|
4301
|
+
path10 = void 0;
|
|
4302
4302
|
}
|
|
4303
4303
|
if (typeof encoding === "function") {
|
|
4304
4304
|
cb = encoding;
|
|
4305
4305
|
encoding = void 0;
|
|
4306
4306
|
}
|
|
4307
|
-
if (
|
|
4308
|
-
this.add(
|
|
4307
|
+
if (path10) {
|
|
4308
|
+
this.add(path10);
|
|
4309
4309
|
}
|
|
4310
4310
|
this[ENDED2] = true;
|
|
4311
4311
|
this[PROCESS2]();
|
|
@@ -4313,14 +4313,14 @@ var Pack = class extends Minipass {
|
|
|
4313
4313
|
cb();
|
|
4314
4314
|
return this;
|
|
4315
4315
|
}
|
|
4316
|
-
write(
|
|
4316
|
+
write(path10) {
|
|
4317
4317
|
if (this[ENDED2]) {
|
|
4318
4318
|
throw new Error("write after end");
|
|
4319
4319
|
}
|
|
4320
|
-
if (
|
|
4321
|
-
this[ADDTARENTRY](
|
|
4320
|
+
if (path10 instanceof ReadEntry) {
|
|
4321
|
+
this[ADDTARENTRY](path10);
|
|
4322
4322
|
} else {
|
|
4323
|
-
this[ADDFSENTRY](
|
|
4323
|
+
this[ADDFSENTRY](path10);
|
|
4324
4324
|
}
|
|
4325
4325
|
return this.flowing;
|
|
4326
4326
|
}
|
|
@@ -4663,9 +4663,9 @@ var getWriteFlag = !fMapEnabled ? () => "w" : (size) => size < fMapLimit ? fMapF
|
|
|
4663
4663
|
// ../../node_modules/chownr/dist/esm/index.js
|
|
4664
4664
|
import fs6 from "node:fs";
|
|
4665
4665
|
import path4 from "node:path";
|
|
4666
|
-
var lchownSync = (
|
|
4666
|
+
var lchownSync = (path10, uid, gid) => {
|
|
4667
4667
|
try {
|
|
4668
|
-
return fs6.lchownSync(
|
|
4668
|
+
return fs6.lchownSync(path10, uid, gid);
|
|
4669
4669
|
} catch (er) {
|
|
4670
4670
|
if (er?.code !== "ENOENT")
|
|
4671
4671
|
throw er;
|
|
@@ -4748,9 +4748,9 @@ var CwdError = class extends Error {
|
|
|
4748
4748
|
path;
|
|
4749
4749
|
code;
|
|
4750
4750
|
syscall = "chdir";
|
|
4751
|
-
constructor(
|
|
4752
|
-
super(`${code2}: Cannot cd into '${
|
|
4753
|
-
this.path =
|
|
4751
|
+
constructor(path10, code2) {
|
|
4752
|
+
super(`${code2}: Cannot cd into '${path10}'`);
|
|
4753
|
+
this.path = path10;
|
|
4754
4754
|
this.code = code2;
|
|
4755
4755
|
}
|
|
4756
4756
|
get name() {
|
|
@@ -4764,10 +4764,10 @@ var SymlinkError = class extends Error {
|
|
|
4764
4764
|
symlink;
|
|
4765
4765
|
syscall = "symlink";
|
|
4766
4766
|
code = "TAR_SYMLINK_ERROR";
|
|
4767
|
-
constructor(symlink,
|
|
4767
|
+
constructor(symlink, path10) {
|
|
4768
4768
|
super("TAR_SYMLINK_ERROR: Cannot extract through symbolic link");
|
|
4769
4769
|
this.symlink = symlink;
|
|
4770
|
-
this.path =
|
|
4770
|
+
this.path = path10;
|
|
4771
4771
|
}
|
|
4772
4772
|
get name() {
|
|
4773
4773
|
return "SymlinkError";
|
|
@@ -4949,13 +4949,13 @@ var normalizeUnicode = (s) => {
|
|
|
4949
4949
|
// ../../node_modules/tar/dist/esm/path-reservations.js
|
|
4950
4950
|
var platform3 = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform;
|
|
4951
4951
|
var isWindows2 = platform3 === "win32";
|
|
4952
|
-
var getDirs = (
|
|
4953
|
-
const dirs =
|
|
4952
|
+
var getDirs = (path10) => {
|
|
4953
|
+
const dirs = path10.split("/").slice(0, -1).reduce((set, path11) => {
|
|
4954
4954
|
const s = set[set.length - 1];
|
|
4955
4955
|
if (s !== void 0) {
|
|
4956
|
-
|
|
4956
|
+
path11 = join(s, path11);
|
|
4957
4957
|
}
|
|
4958
|
-
set.push(
|
|
4958
|
+
set.push(path11 || "/");
|
|
4959
4959
|
return set;
|
|
4960
4960
|
}, []);
|
|
4961
4961
|
return dirs;
|
|
@@ -4973,7 +4973,7 @@ var PathReservations = class {
|
|
|
4973
4973
|
paths = isWindows2 ? ["win32 parallelization disabled"] : paths.map((p) => {
|
|
4974
4974
|
return stripTrailingSlashes(join(normalizeUnicode(p))).toLowerCase();
|
|
4975
4975
|
});
|
|
4976
|
-
const dirs = new Set(paths.map((
|
|
4976
|
+
const dirs = new Set(paths.map((path10) => getDirs(path10)).reduce((a, b) => a.concat(b)));
|
|
4977
4977
|
this.#reservations.set(fn, { dirs, paths });
|
|
4978
4978
|
for (const p of paths) {
|
|
4979
4979
|
const q = this.#queues.get(p);
|
|
@@ -5006,8 +5006,8 @@ var PathReservations = class {
|
|
|
5006
5006
|
throw new Error("function does not have any path reservations");
|
|
5007
5007
|
}
|
|
5008
5008
|
return {
|
|
5009
|
-
paths: res.paths.map((
|
|
5010
|
-
dirs: [...res.dirs].map((
|
|
5009
|
+
paths: res.paths.map((path10) => this.#queues.get(path10)),
|
|
5010
|
+
dirs: [...res.dirs].map((path10) => this.#queues.get(path10))
|
|
5011
5011
|
};
|
|
5012
5012
|
}
|
|
5013
5013
|
// check if fn is first in line for all its paths, and is
|
|
@@ -5035,14 +5035,14 @@ var PathReservations = class {
|
|
|
5035
5035
|
}
|
|
5036
5036
|
const { paths, dirs } = res;
|
|
5037
5037
|
const next = /* @__PURE__ */ new Set();
|
|
5038
|
-
for (const
|
|
5039
|
-
const q = this.#queues.get(
|
|
5038
|
+
for (const path10 of paths) {
|
|
5039
|
+
const q = this.#queues.get(path10);
|
|
5040
5040
|
if (!q || q?.[0] !== fn) {
|
|
5041
5041
|
continue;
|
|
5042
5042
|
}
|
|
5043
5043
|
const q0 = q[1];
|
|
5044
5044
|
if (!q0) {
|
|
5045
|
-
this.#queues.delete(
|
|
5045
|
+
this.#queues.delete(path10);
|
|
5046
5046
|
continue;
|
|
5047
5047
|
}
|
|
5048
5048
|
q.shift();
|
|
@@ -5107,24 +5107,24 @@ var CHECKED_CWD = /* @__PURE__ */ Symbol("checkedCwd");
|
|
|
5107
5107
|
var platform4 = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform;
|
|
5108
5108
|
var isWindows3 = platform4 === "win32";
|
|
5109
5109
|
var DEFAULT_MAX_DEPTH = 1024;
|
|
5110
|
-
var unlinkFile = (
|
|
5110
|
+
var unlinkFile = (path10, cb) => {
|
|
5111
5111
|
if (!isWindows3) {
|
|
5112
|
-
return fs8.unlink(
|
|
5112
|
+
return fs8.unlink(path10, cb);
|
|
5113
5113
|
}
|
|
5114
|
-
const name2 =
|
|
5115
|
-
fs8.rename(
|
|
5114
|
+
const name2 = path10 + ".DELETE." + randomBytes(16).toString("hex");
|
|
5115
|
+
fs8.rename(path10, name2, (er) => {
|
|
5116
5116
|
if (er) {
|
|
5117
5117
|
return cb(er);
|
|
5118
5118
|
}
|
|
5119
5119
|
fs8.unlink(name2, cb);
|
|
5120
5120
|
});
|
|
5121
5121
|
};
|
|
5122
|
-
var unlinkFileSync = (
|
|
5122
|
+
var unlinkFileSync = (path10) => {
|
|
5123
5123
|
if (!isWindows3) {
|
|
5124
|
-
return fs8.unlinkSync(
|
|
5124
|
+
return fs8.unlinkSync(path10);
|
|
5125
5125
|
}
|
|
5126
|
-
const name2 =
|
|
5127
|
-
fs8.renameSync(
|
|
5126
|
+
const name2 = path10 + ".DELETE." + randomBytes(16).toString("hex");
|
|
5127
|
+
fs8.renameSync(path10, name2);
|
|
5128
5128
|
fs8.unlinkSync(name2);
|
|
5129
5129
|
};
|
|
5130
5130
|
var uint32 = (a, b, c) => a !== void 0 && a === a >>> 0 ? a : b !== void 0 && b === b >>> 0 ? b : c;
|
|
@@ -5222,24 +5222,24 @@ var Unpack = class extends Parser {
|
|
|
5222
5222
|
// return false if we need to skip this file
|
|
5223
5223
|
// return true if the field was successfully sanitized
|
|
5224
5224
|
[STRIPABSOLUTEPATH](entry, field) {
|
|
5225
|
-
const
|
|
5226
|
-
if (!
|
|
5225
|
+
const path10 = entry[field];
|
|
5226
|
+
if (!path10 || this.preservePaths)
|
|
5227
5227
|
return true;
|
|
5228
|
-
const parts =
|
|
5228
|
+
const parts = path10.split("/");
|
|
5229
5229
|
if (parts.includes("..") || /* c8 ignore next */
|
|
5230
5230
|
isWindows3 && /^[a-z]:\.\.$/i.test(parts[0] ?? "")) {
|
|
5231
5231
|
this.warn("TAR_ENTRY_ERROR", `${field} contains '..'`, {
|
|
5232
5232
|
entry,
|
|
5233
|
-
[field]:
|
|
5233
|
+
[field]: path10
|
|
5234
5234
|
});
|
|
5235
5235
|
return false;
|
|
5236
5236
|
}
|
|
5237
|
-
const [root, stripped] = stripAbsolutePath(
|
|
5237
|
+
const [root, stripped] = stripAbsolutePath(path10);
|
|
5238
5238
|
if (root) {
|
|
5239
5239
|
entry[field] = String(stripped);
|
|
5240
5240
|
this.warn("TAR_ENTRY_INFO", `stripping ${root} from absolute ${field}`, {
|
|
5241
5241
|
entry,
|
|
5242
|
-
[field]:
|
|
5242
|
+
[field]: path10
|
|
5243
5243
|
});
|
|
5244
5244
|
}
|
|
5245
5245
|
return true;
|
|
@@ -6006,9 +6006,9 @@ var mtimeFilter = (opt) => {
|
|
|
6006
6006
|
if (!opt.mtimeCache) {
|
|
6007
6007
|
opt.mtimeCache = /* @__PURE__ */ new Map();
|
|
6008
6008
|
}
|
|
6009
|
-
opt.filter = filter ? (
|
|
6010
|
-
((opt.mtimeCache?.get(
|
|
6011
|
-
((opt.mtimeCache?.get(
|
|
6009
|
+
opt.filter = filter ? (path10, stat) => filter(path10, stat) && !/* c8 ignore start */
|
|
6010
|
+
((opt.mtimeCache?.get(path10) ?? stat.mtime ?? 0) > (stat.mtime ?? 0)) : (path10, stat) => !/* c8 ignore start */
|
|
6011
|
+
((opt.mtimeCache?.get(path10) ?? stat.mtime ?? 0) > (stat.mtime ?? 0));
|
|
6012
6012
|
};
|
|
6013
6013
|
|
|
6014
6014
|
// src/run-scenario/environment.ts
|
|
@@ -6307,6 +6307,37 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6307
6307
|
queryOptions.mcpServers ? Object.keys(queryOptions.mcpServers) : "none"
|
|
6308
6308
|
);
|
|
6309
6309
|
console.log("[SDK-DEBUG] Calling SDK query()...");
|
|
6310
|
+
if (traceContext) {
|
|
6311
|
+
const preExecEvent = {
|
|
6312
|
+
evalRunId: traceContext.evalRunId,
|
|
6313
|
+
scenarioId: traceContext.scenarioId,
|
|
6314
|
+
scenarioName: traceContext.scenarioName,
|
|
6315
|
+
targetId: traceContext.targetId,
|
|
6316
|
+
targetName: traceContext.targetName,
|
|
6317
|
+
stepNumber: 0,
|
|
6318
|
+
type: LiveTraceEventType.DIAGNOSTIC,
|
|
6319
|
+
outputPreview: JSON.stringify({
|
|
6320
|
+
event: "pre-sdk-execution",
|
|
6321
|
+
model: queryOptions.model,
|
|
6322
|
+
maxTurns: queryOptions.maxTurns,
|
|
6323
|
+
sdkEnv: {
|
|
6324
|
+
ANTHROPIC_BASE_URL: sdkEnv.ANTHROPIC_BASE_URL,
|
|
6325
|
+
hasANTHROPIC_API_KEY: !!sdkEnv.ANTHROPIC_API_KEY,
|
|
6326
|
+
hasANTHROPIC_AUTH_TOKEN: !!sdkEnv.ANTHROPIC_AUTH_TOKEN,
|
|
6327
|
+
hasANTHROPIC_CUSTOM_HEADERS: !!sdkEnv.ANTHROPIC_CUSTOM_HEADERS
|
|
6328
|
+
},
|
|
6329
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
6330
|
+
}),
|
|
6331
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
6332
|
+
isComplete: false
|
|
6333
|
+
};
|
|
6334
|
+
emitTraceEvent(
|
|
6335
|
+
preExecEvent,
|
|
6336
|
+
traceContext.tracePushUrl,
|
|
6337
|
+
traceContext.routeHeader,
|
|
6338
|
+
traceContext.authToken
|
|
6339
|
+
);
|
|
6340
|
+
}
|
|
6310
6341
|
try {
|
|
6311
6342
|
for await (const message of query({
|
|
6312
6343
|
prompt: scenario.triggerPrompt,
|
|
@@ -6487,6 +6518,38 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6487
6518
|
sdkError: Object.keys(sdkSpecificInfo).length > 0 ? sdkSpecificInfo : void 0,
|
|
6488
6519
|
cause: causeInfo
|
|
6489
6520
|
};
|
|
6521
|
+
if (traceContext) {
|
|
6522
|
+
const errorTraceEvent = {
|
|
6523
|
+
evalRunId: traceContext.evalRunId,
|
|
6524
|
+
scenarioId: traceContext.scenarioId,
|
|
6525
|
+
scenarioName: traceContext.scenarioName,
|
|
6526
|
+
targetId: traceContext.targetId,
|
|
6527
|
+
targetName: traceContext.targetName,
|
|
6528
|
+
stepNumber: traceStepNumber + 1,
|
|
6529
|
+
type: LiveTraceEventType.DIAGNOSTIC,
|
|
6530
|
+
outputPreview: JSON.stringify(
|
|
6531
|
+
{
|
|
6532
|
+
event: "sdk-execution-failed",
|
|
6533
|
+
error: errorMessage,
|
|
6534
|
+
errorName,
|
|
6535
|
+
messageCount,
|
|
6536
|
+
sdkEnv: sdkEnvDebug,
|
|
6537
|
+
sdkError: sdkSpecificInfo,
|
|
6538
|
+
cause: causeInfo
|
|
6539
|
+
},
|
|
6540
|
+
null,
|
|
6541
|
+
2
|
|
6542
|
+
).slice(0, 2e3),
|
|
6543
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
6544
|
+
isComplete: true
|
|
6545
|
+
};
|
|
6546
|
+
emitTraceEvent(
|
|
6547
|
+
errorTraceEvent,
|
|
6548
|
+
traceContext.tracePushUrl,
|
|
6549
|
+
traceContext.routeHeader,
|
|
6550
|
+
traceContext.authToken
|
|
6551
|
+
);
|
|
6552
|
+
}
|
|
6490
6553
|
throw new Error(
|
|
6491
6554
|
`Claude SDK execution failed after ${messageCount} messages: ${errorMessage}
|
|
6492
6555
|
Details: ${JSON.stringify(errorDetails, null, 2)}` + (errorStack ? `
|
|
@@ -6853,6 +6916,8 @@ ${stackLines.join("\n")}`);
|
|
|
6853
6916
|
return parts.join(" ");
|
|
6854
6917
|
}
|
|
6855
6918
|
var ExecutionPhase = {
|
|
6919
|
+
/** Environment diagnostics phase (runs before execution) */
|
|
6920
|
+
DIAGNOSTICS: "diagnostics",
|
|
6856
6921
|
CONFIG: "config-loading",
|
|
6857
6922
|
API_CLIENT: "api-client-creation",
|
|
6858
6923
|
FETCH_EVAL_RUN: "fetch-eval-run",
|
|
@@ -6869,6 +6934,585 @@ var ExecutionPhase = {
|
|
|
6869
6934
|
UPDATE_STATUS: "update-status"
|
|
6870
6935
|
};
|
|
6871
6936
|
|
|
6937
|
+
// src/diagnostics.ts
|
|
6938
|
+
import { spawn } from "child_process";
|
|
6939
|
+
import * as fs11 from "fs";
|
|
6940
|
+
import * as path9 from "path";
|
|
6941
|
+
import { LiveTraceEventType as LiveTraceEventType2 } from "@wix/evalforge-types";
|
|
6942
|
+
async function execCommand(command, timeoutMs = 1e4) {
|
|
6943
|
+
return new Promise((resolve) => {
|
|
6944
|
+
try {
|
|
6945
|
+
const proc2 = spawn("sh", ["-c", command], {
|
|
6946
|
+
timeout: timeoutMs
|
|
6947
|
+
});
|
|
6948
|
+
let stdout = "";
|
|
6949
|
+
let stderr = "";
|
|
6950
|
+
proc2.stdout.on("data", (data) => {
|
|
6951
|
+
stdout += data.toString();
|
|
6952
|
+
});
|
|
6953
|
+
proc2.stderr.on("data", (data) => {
|
|
6954
|
+
stderr += data.toString();
|
|
6955
|
+
});
|
|
6956
|
+
proc2.on("close", (code2) => {
|
|
6957
|
+
resolve({
|
|
6958
|
+
stdout: stdout.trim(),
|
|
6959
|
+
stderr: stderr.trim(),
|
|
6960
|
+
exitCode: code2 ?? -1
|
|
6961
|
+
});
|
|
6962
|
+
});
|
|
6963
|
+
proc2.on("error", (err) => {
|
|
6964
|
+
resolve({
|
|
6965
|
+
stdout: "",
|
|
6966
|
+
stderr: err.message,
|
|
6967
|
+
exitCode: -1
|
|
6968
|
+
});
|
|
6969
|
+
});
|
|
6970
|
+
} catch (err) {
|
|
6971
|
+
resolve({
|
|
6972
|
+
stdout: "",
|
|
6973
|
+
stderr: err instanceof Error ? err.message : String(err),
|
|
6974
|
+
exitCode: -99
|
|
6975
|
+
});
|
|
6976
|
+
}
|
|
6977
|
+
});
|
|
6978
|
+
}
|
|
6979
|
+
async function safeRunTest(testName, testFn) {
|
|
6980
|
+
const start = Date.now();
|
|
6981
|
+
try {
|
|
6982
|
+
return await testFn();
|
|
6983
|
+
} catch (err) {
|
|
6984
|
+
const error = err instanceof Error ? err.message : String(err);
|
|
6985
|
+
return {
|
|
6986
|
+
name: testName,
|
|
6987
|
+
passed: false,
|
|
6988
|
+
details: {
|
|
6989
|
+
testCrashed: true,
|
|
6990
|
+
error,
|
|
6991
|
+
stack: err instanceof Error ? err.stack : void 0
|
|
6992
|
+
},
|
|
6993
|
+
error: `Test crashed: ${error}`,
|
|
6994
|
+
durationMs: Date.now() - start
|
|
6995
|
+
};
|
|
6996
|
+
}
|
|
6997
|
+
}
|
|
6998
|
+
async function testClaudeBinaryDiscovery() {
|
|
6999
|
+
const start = Date.now();
|
|
7000
|
+
const details = {};
|
|
7001
|
+
const npmRootResult = await execCommand("npm root -g");
|
|
7002
|
+
const npmBinResult = await execCommand("npm bin -g");
|
|
7003
|
+
const npmRoot = npmRootResult.stdout;
|
|
7004
|
+
const npmBin = npmBinResult.stdout;
|
|
7005
|
+
details.npmRoot = npmRoot;
|
|
7006
|
+
details.npmBin = npmBin;
|
|
7007
|
+
const evaluatorBinPath = path9.join(
|
|
7008
|
+
npmRoot,
|
|
7009
|
+
"@wix",
|
|
7010
|
+
"evalforge-evaluator",
|
|
7011
|
+
"node_modules",
|
|
7012
|
+
".bin"
|
|
7013
|
+
);
|
|
7014
|
+
details.evaluatorBinPath = evaluatorBinPath;
|
|
7015
|
+
const lsBinResult = await execCommand(`ls -la "${evaluatorBinPath}" 2>&1`);
|
|
7016
|
+
details.evaluatorBinContents = lsBinResult.stdout || lsBinResult.stderr;
|
|
7017
|
+
details.lsBinExitCode = lsBinResult.exitCode;
|
|
7018
|
+
const claudePath = path9.join(evaluatorBinPath, "claude");
|
|
7019
|
+
let claudeExists = false;
|
|
7020
|
+
try {
|
|
7021
|
+
claudeExists = fs11.existsSync(claudePath);
|
|
7022
|
+
} catch {
|
|
7023
|
+
claudeExists = false;
|
|
7024
|
+
}
|
|
7025
|
+
details.claudePath = claudePath;
|
|
7026
|
+
details.claudeExists = claudeExists;
|
|
7027
|
+
if (claudeExists) {
|
|
7028
|
+
const readlinkResult = await execCommand(
|
|
7029
|
+
`readlink -f "${claudePath}" 2>&1`
|
|
7030
|
+
);
|
|
7031
|
+
details.claudeRealPath = readlinkResult.stdout || readlinkResult.stderr;
|
|
7032
|
+
const statResult = await execCommand(`stat "${claudePath}" 2>&1`);
|
|
7033
|
+
details.claudeStat = statResult.stdout || statResult.stderr;
|
|
7034
|
+
const lsClaudeResult = await execCommand(`ls -la "${claudePath}" 2>&1`);
|
|
7035
|
+
details.claudeFileInfo = lsClaudeResult.stdout;
|
|
7036
|
+
}
|
|
7037
|
+
const whichResult = await execCommand("which claude 2>&1");
|
|
7038
|
+
details.whichClaude = whichResult.stdout || "(not in PATH)";
|
|
7039
|
+
details.whichExitCode = whichResult.exitCode;
|
|
7040
|
+
const currentPath = process.env.PATH || "";
|
|
7041
|
+
details.currentPATH = currentPath.split(":");
|
|
7042
|
+
details.pathLength = currentPath.split(":").length;
|
|
7043
|
+
const passed = claudeExists || whichResult.exitCode === 0;
|
|
7044
|
+
return {
|
|
7045
|
+
name: "claude-binary-discovery",
|
|
7046
|
+
passed,
|
|
7047
|
+
details,
|
|
7048
|
+
error: passed ? void 0 : `Claude binary not found at ${claudePath}`,
|
|
7049
|
+
durationMs: Date.now() - start
|
|
7050
|
+
};
|
|
7051
|
+
}
|
|
7052
|
+
async function testClaudeExecution() {
|
|
7053
|
+
const start = Date.now();
|
|
7054
|
+
const details = {};
|
|
7055
|
+
const npmRootResult = await execCommand("npm root -g");
|
|
7056
|
+
const npmRoot = npmRootResult.stdout;
|
|
7057
|
+
const claudePath = path9.join(
|
|
7058
|
+
npmRoot,
|
|
7059
|
+
"@wix",
|
|
7060
|
+
"evalforge-evaluator",
|
|
7061
|
+
"node_modules",
|
|
7062
|
+
".bin",
|
|
7063
|
+
"claude"
|
|
7064
|
+
);
|
|
7065
|
+
details.claudePath = claudePath;
|
|
7066
|
+
const versionResult = await execCommand(
|
|
7067
|
+
`"${claudePath}" --version 2>&1`,
|
|
7068
|
+
15e3
|
|
7069
|
+
);
|
|
7070
|
+
details.versionCommand = {
|
|
7071
|
+
command: `"${claudePath}" --version`,
|
|
7072
|
+
stdout: versionResult.stdout,
|
|
7073
|
+
stderr: versionResult.stderr,
|
|
7074
|
+
exitCode: versionResult.exitCode
|
|
7075
|
+
};
|
|
7076
|
+
const helpResult = await execCommand(
|
|
7077
|
+
`"${claudePath}" --help 2>&1 | head -50`,
|
|
7078
|
+
15e3
|
|
7079
|
+
);
|
|
7080
|
+
details.helpCommand = {
|
|
7081
|
+
command: `"${claudePath}" --help | head -50`,
|
|
7082
|
+
stdout: helpResult.stdout.slice(0, 1500),
|
|
7083
|
+
stderr: helpResult.stderr.slice(0, 500),
|
|
7084
|
+
exitCode: helpResult.exitCode
|
|
7085
|
+
};
|
|
7086
|
+
const whichClaudeResult = await execCommand("which claude 2>&1");
|
|
7087
|
+
if (whichClaudeResult.exitCode === 0) {
|
|
7088
|
+
const pathVersionResult = await execCommand("claude --version 2>&1", 15e3);
|
|
7089
|
+
details.pathVersionCommand = {
|
|
7090
|
+
whichClaude: whichClaudeResult.stdout,
|
|
7091
|
+
stdout: pathVersionResult.stdout,
|
|
7092
|
+
stderr: pathVersionResult.stderr,
|
|
7093
|
+
exitCode: pathVersionResult.exitCode
|
|
7094
|
+
};
|
|
7095
|
+
}
|
|
7096
|
+
const passed = versionResult.exitCode === 0 || helpResult.exitCode === 0;
|
|
7097
|
+
return {
|
|
7098
|
+
name: "claude-cli-execution",
|
|
7099
|
+
passed,
|
|
7100
|
+
details,
|
|
7101
|
+
error: passed ? void 0 : `Claude CLI failed. Version exit: ${versionResult.exitCode}, Help exit: ${helpResult.exitCode}`,
|
|
7102
|
+
durationMs: Date.now() - start
|
|
7103
|
+
};
|
|
7104
|
+
}
|
|
7105
|
+
async function testEnvironmentDump() {
|
|
7106
|
+
const start = Date.now();
|
|
7107
|
+
const details = {};
|
|
7108
|
+
const importantVars = [
|
|
7109
|
+
"PATH",
|
|
7110
|
+
"HOME",
|
|
7111
|
+
"USER",
|
|
7112
|
+
"SHELL",
|
|
7113
|
+
"NODE_ENV",
|
|
7114
|
+
"PWD",
|
|
7115
|
+
"EVAL_SERVER_URL",
|
|
7116
|
+
"AI_GATEWAY_URL",
|
|
7117
|
+
"TRACE_PUSH_URL",
|
|
7118
|
+
"EVAL_AUTH_TOKEN",
|
|
7119
|
+
"ANTHROPIC_API_KEY",
|
|
7120
|
+
"ANTHROPIC_AUTH_TOKEN",
|
|
7121
|
+
"ANTHROPIC_BASE_URL",
|
|
7122
|
+
"ANTHROPIC_CUSTOM_HEADERS"
|
|
7123
|
+
];
|
|
7124
|
+
const capturedVars = {};
|
|
7125
|
+
for (const key of importantVars) {
|
|
7126
|
+
const value = process.env[key];
|
|
7127
|
+
if (value) {
|
|
7128
|
+
if (key.includes("SECRET") || key.includes("TOKEN") || key.includes("API_KEY")) {
|
|
7129
|
+
capturedVars[key] = `[REDACTED - ${value.length} chars, starts: ${value.slice(0, 10)}...]`;
|
|
7130
|
+
} else if (key === "ANTHROPIC_CUSTOM_HEADERS") {
|
|
7131
|
+
capturedVars[key] = value.split("\n").map((h) => {
|
|
7132
|
+
const [name2, val] = h.split(":");
|
|
7133
|
+
return `${name2}: ${val ? "[" + val.length + " chars]" : "(empty)"}`;
|
|
7134
|
+
}).join(" | ");
|
|
7135
|
+
} else if (key === "PATH") {
|
|
7136
|
+
const parts = value.split(":");
|
|
7137
|
+
capturedVars[key] = `[${parts.length} entries] First: ${parts.slice(0, 3).join(":")} ... Last: ${parts.slice(-2).join(":")}`;
|
|
7138
|
+
} else {
|
|
7139
|
+
capturedVars[key] = value;
|
|
7140
|
+
}
|
|
7141
|
+
} else {
|
|
7142
|
+
capturedVars[key] = "(NOT SET)";
|
|
7143
|
+
}
|
|
7144
|
+
}
|
|
7145
|
+
details.importantVars = capturedVars;
|
|
7146
|
+
const envResult = await execCommand("env | sort | head -50");
|
|
7147
|
+
details.envCommandOutput = envResult.stdout;
|
|
7148
|
+
details.envExitCode = envResult.exitCode;
|
|
7149
|
+
details.nodeInfo = {
|
|
7150
|
+
version: process.version,
|
|
7151
|
+
platform: process.platform,
|
|
7152
|
+
arch: process.arch,
|
|
7153
|
+
pid: process.pid,
|
|
7154
|
+
cwd: process.cwd(),
|
|
7155
|
+
execPath: process.execPath
|
|
7156
|
+
};
|
|
7157
|
+
return {
|
|
7158
|
+
name: "environment-dump",
|
|
7159
|
+
passed: true,
|
|
7160
|
+
// Info test, always passes
|
|
7161
|
+
details,
|
|
7162
|
+
durationMs: Date.now() - start
|
|
7163
|
+
};
|
|
7164
|
+
}
|
|
7165
|
+
async function testFileSystemStructure() {
|
|
7166
|
+
const start = Date.now();
|
|
7167
|
+
const details = {};
|
|
7168
|
+
const npmRootResult = await execCommand("npm root -g");
|
|
7169
|
+
const npmRoot = npmRootResult.stdout;
|
|
7170
|
+
const lsCwdResult = await execCommand("ls -la");
|
|
7171
|
+
details.currentDirectory = {
|
|
7172
|
+
path: process.cwd(),
|
|
7173
|
+
contents: lsCwdResult.stdout
|
|
7174
|
+
};
|
|
7175
|
+
const lsNpmRootResult = await execCommand(
|
|
7176
|
+
`ls -la "${npmRoot}" 2>&1 | head -30`
|
|
7177
|
+
);
|
|
7178
|
+
details.npmGlobalRoot = {
|
|
7179
|
+
path: npmRoot,
|
|
7180
|
+
contents: lsNpmRootResult.stdout
|
|
7181
|
+
};
|
|
7182
|
+
const wixPath = path9.join(npmRoot, "@wix");
|
|
7183
|
+
const lsWixResult = await execCommand(`ls -la "${wixPath}" 2>&1`);
|
|
7184
|
+
details.wixPackages = {
|
|
7185
|
+
path: wixPath,
|
|
7186
|
+
contents: lsWixResult.stdout
|
|
7187
|
+
};
|
|
7188
|
+
const evaluatorPath = path9.join(npmRoot, "@wix", "evalforge-evaluator");
|
|
7189
|
+
const lsEvaluatorResult = await execCommand(`ls -la "${evaluatorPath}" 2>&1`);
|
|
7190
|
+
details.evaluatorDir = {
|
|
7191
|
+
path: evaluatorPath,
|
|
7192
|
+
contents: lsEvaluatorResult.stdout
|
|
7193
|
+
};
|
|
7194
|
+
const nodeModulesPath = path9.join(evaluatorPath, "node_modules");
|
|
7195
|
+
const lsNodeModulesResult = await execCommand(
|
|
7196
|
+
`ls "${nodeModulesPath}" 2>&1 | head -30`
|
|
7197
|
+
);
|
|
7198
|
+
details.evaluatorNodeModules = {
|
|
7199
|
+
path: nodeModulesPath,
|
|
7200
|
+
contents: lsNodeModulesResult.stdout
|
|
7201
|
+
};
|
|
7202
|
+
const anthropicPath = path9.join(nodeModulesPath, "@anthropic-ai");
|
|
7203
|
+
const lsAnthropicResult = await execCommand(`ls -la "${anthropicPath}" 2>&1`);
|
|
7204
|
+
details.anthropicPackages = {
|
|
7205
|
+
path: anthropicPath,
|
|
7206
|
+
contents: lsAnthropicResult.stdout
|
|
7207
|
+
};
|
|
7208
|
+
const binPath = path9.join(nodeModulesPath, ".bin");
|
|
7209
|
+
const lsBinResult = await execCommand(`ls -la "${binPath}" 2>&1`);
|
|
7210
|
+
details.binDirectory = {
|
|
7211
|
+
path: binPath,
|
|
7212
|
+
contents: lsBinResult.stdout
|
|
7213
|
+
};
|
|
7214
|
+
return {
|
|
7215
|
+
name: "file-system-structure",
|
|
7216
|
+
passed: true,
|
|
7217
|
+
// Info test, always passes
|
|
7218
|
+
details,
|
|
7219
|
+
durationMs: Date.now() - start
|
|
7220
|
+
};
|
|
7221
|
+
}
|
|
7222
|
+
async function testNetworkConnectivity(config) {
|
|
7223
|
+
const start = Date.now();
|
|
7224
|
+
const details = {};
|
|
7225
|
+
const dnsResult = await execCommand(
|
|
7226
|
+
"nslookup manage.wix.com 2>&1 | head -10"
|
|
7227
|
+
);
|
|
7228
|
+
details.dnsLookup = {
|
|
7229
|
+
command: "nslookup manage.wix.com",
|
|
7230
|
+
output: dnsResult.stdout || dnsResult.stderr,
|
|
7231
|
+
exitCode: dnsResult.exitCode
|
|
7232
|
+
};
|
|
7233
|
+
const pingResult = await execCommand("ping -c 2 manage.wix.com 2>&1");
|
|
7234
|
+
details.pingTest = {
|
|
7235
|
+
command: "ping -c 2 manage.wix.com",
|
|
7236
|
+
output: pingResult.stdout || pingResult.stderr,
|
|
7237
|
+
exitCode: pingResult.exitCode
|
|
7238
|
+
};
|
|
7239
|
+
const gatewayUrl = config.aiGatewayUrl || "https://manage.wix.com/_api/eval-wix-ai-gateway-proxy";
|
|
7240
|
+
const curlGatewayResult = await execCommand(
|
|
7241
|
+
`curl -v -s --connect-timeout 5 --max-time 10 "${gatewayUrl}" 2>&1 | tail -30`
|
|
7242
|
+
);
|
|
7243
|
+
details.aiGatewayTest = {
|
|
7244
|
+
url: gatewayUrl,
|
|
7245
|
+
output: curlGatewayResult.stdout,
|
|
7246
|
+
exitCode: curlGatewayResult.exitCode
|
|
7247
|
+
};
|
|
7248
|
+
const serverUrl = config.serverUrl;
|
|
7249
|
+
const curlServerResult = await execCommand(
|
|
7250
|
+
`curl -v -s --connect-timeout 5 --max-time 10 "${serverUrl}/health" 2>&1 | tail -30`
|
|
7251
|
+
);
|
|
7252
|
+
details.backendServerTest = {
|
|
7253
|
+
url: `${serverUrl}/health`,
|
|
7254
|
+
output: curlServerResult.stdout,
|
|
7255
|
+
exitCode: curlServerResult.exitCode
|
|
7256
|
+
};
|
|
7257
|
+
const httpsResult = await execCommand(
|
|
7258
|
+
'curl -s --connect-timeout 5 -o /dev/null -w "HTTP_CODE:%{http_code} TIME:%{time_total}s" https://www.google.com 2>&1'
|
|
7259
|
+
);
|
|
7260
|
+
details.httpsBaseline = {
|
|
7261
|
+
command: "curl https://www.google.com",
|
|
7262
|
+
output: httpsResult.stdout,
|
|
7263
|
+
exitCode: httpsResult.exitCode
|
|
7264
|
+
};
|
|
7265
|
+
const networkWorks = pingResult.exitCode === 0 || httpsResult.exitCode === 0;
|
|
7266
|
+
const gatewayReachable = curlGatewayResult.exitCode === 0;
|
|
7267
|
+
return {
|
|
7268
|
+
name: "network-connectivity",
|
|
7269
|
+
passed: networkWorks && gatewayReachable,
|
|
7270
|
+
details,
|
|
7271
|
+
error: networkWorks && gatewayReachable ? void 0 : `Network: ${networkWorks ? "OK" : "FAILED"}, Gateway: ${gatewayReachable ? "OK" : "FAILED"}`,
|
|
7272
|
+
durationMs: Date.now() - start
|
|
7273
|
+
};
|
|
7274
|
+
}
|
|
7275
|
+
async function testChildProcessSpawning() {
|
|
7276
|
+
const start = Date.now();
|
|
7277
|
+
const details = {};
|
|
7278
|
+
const echoResult = await execCommand('echo "DIAGNOSTIC_TEST_SUCCESS_12345"');
|
|
7279
|
+
details.echoTest = {
|
|
7280
|
+
command: 'echo "DIAGNOSTIC_TEST_SUCCESS_12345"',
|
|
7281
|
+
output: echoResult.stdout,
|
|
7282
|
+
exitCode: echoResult.exitCode,
|
|
7283
|
+
passed: echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345"
|
|
7284
|
+
};
|
|
7285
|
+
const nodeResult = await execCommand(
|
|
7286
|
+
'node -e "console.log(JSON.stringify({pid: process.pid, version: process.version, platform: process.platform}))"'
|
|
7287
|
+
);
|
|
7288
|
+
details.nodeTest = {
|
|
7289
|
+
command: 'node -e "console.log(JSON.stringify({...}))"',
|
|
7290
|
+
output: nodeResult.stdout,
|
|
7291
|
+
exitCode: nodeResult.exitCode
|
|
7292
|
+
};
|
|
7293
|
+
const shellResult = await execCommand(
|
|
7294
|
+
'echo "PID: $$"; pwd; whoami; date; uname -a'
|
|
7295
|
+
);
|
|
7296
|
+
details.shellTest = {
|
|
7297
|
+
command: 'echo "PID: $$"; pwd; whoami; date; uname -a',
|
|
7298
|
+
output: shellResult.stdout,
|
|
7299
|
+
exitCode: shellResult.exitCode
|
|
7300
|
+
};
|
|
7301
|
+
const stderrResult = await execCommand(
|
|
7302
|
+
`node -e "console.error('stderr test')"`
|
|
7303
|
+
);
|
|
7304
|
+
details.stderrTest = {
|
|
7305
|
+
command: `node -e "console.error('stderr test')"`,
|
|
7306
|
+
stderr: stderrResult.stderr,
|
|
7307
|
+
exitCode: stderrResult.exitCode
|
|
7308
|
+
};
|
|
7309
|
+
const exitCodeResult = await execCommand("exit 42");
|
|
7310
|
+
details.exitCodeTest = {
|
|
7311
|
+
command: "exit 42",
|
|
7312
|
+
exitCode: exitCodeResult.exitCode,
|
|
7313
|
+
passed: exitCodeResult.exitCode === 42
|
|
7314
|
+
};
|
|
7315
|
+
const passed = echoResult.exitCode === 0 && echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345";
|
|
7316
|
+
return {
|
|
7317
|
+
name: "child-process-spawning",
|
|
7318
|
+
passed,
|
|
7319
|
+
details,
|
|
7320
|
+
error: passed ? void 0 : "Echo test failed",
|
|
7321
|
+
durationMs: Date.now() - start
|
|
7322
|
+
};
|
|
7323
|
+
}
|
|
7324
|
+
async function testSdkImport() {
|
|
7325
|
+
const start = Date.now();
|
|
7326
|
+
const details = {};
|
|
7327
|
+
try {
|
|
7328
|
+
const sdk = await import("@anthropic-ai/claude-agent-sdk");
|
|
7329
|
+
details.sdkImported = true;
|
|
7330
|
+
details.exportedKeys = Object.keys(sdk);
|
|
7331
|
+
details.hasQuery = typeof sdk.query === "function";
|
|
7332
|
+
if (typeof sdk.query === "function") {
|
|
7333
|
+
details.queryFunctionExists = true;
|
|
7334
|
+
details.queryFunctionType = typeof sdk.query;
|
|
7335
|
+
}
|
|
7336
|
+
return {
|
|
7337
|
+
name: "sdk-import",
|
|
7338
|
+
passed: true,
|
|
7339
|
+
details,
|
|
7340
|
+
durationMs: Date.now() - start
|
|
7341
|
+
};
|
|
7342
|
+
} catch (err) {
|
|
7343
|
+
const error = err instanceof Error ? err.message : String(err);
|
|
7344
|
+
return {
|
|
7345
|
+
name: "sdk-import",
|
|
7346
|
+
passed: false,
|
|
7347
|
+
details: {
|
|
7348
|
+
sdkImported: false,
|
|
7349
|
+
error,
|
|
7350
|
+
stack: err instanceof Error ? err.stack?.split("\n").slice(0, 5) : void 0
|
|
7351
|
+
},
|
|
7352
|
+
error: `Failed to import SDK: ${error}`,
|
|
7353
|
+
durationMs: Date.now() - start
|
|
7354
|
+
};
|
|
7355
|
+
}
|
|
7356
|
+
}
|
|
7357
|
+
async function testFileSystemWrite() {
|
|
7358
|
+
const start = Date.now();
|
|
7359
|
+
const details = {};
|
|
7360
|
+
const testDir = "/tmp/evalforge-diagnostics-test";
|
|
7361
|
+
const testFile = path9.join(testDir, "test-file.txt");
|
|
7362
|
+
const testContent = `Diagnostic test at ${(/* @__PURE__ */ new Date()).toISOString()}`;
|
|
7363
|
+
try {
|
|
7364
|
+
if (!fs11.existsSync(testDir)) {
|
|
7365
|
+
fs11.mkdirSync(testDir, { recursive: true });
|
|
7366
|
+
}
|
|
7367
|
+
details.directoryCreated = true;
|
|
7368
|
+
fs11.writeFileSync(testFile, testContent);
|
|
7369
|
+
details.fileWritten = true;
|
|
7370
|
+
const readContent = fs11.readFileSync(testFile, "utf8");
|
|
7371
|
+
details.fileRead = true;
|
|
7372
|
+
details.contentMatches = readContent === testContent;
|
|
7373
|
+
const lsResult = await execCommand(`ls -la "${testDir}"`);
|
|
7374
|
+
details.directoryContents = lsResult.stdout;
|
|
7375
|
+
fs11.unlinkSync(testFile);
|
|
7376
|
+
fs11.rmdirSync(testDir);
|
|
7377
|
+
details.cleanedUp = true;
|
|
7378
|
+
return {
|
|
7379
|
+
name: "file-system-write",
|
|
7380
|
+
passed: true,
|
|
7381
|
+
details,
|
|
7382
|
+
durationMs: Date.now() - start
|
|
7383
|
+
};
|
|
7384
|
+
} catch (err) {
|
|
7385
|
+
const error = err instanceof Error ? err.message : String(err);
|
|
7386
|
+
return {
|
|
7387
|
+
name: "file-system-write",
|
|
7388
|
+
passed: false,
|
|
7389
|
+
details: {
|
|
7390
|
+
...details,
|
|
7391
|
+
error,
|
|
7392
|
+
testDir,
|
|
7393
|
+
testFile
|
|
7394
|
+
},
|
|
7395
|
+
error: `File system write failed: ${error}`,
|
|
7396
|
+
durationMs: Date.now() - start
|
|
7397
|
+
};
|
|
7398
|
+
}
|
|
7399
|
+
}
|
|
7400
|
+
function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader, authToken) {
|
|
7401
|
+
const truncatedResult = "summary" in result ? result : {
|
|
7402
|
+
...result,
|
|
7403
|
+
details: JSON.parse(
|
|
7404
|
+
JSON.stringify(
|
|
7405
|
+
result.details,
|
|
7406
|
+
(_, v) => typeof v === "string" && v.length > 500 ? v.slice(0, 500) + "... [truncated]" : v
|
|
7407
|
+
)
|
|
7408
|
+
)
|
|
7409
|
+
};
|
|
7410
|
+
const event = {
|
|
7411
|
+
evalRunId: evalRunId2,
|
|
7412
|
+
scenarioId: "diagnostics",
|
|
7413
|
+
scenarioName: "Environment Diagnostics",
|
|
7414
|
+
targetId: "system",
|
|
7415
|
+
targetName: "name" in truncatedResult ? truncatedResult.name : "Summary",
|
|
7416
|
+
stepNumber: 0,
|
|
7417
|
+
type: LiveTraceEventType2.DIAGNOSTIC,
|
|
7418
|
+
outputPreview: JSON.stringify(truncatedResult, null, 2).slice(0, 3e3),
|
|
7419
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7420
|
+
isComplete: "summary" in result
|
|
7421
|
+
};
|
|
7422
|
+
console.log(`TRACE_EVENT:${JSON.stringify(event)}`);
|
|
7423
|
+
if (tracePushUrl) {
|
|
7424
|
+
const headers = {
|
|
7425
|
+
"Content-Type": "application/json"
|
|
7426
|
+
};
|
|
7427
|
+
if (routeHeader) {
|
|
7428
|
+
headers["x-wix-route"] = routeHeader;
|
|
7429
|
+
}
|
|
7430
|
+
if (authToken) {
|
|
7431
|
+
headers["Authorization"] = `Bearer ${authToken}`;
|
|
7432
|
+
}
|
|
7433
|
+
fetch(tracePushUrl, {
|
|
7434
|
+
method: "POST",
|
|
7435
|
+
headers,
|
|
7436
|
+
body: JSON.stringify([event])
|
|
7437
|
+
}).catch((err) => {
|
|
7438
|
+
console.error(
|
|
7439
|
+
"[DIAGNOSTICS] Failed to push trace event to backend:",
|
|
7440
|
+
err
|
|
7441
|
+
);
|
|
7442
|
+
});
|
|
7443
|
+
}
|
|
7444
|
+
}
|
|
7445
|
+
async function runDiagnostics(config, evalRunId2) {
|
|
7446
|
+
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
7447
|
+
const startTime = Date.now();
|
|
7448
|
+
console.error("");
|
|
7449
|
+
console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
|
|
7450
|
+
console.error("\u2551 EVALFORGE ENVIRONMENT DIAGNOSTICS \u2551");
|
|
7451
|
+
console.error("\u2551 (Results sent to backend via trace events) \u2551");
|
|
7452
|
+
console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
|
|
7453
|
+
console.error("");
|
|
7454
|
+
const tests = [];
|
|
7455
|
+
const runTest = async (testName, testFn) => {
|
|
7456
|
+
console.error(`[DIAG] Running: ${testName}...`);
|
|
7457
|
+
const result = await safeRunTest(testName, testFn);
|
|
7458
|
+
tests.push(result);
|
|
7459
|
+
const status = result.passed ? "\u2713 PASS" : "\u2717 FAIL";
|
|
7460
|
+
console.error(`[DIAG] ${status}: ${result.name} (${result.durationMs}ms)`);
|
|
7461
|
+
console.error("[DIAG] Details:");
|
|
7462
|
+
console.error(JSON.stringify(result.details, null, 2));
|
|
7463
|
+
console.error("");
|
|
7464
|
+
if (!result.passed && result.error) {
|
|
7465
|
+
console.error(`[DIAG] ERROR: ${result.error}`);
|
|
7466
|
+
}
|
|
7467
|
+
emitDiagnosticTraceEvent(
|
|
7468
|
+
evalRunId2,
|
|
7469
|
+
result,
|
|
7470
|
+
config.tracePushUrl,
|
|
7471
|
+
config.routeHeader,
|
|
7472
|
+
config.authToken
|
|
7473
|
+
);
|
|
7474
|
+
};
|
|
7475
|
+
await runTest("claude-binary-discovery", testClaudeBinaryDiscovery);
|
|
7476
|
+
await runTest("claude-cli-execution", testClaudeExecution);
|
|
7477
|
+
await runTest("environment-dump", testEnvironmentDump);
|
|
7478
|
+
await runTest("file-system-structure", testFileSystemStructure);
|
|
7479
|
+
await runTest("network-connectivity", () => testNetworkConnectivity(config));
|
|
7480
|
+
await runTest("child-process-spawning", testChildProcessSpawning);
|
|
7481
|
+
await runTest("sdk-import", testSdkImport);
|
|
7482
|
+
await runTest("file-system-write", testFileSystemWrite);
|
|
7483
|
+
const completedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
7484
|
+
const totalDurationMs = Date.now() - startTime;
|
|
7485
|
+
const report = {
|
|
7486
|
+
startedAt,
|
|
7487
|
+
completedAt,
|
|
7488
|
+
totalDurationMs,
|
|
7489
|
+
tests,
|
|
7490
|
+
summary: {
|
|
7491
|
+
total: tests.length,
|
|
7492
|
+
passed: tests.filter((t) => t.passed).length,
|
|
7493
|
+
failed: tests.filter((t) => !t.passed).length
|
|
7494
|
+
}
|
|
7495
|
+
};
|
|
7496
|
+
emitDiagnosticTraceEvent(
|
|
7497
|
+
evalRunId2,
|
|
7498
|
+
report,
|
|
7499
|
+
config.tracePushUrl,
|
|
7500
|
+
config.routeHeader,
|
|
7501
|
+
config.authToken
|
|
7502
|
+
);
|
|
7503
|
+
console.error("");
|
|
7504
|
+
console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
|
|
7505
|
+
console.error(
|
|
7506
|
+
`\u2551 DIAGNOSTICS COMPLETE: ${report.summary.passed}/${report.summary.total} passed, ${report.summary.failed} failed`.padEnd(
|
|
7507
|
+
60
|
|
7508
|
+
) + "\u2551"
|
|
7509
|
+
);
|
|
7510
|
+
console.error(`\u2551 Total time: ${totalDurationMs}ms`.padEnd(60) + "\u2551");
|
|
7511
|
+
console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
|
|
7512
|
+
console.error("");
|
|
7513
|
+
return report;
|
|
7514
|
+
}
|
|
7515
|
+
|
|
6872
7516
|
// src/index.ts
|
|
6873
7517
|
console.error(
|
|
6874
7518
|
"[EVALUATOR-BOOT] Module loading started",
|
|
@@ -6936,6 +7580,33 @@ async function runEvaluation(projectId2, evalRunId2) {
|
|
|
6936
7580
|
`[${ExecutionPhase.API_CLIENT}] Failed to create API client: ${apiErr instanceof Error ? apiErr.message : String(apiErr)}`
|
|
6937
7581
|
);
|
|
6938
7582
|
}
|
|
7583
|
+
state.currentPhase = ExecutionPhase.DIAGNOSTICS;
|
|
7584
|
+
state.currentContext = { projectId: projectId2, evalRunId: evalRunId2, phase: "diagnostics" };
|
|
7585
|
+
console.error("[DEBUG-H1.5] Running environment diagnostics...");
|
|
7586
|
+
try {
|
|
7587
|
+
const diagnosticReport = await runDiagnostics(config, evalRunId2);
|
|
7588
|
+
console.error(
|
|
7589
|
+
"[DEBUG-H1.5] Diagnostics completed",
|
|
7590
|
+
JSON.stringify({
|
|
7591
|
+
passed: diagnosticReport.summary.passed,
|
|
7592
|
+
failed: diagnosticReport.summary.failed,
|
|
7593
|
+
total: diagnosticReport.summary.total,
|
|
7594
|
+
durationMs: diagnosticReport.totalDurationMs
|
|
7595
|
+
})
|
|
7596
|
+
);
|
|
7597
|
+
const failedTests = diagnosticReport.tests.filter((t) => !t.passed);
|
|
7598
|
+
if (failedTests.length > 0) {
|
|
7599
|
+
console.error(
|
|
7600
|
+
"[DEBUG-H1.5] FAILED DIAGNOSTIC TESTS:",
|
|
7601
|
+
failedTests.map((t) => `${t.name}: ${t.error}`).join("\n")
|
|
7602
|
+
);
|
|
7603
|
+
}
|
|
7604
|
+
} catch (diagErr) {
|
|
7605
|
+
console.error(
|
|
7606
|
+
"[DEBUG-H1.5] Diagnostics failed (non-fatal):",
|
|
7607
|
+
diagErr instanceof Error ? diagErr.message : String(diagErr)
|
|
7608
|
+
);
|
|
7609
|
+
}
|
|
6939
7610
|
state.currentPhase = ExecutionPhase.FETCH_EVAL_RUN;
|
|
6940
7611
|
state.currentContext = { projectId: projectId2, evalRunId: evalRunId2, serverUrl: config.serverUrl };
|
|
6941
7612
|
console.error(
|