@wix/evalforge-evaluator 0.13.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +624 -93
- package/build/index.js.map +4 -4
- package/build/index.mjs +620 -89
- package/build/index.mjs.map +4 -4
- package/build/types/diagnostics.d.ts +52 -0
- package/build/types/error-reporter.d.ts +2 -0
- package/package.json +3 -3
package/build/index.mjs
CHANGED
|
@@ -64,8 +64,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
64
64
|
}
|
|
65
65
|
return headers;
|
|
66
66
|
}
|
|
67
|
-
async function fetchJson(
|
|
68
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
67
|
+
async function fetchJson(path10) {
|
|
68
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
|
|
69
69
|
console.error(`[API] GET ${url}`);
|
|
70
70
|
const headers = buildHeaders();
|
|
71
71
|
const response = await fetch(url, {
|
|
@@ -79,8 +79,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
79
79
|
}
|
|
80
80
|
return response.json();
|
|
81
81
|
}
|
|
82
|
-
async function postJson(
|
|
83
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
82
|
+
async function postJson(path10, body) {
|
|
83
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
|
|
84
84
|
console.error(`[API] POST ${url}`);
|
|
85
85
|
const response = await fetch(url, {
|
|
86
86
|
method: "POST",
|
|
@@ -94,8 +94,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
94
94
|
);
|
|
95
95
|
}
|
|
96
96
|
}
|
|
97
|
-
async function deleteRequest(
|
|
98
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
97
|
+
async function deleteRequest(path10) {
|
|
98
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
|
|
99
99
|
console.error(`[API] DELETE ${url}`);
|
|
100
100
|
const headers = buildHeaders();
|
|
101
101
|
const response = await fetch(url, {
|
|
@@ -109,8 +109,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
109
109
|
);
|
|
110
110
|
}
|
|
111
111
|
}
|
|
112
|
-
async function putJson(
|
|
113
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
112
|
+
async function putJson(path10, body) {
|
|
113
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
|
|
114
114
|
console.error(`[API] PUT ${url}`);
|
|
115
115
|
const response = await fetch(url, {
|
|
116
116
|
method: "PUT",
|
|
@@ -1136,17 +1136,17 @@ var ReadStream = class extends Minipass {
|
|
|
1136
1136
|
[_size];
|
|
1137
1137
|
[_remain];
|
|
1138
1138
|
[_autoClose];
|
|
1139
|
-
constructor(
|
|
1139
|
+
constructor(path10, opt) {
|
|
1140
1140
|
opt = opt || {};
|
|
1141
1141
|
super(opt);
|
|
1142
1142
|
this.readable = true;
|
|
1143
1143
|
this.writable = false;
|
|
1144
|
-
if (typeof
|
|
1144
|
+
if (typeof path10 !== "string") {
|
|
1145
1145
|
throw new TypeError("path must be a string");
|
|
1146
1146
|
}
|
|
1147
1147
|
this[_errored] = false;
|
|
1148
1148
|
this[_fd] = typeof opt.fd === "number" ? opt.fd : void 0;
|
|
1149
|
-
this[_path] =
|
|
1149
|
+
this[_path] = path10;
|
|
1150
1150
|
this[_readSize] = opt.readSize || 16 * 1024 * 1024;
|
|
1151
1151
|
this[_reading] = false;
|
|
1152
1152
|
this[_size] = typeof opt.size === "number" ? opt.size : Infinity;
|
|
@@ -1309,10 +1309,10 @@ var WriteStream = class extends EE {
|
|
|
1309
1309
|
[_flags];
|
|
1310
1310
|
[_finished] = false;
|
|
1311
1311
|
[_pos];
|
|
1312
|
-
constructor(
|
|
1312
|
+
constructor(path10, opt) {
|
|
1313
1313
|
opt = opt || {};
|
|
1314
1314
|
super(opt);
|
|
1315
|
-
this[_path] =
|
|
1315
|
+
this[_path] = path10;
|
|
1316
1316
|
this[_fd] = typeof opt.fd === "number" ? opt.fd : void 0;
|
|
1317
1317
|
this[_mode] = opt.mode === void 0 ? 438 : opt.mode;
|
|
1318
1318
|
this[_pos] = typeof opt.start === "number" ? opt.start : void 0;
|
|
@@ -2205,10 +2205,10 @@ var Header = class {
|
|
|
2205
2205
|
}
|
|
2206
2206
|
const prefixSize = this.ctime || this.atime ? 130 : 155;
|
|
2207
2207
|
const split = splitPrefix(this.path || "", prefixSize);
|
|
2208
|
-
const
|
|
2208
|
+
const path10 = split[0];
|
|
2209
2209
|
const prefix = split[1];
|
|
2210
2210
|
this.needPax = !!split[2];
|
|
2211
|
-
this.needPax = encString(buf, off, 100,
|
|
2211
|
+
this.needPax = encString(buf, off, 100, path10) || this.needPax;
|
|
2212
2212
|
this.needPax = encNumber(buf, off + 100, 8, this.mode) || this.needPax;
|
|
2213
2213
|
this.needPax = encNumber(buf, off + 108, 8, this.uid) || this.needPax;
|
|
2214
2214
|
this.needPax = encNumber(buf, off + 116, 8, this.gid) || this.needPax;
|
|
@@ -3184,16 +3184,16 @@ var modeFix = (mode, isDir, portable) => {
|
|
|
3184
3184
|
// ../../node_modules/tar/dist/esm/strip-absolute-path.js
|
|
3185
3185
|
import { win32 } from "node:path";
|
|
3186
3186
|
var { isAbsolute, parse: parse3 } = win32;
|
|
3187
|
-
var stripAbsolutePath = (
|
|
3187
|
+
var stripAbsolutePath = (path10) => {
|
|
3188
3188
|
let r = "";
|
|
3189
|
-
let parsed = parse3(
|
|
3190
|
-
while (isAbsolute(
|
|
3191
|
-
const root =
|
|
3192
|
-
|
|
3189
|
+
let parsed = parse3(path10);
|
|
3190
|
+
while (isAbsolute(path10) || parsed.root) {
|
|
3191
|
+
const root = path10.charAt(0) === "/" && path10.slice(0, 4) !== "//?/" ? "/" : parsed.root;
|
|
3192
|
+
path10 = path10.slice(root.length);
|
|
3193
3193
|
r += root;
|
|
3194
|
-
parsed = parse3(
|
|
3194
|
+
parsed = parse3(path10);
|
|
3195
3195
|
}
|
|
3196
|
-
return [r,
|
|
3196
|
+
return [r, path10];
|
|
3197
3197
|
};
|
|
3198
3198
|
|
|
3199
3199
|
// ../../node_modules/tar/dist/esm/winchars.js
|
|
@@ -3205,12 +3205,12 @@ var encode2 = (s) => raw.reduce((s2, c) => s2.split(c).join(toWin.get(c)), s);
|
|
|
3205
3205
|
var decode = (s) => win.reduce((s2, c) => s2.split(c).join(toRaw.get(c)), s);
|
|
3206
3206
|
|
|
3207
3207
|
// ../../node_modules/tar/dist/esm/write-entry.js
|
|
3208
|
-
var prefixPath = (
|
|
3208
|
+
var prefixPath = (path10, prefix) => {
|
|
3209
3209
|
if (!prefix) {
|
|
3210
|
-
return normalizeWindowsPath(
|
|
3210
|
+
return normalizeWindowsPath(path10);
|
|
3211
3211
|
}
|
|
3212
|
-
|
|
3213
|
-
return stripTrailingSlashes(prefix) + "/" +
|
|
3212
|
+
path10 = normalizeWindowsPath(path10).replace(/^\.(\/|$)/, "");
|
|
3213
|
+
return stripTrailingSlashes(prefix) + "/" + path10;
|
|
3214
3214
|
};
|
|
3215
3215
|
var maxReadSize = 16 * 1024 * 1024;
|
|
3216
3216
|
var PROCESS = /* @__PURE__ */ Symbol("process");
|
|
@@ -3355,8 +3355,8 @@ var WriteEntry = class extends Minipass {
|
|
|
3355
3355
|
[MODE](mode) {
|
|
3356
3356
|
return modeFix(mode, this.type === "Directory", this.portable);
|
|
3357
3357
|
}
|
|
3358
|
-
[PREFIX](
|
|
3359
|
-
return prefixPath(
|
|
3358
|
+
[PREFIX](path10) {
|
|
3359
|
+
return prefixPath(path10, this.prefix);
|
|
3360
3360
|
}
|
|
3361
3361
|
[HEADER]() {
|
|
3362
3362
|
if (!this.stat) {
|
|
@@ -3737,8 +3737,8 @@ var WriteEntryTar = class extends Minipass {
|
|
|
3737
3737
|
super.write(b);
|
|
3738
3738
|
readEntry.pipe(this);
|
|
3739
3739
|
}
|
|
3740
|
-
[PREFIX](
|
|
3741
|
-
return prefixPath(
|
|
3740
|
+
[PREFIX](path10) {
|
|
3741
|
+
return prefixPath(path10, this.prefix);
|
|
3742
3742
|
}
|
|
3743
3743
|
[MODE](mode) {
|
|
3744
3744
|
return modeFix(mode, this.type === "Directory", this.portable);
|
|
@@ -4162,8 +4162,8 @@ var PackJob = class {
|
|
|
4162
4162
|
pending = false;
|
|
4163
4163
|
ignore = false;
|
|
4164
4164
|
piped = false;
|
|
4165
|
-
constructor(
|
|
4166
|
-
this.path =
|
|
4165
|
+
constructor(path10, absolute) {
|
|
4166
|
+
this.path = path10 || "./";
|
|
4167
4167
|
this.absolute = absolute;
|
|
4168
4168
|
}
|
|
4169
4169
|
};
|
|
@@ -4291,21 +4291,21 @@ var Pack = class extends Minipass {
|
|
|
4291
4291
|
[WRITE](chunk) {
|
|
4292
4292
|
return super.write(chunk);
|
|
4293
4293
|
}
|
|
4294
|
-
add(
|
|
4295
|
-
this.write(
|
|
4294
|
+
add(path10) {
|
|
4295
|
+
this.write(path10);
|
|
4296
4296
|
return this;
|
|
4297
4297
|
}
|
|
4298
|
-
end(
|
|
4299
|
-
if (typeof
|
|
4300
|
-
cb =
|
|
4301
|
-
|
|
4298
|
+
end(path10, encoding, cb) {
|
|
4299
|
+
if (typeof path10 === "function") {
|
|
4300
|
+
cb = path10;
|
|
4301
|
+
path10 = void 0;
|
|
4302
4302
|
}
|
|
4303
4303
|
if (typeof encoding === "function") {
|
|
4304
4304
|
cb = encoding;
|
|
4305
4305
|
encoding = void 0;
|
|
4306
4306
|
}
|
|
4307
|
-
if (
|
|
4308
|
-
this.add(
|
|
4307
|
+
if (path10) {
|
|
4308
|
+
this.add(path10);
|
|
4309
4309
|
}
|
|
4310
4310
|
this[ENDED2] = true;
|
|
4311
4311
|
this[PROCESS2]();
|
|
@@ -4313,14 +4313,14 @@ var Pack = class extends Minipass {
|
|
|
4313
4313
|
cb();
|
|
4314
4314
|
return this;
|
|
4315
4315
|
}
|
|
4316
|
-
write(
|
|
4316
|
+
write(path10) {
|
|
4317
4317
|
if (this[ENDED2]) {
|
|
4318
4318
|
throw new Error("write after end");
|
|
4319
4319
|
}
|
|
4320
|
-
if (
|
|
4321
|
-
this[ADDTARENTRY](
|
|
4320
|
+
if (path10 instanceof ReadEntry) {
|
|
4321
|
+
this[ADDTARENTRY](path10);
|
|
4322
4322
|
} else {
|
|
4323
|
-
this[ADDFSENTRY](
|
|
4323
|
+
this[ADDFSENTRY](path10);
|
|
4324
4324
|
}
|
|
4325
4325
|
return this.flowing;
|
|
4326
4326
|
}
|
|
@@ -4663,9 +4663,9 @@ var getWriteFlag = !fMapEnabled ? () => "w" : (size) => size < fMapLimit ? fMapF
|
|
|
4663
4663
|
// ../../node_modules/chownr/dist/esm/index.js
|
|
4664
4664
|
import fs6 from "node:fs";
|
|
4665
4665
|
import path4 from "node:path";
|
|
4666
|
-
var lchownSync = (
|
|
4666
|
+
var lchownSync = (path10, uid, gid) => {
|
|
4667
4667
|
try {
|
|
4668
|
-
return fs6.lchownSync(
|
|
4668
|
+
return fs6.lchownSync(path10, uid, gid);
|
|
4669
4669
|
} catch (er) {
|
|
4670
4670
|
if (er?.code !== "ENOENT")
|
|
4671
4671
|
throw er;
|
|
@@ -4748,9 +4748,9 @@ var CwdError = class extends Error {
|
|
|
4748
4748
|
path;
|
|
4749
4749
|
code;
|
|
4750
4750
|
syscall = "chdir";
|
|
4751
|
-
constructor(
|
|
4752
|
-
super(`${code2}: Cannot cd into '${
|
|
4753
|
-
this.path =
|
|
4751
|
+
constructor(path10, code2) {
|
|
4752
|
+
super(`${code2}: Cannot cd into '${path10}'`);
|
|
4753
|
+
this.path = path10;
|
|
4754
4754
|
this.code = code2;
|
|
4755
4755
|
}
|
|
4756
4756
|
get name() {
|
|
@@ -4764,10 +4764,10 @@ var SymlinkError = class extends Error {
|
|
|
4764
4764
|
symlink;
|
|
4765
4765
|
syscall = "symlink";
|
|
4766
4766
|
code = "TAR_SYMLINK_ERROR";
|
|
4767
|
-
constructor(symlink,
|
|
4767
|
+
constructor(symlink, path10) {
|
|
4768
4768
|
super("TAR_SYMLINK_ERROR: Cannot extract through symbolic link");
|
|
4769
4769
|
this.symlink = symlink;
|
|
4770
|
-
this.path =
|
|
4770
|
+
this.path = path10;
|
|
4771
4771
|
}
|
|
4772
4772
|
get name() {
|
|
4773
4773
|
return "SymlinkError";
|
|
@@ -4949,13 +4949,13 @@ var normalizeUnicode = (s) => {
|
|
|
4949
4949
|
// ../../node_modules/tar/dist/esm/path-reservations.js
|
|
4950
4950
|
var platform3 = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform;
|
|
4951
4951
|
var isWindows2 = platform3 === "win32";
|
|
4952
|
-
var getDirs = (
|
|
4953
|
-
const dirs =
|
|
4952
|
+
var getDirs = (path10) => {
|
|
4953
|
+
const dirs = path10.split("/").slice(0, -1).reduce((set, path11) => {
|
|
4954
4954
|
const s = set[set.length - 1];
|
|
4955
4955
|
if (s !== void 0) {
|
|
4956
|
-
|
|
4956
|
+
path11 = join(s, path11);
|
|
4957
4957
|
}
|
|
4958
|
-
set.push(
|
|
4958
|
+
set.push(path11 || "/");
|
|
4959
4959
|
return set;
|
|
4960
4960
|
}, []);
|
|
4961
4961
|
return dirs;
|
|
@@ -4973,7 +4973,7 @@ var PathReservations = class {
|
|
|
4973
4973
|
paths = isWindows2 ? ["win32 parallelization disabled"] : paths.map((p) => {
|
|
4974
4974
|
return stripTrailingSlashes(join(normalizeUnicode(p))).toLowerCase();
|
|
4975
4975
|
});
|
|
4976
|
-
const dirs = new Set(paths.map((
|
|
4976
|
+
const dirs = new Set(paths.map((path10) => getDirs(path10)).reduce((a, b) => a.concat(b)));
|
|
4977
4977
|
this.#reservations.set(fn, { dirs, paths });
|
|
4978
4978
|
for (const p of paths) {
|
|
4979
4979
|
const q = this.#queues.get(p);
|
|
@@ -5006,8 +5006,8 @@ var PathReservations = class {
|
|
|
5006
5006
|
throw new Error("function does not have any path reservations");
|
|
5007
5007
|
}
|
|
5008
5008
|
return {
|
|
5009
|
-
paths: res.paths.map((
|
|
5010
|
-
dirs: [...res.dirs].map((
|
|
5009
|
+
paths: res.paths.map((path10) => this.#queues.get(path10)),
|
|
5010
|
+
dirs: [...res.dirs].map((path10) => this.#queues.get(path10))
|
|
5011
5011
|
};
|
|
5012
5012
|
}
|
|
5013
5013
|
// check if fn is first in line for all its paths, and is
|
|
@@ -5035,14 +5035,14 @@ var PathReservations = class {
|
|
|
5035
5035
|
}
|
|
5036
5036
|
const { paths, dirs } = res;
|
|
5037
5037
|
const next = /* @__PURE__ */ new Set();
|
|
5038
|
-
for (const
|
|
5039
|
-
const q = this.#queues.get(
|
|
5038
|
+
for (const path10 of paths) {
|
|
5039
|
+
const q = this.#queues.get(path10);
|
|
5040
5040
|
if (!q || q?.[0] !== fn) {
|
|
5041
5041
|
continue;
|
|
5042
5042
|
}
|
|
5043
5043
|
const q0 = q[1];
|
|
5044
5044
|
if (!q0) {
|
|
5045
|
-
this.#queues.delete(
|
|
5045
|
+
this.#queues.delete(path10);
|
|
5046
5046
|
continue;
|
|
5047
5047
|
}
|
|
5048
5048
|
q.shift();
|
|
@@ -5107,24 +5107,24 @@ var CHECKED_CWD = /* @__PURE__ */ Symbol("checkedCwd");
|
|
|
5107
5107
|
var platform4 = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform;
|
|
5108
5108
|
var isWindows3 = platform4 === "win32";
|
|
5109
5109
|
var DEFAULT_MAX_DEPTH = 1024;
|
|
5110
|
-
var unlinkFile = (
|
|
5110
|
+
var unlinkFile = (path10, cb) => {
|
|
5111
5111
|
if (!isWindows3) {
|
|
5112
|
-
return fs8.unlink(
|
|
5112
|
+
return fs8.unlink(path10, cb);
|
|
5113
5113
|
}
|
|
5114
|
-
const name2 =
|
|
5115
|
-
fs8.rename(
|
|
5114
|
+
const name2 = path10 + ".DELETE." + randomBytes(16).toString("hex");
|
|
5115
|
+
fs8.rename(path10, name2, (er) => {
|
|
5116
5116
|
if (er) {
|
|
5117
5117
|
return cb(er);
|
|
5118
5118
|
}
|
|
5119
5119
|
fs8.unlink(name2, cb);
|
|
5120
5120
|
});
|
|
5121
5121
|
};
|
|
5122
|
-
var unlinkFileSync = (
|
|
5122
|
+
var unlinkFileSync = (path10) => {
|
|
5123
5123
|
if (!isWindows3) {
|
|
5124
|
-
return fs8.unlinkSync(
|
|
5124
|
+
return fs8.unlinkSync(path10);
|
|
5125
5125
|
}
|
|
5126
|
-
const name2 =
|
|
5127
|
-
fs8.renameSync(
|
|
5126
|
+
const name2 = path10 + ".DELETE." + randomBytes(16).toString("hex");
|
|
5127
|
+
fs8.renameSync(path10, name2);
|
|
5128
5128
|
fs8.unlinkSync(name2);
|
|
5129
5129
|
};
|
|
5130
5130
|
var uint32 = (a, b, c) => a !== void 0 && a === a >>> 0 ? a : b !== void 0 && b === b >>> 0 ? b : c;
|
|
@@ -5222,24 +5222,24 @@ var Unpack = class extends Parser {
|
|
|
5222
5222
|
// return false if we need to skip this file
|
|
5223
5223
|
// return true if the field was successfully sanitized
|
|
5224
5224
|
[STRIPABSOLUTEPATH](entry, field) {
|
|
5225
|
-
const
|
|
5226
|
-
if (!
|
|
5225
|
+
const path10 = entry[field];
|
|
5226
|
+
if (!path10 || this.preservePaths)
|
|
5227
5227
|
return true;
|
|
5228
|
-
const parts =
|
|
5228
|
+
const parts = path10.split("/");
|
|
5229
5229
|
if (parts.includes("..") || /* c8 ignore next */
|
|
5230
5230
|
isWindows3 && /^[a-z]:\.\.$/i.test(parts[0] ?? "")) {
|
|
5231
5231
|
this.warn("TAR_ENTRY_ERROR", `${field} contains '..'`, {
|
|
5232
5232
|
entry,
|
|
5233
|
-
[field]:
|
|
5233
|
+
[field]: path10
|
|
5234
5234
|
});
|
|
5235
5235
|
return false;
|
|
5236
5236
|
}
|
|
5237
|
-
const [root, stripped] = stripAbsolutePath(
|
|
5237
|
+
const [root, stripped] = stripAbsolutePath(path10);
|
|
5238
5238
|
if (root) {
|
|
5239
5239
|
entry[field] = String(stripped);
|
|
5240
5240
|
this.warn("TAR_ENTRY_INFO", `stripping ${root} from absolute ${field}`, {
|
|
5241
5241
|
entry,
|
|
5242
|
-
[field]:
|
|
5242
|
+
[field]: path10
|
|
5243
5243
|
});
|
|
5244
5244
|
}
|
|
5245
5245
|
return true;
|
|
@@ -6006,9 +6006,9 @@ var mtimeFilter = (opt) => {
|
|
|
6006
6006
|
if (!opt.mtimeCache) {
|
|
6007
6007
|
opt.mtimeCache = /* @__PURE__ */ new Map();
|
|
6008
6008
|
}
|
|
6009
|
-
opt.filter = filter ? (
|
|
6010
|
-
((opt.mtimeCache?.get(
|
|
6011
|
-
((opt.mtimeCache?.get(
|
|
6009
|
+
opt.filter = filter ? (path10, stat) => filter(path10, stat) && !/* c8 ignore start */
|
|
6010
|
+
((opt.mtimeCache?.get(path10) ?? stat.mtime ?? 0) > (stat.mtime ?? 0)) : (path10, stat) => !/* c8 ignore start */
|
|
6011
|
+
((opt.mtimeCache?.get(path10) ?? stat.mtime ?? 0) > (stat.mtime ?? 0));
|
|
6012
6012
|
};
|
|
6013
6013
|
|
|
6014
6014
|
// src/run-scenario/environment.ts
|
|
@@ -6307,6 +6307,37 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6307
6307
|
queryOptions.mcpServers ? Object.keys(queryOptions.mcpServers) : "none"
|
|
6308
6308
|
);
|
|
6309
6309
|
console.log("[SDK-DEBUG] Calling SDK query()...");
|
|
6310
|
+
if (traceContext) {
|
|
6311
|
+
const preExecEvent = {
|
|
6312
|
+
evalRunId: traceContext.evalRunId,
|
|
6313
|
+
scenarioId: traceContext.scenarioId,
|
|
6314
|
+
scenarioName: traceContext.scenarioName,
|
|
6315
|
+
targetId: traceContext.targetId,
|
|
6316
|
+
targetName: traceContext.targetName,
|
|
6317
|
+
stepNumber: 0,
|
|
6318
|
+
type: LiveTraceEventType.DIAGNOSTIC,
|
|
6319
|
+
outputPreview: JSON.stringify({
|
|
6320
|
+
event: "pre-sdk-execution",
|
|
6321
|
+
model: queryOptions.model,
|
|
6322
|
+
maxTurns: queryOptions.maxTurns,
|
|
6323
|
+
sdkEnv: {
|
|
6324
|
+
ANTHROPIC_BASE_URL: sdkEnv.ANTHROPIC_BASE_URL,
|
|
6325
|
+
hasANTHROPIC_API_KEY: !!sdkEnv.ANTHROPIC_API_KEY,
|
|
6326
|
+
hasANTHROPIC_AUTH_TOKEN: !!sdkEnv.ANTHROPIC_AUTH_TOKEN,
|
|
6327
|
+
hasANTHROPIC_CUSTOM_HEADERS: !!sdkEnv.ANTHROPIC_CUSTOM_HEADERS
|
|
6328
|
+
},
|
|
6329
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
6330
|
+
}),
|
|
6331
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
6332
|
+
isComplete: false
|
|
6333
|
+
};
|
|
6334
|
+
emitTraceEvent(
|
|
6335
|
+
preExecEvent,
|
|
6336
|
+
traceContext.tracePushUrl,
|
|
6337
|
+
traceContext.routeHeader,
|
|
6338
|
+
traceContext.authToken
|
|
6339
|
+
);
|
|
6340
|
+
}
|
|
6310
6341
|
try {
|
|
6311
6342
|
for await (const message of query({
|
|
6312
6343
|
prompt: scenario.triggerPrompt,
|
|
@@ -6364,10 +6395,10 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6364
6395
|
console.error(errorStack);
|
|
6365
6396
|
}
|
|
6366
6397
|
if (sdkError && typeof sdkError === "object") {
|
|
6367
|
-
const
|
|
6398
|
+
const errObj2 = sdkError;
|
|
6368
6399
|
console.error("[SDK-ERROR] Error object properties:");
|
|
6369
|
-
for (const key of Object.keys(
|
|
6370
|
-
const value =
|
|
6400
|
+
for (const key of Object.keys(errObj2)) {
|
|
6401
|
+
const value = errObj2[key];
|
|
6371
6402
|
if (value !== void 0 && key !== "stack") {
|
|
6372
6403
|
try {
|
|
6373
6404
|
const valueStr = typeof value === "object" ? JSON.stringify(value, null, 2) : String(value);
|
|
@@ -6377,7 +6408,7 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6377
6408
|
}
|
|
6378
6409
|
}
|
|
6379
6410
|
}
|
|
6380
|
-
const
|
|
6411
|
+
const sdkErrorKeys2 = [
|
|
6381
6412
|
"code",
|
|
6382
6413
|
"status",
|
|
6383
6414
|
"statusCode",
|
|
@@ -6400,19 +6431,19 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6400
6431
|
"spawnargs"
|
|
6401
6432
|
];
|
|
6402
6433
|
const extraInfo = {};
|
|
6403
|
-
for (const key of
|
|
6404
|
-
if (key in
|
|
6405
|
-
extraInfo[key] =
|
|
6434
|
+
for (const key of sdkErrorKeys2) {
|
|
6435
|
+
if (key in errObj2 && errObj2[key] !== void 0) {
|
|
6436
|
+
extraInfo[key] = errObj2[key];
|
|
6406
6437
|
}
|
|
6407
6438
|
}
|
|
6408
6439
|
if (Object.keys(extraInfo).length > 0) {
|
|
6409
6440
|
console.error("[SDK-ERROR] SDK-specific error details:");
|
|
6410
6441
|
console.error(JSON.stringify(extraInfo, null, 2));
|
|
6411
6442
|
}
|
|
6412
|
-
if (
|
|
6443
|
+
if (errObj2.cause && typeof errObj2.cause === "object") {
|
|
6413
6444
|
console.error("[SDK-ERROR] Error cause:");
|
|
6414
6445
|
try {
|
|
6415
|
-
console.error(JSON.stringify(
|
|
6446
|
+
console.error(JSON.stringify(errObj2.cause, null, 2));
|
|
6416
6447
|
} catch {
|
|
6417
6448
|
console.error("[SDK-ERROR] Error cause: [cannot serialize]");
|
|
6418
6449
|
}
|
|
@@ -6436,17 +6467,92 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6436
6467
|
console.error("[SDK-ERROR] USER:", process.env.USER);
|
|
6437
6468
|
console.error("[SDK-ERROR] SHELL:", process.env.SHELL);
|
|
6438
6469
|
console.error("[SDK-ERROR] ==========================================");
|
|
6470
|
+
const errObj = sdkError;
|
|
6471
|
+
const sdkSpecificInfo = {};
|
|
6472
|
+
const sdkErrorKeys = [
|
|
6473
|
+
"exitCode",
|
|
6474
|
+
"stderr",
|
|
6475
|
+
"stdout",
|
|
6476
|
+
"signal",
|
|
6477
|
+
"killed",
|
|
6478
|
+
"code",
|
|
6479
|
+
"status",
|
|
6480
|
+
"errno",
|
|
6481
|
+
"syscall",
|
|
6482
|
+
"spawnargs"
|
|
6483
|
+
];
|
|
6484
|
+
for (const key of sdkErrorKeys) {
|
|
6485
|
+
if (errObj && key in errObj && errObj[key] !== void 0) {
|
|
6486
|
+
const val = errObj[key];
|
|
6487
|
+
if (typeof val === "string" && val.length > 500) {
|
|
6488
|
+
sdkSpecificInfo[key] = val.substring(0, 500) + "... [truncated]";
|
|
6489
|
+
} else {
|
|
6490
|
+
sdkSpecificInfo[key] = val;
|
|
6491
|
+
}
|
|
6492
|
+
}
|
|
6493
|
+
}
|
|
6494
|
+
let causeInfo;
|
|
6495
|
+
if (errObj?.cause && typeof errObj.cause === "object") {
|
|
6496
|
+
try {
|
|
6497
|
+
const causeStr = JSON.stringify(errObj.cause, null, 2);
|
|
6498
|
+
causeInfo = causeStr.length > 500 ? causeStr.substring(0, 500) + "... [truncated]" : causeStr;
|
|
6499
|
+
} catch {
|
|
6500
|
+
causeInfo = "[cannot serialize cause]";
|
|
6501
|
+
}
|
|
6502
|
+
}
|
|
6503
|
+
const sdkEnvDebug = {
|
|
6504
|
+
ANTHROPIC_BASE_URL: sdkEnv.ANTHROPIC_BASE_URL,
|
|
6505
|
+
hasANTHROPIC_API_KEY: !!sdkEnv.ANTHROPIC_API_KEY,
|
|
6506
|
+
hasANTHROPIC_AUTH_TOKEN: !!sdkEnv.ANTHROPIC_AUTH_TOKEN,
|
|
6507
|
+
hasANTHROPIC_CUSTOM_HEADERS: !!sdkEnv.ANTHROPIC_CUSTOM_HEADERS,
|
|
6508
|
+
ANTHROPIC_CUSTOM_HEADERS_preview: sdkEnv.ANTHROPIC_CUSTOM_HEADERS ? sdkEnv.ANTHROPIC_CUSTOM_HEADERS.split("\n").map((h) => h.split(":")[0]).join(", ") : void 0
|
|
6509
|
+
};
|
|
6439
6510
|
const errorDetails = {
|
|
6440
6511
|
messageCount,
|
|
6441
6512
|
errorName,
|
|
6442
6513
|
errorMessage,
|
|
6443
6514
|
skillId: skill.id,
|
|
6444
6515
|
scenarioId: scenario.id,
|
|
6445
|
-
model: options.model || DEFAULT_MODEL
|
|
6516
|
+
model: options.model || DEFAULT_MODEL,
|
|
6517
|
+
sdkEnv: sdkEnvDebug,
|
|
6518
|
+
sdkError: Object.keys(sdkSpecificInfo).length > 0 ? sdkSpecificInfo : void 0,
|
|
6519
|
+
cause: causeInfo
|
|
6446
6520
|
};
|
|
6521
|
+
if (traceContext) {
|
|
6522
|
+
const errorTraceEvent = {
|
|
6523
|
+
evalRunId: traceContext.evalRunId,
|
|
6524
|
+
scenarioId: traceContext.scenarioId,
|
|
6525
|
+
scenarioName: traceContext.scenarioName,
|
|
6526
|
+
targetId: traceContext.targetId,
|
|
6527
|
+
targetName: traceContext.targetName,
|
|
6528
|
+
stepNumber: traceStepNumber + 1,
|
|
6529
|
+
type: LiveTraceEventType.DIAGNOSTIC,
|
|
6530
|
+
outputPreview: JSON.stringify(
|
|
6531
|
+
{
|
|
6532
|
+
event: "sdk-execution-failed",
|
|
6533
|
+
error: errorMessage,
|
|
6534
|
+
errorName,
|
|
6535
|
+
messageCount,
|
|
6536
|
+
sdkEnv: sdkEnvDebug,
|
|
6537
|
+
sdkError: sdkSpecificInfo,
|
|
6538
|
+
cause: causeInfo
|
|
6539
|
+
},
|
|
6540
|
+
null,
|
|
6541
|
+
2
|
|
6542
|
+
).slice(0, 2e3),
|
|
6543
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
6544
|
+
isComplete: true
|
|
6545
|
+
};
|
|
6546
|
+
emitTraceEvent(
|
|
6547
|
+
errorTraceEvent,
|
|
6548
|
+
traceContext.tracePushUrl,
|
|
6549
|
+
traceContext.routeHeader,
|
|
6550
|
+
traceContext.authToken
|
|
6551
|
+
);
|
|
6552
|
+
}
|
|
6447
6553
|
throw new Error(
|
|
6448
6554
|
`Claude SDK execution failed after ${messageCount} messages: ${errorMessage}
|
|
6449
|
-
Details: ${JSON.stringify(errorDetails)}` + (errorStack ? `
|
|
6555
|
+
Details: ${JSON.stringify(errorDetails, null, 2)}` + (errorStack ? `
|
|
6450
6556
|
Stack: ${errorStack.split("\n").slice(0, 5).join("\n")}` : "")
|
|
6451
6557
|
);
|
|
6452
6558
|
}
|
|
@@ -6810,6 +6916,8 @@ ${stackLines.join("\n")}`);
|
|
|
6810
6916
|
return parts.join(" ");
|
|
6811
6917
|
}
|
|
6812
6918
|
var ExecutionPhase = {
|
|
6919
|
+
/** Environment diagnostics phase (runs before execution) */
|
|
6920
|
+
DIAGNOSTICS: "diagnostics",
|
|
6813
6921
|
CONFIG: "config-loading",
|
|
6814
6922
|
API_CLIENT: "api-client-creation",
|
|
6815
6923
|
FETCH_EVAL_RUN: "fetch-eval-run",
|
|
@@ -6826,6 +6934,402 @@ var ExecutionPhase = {
|
|
|
6826
6934
|
UPDATE_STATUS: "update-status"
|
|
6827
6935
|
};
|
|
6828
6936
|
|
|
6937
|
+
// src/diagnostics.ts
|
|
6938
|
+
import { spawn } from "child_process";
|
|
6939
|
+
import * as fs11 from "fs";
|
|
6940
|
+
import * as path9 from "path";
|
|
6941
|
+
import { LiveTraceEventType as LiveTraceEventType2 } from "@wix/evalforge-types";
|
|
6942
|
+
async function execCommand(command, timeoutMs = 5e3) {
|
|
6943
|
+
return new Promise((resolve) => {
|
|
6944
|
+
const proc2 = spawn("sh", ["-c", command], {
|
|
6945
|
+
timeout: timeoutMs
|
|
6946
|
+
});
|
|
6947
|
+
let stdout = "";
|
|
6948
|
+
let stderr = "";
|
|
6949
|
+
proc2.stdout.on("data", (data) => {
|
|
6950
|
+
stdout += data.toString();
|
|
6951
|
+
});
|
|
6952
|
+
proc2.stderr.on("data", (data) => {
|
|
6953
|
+
stderr += data.toString();
|
|
6954
|
+
});
|
|
6955
|
+
proc2.on("close", (code2) => {
|
|
6956
|
+
resolve({
|
|
6957
|
+
stdout: stdout.trim(),
|
|
6958
|
+
stderr: stderr.trim(),
|
|
6959
|
+
exitCode: code2 ?? -1
|
|
6960
|
+
});
|
|
6961
|
+
});
|
|
6962
|
+
proc2.on("error", (err) => {
|
|
6963
|
+
resolve({
|
|
6964
|
+
stdout: "",
|
|
6965
|
+
stderr: err.message,
|
|
6966
|
+
exitCode: -1
|
|
6967
|
+
});
|
|
6968
|
+
});
|
|
6969
|
+
});
|
|
6970
|
+
}
|
|
6971
|
+
async function testEnvironmentVariables() {
|
|
6972
|
+
const start = Date.now();
|
|
6973
|
+
const envVars = [
|
|
6974
|
+
"PATH",
|
|
6975
|
+
"HOME",
|
|
6976
|
+
"USER",
|
|
6977
|
+
"SHELL",
|
|
6978
|
+
"NODE_ENV",
|
|
6979
|
+
"EVAL_SERVER_URL",
|
|
6980
|
+
"AI_GATEWAY_URL",
|
|
6981
|
+
"ANTHROPIC_API_KEY",
|
|
6982
|
+
"ANTHROPIC_AUTH_TOKEN",
|
|
6983
|
+
"ANTHROPIC_BASE_URL",
|
|
6984
|
+
"ANTHROPIC_CUSTOM_HEADERS"
|
|
6985
|
+
];
|
|
6986
|
+
const details = {};
|
|
6987
|
+
const missing = [];
|
|
6988
|
+
for (const key of envVars) {
|
|
6989
|
+
const value = process.env[key];
|
|
6990
|
+
if (value) {
|
|
6991
|
+
if (key.includes("SECRET") || key.includes("TOKEN") || key.includes("API_KEY")) {
|
|
6992
|
+
details[key] = `[SET - ${value.length} chars]`;
|
|
6993
|
+
} else if (key === "PATH") {
|
|
6994
|
+
details[key] = value.split(":");
|
|
6995
|
+
} else if (key === "ANTHROPIC_CUSTOM_HEADERS") {
|
|
6996
|
+
details[key] = value.split("\n").map((h) => h.split(":")[0]).join(", ");
|
|
6997
|
+
} else {
|
|
6998
|
+
details[key] = value;
|
|
6999
|
+
}
|
|
7000
|
+
} else {
|
|
7001
|
+
missing.push(key);
|
|
7002
|
+
}
|
|
7003
|
+
}
|
|
7004
|
+
details.missingEnvVars = missing;
|
|
7005
|
+
return {
|
|
7006
|
+
name: "environment-variables",
|
|
7007
|
+
passed: true,
|
|
7008
|
+
// Info only, doesn't fail
|
|
7009
|
+
details,
|
|
7010
|
+
durationMs: Date.now() - start
|
|
7011
|
+
};
|
|
7012
|
+
}
|
|
7013
|
+
async function testNodeEnvironment() {
|
|
7014
|
+
const start = Date.now();
|
|
7015
|
+
const details = {
|
|
7016
|
+
nodeVersion: process.version,
|
|
7017
|
+
platform: process.platform,
|
|
7018
|
+
arch: process.arch,
|
|
7019
|
+
cwd: process.cwd(),
|
|
7020
|
+
pid: process.pid,
|
|
7021
|
+
uptime: process.uptime(),
|
|
7022
|
+
memoryUsage: process.memoryUsage(),
|
|
7023
|
+
execPath: process.execPath
|
|
7024
|
+
};
|
|
7025
|
+
return {
|
|
7026
|
+
name: "node-environment",
|
|
7027
|
+
passed: true,
|
|
7028
|
+
details,
|
|
7029
|
+
durationMs: Date.now() - start
|
|
7030
|
+
};
|
|
7031
|
+
}
|
|
7032
|
+
async function testNpmGlobalDirectory() {
|
|
7033
|
+
const start = Date.now();
|
|
7034
|
+
const npmRootResult = await execCommand("npm root -g");
|
|
7035
|
+
const npmBinResult = await execCommand("npm bin -g");
|
|
7036
|
+
const npmRoot = npmRootResult.stdout;
|
|
7037
|
+
const npmBin = npmBinResult.stdout;
|
|
7038
|
+
const details = {
|
|
7039
|
+
npmRootGlobal: npmRoot,
|
|
7040
|
+
npmBinGlobal: npmBin,
|
|
7041
|
+
npmRootExitCode: npmRootResult.exitCode,
|
|
7042
|
+
npmBinExitCode: npmBinResult.exitCode
|
|
7043
|
+
};
|
|
7044
|
+
if (npmRoot) {
|
|
7045
|
+
const evaluatorPath = path9.join(npmRoot, "@wix", "evalforge-evaluator");
|
|
7046
|
+
const evaluatorExists = fs11.existsSync(evaluatorPath);
|
|
7047
|
+
details.evaluatorInstalled = evaluatorExists;
|
|
7048
|
+
if (evaluatorExists) {
|
|
7049
|
+
try {
|
|
7050
|
+
const files = fs11.readdirSync(evaluatorPath);
|
|
7051
|
+
details.evaluatorFiles = files;
|
|
7052
|
+
} catch {
|
|
7053
|
+
details.evaluatorFiles = "Failed to list files";
|
|
7054
|
+
}
|
|
7055
|
+
}
|
|
7056
|
+
}
|
|
7057
|
+
if (npmRoot) {
|
|
7058
|
+
const sdkPath = path9.join(
|
|
7059
|
+
npmRoot,
|
|
7060
|
+
"@wix",
|
|
7061
|
+
"evalforge-evaluator",
|
|
7062
|
+
"node_modules",
|
|
7063
|
+
"@anthropic-ai",
|
|
7064
|
+
"claude-agent-sdk"
|
|
7065
|
+
);
|
|
7066
|
+
const sdkExists = fs11.existsSync(sdkPath);
|
|
7067
|
+
details.claudeAgentSdkInstalled = sdkExists;
|
|
7068
|
+
}
|
|
7069
|
+
const passed = npmRootResult.exitCode === 0 && npmBinResult.exitCode === 0;
|
|
7070
|
+
return {
|
|
7071
|
+
name: "npm-global-directory",
|
|
7072
|
+
passed,
|
|
7073
|
+
details,
|
|
7074
|
+
error: passed ? void 0 : npmRootResult.stderr || npmBinResult.stderr,
|
|
7075
|
+
durationMs: Date.now() - start
|
|
7076
|
+
};
|
|
7077
|
+
}
|
|
7078
|
+
async function testClaudeBinary() {
|
|
7079
|
+
const start = Date.now();
|
|
7080
|
+
const whichResult = await execCommand("which claude");
|
|
7081
|
+
const versionResult = await execCommand("claude --version");
|
|
7082
|
+
const npmBinResult = await execCommand("npm bin -g");
|
|
7083
|
+
const npmBin = npmBinResult.stdout;
|
|
7084
|
+
let claudeInNpmBin = false;
|
|
7085
|
+
if (npmBin) {
|
|
7086
|
+
const claudePath = path9.join(npmBin, "claude");
|
|
7087
|
+
claudeInNpmBin = fs11.existsSync(claudePath);
|
|
7088
|
+
}
|
|
7089
|
+
const details = {
|
|
7090
|
+
whichClaude: whichResult.stdout || "(not found)",
|
|
7091
|
+
whichExitCode: whichResult.exitCode,
|
|
7092
|
+
claudeVersion: versionResult.stdout || versionResult.stderr,
|
|
7093
|
+
versionExitCode: versionResult.exitCode,
|
|
7094
|
+
claudeInNpmGlobalBin: claudeInNpmBin,
|
|
7095
|
+
npmGlobalBin: npmBin
|
|
7096
|
+
};
|
|
7097
|
+
const pathDirs = (process.env.PATH || "").split(":");
|
|
7098
|
+
const claudeFoundIn = [];
|
|
7099
|
+
for (const dir of pathDirs) {
|
|
7100
|
+
const claudePath = path9.join(dir, "claude");
|
|
7101
|
+
if (fs11.existsSync(claudePath)) {
|
|
7102
|
+
claudeFoundIn.push(dir);
|
|
7103
|
+
}
|
|
7104
|
+
}
|
|
7105
|
+
details.claudeFoundInPathDirs = claudeFoundIn;
|
|
7106
|
+
const passed = whichResult.exitCode === 0 || claudeInNpmBin;
|
|
7107
|
+
return {
|
|
7108
|
+
name: "claude-cli-binary",
|
|
7109
|
+
passed,
|
|
7110
|
+
details,
|
|
7111
|
+
error: passed ? void 0 : "Claude CLI binary not found in PATH. The SDK will fail to spawn it.",
|
|
7112
|
+
durationMs: Date.now() - start
|
|
7113
|
+
};
|
|
7114
|
+
}
|
|
7115
|
+
async function testChildProcess() {
|
|
7116
|
+
const start = Date.now();
|
|
7117
|
+
const echoResult = await execCommand('echo "diagnostic-test-success"');
|
|
7118
|
+
const nodeResult = await execCommand('node -e "console.log(process.pid)"');
|
|
7119
|
+
const details = {
|
|
7120
|
+
echoResult: echoResult.stdout,
|
|
7121
|
+
echoExitCode: echoResult.exitCode,
|
|
7122
|
+
nodeResult: nodeResult.stdout,
|
|
7123
|
+
nodeExitCode: nodeResult.exitCode
|
|
7124
|
+
};
|
|
7125
|
+
const passed = echoResult.exitCode === 0 && echoResult.stdout === "diagnostic-test-success";
|
|
7126
|
+
return {
|
|
7127
|
+
name: "child-process-spawning",
|
|
7128
|
+
passed,
|
|
7129
|
+
details,
|
|
7130
|
+
error: passed ? void 0 : "Failed to spawn child process",
|
|
7131
|
+
durationMs: Date.now() - start
|
|
7132
|
+
};
|
|
7133
|
+
}
|
|
7134
|
+
async function testNetworkConnectivity(config) {
|
|
7135
|
+
const start = Date.now();
|
|
7136
|
+
const aiGatewayUrl = config.aiGatewayUrl;
|
|
7137
|
+
if (!aiGatewayUrl) {
|
|
7138
|
+
return {
|
|
7139
|
+
name: "network-connectivity",
|
|
7140
|
+
passed: false,
|
|
7141
|
+
details: { error: "No AI_GATEWAY_URL configured" },
|
|
7142
|
+
error: "No AI_GATEWAY_URL configured",
|
|
7143
|
+
durationMs: Date.now() - start
|
|
7144
|
+
};
|
|
7145
|
+
}
|
|
7146
|
+
const curlResult = await execCommand(
|
|
7147
|
+
`curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${aiGatewayUrl}" 2>&1`
|
|
7148
|
+
);
|
|
7149
|
+
const serverUrl = config.serverUrl;
|
|
7150
|
+
const serverResult = await execCommand(
|
|
7151
|
+
`curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${serverUrl}/health" 2>&1`
|
|
7152
|
+
);
|
|
7153
|
+
const details = {
|
|
7154
|
+
aiGatewayUrl,
|
|
7155
|
+
aiGatewayHttpCode: curlResult.stdout,
|
|
7156
|
+
aiGatewayExitCode: curlResult.exitCode,
|
|
7157
|
+
serverUrl,
|
|
7158
|
+
serverHttpCode: serverResult.stdout,
|
|
7159
|
+
serverExitCode: serverResult.exitCode
|
|
7160
|
+
};
|
|
7161
|
+
const gatewayReachable = curlResult.exitCode === 0 && curlResult.stdout !== "000";
|
|
7162
|
+
return {
|
|
7163
|
+
name: "network-connectivity",
|
|
7164
|
+
passed: gatewayReachable,
|
|
7165
|
+
details,
|
|
7166
|
+
error: gatewayReachable ? void 0 : "Failed to reach AI Gateway",
|
|
7167
|
+
durationMs: Date.now() - start
|
|
7168
|
+
};
|
|
7169
|
+
}
|
|
7170
|
+
async function testSdkImport() {
|
|
7171
|
+
const start = Date.now();
|
|
7172
|
+
try {
|
|
7173
|
+
const sdk = await import("@anthropic-ai/claude-agent-sdk");
|
|
7174
|
+
const details = {
|
|
7175
|
+
sdkImported: true,
|
|
7176
|
+
hasQuery: typeof sdk.query === "function",
|
|
7177
|
+
exportedKeys: Object.keys(sdk)
|
|
7178
|
+
};
|
|
7179
|
+
return {
|
|
7180
|
+
name: "sdk-import",
|
|
7181
|
+
passed: true,
|
|
7182
|
+
details,
|
|
7183
|
+
durationMs: Date.now() - start
|
|
7184
|
+
};
|
|
7185
|
+
} catch (err) {
|
|
7186
|
+
const error = err instanceof Error ? err.message : String(err);
|
|
7187
|
+
return {
|
|
7188
|
+
name: "sdk-import",
|
|
7189
|
+
passed: false,
|
|
7190
|
+
details: {
|
|
7191
|
+
sdkImported: false,
|
|
7192
|
+
error
|
|
7193
|
+
},
|
|
7194
|
+
error: `Failed to import Claude Agent SDK: ${error}`,
|
|
7195
|
+
durationMs: Date.now() - start
|
|
7196
|
+
};
|
|
7197
|
+
}
|
|
7198
|
+
}
|
|
7199
|
+
async function testFileSystemAccess() {
|
|
7200
|
+
const start = Date.now();
|
|
7201
|
+
const testDir = "/tmp/evalforge-diagnostics";
|
|
7202
|
+
const testFile = path9.join(testDir, "test.txt");
|
|
7203
|
+
try {
|
|
7204
|
+
if (!fs11.existsSync(testDir)) {
|
|
7205
|
+
fs11.mkdirSync(testDir, { recursive: true });
|
|
7206
|
+
}
|
|
7207
|
+
fs11.writeFileSync(testFile, "diagnostic-test");
|
|
7208
|
+
const content = fs11.readFileSync(testFile, "utf8");
|
|
7209
|
+
fs11.unlinkSync(testFile);
|
|
7210
|
+
fs11.rmdirSync(testDir);
|
|
7211
|
+
const details = {
|
|
7212
|
+
canCreateDirectory: true,
|
|
7213
|
+
canWriteFile: true,
|
|
7214
|
+
canReadFile: content === "diagnostic-test",
|
|
7215
|
+
testDir,
|
|
7216
|
+
cwd: process.cwd(),
|
|
7217
|
+
cwdContents: fs11.readdirSync(process.cwd()).slice(0, 20)
|
|
7218
|
+
// First 20 files
|
|
7219
|
+
};
|
|
7220
|
+
return {
|
|
7221
|
+
name: "file-system-access",
|
|
7222
|
+
passed: true,
|
|
7223
|
+
details,
|
|
7224
|
+
durationMs: Date.now() - start
|
|
7225
|
+
};
|
|
7226
|
+
} catch (err) {
|
|
7227
|
+
const error = err instanceof Error ? err.message : String(err);
|
|
7228
|
+
return {
|
|
7229
|
+
name: "file-system-access",
|
|
7230
|
+
passed: false,
|
|
7231
|
+
details: {
|
|
7232
|
+
error,
|
|
7233
|
+
testDir,
|
|
7234
|
+
cwd: process.cwd()
|
|
7235
|
+
},
|
|
7236
|
+
error: `File system access failed: ${error}`,
|
|
7237
|
+
durationMs: Date.now() - start
|
|
7238
|
+
};
|
|
7239
|
+
}
|
|
7240
|
+
}
|
|
7241
|
+
function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader, authToken) {
|
|
7242
|
+
const event = {
|
|
7243
|
+
evalRunId: evalRunId2,
|
|
7244
|
+
scenarioId: "diagnostics",
|
|
7245
|
+
scenarioName: "Environment Diagnostics",
|
|
7246
|
+
targetId: "system",
|
|
7247
|
+
targetName: "System",
|
|
7248
|
+
stepNumber: 0,
|
|
7249
|
+
type: LiveTraceEventType2.DIAGNOSTIC,
|
|
7250
|
+
outputPreview: JSON.stringify(result, null, 2).slice(0, 2e3),
|
|
7251
|
+
// Limit size
|
|
7252
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7253
|
+
isComplete: "summary" in result
|
|
7254
|
+
// Complete if it's the full report
|
|
7255
|
+
};
|
|
7256
|
+
console.log(`TRACE_EVENT:${JSON.stringify(event)}`);
|
|
7257
|
+
if (tracePushUrl) {
|
|
7258
|
+
const headers = {
|
|
7259
|
+
"Content-Type": "application/json"
|
|
7260
|
+
};
|
|
7261
|
+
if (routeHeader) {
|
|
7262
|
+
headers["x-wix-route"] = routeHeader;
|
|
7263
|
+
}
|
|
7264
|
+
if (authToken) {
|
|
7265
|
+
headers["Authorization"] = `Bearer ${authToken}`;
|
|
7266
|
+
}
|
|
7267
|
+
fetch(tracePushUrl, {
|
|
7268
|
+
method: "POST",
|
|
7269
|
+
headers,
|
|
7270
|
+
body: JSON.stringify([event])
|
|
7271
|
+
}).catch((err) => {
|
|
7272
|
+
console.error("[DIAGNOSTICS] Failed to push trace event:", err);
|
|
7273
|
+
});
|
|
7274
|
+
}
|
|
7275
|
+
}
|
|
7276
|
+
async function runDiagnostics(config, evalRunId2) {
|
|
7277
|
+
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
7278
|
+
const startTime = Date.now();
|
|
7279
|
+
console.error("[DIAGNOSTICS] Starting environment diagnostics...");
|
|
7280
|
+
const tests = [];
|
|
7281
|
+
const runTest = async (testFn) => {
|
|
7282
|
+
const result = await testFn();
|
|
7283
|
+
tests.push(result);
|
|
7284
|
+
emitDiagnosticTraceEvent(
|
|
7285
|
+
evalRunId2,
|
|
7286
|
+
result,
|
|
7287
|
+
config.tracePushUrl,
|
|
7288
|
+
config.routeHeader,
|
|
7289
|
+
config.authToken
|
|
7290
|
+
);
|
|
7291
|
+
const status = result.passed ? "\u2713" : "\u2717";
|
|
7292
|
+
console.error(
|
|
7293
|
+
`[DIAGNOSTICS] ${status} ${result.name} (${result.durationMs}ms)`
|
|
7294
|
+
);
|
|
7295
|
+
if (!result.passed && result.error) {
|
|
7296
|
+
console.error(`[DIAGNOSTICS] Error: ${result.error}`);
|
|
7297
|
+
}
|
|
7298
|
+
};
|
|
7299
|
+
await runTest(testEnvironmentVariables);
|
|
7300
|
+
await runTest(testNodeEnvironment);
|
|
7301
|
+
await runTest(testNpmGlobalDirectory);
|
|
7302
|
+
await runTest(testClaudeBinary);
|
|
7303
|
+
await runTest(testChildProcess);
|
|
7304
|
+
await runTest(() => testNetworkConnectivity(config));
|
|
7305
|
+
await runTest(testSdkImport);
|
|
7306
|
+
await runTest(testFileSystemAccess);
|
|
7307
|
+
const completedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
7308
|
+
const totalDurationMs = Date.now() - startTime;
|
|
7309
|
+
const report = {
|
|
7310
|
+
startedAt,
|
|
7311
|
+
completedAt,
|
|
7312
|
+
totalDurationMs,
|
|
7313
|
+
tests,
|
|
7314
|
+
summary: {
|
|
7315
|
+
total: tests.length,
|
|
7316
|
+
passed: tests.filter((t) => t.passed).length,
|
|
7317
|
+
failed: tests.filter((t) => !t.passed).length
|
|
7318
|
+
}
|
|
7319
|
+
};
|
|
7320
|
+
emitDiagnosticTraceEvent(
|
|
7321
|
+
evalRunId2,
|
|
7322
|
+
report,
|
|
7323
|
+
config.tracePushUrl,
|
|
7324
|
+
config.routeHeader,
|
|
7325
|
+
config.authToken
|
|
7326
|
+
);
|
|
7327
|
+
console.error(
|
|
7328
|
+
`[DIAGNOSTICS] Completed: ${report.summary.passed}/${report.summary.total} tests passed (${totalDurationMs}ms)`
|
|
7329
|
+
);
|
|
7330
|
+
return report;
|
|
7331
|
+
}
|
|
7332
|
+
|
|
6829
7333
|
// src/index.ts
|
|
6830
7334
|
console.error(
|
|
6831
7335
|
"[EVALUATOR-BOOT] Module loading started",
|
|
@@ -6893,6 +7397,33 @@ async function runEvaluation(projectId2, evalRunId2) {
|
|
|
6893
7397
|
`[${ExecutionPhase.API_CLIENT}] Failed to create API client: ${apiErr instanceof Error ? apiErr.message : String(apiErr)}`
|
|
6894
7398
|
);
|
|
6895
7399
|
}
|
|
7400
|
+
state.currentPhase = ExecutionPhase.DIAGNOSTICS;
|
|
7401
|
+
state.currentContext = { projectId: projectId2, evalRunId: evalRunId2, phase: "diagnostics" };
|
|
7402
|
+
console.error("[DEBUG-H1.5] Running environment diagnostics...");
|
|
7403
|
+
try {
|
|
7404
|
+
const diagnosticReport = await runDiagnostics(config, evalRunId2);
|
|
7405
|
+
console.error(
|
|
7406
|
+
"[DEBUG-H1.5] Diagnostics completed",
|
|
7407
|
+
JSON.stringify({
|
|
7408
|
+
passed: diagnosticReport.summary.passed,
|
|
7409
|
+
failed: diagnosticReport.summary.failed,
|
|
7410
|
+
total: diagnosticReport.summary.total,
|
|
7411
|
+
durationMs: diagnosticReport.totalDurationMs
|
|
7412
|
+
})
|
|
7413
|
+
);
|
|
7414
|
+
const failedTests = diagnosticReport.tests.filter((t) => !t.passed);
|
|
7415
|
+
if (failedTests.length > 0) {
|
|
7416
|
+
console.error(
|
|
7417
|
+
"[DEBUG-H1.5] FAILED DIAGNOSTIC TESTS:",
|
|
7418
|
+
failedTests.map((t) => `${t.name}: ${t.error}`).join("\n")
|
|
7419
|
+
);
|
|
7420
|
+
}
|
|
7421
|
+
} catch (diagErr) {
|
|
7422
|
+
console.error(
|
|
7423
|
+
"[DEBUG-H1.5] Diagnostics failed (non-fatal):",
|
|
7424
|
+
diagErr instanceof Error ? diagErr.message : String(diagErr)
|
|
7425
|
+
);
|
|
7426
|
+
}
|
|
6896
7427
|
state.currentPhase = ExecutionPhase.FETCH_EVAL_RUN;
|
|
6897
7428
|
state.currentContext = { projectId: projectId2, evalRunId: evalRunId2, serverUrl: config.serverUrl };
|
|
6898
7429
|
console.error(
|