@wix/evalforge-evaluator 0.14.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +753 -82
- package/build/index.js.map +4 -4
- package/build/index.mjs +749 -78
- package/build/index.mjs.map +4 -4
- package/build/types/diagnostics.d.ts +48 -0
- package/build/types/error-reporter.d.ts +2 -0
- package/package.json +3 -3
package/build/index.js
CHANGED
|
@@ -24,7 +24,7 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
24
24
|
));
|
|
25
25
|
|
|
26
26
|
// src/index.ts
|
|
27
|
-
var
|
|
27
|
+
var import_evalforge_types5 = require("@wix/evalforge-types");
|
|
28
28
|
|
|
29
29
|
// src/config.ts
|
|
30
30
|
function loadConfig() {
|
|
@@ -87,8 +87,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
87
87
|
}
|
|
88
88
|
return headers;
|
|
89
89
|
}
|
|
90
|
-
async function fetchJson(
|
|
91
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
90
|
+
async function fetchJson(path10) {
|
|
91
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
|
|
92
92
|
console.error(`[API] GET ${url}`);
|
|
93
93
|
const headers = buildHeaders();
|
|
94
94
|
const response = await fetch(url, {
|
|
@@ -102,8 +102,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
102
102
|
}
|
|
103
103
|
return response.json();
|
|
104
104
|
}
|
|
105
|
-
async function postJson(
|
|
106
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
105
|
+
async function postJson(path10, body) {
|
|
106
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
|
|
107
107
|
console.error(`[API] POST ${url}`);
|
|
108
108
|
const response = await fetch(url, {
|
|
109
109
|
method: "POST",
|
|
@@ -117,8 +117,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
117
117
|
);
|
|
118
118
|
}
|
|
119
119
|
}
|
|
120
|
-
async function deleteRequest(
|
|
121
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
120
|
+
async function deleteRequest(path10) {
|
|
121
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
|
|
122
122
|
console.error(`[API] DELETE ${url}`);
|
|
123
123
|
const headers = buildHeaders();
|
|
124
124
|
const response = await fetch(url, {
|
|
@@ -132,8 +132,8 @@ function createApiClient(serverUrl, options = "") {
|
|
|
132
132
|
);
|
|
133
133
|
}
|
|
134
134
|
}
|
|
135
|
-
async function putJson(
|
|
136
|
-
const url = `${serverUrl}${apiPrefix}${pathPrefix}${
|
|
135
|
+
async function putJson(path10, body) {
|
|
136
|
+
const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
|
|
137
137
|
console.error(`[API] PUT ${url}`);
|
|
138
138
|
const response = await fetch(url, {
|
|
139
139
|
method: "PUT",
|
|
@@ -1157,17 +1157,17 @@ var ReadStream = class extends Minipass {
|
|
|
1157
1157
|
[_size];
|
|
1158
1158
|
[_remain];
|
|
1159
1159
|
[_autoClose];
|
|
1160
|
-
constructor(
|
|
1160
|
+
constructor(path10, opt) {
|
|
1161
1161
|
opt = opt || {};
|
|
1162
1162
|
super(opt);
|
|
1163
1163
|
this.readable = true;
|
|
1164
1164
|
this.writable = false;
|
|
1165
|
-
if (typeof
|
|
1165
|
+
if (typeof path10 !== "string") {
|
|
1166
1166
|
throw new TypeError("path must be a string");
|
|
1167
1167
|
}
|
|
1168
1168
|
this[_errored] = false;
|
|
1169
1169
|
this[_fd] = typeof opt.fd === "number" ? opt.fd : void 0;
|
|
1170
|
-
this[_path] =
|
|
1170
|
+
this[_path] = path10;
|
|
1171
1171
|
this[_readSize] = opt.readSize || 16 * 1024 * 1024;
|
|
1172
1172
|
this[_reading] = false;
|
|
1173
1173
|
this[_size] = typeof opt.size === "number" ? opt.size : Infinity;
|
|
@@ -1330,10 +1330,10 @@ var WriteStream = class extends import_events.default {
|
|
|
1330
1330
|
[_flags];
|
|
1331
1331
|
[_finished] = false;
|
|
1332
1332
|
[_pos];
|
|
1333
|
-
constructor(
|
|
1333
|
+
constructor(path10, opt) {
|
|
1334
1334
|
opt = opt || {};
|
|
1335
1335
|
super(opt);
|
|
1336
|
-
this[_path] =
|
|
1336
|
+
this[_path] = path10;
|
|
1337
1337
|
this[_fd] = typeof opt.fd === "number" ? opt.fd : void 0;
|
|
1338
1338
|
this[_mode] = opt.mode === void 0 ? 438 : opt.mode;
|
|
1339
1339
|
this[_pos] = typeof opt.start === "number" ? opt.start : void 0;
|
|
@@ -2226,10 +2226,10 @@ var Header = class {
|
|
|
2226
2226
|
}
|
|
2227
2227
|
const prefixSize = this.ctime || this.atime ? 130 : 155;
|
|
2228
2228
|
const split = splitPrefix(this.path || "", prefixSize);
|
|
2229
|
-
const
|
|
2229
|
+
const path10 = split[0];
|
|
2230
2230
|
const prefix = split[1];
|
|
2231
2231
|
this.needPax = !!split[2];
|
|
2232
|
-
this.needPax = encString(buf, off, 100,
|
|
2232
|
+
this.needPax = encString(buf, off, 100, path10) || this.needPax;
|
|
2233
2233
|
this.needPax = encNumber(buf, off + 100, 8, this.mode) || this.needPax;
|
|
2234
2234
|
this.needPax = encNumber(buf, off + 108, 8, this.uid) || this.needPax;
|
|
2235
2235
|
this.needPax = encNumber(buf, off + 116, 8, this.gid) || this.needPax;
|
|
@@ -3205,16 +3205,16 @@ var modeFix = (mode, isDir, portable) => {
|
|
|
3205
3205
|
// ../../node_modules/tar/dist/esm/strip-absolute-path.js
|
|
3206
3206
|
var import_node_path3 = require("node:path");
|
|
3207
3207
|
var { isAbsolute, parse: parse3 } = import_node_path3.win32;
|
|
3208
|
-
var stripAbsolutePath = (
|
|
3208
|
+
var stripAbsolutePath = (path10) => {
|
|
3209
3209
|
let r = "";
|
|
3210
|
-
let parsed = parse3(
|
|
3211
|
-
while (isAbsolute(
|
|
3212
|
-
const root =
|
|
3213
|
-
|
|
3210
|
+
let parsed = parse3(path10);
|
|
3211
|
+
while (isAbsolute(path10) || parsed.root) {
|
|
3212
|
+
const root = path10.charAt(0) === "/" && path10.slice(0, 4) !== "//?/" ? "/" : parsed.root;
|
|
3213
|
+
path10 = path10.slice(root.length);
|
|
3214
3214
|
r += root;
|
|
3215
|
-
parsed = parse3(
|
|
3215
|
+
parsed = parse3(path10);
|
|
3216
3216
|
}
|
|
3217
|
-
return [r,
|
|
3217
|
+
return [r, path10];
|
|
3218
3218
|
};
|
|
3219
3219
|
|
|
3220
3220
|
// ../../node_modules/tar/dist/esm/winchars.js
|
|
@@ -3226,12 +3226,12 @@ var encode2 = (s) => raw.reduce((s2, c) => s2.split(c).join(toWin.get(c)), s);
|
|
|
3226
3226
|
var decode = (s) => win.reduce((s2, c) => s2.split(c).join(toRaw.get(c)), s);
|
|
3227
3227
|
|
|
3228
3228
|
// ../../node_modules/tar/dist/esm/write-entry.js
|
|
3229
|
-
var prefixPath = (
|
|
3229
|
+
var prefixPath = (path10, prefix) => {
|
|
3230
3230
|
if (!prefix) {
|
|
3231
|
-
return normalizeWindowsPath(
|
|
3231
|
+
return normalizeWindowsPath(path10);
|
|
3232
3232
|
}
|
|
3233
|
-
|
|
3234
|
-
return stripTrailingSlashes(prefix) + "/" +
|
|
3233
|
+
path10 = normalizeWindowsPath(path10).replace(/^\.(\/|$)/, "");
|
|
3234
|
+
return stripTrailingSlashes(prefix) + "/" + path10;
|
|
3235
3235
|
};
|
|
3236
3236
|
var maxReadSize = 16 * 1024 * 1024;
|
|
3237
3237
|
var PROCESS = /* @__PURE__ */ Symbol("process");
|
|
@@ -3376,8 +3376,8 @@ var WriteEntry = class extends Minipass {
|
|
|
3376
3376
|
[MODE](mode) {
|
|
3377
3377
|
return modeFix(mode, this.type === "Directory", this.portable);
|
|
3378
3378
|
}
|
|
3379
|
-
[PREFIX](
|
|
3380
|
-
return prefixPath(
|
|
3379
|
+
[PREFIX](path10) {
|
|
3380
|
+
return prefixPath(path10, this.prefix);
|
|
3381
3381
|
}
|
|
3382
3382
|
[HEADER]() {
|
|
3383
3383
|
if (!this.stat) {
|
|
@@ -3758,8 +3758,8 @@ var WriteEntryTar = class extends Minipass {
|
|
|
3758
3758
|
super.write(b);
|
|
3759
3759
|
readEntry.pipe(this);
|
|
3760
3760
|
}
|
|
3761
|
-
[PREFIX](
|
|
3762
|
-
return prefixPath(
|
|
3761
|
+
[PREFIX](path10) {
|
|
3762
|
+
return prefixPath(path10, this.prefix);
|
|
3763
3763
|
}
|
|
3764
3764
|
[MODE](mode) {
|
|
3765
3765
|
return modeFix(mode, this.type === "Directory", this.portable);
|
|
@@ -4183,8 +4183,8 @@ var PackJob = class {
|
|
|
4183
4183
|
pending = false;
|
|
4184
4184
|
ignore = false;
|
|
4185
4185
|
piped = false;
|
|
4186
|
-
constructor(
|
|
4187
|
-
this.path =
|
|
4186
|
+
constructor(path10, absolute) {
|
|
4187
|
+
this.path = path10 || "./";
|
|
4188
4188
|
this.absolute = absolute;
|
|
4189
4189
|
}
|
|
4190
4190
|
};
|
|
@@ -4312,21 +4312,21 @@ var Pack = class extends Minipass {
|
|
|
4312
4312
|
[WRITE](chunk) {
|
|
4313
4313
|
return super.write(chunk);
|
|
4314
4314
|
}
|
|
4315
|
-
add(
|
|
4316
|
-
this.write(
|
|
4315
|
+
add(path10) {
|
|
4316
|
+
this.write(path10);
|
|
4317
4317
|
return this;
|
|
4318
4318
|
}
|
|
4319
|
-
end(
|
|
4320
|
-
if (typeof
|
|
4321
|
-
cb =
|
|
4322
|
-
|
|
4319
|
+
end(path10, encoding, cb) {
|
|
4320
|
+
if (typeof path10 === "function") {
|
|
4321
|
+
cb = path10;
|
|
4322
|
+
path10 = void 0;
|
|
4323
4323
|
}
|
|
4324
4324
|
if (typeof encoding === "function") {
|
|
4325
4325
|
cb = encoding;
|
|
4326
4326
|
encoding = void 0;
|
|
4327
4327
|
}
|
|
4328
|
-
if (
|
|
4329
|
-
this.add(
|
|
4328
|
+
if (path10) {
|
|
4329
|
+
this.add(path10);
|
|
4330
4330
|
}
|
|
4331
4331
|
this[ENDED2] = true;
|
|
4332
4332
|
this[PROCESS2]();
|
|
@@ -4334,14 +4334,14 @@ var Pack = class extends Minipass {
|
|
|
4334
4334
|
cb();
|
|
4335
4335
|
return this;
|
|
4336
4336
|
}
|
|
4337
|
-
write(
|
|
4337
|
+
write(path10) {
|
|
4338
4338
|
if (this[ENDED2]) {
|
|
4339
4339
|
throw new Error("write after end");
|
|
4340
4340
|
}
|
|
4341
|
-
if (
|
|
4342
|
-
this[ADDTARENTRY](
|
|
4341
|
+
if (path10 instanceof ReadEntry) {
|
|
4342
|
+
this[ADDTARENTRY](path10);
|
|
4343
4343
|
} else {
|
|
4344
|
-
this[ADDFSENTRY](
|
|
4344
|
+
this[ADDFSENTRY](path10);
|
|
4345
4345
|
}
|
|
4346
4346
|
return this.flowing;
|
|
4347
4347
|
}
|
|
@@ -4684,9 +4684,9 @@ var getWriteFlag = !fMapEnabled ? () => "w" : (size) => size < fMapLimit ? fMapF
|
|
|
4684
4684
|
// ../../node_modules/chownr/dist/esm/index.js
|
|
4685
4685
|
var import_node_fs2 = __toESM(require("node:fs"), 1);
|
|
4686
4686
|
var import_node_path5 = __toESM(require("node:path"), 1);
|
|
4687
|
-
var lchownSync = (
|
|
4687
|
+
var lchownSync = (path10, uid, gid) => {
|
|
4688
4688
|
try {
|
|
4689
|
-
return import_node_fs2.default.lchownSync(
|
|
4689
|
+
return import_node_fs2.default.lchownSync(path10, uid, gid);
|
|
4690
4690
|
} catch (er) {
|
|
4691
4691
|
if (er?.code !== "ENOENT")
|
|
4692
4692
|
throw er;
|
|
@@ -4769,9 +4769,9 @@ var CwdError = class extends Error {
|
|
|
4769
4769
|
path;
|
|
4770
4770
|
code;
|
|
4771
4771
|
syscall = "chdir";
|
|
4772
|
-
constructor(
|
|
4773
|
-
super(`${code2}: Cannot cd into '${
|
|
4774
|
-
this.path =
|
|
4772
|
+
constructor(path10, code2) {
|
|
4773
|
+
super(`${code2}: Cannot cd into '${path10}'`);
|
|
4774
|
+
this.path = path10;
|
|
4775
4775
|
this.code = code2;
|
|
4776
4776
|
}
|
|
4777
4777
|
get name() {
|
|
@@ -4785,10 +4785,10 @@ var SymlinkError = class extends Error {
|
|
|
4785
4785
|
symlink;
|
|
4786
4786
|
syscall = "symlink";
|
|
4787
4787
|
code = "TAR_SYMLINK_ERROR";
|
|
4788
|
-
constructor(symlink,
|
|
4788
|
+
constructor(symlink, path10) {
|
|
4789
4789
|
super("TAR_SYMLINK_ERROR: Cannot extract through symbolic link");
|
|
4790
4790
|
this.symlink = symlink;
|
|
4791
|
-
this.path =
|
|
4791
|
+
this.path = path10;
|
|
4792
4792
|
}
|
|
4793
4793
|
get name() {
|
|
4794
4794
|
return "SymlinkError";
|
|
@@ -4970,13 +4970,13 @@ var normalizeUnicode = (s) => {
|
|
|
4970
4970
|
// ../../node_modules/tar/dist/esm/path-reservations.js
|
|
4971
4971
|
var platform3 = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform;
|
|
4972
4972
|
var isWindows2 = platform3 === "win32";
|
|
4973
|
-
var getDirs = (
|
|
4974
|
-
const dirs =
|
|
4973
|
+
var getDirs = (path10) => {
|
|
4974
|
+
const dirs = path10.split("/").slice(0, -1).reduce((set, path11) => {
|
|
4975
4975
|
const s = set[set.length - 1];
|
|
4976
4976
|
if (s !== void 0) {
|
|
4977
|
-
|
|
4977
|
+
path11 = (0, import_node_path7.join)(s, path11);
|
|
4978
4978
|
}
|
|
4979
|
-
set.push(
|
|
4979
|
+
set.push(path11 || "/");
|
|
4980
4980
|
return set;
|
|
4981
4981
|
}, []);
|
|
4982
4982
|
return dirs;
|
|
@@ -4994,7 +4994,7 @@ var PathReservations = class {
|
|
|
4994
4994
|
paths = isWindows2 ? ["win32 parallelization disabled"] : paths.map((p) => {
|
|
4995
4995
|
return stripTrailingSlashes((0, import_node_path7.join)(normalizeUnicode(p))).toLowerCase();
|
|
4996
4996
|
});
|
|
4997
|
-
const dirs = new Set(paths.map((
|
|
4997
|
+
const dirs = new Set(paths.map((path10) => getDirs(path10)).reduce((a, b) => a.concat(b)));
|
|
4998
4998
|
this.#reservations.set(fn, { dirs, paths });
|
|
4999
4999
|
for (const p of paths) {
|
|
5000
5000
|
const q = this.#queues.get(p);
|
|
@@ -5027,8 +5027,8 @@ var PathReservations = class {
|
|
|
5027
5027
|
throw new Error("function does not have any path reservations");
|
|
5028
5028
|
}
|
|
5029
5029
|
return {
|
|
5030
|
-
paths: res.paths.map((
|
|
5031
|
-
dirs: [...res.dirs].map((
|
|
5030
|
+
paths: res.paths.map((path10) => this.#queues.get(path10)),
|
|
5031
|
+
dirs: [...res.dirs].map((path10) => this.#queues.get(path10))
|
|
5032
5032
|
};
|
|
5033
5033
|
}
|
|
5034
5034
|
// check if fn is first in line for all its paths, and is
|
|
@@ -5056,14 +5056,14 @@ var PathReservations = class {
|
|
|
5056
5056
|
}
|
|
5057
5057
|
const { paths, dirs } = res;
|
|
5058
5058
|
const next = /* @__PURE__ */ new Set();
|
|
5059
|
-
for (const
|
|
5060
|
-
const q = this.#queues.get(
|
|
5059
|
+
for (const path10 of paths) {
|
|
5060
|
+
const q = this.#queues.get(path10);
|
|
5061
5061
|
if (!q || q?.[0] !== fn) {
|
|
5062
5062
|
continue;
|
|
5063
5063
|
}
|
|
5064
5064
|
const q0 = q[1];
|
|
5065
5065
|
if (!q0) {
|
|
5066
|
-
this.#queues.delete(
|
|
5066
|
+
this.#queues.delete(path10);
|
|
5067
5067
|
continue;
|
|
5068
5068
|
}
|
|
5069
5069
|
q.shift();
|
|
@@ -5128,24 +5128,24 @@ var CHECKED_CWD = /* @__PURE__ */ Symbol("checkedCwd");
|
|
|
5128
5128
|
var platform4 = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform;
|
|
5129
5129
|
var isWindows3 = platform4 === "win32";
|
|
5130
5130
|
var DEFAULT_MAX_DEPTH = 1024;
|
|
5131
|
-
var unlinkFile = (
|
|
5131
|
+
var unlinkFile = (path10, cb) => {
|
|
5132
5132
|
if (!isWindows3) {
|
|
5133
|
-
return import_node_fs4.default.unlink(
|
|
5133
|
+
return import_node_fs4.default.unlink(path10, cb);
|
|
5134
5134
|
}
|
|
5135
|
-
const name2 =
|
|
5136
|
-
import_node_fs4.default.rename(
|
|
5135
|
+
const name2 = path10 + ".DELETE." + (0, import_node_crypto.randomBytes)(16).toString("hex");
|
|
5136
|
+
import_node_fs4.default.rename(path10, name2, (er) => {
|
|
5137
5137
|
if (er) {
|
|
5138
5138
|
return cb(er);
|
|
5139
5139
|
}
|
|
5140
5140
|
import_node_fs4.default.unlink(name2, cb);
|
|
5141
5141
|
});
|
|
5142
5142
|
};
|
|
5143
|
-
var unlinkFileSync = (
|
|
5143
|
+
var unlinkFileSync = (path10) => {
|
|
5144
5144
|
if (!isWindows3) {
|
|
5145
|
-
return import_node_fs4.default.unlinkSync(
|
|
5145
|
+
return import_node_fs4.default.unlinkSync(path10);
|
|
5146
5146
|
}
|
|
5147
|
-
const name2 =
|
|
5148
|
-
import_node_fs4.default.renameSync(
|
|
5147
|
+
const name2 = path10 + ".DELETE." + (0, import_node_crypto.randomBytes)(16).toString("hex");
|
|
5148
|
+
import_node_fs4.default.renameSync(path10, name2);
|
|
5149
5149
|
import_node_fs4.default.unlinkSync(name2);
|
|
5150
5150
|
};
|
|
5151
5151
|
var uint32 = (a, b, c) => a !== void 0 && a === a >>> 0 ? a : b !== void 0 && b === b >>> 0 ? b : c;
|
|
@@ -5243,24 +5243,24 @@ var Unpack = class extends Parser {
|
|
|
5243
5243
|
// return false if we need to skip this file
|
|
5244
5244
|
// return true if the field was successfully sanitized
|
|
5245
5245
|
[STRIPABSOLUTEPATH](entry, field) {
|
|
5246
|
-
const
|
|
5247
|
-
if (!
|
|
5246
|
+
const path10 = entry[field];
|
|
5247
|
+
if (!path10 || this.preservePaths)
|
|
5248
5248
|
return true;
|
|
5249
|
-
const parts =
|
|
5249
|
+
const parts = path10.split("/");
|
|
5250
5250
|
if (parts.includes("..") || /* c8 ignore next */
|
|
5251
5251
|
isWindows3 && /^[a-z]:\.\.$/i.test(parts[0] ?? "")) {
|
|
5252
5252
|
this.warn("TAR_ENTRY_ERROR", `${field} contains '..'`, {
|
|
5253
5253
|
entry,
|
|
5254
|
-
[field]:
|
|
5254
|
+
[field]: path10
|
|
5255
5255
|
});
|
|
5256
5256
|
return false;
|
|
5257
5257
|
}
|
|
5258
|
-
const [root, stripped] = stripAbsolutePath(
|
|
5258
|
+
const [root, stripped] = stripAbsolutePath(path10);
|
|
5259
5259
|
if (root) {
|
|
5260
5260
|
entry[field] = String(stripped);
|
|
5261
5261
|
this.warn("TAR_ENTRY_INFO", `stripping ${root} from absolute ${field}`, {
|
|
5262
5262
|
entry,
|
|
5263
|
-
[field]:
|
|
5263
|
+
[field]: path10
|
|
5264
5264
|
});
|
|
5265
5265
|
}
|
|
5266
5266
|
return true;
|
|
@@ -6027,9 +6027,9 @@ var mtimeFilter = (opt) => {
|
|
|
6027
6027
|
if (!opt.mtimeCache) {
|
|
6028
6028
|
opt.mtimeCache = /* @__PURE__ */ new Map();
|
|
6029
6029
|
}
|
|
6030
|
-
opt.filter = filter ? (
|
|
6031
|
-
((opt.mtimeCache?.get(
|
|
6032
|
-
((opt.mtimeCache?.get(
|
|
6030
|
+
opt.filter = filter ? (path10, stat) => filter(path10, stat) && !/* c8 ignore start */
|
|
6031
|
+
((opt.mtimeCache?.get(path10) ?? stat.mtime ?? 0) > (stat.mtime ?? 0)) : (path10, stat) => !/* c8 ignore start */
|
|
6032
|
+
((opt.mtimeCache?.get(path10) ?? stat.mtime ?? 0) > (stat.mtime ?? 0));
|
|
6033
6033
|
};
|
|
6034
6034
|
|
|
6035
6035
|
// src/run-scenario/environment.ts
|
|
@@ -6324,6 +6324,37 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6324
6324
|
queryOptions.mcpServers ? Object.keys(queryOptions.mcpServers) : "none"
|
|
6325
6325
|
);
|
|
6326
6326
|
console.log("[SDK-DEBUG] Calling SDK query()...");
|
|
6327
|
+
if (traceContext) {
|
|
6328
|
+
const preExecEvent = {
|
|
6329
|
+
evalRunId: traceContext.evalRunId,
|
|
6330
|
+
scenarioId: traceContext.scenarioId,
|
|
6331
|
+
scenarioName: traceContext.scenarioName,
|
|
6332
|
+
targetId: traceContext.targetId,
|
|
6333
|
+
targetName: traceContext.targetName,
|
|
6334
|
+
stepNumber: 0,
|
|
6335
|
+
type: import_evalforge_types.LiveTraceEventType.DIAGNOSTIC,
|
|
6336
|
+
outputPreview: JSON.stringify({
|
|
6337
|
+
event: "pre-sdk-execution",
|
|
6338
|
+
model: queryOptions.model,
|
|
6339
|
+
maxTurns: queryOptions.maxTurns,
|
|
6340
|
+
sdkEnv: {
|
|
6341
|
+
ANTHROPIC_BASE_URL: sdkEnv.ANTHROPIC_BASE_URL,
|
|
6342
|
+
hasANTHROPIC_API_KEY: !!sdkEnv.ANTHROPIC_API_KEY,
|
|
6343
|
+
hasANTHROPIC_AUTH_TOKEN: !!sdkEnv.ANTHROPIC_AUTH_TOKEN,
|
|
6344
|
+
hasANTHROPIC_CUSTOM_HEADERS: !!sdkEnv.ANTHROPIC_CUSTOM_HEADERS
|
|
6345
|
+
},
|
|
6346
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
6347
|
+
}),
|
|
6348
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
6349
|
+
isComplete: false
|
|
6350
|
+
};
|
|
6351
|
+
emitTraceEvent(
|
|
6352
|
+
preExecEvent,
|
|
6353
|
+
traceContext.tracePushUrl,
|
|
6354
|
+
traceContext.routeHeader,
|
|
6355
|
+
traceContext.authToken
|
|
6356
|
+
);
|
|
6357
|
+
}
|
|
6327
6358
|
try {
|
|
6328
6359
|
for await (const message of query({
|
|
6329
6360
|
prompt: scenario.triggerPrompt,
|
|
@@ -6504,6 +6535,38 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6504
6535
|
sdkError: Object.keys(sdkSpecificInfo).length > 0 ? sdkSpecificInfo : void 0,
|
|
6505
6536
|
cause: causeInfo
|
|
6506
6537
|
};
|
|
6538
|
+
if (traceContext) {
|
|
6539
|
+
const errorTraceEvent = {
|
|
6540
|
+
evalRunId: traceContext.evalRunId,
|
|
6541
|
+
scenarioId: traceContext.scenarioId,
|
|
6542
|
+
scenarioName: traceContext.scenarioName,
|
|
6543
|
+
targetId: traceContext.targetId,
|
|
6544
|
+
targetName: traceContext.targetName,
|
|
6545
|
+
stepNumber: traceStepNumber + 1,
|
|
6546
|
+
type: import_evalforge_types.LiveTraceEventType.DIAGNOSTIC,
|
|
6547
|
+
outputPreview: JSON.stringify(
|
|
6548
|
+
{
|
|
6549
|
+
event: "sdk-execution-failed",
|
|
6550
|
+
error: errorMessage,
|
|
6551
|
+
errorName,
|
|
6552
|
+
messageCount,
|
|
6553
|
+
sdkEnv: sdkEnvDebug,
|
|
6554
|
+
sdkError: sdkSpecificInfo,
|
|
6555
|
+
cause: causeInfo
|
|
6556
|
+
},
|
|
6557
|
+
null,
|
|
6558
|
+
2
|
|
6559
|
+
).slice(0, 2e3),
|
|
6560
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
6561
|
+
isComplete: true
|
|
6562
|
+
};
|
|
6563
|
+
emitTraceEvent(
|
|
6564
|
+
errorTraceEvent,
|
|
6565
|
+
traceContext.tracePushUrl,
|
|
6566
|
+
traceContext.routeHeader,
|
|
6567
|
+
traceContext.authToken
|
|
6568
|
+
);
|
|
6569
|
+
}
|
|
6507
6570
|
throw new Error(
|
|
6508
6571
|
`Claude SDK execution failed after ${messageCount} messages: ${errorMessage}
|
|
6509
6572
|
Details: ${JSON.stringify(errorDetails, null, 2)}` + (errorStack ? `
|
|
@@ -6870,6 +6933,8 @@ ${stackLines.join("\n")}`);
|
|
|
6870
6933
|
return parts.join(" ");
|
|
6871
6934
|
}
|
|
6872
6935
|
var ExecutionPhase = {
|
|
6936
|
+
/** Environment diagnostics phase (runs before execution) */
|
|
6937
|
+
DIAGNOSTICS: "diagnostics",
|
|
6873
6938
|
CONFIG: "config-loading",
|
|
6874
6939
|
API_CLIENT: "api-client-creation",
|
|
6875
6940
|
FETCH_EVAL_RUN: "fetch-eval-run",
|
|
@@ -6886,6 +6951,585 @@ var ExecutionPhase = {
|
|
|
6886
6951
|
UPDATE_STATUS: "update-status"
|
|
6887
6952
|
};
|
|
6888
6953
|
|
|
6954
|
+
// src/diagnostics.ts
|
|
6955
|
+
var import_child_process = require("child_process");
|
|
6956
|
+
var fs11 = __toESM(require("fs"));
|
|
6957
|
+
var path9 = __toESM(require("path"));
|
|
6958
|
+
var import_evalforge_types4 = require("@wix/evalforge-types");
|
|
6959
|
+
async function execCommand(command, timeoutMs = 1e4) {
|
|
6960
|
+
return new Promise((resolve) => {
|
|
6961
|
+
try {
|
|
6962
|
+
const proc2 = (0, import_child_process.spawn)("sh", ["-c", command], {
|
|
6963
|
+
timeout: timeoutMs
|
|
6964
|
+
});
|
|
6965
|
+
let stdout = "";
|
|
6966
|
+
let stderr = "";
|
|
6967
|
+
proc2.stdout.on("data", (data) => {
|
|
6968
|
+
stdout += data.toString();
|
|
6969
|
+
});
|
|
6970
|
+
proc2.stderr.on("data", (data) => {
|
|
6971
|
+
stderr += data.toString();
|
|
6972
|
+
});
|
|
6973
|
+
proc2.on("close", (code2) => {
|
|
6974
|
+
resolve({
|
|
6975
|
+
stdout: stdout.trim(),
|
|
6976
|
+
stderr: stderr.trim(),
|
|
6977
|
+
exitCode: code2 ?? -1
|
|
6978
|
+
});
|
|
6979
|
+
});
|
|
6980
|
+
proc2.on("error", (err) => {
|
|
6981
|
+
resolve({
|
|
6982
|
+
stdout: "",
|
|
6983
|
+
stderr: err.message,
|
|
6984
|
+
exitCode: -1
|
|
6985
|
+
});
|
|
6986
|
+
});
|
|
6987
|
+
} catch (err) {
|
|
6988
|
+
resolve({
|
|
6989
|
+
stdout: "",
|
|
6990
|
+
stderr: err instanceof Error ? err.message : String(err),
|
|
6991
|
+
exitCode: -99
|
|
6992
|
+
});
|
|
6993
|
+
}
|
|
6994
|
+
});
|
|
6995
|
+
}
|
|
6996
|
+
async function safeRunTest(testName, testFn) {
|
|
6997
|
+
const start = Date.now();
|
|
6998
|
+
try {
|
|
6999
|
+
return await testFn();
|
|
7000
|
+
} catch (err) {
|
|
7001
|
+
const error = err instanceof Error ? err.message : String(err);
|
|
7002
|
+
return {
|
|
7003
|
+
name: testName,
|
|
7004
|
+
passed: false,
|
|
7005
|
+
details: {
|
|
7006
|
+
testCrashed: true,
|
|
7007
|
+
error,
|
|
7008
|
+
stack: err instanceof Error ? err.stack : void 0
|
|
7009
|
+
},
|
|
7010
|
+
error: `Test crashed: ${error}`,
|
|
7011
|
+
durationMs: Date.now() - start
|
|
7012
|
+
};
|
|
7013
|
+
}
|
|
7014
|
+
}
|
|
7015
|
+
async function testClaudeBinaryDiscovery() {
|
|
7016
|
+
const start = Date.now();
|
|
7017
|
+
const details = {};
|
|
7018
|
+
const npmRootResult = await execCommand("npm root -g");
|
|
7019
|
+
const npmBinResult = await execCommand("npm bin -g");
|
|
7020
|
+
const npmRoot = npmRootResult.stdout;
|
|
7021
|
+
const npmBin = npmBinResult.stdout;
|
|
7022
|
+
details.npmRoot = npmRoot;
|
|
7023
|
+
details.npmBin = npmBin;
|
|
7024
|
+
const evaluatorBinPath = path9.join(
|
|
7025
|
+
npmRoot,
|
|
7026
|
+
"@wix",
|
|
7027
|
+
"evalforge-evaluator",
|
|
7028
|
+
"node_modules",
|
|
7029
|
+
".bin"
|
|
7030
|
+
);
|
|
7031
|
+
details.evaluatorBinPath = evaluatorBinPath;
|
|
7032
|
+
const lsBinResult = await execCommand(`ls -la "${evaluatorBinPath}" 2>&1`);
|
|
7033
|
+
details.evaluatorBinContents = lsBinResult.stdout || lsBinResult.stderr;
|
|
7034
|
+
details.lsBinExitCode = lsBinResult.exitCode;
|
|
7035
|
+
const claudePath = path9.join(evaluatorBinPath, "claude");
|
|
7036
|
+
let claudeExists = false;
|
|
7037
|
+
try {
|
|
7038
|
+
claudeExists = fs11.existsSync(claudePath);
|
|
7039
|
+
} catch {
|
|
7040
|
+
claudeExists = false;
|
|
7041
|
+
}
|
|
7042
|
+
details.claudePath = claudePath;
|
|
7043
|
+
details.claudeExists = claudeExists;
|
|
7044
|
+
if (claudeExists) {
|
|
7045
|
+
const readlinkResult = await execCommand(
|
|
7046
|
+
`readlink -f "${claudePath}" 2>&1`
|
|
7047
|
+
);
|
|
7048
|
+
details.claudeRealPath = readlinkResult.stdout || readlinkResult.stderr;
|
|
7049
|
+
const statResult = await execCommand(`stat "${claudePath}" 2>&1`);
|
|
7050
|
+
details.claudeStat = statResult.stdout || statResult.stderr;
|
|
7051
|
+
const lsClaudeResult = await execCommand(`ls -la "${claudePath}" 2>&1`);
|
|
7052
|
+
details.claudeFileInfo = lsClaudeResult.stdout;
|
|
7053
|
+
}
|
|
7054
|
+
const whichResult = await execCommand("which claude 2>&1");
|
|
7055
|
+
details.whichClaude = whichResult.stdout || "(not in PATH)";
|
|
7056
|
+
details.whichExitCode = whichResult.exitCode;
|
|
7057
|
+
const currentPath = process.env.PATH || "";
|
|
7058
|
+
details.currentPATH = currentPath.split(":");
|
|
7059
|
+
details.pathLength = currentPath.split(":").length;
|
|
7060
|
+
const passed = claudeExists || whichResult.exitCode === 0;
|
|
7061
|
+
return {
|
|
7062
|
+
name: "claude-binary-discovery",
|
|
7063
|
+
passed,
|
|
7064
|
+
details,
|
|
7065
|
+
error: passed ? void 0 : `Claude binary not found at ${claudePath}`,
|
|
7066
|
+
durationMs: Date.now() - start
|
|
7067
|
+
};
|
|
7068
|
+
}
|
|
7069
|
+
async function testClaudeExecution() {
|
|
7070
|
+
const start = Date.now();
|
|
7071
|
+
const details = {};
|
|
7072
|
+
const npmRootResult = await execCommand("npm root -g");
|
|
7073
|
+
const npmRoot = npmRootResult.stdout;
|
|
7074
|
+
const claudePath = path9.join(
|
|
7075
|
+
npmRoot,
|
|
7076
|
+
"@wix",
|
|
7077
|
+
"evalforge-evaluator",
|
|
7078
|
+
"node_modules",
|
|
7079
|
+
".bin",
|
|
7080
|
+
"claude"
|
|
7081
|
+
);
|
|
7082
|
+
details.claudePath = claudePath;
|
|
7083
|
+
const versionResult = await execCommand(
|
|
7084
|
+
`"${claudePath}" --version 2>&1`,
|
|
7085
|
+
15e3
|
|
7086
|
+
);
|
|
7087
|
+
details.versionCommand = {
|
|
7088
|
+
command: `"${claudePath}" --version`,
|
|
7089
|
+
stdout: versionResult.stdout,
|
|
7090
|
+
stderr: versionResult.stderr,
|
|
7091
|
+
exitCode: versionResult.exitCode
|
|
7092
|
+
};
|
|
7093
|
+
const helpResult = await execCommand(
|
|
7094
|
+
`"${claudePath}" --help 2>&1 | head -50`,
|
|
7095
|
+
15e3
|
|
7096
|
+
);
|
|
7097
|
+
details.helpCommand = {
|
|
7098
|
+
command: `"${claudePath}" --help | head -50`,
|
|
7099
|
+
stdout: helpResult.stdout.slice(0, 1500),
|
|
7100
|
+
stderr: helpResult.stderr.slice(0, 500),
|
|
7101
|
+
exitCode: helpResult.exitCode
|
|
7102
|
+
};
|
|
7103
|
+
const whichClaudeResult = await execCommand("which claude 2>&1");
|
|
7104
|
+
if (whichClaudeResult.exitCode === 0) {
|
|
7105
|
+
const pathVersionResult = await execCommand("claude --version 2>&1", 15e3);
|
|
7106
|
+
details.pathVersionCommand = {
|
|
7107
|
+
whichClaude: whichClaudeResult.stdout,
|
|
7108
|
+
stdout: pathVersionResult.stdout,
|
|
7109
|
+
stderr: pathVersionResult.stderr,
|
|
7110
|
+
exitCode: pathVersionResult.exitCode
|
|
7111
|
+
};
|
|
7112
|
+
}
|
|
7113
|
+
const passed = versionResult.exitCode === 0 || helpResult.exitCode === 0;
|
|
7114
|
+
return {
|
|
7115
|
+
name: "claude-cli-execution",
|
|
7116
|
+
passed,
|
|
7117
|
+
details,
|
|
7118
|
+
error: passed ? void 0 : `Claude CLI failed. Version exit: ${versionResult.exitCode}, Help exit: ${helpResult.exitCode}`,
|
|
7119
|
+
durationMs: Date.now() - start
|
|
7120
|
+
};
|
|
7121
|
+
}
|
|
7122
|
+
async function testEnvironmentDump() {
|
|
7123
|
+
const start = Date.now();
|
|
7124
|
+
const details = {};
|
|
7125
|
+
const importantVars = [
|
|
7126
|
+
"PATH",
|
|
7127
|
+
"HOME",
|
|
7128
|
+
"USER",
|
|
7129
|
+
"SHELL",
|
|
7130
|
+
"NODE_ENV",
|
|
7131
|
+
"PWD",
|
|
7132
|
+
"EVAL_SERVER_URL",
|
|
7133
|
+
"AI_GATEWAY_URL",
|
|
7134
|
+
"TRACE_PUSH_URL",
|
|
7135
|
+
"EVAL_AUTH_TOKEN",
|
|
7136
|
+
"ANTHROPIC_API_KEY",
|
|
7137
|
+
"ANTHROPIC_AUTH_TOKEN",
|
|
7138
|
+
"ANTHROPIC_BASE_URL",
|
|
7139
|
+
"ANTHROPIC_CUSTOM_HEADERS"
|
|
7140
|
+
];
|
|
7141
|
+
const capturedVars = {};
|
|
7142
|
+
for (const key of importantVars) {
|
|
7143
|
+
const value = process.env[key];
|
|
7144
|
+
if (value) {
|
|
7145
|
+
if (key.includes("SECRET") || key.includes("TOKEN") || key.includes("API_KEY")) {
|
|
7146
|
+
capturedVars[key] = `[REDACTED - ${value.length} chars, starts: ${value.slice(0, 10)}...]`;
|
|
7147
|
+
} else if (key === "ANTHROPIC_CUSTOM_HEADERS") {
|
|
7148
|
+
capturedVars[key] = value.split("\n").map((h) => {
|
|
7149
|
+
const [name2, val] = h.split(":");
|
|
7150
|
+
return `${name2}: ${val ? "[" + val.length + " chars]" : "(empty)"}`;
|
|
7151
|
+
}).join(" | ");
|
|
7152
|
+
} else if (key === "PATH") {
|
|
7153
|
+
const parts = value.split(":");
|
|
7154
|
+
capturedVars[key] = `[${parts.length} entries] First: ${parts.slice(0, 3).join(":")} ... Last: ${parts.slice(-2).join(":")}`;
|
|
7155
|
+
} else {
|
|
7156
|
+
capturedVars[key] = value;
|
|
7157
|
+
}
|
|
7158
|
+
} else {
|
|
7159
|
+
capturedVars[key] = "(NOT SET)";
|
|
7160
|
+
}
|
|
7161
|
+
}
|
|
7162
|
+
details.importantVars = capturedVars;
|
|
7163
|
+
const envResult = await execCommand("env | sort | head -50");
|
|
7164
|
+
details.envCommandOutput = envResult.stdout;
|
|
7165
|
+
details.envExitCode = envResult.exitCode;
|
|
7166
|
+
details.nodeInfo = {
|
|
7167
|
+
version: process.version,
|
|
7168
|
+
platform: process.platform,
|
|
7169
|
+
arch: process.arch,
|
|
7170
|
+
pid: process.pid,
|
|
7171
|
+
cwd: process.cwd(),
|
|
7172
|
+
execPath: process.execPath
|
|
7173
|
+
};
|
|
7174
|
+
return {
|
|
7175
|
+
name: "environment-dump",
|
|
7176
|
+
passed: true,
|
|
7177
|
+
// Info test, always passes
|
|
7178
|
+
details,
|
|
7179
|
+
durationMs: Date.now() - start
|
|
7180
|
+
};
|
|
7181
|
+
}
|
|
7182
|
+
async function testFileSystemStructure() {
|
|
7183
|
+
const start = Date.now();
|
|
7184
|
+
const details = {};
|
|
7185
|
+
const npmRootResult = await execCommand("npm root -g");
|
|
7186
|
+
const npmRoot = npmRootResult.stdout;
|
|
7187
|
+
const lsCwdResult = await execCommand("ls -la");
|
|
7188
|
+
details.currentDirectory = {
|
|
7189
|
+
path: process.cwd(),
|
|
7190
|
+
contents: lsCwdResult.stdout
|
|
7191
|
+
};
|
|
7192
|
+
const lsNpmRootResult = await execCommand(
|
|
7193
|
+
`ls -la "${npmRoot}" 2>&1 | head -30`
|
|
7194
|
+
);
|
|
7195
|
+
details.npmGlobalRoot = {
|
|
7196
|
+
path: npmRoot,
|
|
7197
|
+
contents: lsNpmRootResult.stdout
|
|
7198
|
+
};
|
|
7199
|
+
const wixPath = path9.join(npmRoot, "@wix");
|
|
7200
|
+
const lsWixResult = await execCommand(`ls -la "${wixPath}" 2>&1`);
|
|
7201
|
+
details.wixPackages = {
|
|
7202
|
+
path: wixPath,
|
|
7203
|
+
contents: lsWixResult.stdout
|
|
7204
|
+
};
|
|
7205
|
+
const evaluatorPath = path9.join(npmRoot, "@wix", "evalforge-evaluator");
|
|
7206
|
+
const lsEvaluatorResult = await execCommand(`ls -la "${evaluatorPath}" 2>&1`);
|
|
7207
|
+
details.evaluatorDir = {
|
|
7208
|
+
path: evaluatorPath,
|
|
7209
|
+
contents: lsEvaluatorResult.stdout
|
|
7210
|
+
};
|
|
7211
|
+
const nodeModulesPath = path9.join(evaluatorPath, "node_modules");
|
|
7212
|
+
const lsNodeModulesResult = await execCommand(
|
|
7213
|
+
`ls "${nodeModulesPath}" 2>&1 | head -30`
|
|
7214
|
+
);
|
|
7215
|
+
details.evaluatorNodeModules = {
|
|
7216
|
+
path: nodeModulesPath,
|
|
7217
|
+
contents: lsNodeModulesResult.stdout
|
|
7218
|
+
};
|
|
7219
|
+
const anthropicPath = path9.join(nodeModulesPath, "@anthropic-ai");
|
|
7220
|
+
const lsAnthropicResult = await execCommand(`ls -la "${anthropicPath}" 2>&1`);
|
|
7221
|
+
details.anthropicPackages = {
|
|
7222
|
+
path: anthropicPath,
|
|
7223
|
+
contents: lsAnthropicResult.stdout
|
|
7224
|
+
};
|
|
7225
|
+
const binPath = path9.join(nodeModulesPath, ".bin");
|
|
7226
|
+
const lsBinResult = await execCommand(`ls -la "${binPath}" 2>&1`);
|
|
7227
|
+
details.binDirectory = {
|
|
7228
|
+
path: binPath,
|
|
7229
|
+
contents: lsBinResult.stdout
|
|
7230
|
+
};
|
|
7231
|
+
return {
|
|
7232
|
+
name: "file-system-structure",
|
|
7233
|
+
passed: true,
|
|
7234
|
+
// Info test, always passes
|
|
7235
|
+
details,
|
|
7236
|
+
durationMs: Date.now() - start
|
|
7237
|
+
};
|
|
7238
|
+
}
|
|
7239
|
+
async function testNetworkConnectivity(config) {
|
|
7240
|
+
const start = Date.now();
|
|
7241
|
+
const details = {};
|
|
7242
|
+
const dnsResult = await execCommand(
|
|
7243
|
+
"nslookup manage.wix.com 2>&1 | head -10"
|
|
7244
|
+
);
|
|
7245
|
+
details.dnsLookup = {
|
|
7246
|
+
command: "nslookup manage.wix.com",
|
|
7247
|
+
output: dnsResult.stdout || dnsResult.stderr,
|
|
7248
|
+
exitCode: dnsResult.exitCode
|
|
7249
|
+
};
|
|
7250
|
+
const pingResult = await execCommand("ping -c 2 manage.wix.com 2>&1");
|
|
7251
|
+
details.pingTest = {
|
|
7252
|
+
command: "ping -c 2 manage.wix.com",
|
|
7253
|
+
output: pingResult.stdout || pingResult.stderr,
|
|
7254
|
+
exitCode: pingResult.exitCode
|
|
7255
|
+
};
|
|
7256
|
+
const gatewayUrl = config.aiGatewayUrl || "https://manage.wix.com/_api/eval-wix-ai-gateway-proxy";
|
|
7257
|
+
const curlGatewayResult = await execCommand(
|
|
7258
|
+
`curl -v -s --connect-timeout 5 --max-time 10 "${gatewayUrl}" 2>&1 | tail -30`
|
|
7259
|
+
);
|
|
7260
|
+
details.aiGatewayTest = {
|
|
7261
|
+
url: gatewayUrl,
|
|
7262
|
+
output: curlGatewayResult.stdout,
|
|
7263
|
+
exitCode: curlGatewayResult.exitCode
|
|
7264
|
+
};
|
|
7265
|
+
const serverUrl = config.serverUrl;
|
|
7266
|
+
const curlServerResult = await execCommand(
|
|
7267
|
+
`curl -v -s --connect-timeout 5 --max-time 10 "${serverUrl}/health" 2>&1 | tail -30`
|
|
7268
|
+
);
|
|
7269
|
+
details.backendServerTest = {
|
|
7270
|
+
url: `${serverUrl}/health`,
|
|
7271
|
+
output: curlServerResult.stdout,
|
|
7272
|
+
exitCode: curlServerResult.exitCode
|
|
7273
|
+
};
|
|
7274
|
+
const httpsResult = await execCommand(
|
|
7275
|
+
'curl -s --connect-timeout 5 -o /dev/null -w "HTTP_CODE:%{http_code} TIME:%{time_total}s" https://www.google.com 2>&1'
|
|
7276
|
+
);
|
|
7277
|
+
details.httpsBaseline = {
|
|
7278
|
+
command: "curl https://www.google.com",
|
|
7279
|
+
output: httpsResult.stdout,
|
|
7280
|
+
exitCode: httpsResult.exitCode
|
|
7281
|
+
};
|
|
7282
|
+
const networkWorks = pingResult.exitCode === 0 || httpsResult.exitCode === 0;
|
|
7283
|
+
const gatewayReachable = curlGatewayResult.exitCode === 0;
|
|
7284
|
+
return {
|
|
7285
|
+
name: "network-connectivity",
|
|
7286
|
+
passed: networkWorks && gatewayReachable,
|
|
7287
|
+
details,
|
|
7288
|
+
error: networkWorks && gatewayReachable ? void 0 : `Network: ${networkWorks ? "OK" : "FAILED"}, Gateway: ${gatewayReachable ? "OK" : "FAILED"}`,
|
|
7289
|
+
durationMs: Date.now() - start
|
|
7290
|
+
};
|
|
7291
|
+
}
|
|
7292
|
+
async function testChildProcessSpawning() {
|
|
7293
|
+
const start = Date.now();
|
|
7294
|
+
const details = {};
|
|
7295
|
+
const echoResult = await execCommand('echo "DIAGNOSTIC_TEST_SUCCESS_12345"');
|
|
7296
|
+
details.echoTest = {
|
|
7297
|
+
command: 'echo "DIAGNOSTIC_TEST_SUCCESS_12345"',
|
|
7298
|
+
output: echoResult.stdout,
|
|
7299
|
+
exitCode: echoResult.exitCode,
|
|
7300
|
+
passed: echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345"
|
|
7301
|
+
};
|
|
7302
|
+
const nodeResult = await execCommand(
|
|
7303
|
+
'node -e "console.log(JSON.stringify({pid: process.pid, version: process.version, platform: process.platform}))"'
|
|
7304
|
+
);
|
|
7305
|
+
details.nodeTest = {
|
|
7306
|
+
command: 'node -e "console.log(JSON.stringify({...}))"',
|
|
7307
|
+
output: nodeResult.stdout,
|
|
7308
|
+
exitCode: nodeResult.exitCode
|
|
7309
|
+
};
|
|
7310
|
+
const shellResult = await execCommand(
|
|
7311
|
+
'echo "PID: $$"; pwd; whoami; date; uname -a'
|
|
7312
|
+
);
|
|
7313
|
+
details.shellTest = {
|
|
7314
|
+
command: 'echo "PID: $$"; pwd; whoami; date; uname -a',
|
|
7315
|
+
output: shellResult.stdout,
|
|
7316
|
+
exitCode: shellResult.exitCode
|
|
7317
|
+
};
|
|
7318
|
+
const stderrResult = await execCommand(
|
|
7319
|
+
`node -e "console.error('stderr test')"`
|
|
7320
|
+
);
|
|
7321
|
+
details.stderrTest = {
|
|
7322
|
+
command: `node -e "console.error('stderr test')"`,
|
|
7323
|
+
stderr: stderrResult.stderr,
|
|
7324
|
+
exitCode: stderrResult.exitCode
|
|
7325
|
+
};
|
|
7326
|
+
const exitCodeResult = await execCommand("exit 42");
|
|
7327
|
+
details.exitCodeTest = {
|
|
7328
|
+
command: "exit 42",
|
|
7329
|
+
exitCode: exitCodeResult.exitCode,
|
|
7330
|
+
passed: exitCodeResult.exitCode === 42
|
|
7331
|
+
};
|
|
7332
|
+
const passed = echoResult.exitCode === 0 && echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345";
|
|
7333
|
+
return {
|
|
7334
|
+
name: "child-process-spawning",
|
|
7335
|
+
passed,
|
|
7336
|
+
details,
|
|
7337
|
+
error: passed ? void 0 : "Echo test failed",
|
|
7338
|
+
durationMs: Date.now() - start
|
|
7339
|
+
};
|
|
7340
|
+
}
|
|
7341
|
+
async function testSdkImport() {
|
|
7342
|
+
const start = Date.now();
|
|
7343
|
+
const details = {};
|
|
7344
|
+
try {
|
|
7345
|
+
const sdk = await import("@anthropic-ai/claude-agent-sdk");
|
|
7346
|
+
details.sdkImported = true;
|
|
7347
|
+
details.exportedKeys = Object.keys(sdk);
|
|
7348
|
+
details.hasQuery = typeof sdk.query === "function";
|
|
7349
|
+
if (typeof sdk.query === "function") {
|
|
7350
|
+
details.queryFunctionExists = true;
|
|
7351
|
+
details.queryFunctionType = typeof sdk.query;
|
|
7352
|
+
}
|
|
7353
|
+
return {
|
|
7354
|
+
name: "sdk-import",
|
|
7355
|
+
passed: true,
|
|
7356
|
+
details,
|
|
7357
|
+
durationMs: Date.now() - start
|
|
7358
|
+
};
|
|
7359
|
+
} catch (err) {
|
|
7360
|
+
const error = err instanceof Error ? err.message : String(err);
|
|
7361
|
+
return {
|
|
7362
|
+
name: "sdk-import",
|
|
7363
|
+
passed: false,
|
|
7364
|
+
details: {
|
|
7365
|
+
sdkImported: false,
|
|
7366
|
+
error,
|
|
7367
|
+
stack: err instanceof Error ? err.stack?.split("\n").slice(0, 5) : void 0
|
|
7368
|
+
},
|
|
7369
|
+
error: `Failed to import SDK: ${error}`,
|
|
7370
|
+
durationMs: Date.now() - start
|
|
7371
|
+
};
|
|
7372
|
+
}
|
|
7373
|
+
}
|
|
7374
|
+
async function testFileSystemWrite() {
|
|
7375
|
+
const start = Date.now();
|
|
7376
|
+
const details = {};
|
|
7377
|
+
const testDir = "/tmp/evalforge-diagnostics-test";
|
|
7378
|
+
const testFile = path9.join(testDir, "test-file.txt");
|
|
7379
|
+
const testContent = `Diagnostic test at ${(/* @__PURE__ */ new Date()).toISOString()}`;
|
|
7380
|
+
try {
|
|
7381
|
+
if (!fs11.existsSync(testDir)) {
|
|
7382
|
+
fs11.mkdirSync(testDir, { recursive: true });
|
|
7383
|
+
}
|
|
7384
|
+
details.directoryCreated = true;
|
|
7385
|
+
fs11.writeFileSync(testFile, testContent);
|
|
7386
|
+
details.fileWritten = true;
|
|
7387
|
+
const readContent = fs11.readFileSync(testFile, "utf8");
|
|
7388
|
+
details.fileRead = true;
|
|
7389
|
+
details.contentMatches = readContent === testContent;
|
|
7390
|
+
const lsResult = await execCommand(`ls -la "${testDir}"`);
|
|
7391
|
+
details.directoryContents = lsResult.stdout;
|
|
7392
|
+
fs11.unlinkSync(testFile);
|
|
7393
|
+
fs11.rmdirSync(testDir);
|
|
7394
|
+
details.cleanedUp = true;
|
|
7395
|
+
return {
|
|
7396
|
+
name: "file-system-write",
|
|
7397
|
+
passed: true,
|
|
7398
|
+
details,
|
|
7399
|
+
durationMs: Date.now() - start
|
|
7400
|
+
};
|
|
7401
|
+
} catch (err) {
|
|
7402
|
+
const error = err instanceof Error ? err.message : String(err);
|
|
7403
|
+
return {
|
|
7404
|
+
name: "file-system-write",
|
|
7405
|
+
passed: false,
|
|
7406
|
+
details: {
|
|
7407
|
+
...details,
|
|
7408
|
+
error,
|
|
7409
|
+
testDir,
|
|
7410
|
+
testFile
|
|
7411
|
+
},
|
|
7412
|
+
error: `File system write failed: ${error}`,
|
|
7413
|
+
durationMs: Date.now() - start
|
|
7414
|
+
};
|
|
7415
|
+
}
|
|
7416
|
+
}
|
|
7417
|
+
function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader, authToken) {
|
|
7418
|
+
const truncatedResult = "summary" in result ? result : {
|
|
7419
|
+
...result,
|
|
7420
|
+
details: JSON.parse(
|
|
7421
|
+
JSON.stringify(
|
|
7422
|
+
result.details,
|
|
7423
|
+
(_, v) => typeof v === "string" && v.length > 500 ? v.slice(0, 500) + "... [truncated]" : v
|
|
7424
|
+
)
|
|
7425
|
+
)
|
|
7426
|
+
};
|
|
7427
|
+
const event = {
|
|
7428
|
+
evalRunId: evalRunId2,
|
|
7429
|
+
scenarioId: "diagnostics",
|
|
7430
|
+
scenarioName: "Environment Diagnostics",
|
|
7431
|
+
targetId: "system",
|
|
7432
|
+
targetName: "name" in truncatedResult ? truncatedResult.name : "Summary",
|
|
7433
|
+
stepNumber: 0,
|
|
7434
|
+
type: import_evalforge_types4.LiveTraceEventType.DIAGNOSTIC,
|
|
7435
|
+
outputPreview: JSON.stringify(truncatedResult, null, 2).slice(0, 3e3),
|
|
7436
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7437
|
+
isComplete: "summary" in result
|
|
7438
|
+
};
|
|
7439
|
+
console.log(`TRACE_EVENT:${JSON.stringify(event)}`);
|
|
7440
|
+
if (tracePushUrl) {
|
|
7441
|
+
const headers = {
|
|
7442
|
+
"Content-Type": "application/json"
|
|
7443
|
+
};
|
|
7444
|
+
if (routeHeader) {
|
|
7445
|
+
headers["x-wix-route"] = routeHeader;
|
|
7446
|
+
}
|
|
7447
|
+
if (authToken) {
|
|
7448
|
+
headers["Authorization"] = `Bearer ${authToken}`;
|
|
7449
|
+
}
|
|
7450
|
+
fetch(tracePushUrl, {
|
|
7451
|
+
method: "POST",
|
|
7452
|
+
headers,
|
|
7453
|
+
body: JSON.stringify([event])
|
|
7454
|
+
}).catch((err) => {
|
|
7455
|
+
console.error(
|
|
7456
|
+
"[DIAGNOSTICS] Failed to push trace event to backend:",
|
|
7457
|
+
err
|
|
7458
|
+
);
|
|
7459
|
+
});
|
|
7460
|
+
}
|
|
7461
|
+
}
|
|
7462
|
+
async function runDiagnostics(config, evalRunId2) {
|
|
7463
|
+
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
7464
|
+
const startTime = Date.now();
|
|
7465
|
+
console.error("");
|
|
7466
|
+
console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
|
|
7467
|
+
console.error("\u2551 EVALFORGE ENVIRONMENT DIAGNOSTICS \u2551");
|
|
7468
|
+
console.error("\u2551 (Results sent to backend via trace events) \u2551");
|
|
7469
|
+
console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
|
|
7470
|
+
console.error("");
|
|
7471
|
+
const tests = [];
|
|
7472
|
+
const runTest = async (testName, testFn) => {
|
|
7473
|
+
console.error(`[DIAG] Running: ${testName}...`);
|
|
7474
|
+
const result = await safeRunTest(testName, testFn);
|
|
7475
|
+
tests.push(result);
|
|
7476
|
+
const status = result.passed ? "\u2713 PASS" : "\u2717 FAIL";
|
|
7477
|
+
console.error(`[DIAG] ${status}: ${result.name} (${result.durationMs}ms)`);
|
|
7478
|
+
console.error("[DIAG] Details:");
|
|
7479
|
+
console.error(JSON.stringify(result.details, null, 2));
|
|
7480
|
+
console.error("");
|
|
7481
|
+
if (!result.passed && result.error) {
|
|
7482
|
+
console.error(`[DIAG] ERROR: ${result.error}`);
|
|
7483
|
+
}
|
|
7484
|
+
emitDiagnosticTraceEvent(
|
|
7485
|
+
evalRunId2,
|
|
7486
|
+
result,
|
|
7487
|
+
config.tracePushUrl,
|
|
7488
|
+
config.routeHeader,
|
|
7489
|
+
config.authToken
|
|
7490
|
+
);
|
|
7491
|
+
};
|
|
7492
|
+
await runTest("claude-binary-discovery", testClaudeBinaryDiscovery);
|
|
7493
|
+
await runTest("claude-cli-execution", testClaudeExecution);
|
|
7494
|
+
await runTest("environment-dump", testEnvironmentDump);
|
|
7495
|
+
await runTest("file-system-structure", testFileSystemStructure);
|
|
7496
|
+
await runTest("network-connectivity", () => testNetworkConnectivity(config));
|
|
7497
|
+
await runTest("child-process-spawning", testChildProcessSpawning);
|
|
7498
|
+
await runTest("sdk-import", testSdkImport);
|
|
7499
|
+
await runTest("file-system-write", testFileSystemWrite);
|
|
7500
|
+
const completedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
7501
|
+
const totalDurationMs = Date.now() - startTime;
|
|
7502
|
+
const report = {
|
|
7503
|
+
startedAt,
|
|
7504
|
+
completedAt,
|
|
7505
|
+
totalDurationMs,
|
|
7506
|
+
tests,
|
|
7507
|
+
summary: {
|
|
7508
|
+
total: tests.length,
|
|
7509
|
+
passed: tests.filter((t) => t.passed).length,
|
|
7510
|
+
failed: tests.filter((t) => !t.passed).length
|
|
7511
|
+
}
|
|
7512
|
+
};
|
|
7513
|
+
emitDiagnosticTraceEvent(
|
|
7514
|
+
evalRunId2,
|
|
7515
|
+
report,
|
|
7516
|
+
config.tracePushUrl,
|
|
7517
|
+
config.routeHeader,
|
|
7518
|
+
config.authToken
|
|
7519
|
+
);
|
|
7520
|
+
console.error("");
|
|
7521
|
+
console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
|
|
7522
|
+
console.error(
|
|
7523
|
+
`\u2551 DIAGNOSTICS COMPLETE: ${report.summary.passed}/${report.summary.total} passed, ${report.summary.failed} failed`.padEnd(
|
|
7524
|
+
60
|
|
7525
|
+
) + "\u2551"
|
|
7526
|
+
);
|
|
7527
|
+
console.error(`\u2551 Total time: ${totalDurationMs}ms`.padEnd(60) + "\u2551");
|
|
7528
|
+
console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
|
|
7529
|
+
console.error("");
|
|
7530
|
+
return report;
|
|
7531
|
+
}
|
|
7532
|
+
|
|
6889
7533
|
// src/index.ts
|
|
6890
7534
|
console.error(
|
|
6891
7535
|
"[EVALUATOR-BOOT] Module loading started",
|
|
@@ -6953,6 +7597,33 @@ async function runEvaluation(projectId2, evalRunId2) {
|
|
|
6953
7597
|
`[${ExecutionPhase.API_CLIENT}] Failed to create API client: ${apiErr instanceof Error ? apiErr.message : String(apiErr)}`
|
|
6954
7598
|
);
|
|
6955
7599
|
}
|
|
7600
|
+
state.currentPhase = ExecutionPhase.DIAGNOSTICS;
|
|
7601
|
+
state.currentContext = { projectId: projectId2, evalRunId: evalRunId2, phase: "diagnostics" };
|
|
7602
|
+
console.error("[DEBUG-H1.5] Running environment diagnostics...");
|
|
7603
|
+
try {
|
|
7604
|
+
const diagnosticReport = await runDiagnostics(config, evalRunId2);
|
|
7605
|
+
console.error(
|
|
7606
|
+
"[DEBUG-H1.5] Diagnostics completed",
|
|
7607
|
+
JSON.stringify({
|
|
7608
|
+
passed: diagnosticReport.summary.passed,
|
|
7609
|
+
failed: diagnosticReport.summary.failed,
|
|
7610
|
+
total: diagnosticReport.summary.total,
|
|
7611
|
+
durationMs: diagnosticReport.totalDurationMs
|
|
7612
|
+
})
|
|
7613
|
+
);
|
|
7614
|
+
const failedTests = diagnosticReport.tests.filter((t) => !t.passed);
|
|
7615
|
+
if (failedTests.length > 0) {
|
|
7616
|
+
console.error(
|
|
7617
|
+
"[DEBUG-H1.5] FAILED DIAGNOSTIC TESTS:",
|
|
7618
|
+
failedTests.map((t) => `${t.name}: ${t.error}`).join("\n")
|
|
7619
|
+
);
|
|
7620
|
+
}
|
|
7621
|
+
} catch (diagErr) {
|
|
7622
|
+
console.error(
|
|
7623
|
+
"[DEBUG-H1.5] Diagnostics failed (non-fatal):",
|
|
7624
|
+
diagErr instanceof Error ? diagErr.message : String(diagErr)
|
|
7625
|
+
);
|
|
7626
|
+
}
|
|
6956
7627
|
state.currentPhase = ExecutionPhase.FETCH_EVAL_RUN;
|
|
6957
7628
|
state.currentContext = { projectId: projectId2, evalRunId: evalRunId2, serverUrl: config.serverUrl };
|
|
6958
7629
|
console.error(
|
|
@@ -7072,7 +7743,7 @@ async function runEvaluation(projectId2, evalRunId2) {
|
|
|
7072
7743
|
};
|
|
7073
7744
|
try {
|
|
7074
7745
|
await api.updateEvalRun(projectId2, evalRunId2, {
|
|
7075
|
-
status:
|
|
7746
|
+
status: import_evalforge_types5.EvalStatus.COMPLETED,
|
|
7076
7747
|
completedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
7077
7748
|
});
|
|
7078
7749
|
} catch (updateErr) {
|
|
@@ -7113,7 +7784,7 @@ runEvaluation(projectId, evalRunId).then(() => {
|
|
|
7113
7784
|
authToken: config.authToken
|
|
7114
7785
|
});
|
|
7115
7786
|
await api.updateEvalRun(projectId, evalRunId, {
|
|
7116
|
-
status:
|
|
7787
|
+
status: import_evalforge_types5.EvalStatus.FAILED,
|
|
7117
7788
|
completedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7118
7789
|
jobError,
|
|
7119
7790
|
jobStatus: "FAILED"
|
|
@@ -7136,7 +7807,7 @@ runEvaluation(projectId, evalRunId).then(() => {
|
|
|
7136
7807
|
authToken
|
|
7137
7808
|
});
|
|
7138
7809
|
await api.updateEvalRun(projectId, evalRunId, {
|
|
7139
|
-
status:
|
|
7810
|
+
status: import_evalforge_types5.EvalStatus.FAILED,
|
|
7140
7811
|
completedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
7141
7812
|
jobError: `Config load failed, then: ${jobError}`,
|
|
7142
7813
|
jobStatus: "FAILED"
|