@wix/evalforge-evaluator 0.13.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -64,8 +64,8 @@ function createApiClient(serverUrl, options = "") {
64
64
  }
65
65
  return headers;
66
66
  }
67
- async function fetchJson(path9) {
68
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path9}`;
67
+ async function fetchJson(path10) {
68
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
69
69
  console.error(`[API] GET ${url}`);
70
70
  const headers = buildHeaders();
71
71
  const response = await fetch(url, {
@@ -79,8 +79,8 @@ function createApiClient(serverUrl, options = "") {
79
79
  }
80
80
  return response.json();
81
81
  }
82
- async function postJson(path9, body) {
83
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path9}`;
82
+ async function postJson(path10, body) {
83
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
84
84
  console.error(`[API] POST ${url}`);
85
85
  const response = await fetch(url, {
86
86
  method: "POST",
@@ -94,8 +94,8 @@ function createApiClient(serverUrl, options = "") {
94
94
  );
95
95
  }
96
96
  }
97
- async function deleteRequest(path9) {
98
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path9}`;
97
+ async function deleteRequest(path10) {
98
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
99
99
  console.error(`[API] DELETE ${url}`);
100
100
  const headers = buildHeaders();
101
101
  const response = await fetch(url, {
@@ -109,8 +109,8 @@ function createApiClient(serverUrl, options = "") {
109
109
  );
110
110
  }
111
111
  }
112
- async function putJson(path9, body) {
113
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path9}`;
112
+ async function putJson(path10, body) {
113
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
114
114
  console.error(`[API] PUT ${url}`);
115
115
  const response = await fetch(url, {
116
116
  method: "PUT",
@@ -1136,17 +1136,17 @@ var ReadStream = class extends Minipass {
1136
1136
  [_size];
1137
1137
  [_remain];
1138
1138
  [_autoClose];
1139
- constructor(path9, opt) {
1139
+ constructor(path10, opt) {
1140
1140
  opt = opt || {};
1141
1141
  super(opt);
1142
1142
  this.readable = true;
1143
1143
  this.writable = false;
1144
- if (typeof path9 !== "string") {
1144
+ if (typeof path10 !== "string") {
1145
1145
  throw new TypeError("path must be a string");
1146
1146
  }
1147
1147
  this[_errored] = false;
1148
1148
  this[_fd] = typeof opt.fd === "number" ? opt.fd : void 0;
1149
- this[_path] = path9;
1149
+ this[_path] = path10;
1150
1150
  this[_readSize] = opt.readSize || 16 * 1024 * 1024;
1151
1151
  this[_reading] = false;
1152
1152
  this[_size] = typeof opt.size === "number" ? opt.size : Infinity;
@@ -1309,10 +1309,10 @@ var WriteStream = class extends EE {
1309
1309
  [_flags];
1310
1310
  [_finished] = false;
1311
1311
  [_pos];
1312
- constructor(path9, opt) {
1312
+ constructor(path10, opt) {
1313
1313
  opt = opt || {};
1314
1314
  super(opt);
1315
- this[_path] = path9;
1315
+ this[_path] = path10;
1316
1316
  this[_fd] = typeof opt.fd === "number" ? opt.fd : void 0;
1317
1317
  this[_mode] = opt.mode === void 0 ? 438 : opt.mode;
1318
1318
  this[_pos] = typeof opt.start === "number" ? opt.start : void 0;
@@ -2205,10 +2205,10 @@ var Header = class {
2205
2205
  }
2206
2206
  const prefixSize = this.ctime || this.atime ? 130 : 155;
2207
2207
  const split = splitPrefix(this.path || "", prefixSize);
2208
- const path9 = split[0];
2208
+ const path10 = split[0];
2209
2209
  const prefix = split[1];
2210
2210
  this.needPax = !!split[2];
2211
- this.needPax = encString(buf, off, 100, path9) || this.needPax;
2211
+ this.needPax = encString(buf, off, 100, path10) || this.needPax;
2212
2212
  this.needPax = encNumber(buf, off + 100, 8, this.mode) || this.needPax;
2213
2213
  this.needPax = encNumber(buf, off + 108, 8, this.uid) || this.needPax;
2214
2214
  this.needPax = encNumber(buf, off + 116, 8, this.gid) || this.needPax;
@@ -3184,16 +3184,16 @@ var modeFix = (mode, isDir, portable) => {
3184
3184
  // ../../node_modules/tar/dist/esm/strip-absolute-path.js
3185
3185
  import { win32 } from "node:path";
3186
3186
  var { isAbsolute, parse: parse3 } = win32;
3187
- var stripAbsolutePath = (path9) => {
3187
+ var stripAbsolutePath = (path10) => {
3188
3188
  let r = "";
3189
- let parsed = parse3(path9);
3190
- while (isAbsolute(path9) || parsed.root) {
3191
- const root = path9.charAt(0) === "/" && path9.slice(0, 4) !== "//?/" ? "/" : parsed.root;
3192
- path9 = path9.slice(root.length);
3189
+ let parsed = parse3(path10);
3190
+ while (isAbsolute(path10) || parsed.root) {
3191
+ const root = path10.charAt(0) === "/" && path10.slice(0, 4) !== "//?/" ? "/" : parsed.root;
3192
+ path10 = path10.slice(root.length);
3193
3193
  r += root;
3194
- parsed = parse3(path9);
3194
+ parsed = parse3(path10);
3195
3195
  }
3196
- return [r, path9];
3196
+ return [r, path10];
3197
3197
  };
3198
3198
 
3199
3199
  // ../../node_modules/tar/dist/esm/winchars.js
@@ -3205,12 +3205,12 @@ var encode2 = (s) => raw.reduce((s2, c) => s2.split(c).join(toWin.get(c)), s);
3205
3205
  var decode = (s) => win.reduce((s2, c) => s2.split(c).join(toRaw.get(c)), s);
3206
3206
 
3207
3207
  // ../../node_modules/tar/dist/esm/write-entry.js
3208
- var prefixPath = (path9, prefix) => {
3208
+ var prefixPath = (path10, prefix) => {
3209
3209
  if (!prefix) {
3210
- return normalizeWindowsPath(path9);
3210
+ return normalizeWindowsPath(path10);
3211
3211
  }
3212
- path9 = normalizeWindowsPath(path9).replace(/^\.(\/|$)/, "");
3213
- return stripTrailingSlashes(prefix) + "/" + path9;
3212
+ path10 = normalizeWindowsPath(path10).replace(/^\.(\/|$)/, "");
3213
+ return stripTrailingSlashes(prefix) + "/" + path10;
3214
3214
  };
3215
3215
  var maxReadSize = 16 * 1024 * 1024;
3216
3216
  var PROCESS = /* @__PURE__ */ Symbol("process");
@@ -3355,8 +3355,8 @@ var WriteEntry = class extends Minipass {
3355
3355
  [MODE](mode) {
3356
3356
  return modeFix(mode, this.type === "Directory", this.portable);
3357
3357
  }
3358
- [PREFIX](path9) {
3359
- return prefixPath(path9, this.prefix);
3358
+ [PREFIX](path10) {
3359
+ return prefixPath(path10, this.prefix);
3360
3360
  }
3361
3361
  [HEADER]() {
3362
3362
  if (!this.stat) {
@@ -3737,8 +3737,8 @@ var WriteEntryTar = class extends Minipass {
3737
3737
  super.write(b);
3738
3738
  readEntry.pipe(this);
3739
3739
  }
3740
- [PREFIX](path9) {
3741
- return prefixPath(path9, this.prefix);
3740
+ [PREFIX](path10) {
3741
+ return prefixPath(path10, this.prefix);
3742
3742
  }
3743
3743
  [MODE](mode) {
3744
3744
  return modeFix(mode, this.type === "Directory", this.portable);
@@ -4162,8 +4162,8 @@ var PackJob = class {
4162
4162
  pending = false;
4163
4163
  ignore = false;
4164
4164
  piped = false;
4165
- constructor(path9, absolute) {
4166
- this.path = path9 || "./";
4165
+ constructor(path10, absolute) {
4166
+ this.path = path10 || "./";
4167
4167
  this.absolute = absolute;
4168
4168
  }
4169
4169
  };
@@ -4291,21 +4291,21 @@ var Pack = class extends Minipass {
4291
4291
  [WRITE](chunk) {
4292
4292
  return super.write(chunk);
4293
4293
  }
4294
- add(path9) {
4295
- this.write(path9);
4294
+ add(path10) {
4295
+ this.write(path10);
4296
4296
  return this;
4297
4297
  }
4298
- end(path9, encoding, cb) {
4299
- if (typeof path9 === "function") {
4300
- cb = path9;
4301
- path9 = void 0;
4298
+ end(path10, encoding, cb) {
4299
+ if (typeof path10 === "function") {
4300
+ cb = path10;
4301
+ path10 = void 0;
4302
4302
  }
4303
4303
  if (typeof encoding === "function") {
4304
4304
  cb = encoding;
4305
4305
  encoding = void 0;
4306
4306
  }
4307
- if (path9) {
4308
- this.add(path9);
4307
+ if (path10) {
4308
+ this.add(path10);
4309
4309
  }
4310
4310
  this[ENDED2] = true;
4311
4311
  this[PROCESS2]();
@@ -4313,14 +4313,14 @@ var Pack = class extends Minipass {
4313
4313
  cb();
4314
4314
  return this;
4315
4315
  }
4316
- write(path9) {
4316
+ write(path10) {
4317
4317
  if (this[ENDED2]) {
4318
4318
  throw new Error("write after end");
4319
4319
  }
4320
- if (path9 instanceof ReadEntry) {
4321
- this[ADDTARENTRY](path9);
4320
+ if (path10 instanceof ReadEntry) {
4321
+ this[ADDTARENTRY](path10);
4322
4322
  } else {
4323
- this[ADDFSENTRY](path9);
4323
+ this[ADDFSENTRY](path10);
4324
4324
  }
4325
4325
  return this.flowing;
4326
4326
  }
@@ -4663,9 +4663,9 @@ var getWriteFlag = !fMapEnabled ? () => "w" : (size) => size < fMapLimit ? fMapF
4663
4663
  // ../../node_modules/chownr/dist/esm/index.js
4664
4664
  import fs6 from "node:fs";
4665
4665
  import path4 from "node:path";
4666
- var lchownSync = (path9, uid, gid) => {
4666
+ var lchownSync = (path10, uid, gid) => {
4667
4667
  try {
4668
- return fs6.lchownSync(path9, uid, gid);
4668
+ return fs6.lchownSync(path10, uid, gid);
4669
4669
  } catch (er) {
4670
4670
  if (er?.code !== "ENOENT")
4671
4671
  throw er;
@@ -4748,9 +4748,9 @@ var CwdError = class extends Error {
4748
4748
  path;
4749
4749
  code;
4750
4750
  syscall = "chdir";
4751
- constructor(path9, code2) {
4752
- super(`${code2}: Cannot cd into '${path9}'`);
4753
- this.path = path9;
4751
+ constructor(path10, code2) {
4752
+ super(`${code2}: Cannot cd into '${path10}'`);
4753
+ this.path = path10;
4754
4754
  this.code = code2;
4755
4755
  }
4756
4756
  get name() {
@@ -4764,10 +4764,10 @@ var SymlinkError = class extends Error {
4764
4764
  symlink;
4765
4765
  syscall = "symlink";
4766
4766
  code = "TAR_SYMLINK_ERROR";
4767
- constructor(symlink, path9) {
4767
+ constructor(symlink, path10) {
4768
4768
  super("TAR_SYMLINK_ERROR: Cannot extract through symbolic link");
4769
4769
  this.symlink = symlink;
4770
- this.path = path9;
4770
+ this.path = path10;
4771
4771
  }
4772
4772
  get name() {
4773
4773
  return "SymlinkError";
@@ -4949,13 +4949,13 @@ var normalizeUnicode = (s) => {
4949
4949
  // ../../node_modules/tar/dist/esm/path-reservations.js
4950
4950
  var platform3 = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform;
4951
4951
  var isWindows2 = platform3 === "win32";
4952
- var getDirs = (path9) => {
4953
- const dirs = path9.split("/").slice(0, -1).reduce((set, path10) => {
4952
+ var getDirs = (path10) => {
4953
+ const dirs = path10.split("/").slice(0, -1).reduce((set, path11) => {
4954
4954
  const s = set[set.length - 1];
4955
4955
  if (s !== void 0) {
4956
- path10 = join(s, path10);
4956
+ path11 = join(s, path11);
4957
4957
  }
4958
- set.push(path10 || "/");
4958
+ set.push(path11 || "/");
4959
4959
  return set;
4960
4960
  }, []);
4961
4961
  return dirs;
@@ -4973,7 +4973,7 @@ var PathReservations = class {
4973
4973
  paths = isWindows2 ? ["win32 parallelization disabled"] : paths.map((p) => {
4974
4974
  return stripTrailingSlashes(join(normalizeUnicode(p))).toLowerCase();
4975
4975
  });
4976
- const dirs = new Set(paths.map((path9) => getDirs(path9)).reduce((a, b) => a.concat(b)));
4976
+ const dirs = new Set(paths.map((path10) => getDirs(path10)).reduce((a, b) => a.concat(b)));
4977
4977
  this.#reservations.set(fn, { dirs, paths });
4978
4978
  for (const p of paths) {
4979
4979
  const q = this.#queues.get(p);
@@ -5006,8 +5006,8 @@ var PathReservations = class {
5006
5006
  throw new Error("function does not have any path reservations");
5007
5007
  }
5008
5008
  return {
5009
- paths: res.paths.map((path9) => this.#queues.get(path9)),
5010
- dirs: [...res.dirs].map((path9) => this.#queues.get(path9))
5009
+ paths: res.paths.map((path10) => this.#queues.get(path10)),
5010
+ dirs: [...res.dirs].map((path10) => this.#queues.get(path10))
5011
5011
  };
5012
5012
  }
5013
5013
  // check if fn is first in line for all its paths, and is
@@ -5035,14 +5035,14 @@ var PathReservations = class {
5035
5035
  }
5036
5036
  const { paths, dirs } = res;
5037
5037
  const next = /* @__PURE__ */ new Set();
5038
- for (const path9 of paths) {
5039
- const q = this.#queues.get(path9);
5038
+ for (const path10 of paths) {
5039
+ const q = this.#queues.get(path10);
5040
5040
  if (!q || q?.[0] !== fn) {
5041
5041
  continue;
5042
5042
  }
5043
5043
  const q0 = q[1];
5044
5044
  if (!q0) {
5045
- this.#queues.delete(path9);
5045
+ this.#queues.delete(path10);
5046
5046
  continue;
5047
5047
  }
5048
5048
  q.shift();
@@ -5107,24 +5107,24 @@ var CHECKED_CWD = /* @__PURE__ */ Symbol("checkedCwd");
5107
5107
  var platform4 = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform;
5108
5108
  var isWindows3 = platform4 === "win32";
5109
5109
  var DEFAULT_MAX_DEPTH = 1024;
5110
- var unlinkFile = (path9, cb) => {
5110
+ var unlinkFile = (path10, cb) => {
5111
5111
  if (!isWindows3) {
5112
- return fs8.unlink(path9, cb);
5112
+ return fs8.unlink(path10, cb);
5113
5113
  }
5114
- const name2 = path9 + ".DELETE." + randomBytes(16).toString("hex");
5115
- fs8.rename(path9, name2, (er) => {
5114
+ const name2 = path10 + ".DELETE." + randomBytes(16).toString("hex");
5115
+ fs8.rename(path10, name2, (er) => {
5116
5116
  if (er) {
5117
5117
  return cb(er);
5118
5118
  }
5119
5119
  fs8.unlink(name2, cb);
5120
5120
  });
5121
5121
  };
5122
- var unlinkFileSync = (path9) => {
5122
+ var unlinkFileSync = (path10) => {
5123
5123
  if (!isWindows3) {
5124
- return fs8.unlinkSync(path9);
5124
+ return fs8.unlinkSync(path10);
5125
5125
  }
5126
- const name2 = path9 + ".DELETE." + randomBytes(16).toString("hex");
5127
- fs8.renameSync(path9, name2);
5126
+ const name2 = path10 + ".DELETE." + randomBytes(16).toString("hex");
5127
+ fs8.renameSync(path10, name2);
5128
5128
  fs8.unlinkSync(name2);
5129
5129
  };
5130
5130
  var uint32 = (a, b, c) => a !== void 0 && a === a >>> 0 ? a : b !== void 0 && b === b >>> 0 ? b : c;
@@ -5222,24 +5222,24 @@ var Unpack = class extends Parser {
5222
5222
  // return false if we need to skip this file
5223
5223
  // return true if the field was successfully sanitized
5224
5224
  [STRIPABSOLUTEPATH](entry, field) {
5225
- const path9 = entry[field];
5226
- if (!path9 || this.preservePaths)
5225
+ const path10 = entry[field];
5226
+ if (!path10 || this.preservePaths)
5227
5227
  return true;
5228
- const parts = path9.split("/");
5228
+ const parts = path10.split("/");
5229
5229
  if (parts.includes("..") || /* c8 ignore next */
5230
5230
  isWindows3 && /^[a-z]:\.\.$/i.test(parts[0] ?? "")) {
5231
5231
  this.warn("TAR_ENTRY_ERROR", `${field} contains '..'`, {
5232
5232
  entry,
5233
- [field]: path9
5233
+ [field]: path10
5234
5234
  });
5235
5235
  return false;
5236
5236
  }
5237
- const [root, stripped] = stripAbsolutePath(path9);
5237
+ const [root, stripped] = stripAbsolutePath(path10);
5238
5238
  if (root) {
5239
5239
  entry[field] = String(stripped);
5240
5240
  this.warn("TAR_ENTRY_INFO", `stripping ${root} from absolute ${field}`, {
5241
5241
  entry,
5242
- [field]: path9
5242
+ [field]: path10
5243
5243
  });
5244
5244
  }
5245
5245
  return true;
@@ -6006,9 +6006,9 @@ var mtimeFilter = (opt) => {
6006
6006
  if (!opt.mtimeCache) {
6007
6007
  opt.mtimeCache = /* @__PURE__ */ new Map();
6008
6008
  }
6009
- opt.filter = filter ? (path9, stat) => filter(path9, stat) && !/* c8 ignore start */
6010
- ((opt.mtimeCache?.get(path9) ?? stat.mtime ?? 0) > (stat.mtime ?? 0)) : (path9, stat) => !/* c8 ignore start */
6011
- ((opt.mtimeCache?.get(path9) ?? stat.mtime ?? 0) > (stat.mtime ?? 0));
6009
+ opt.filter = filter ? (path10, stat) => filter(path10, stat) && !/* c8 ignore start */
6010
+ ((opt.mtimeCache?.get(path10) ?? stat.mtime ?? 0) > (stat.mtime ?? 0)) : (path10, stat) => !/* c8 ignore start */
6011
+ ((opt.mtimeCache?.get(path10) ?? stat.mtime ?? 0) > (stat.mtime ?? 0));
6012
6012
  };
6013
6013
 
6014
6014
  // src/run-scenario/environment.ts
@@ -6307,6 +6307,37 @@ async function executeWithClaudeCode(skill, scenario, options) {
6307
6307
  queryOptions.mcpServers ? Object.keys(queryOptions.mcpServers) : "none"
6308
6308
  );
6309
6309
  console.log("[SDK-DEBUG] Calling SDK query()...");
6310
+ if (traceContext) {
6311
+ const preExecEvent = {
6312
+ evalRunId: traceContext.evalRunId,
6313
+ scenarioId: traceContext.scenarioId,
6314
+ scenarioName: traceContext.scenarioName,
6315
+ targetId: traceContext.targetId,
6316
+ targetName: traceContext.targetName,
6317
+ stepNumber: 0,
6318
+ type: LiveTraceEventType.DIAGNOSTIC,
6319
+ outputPreview: JSON.stringify({
6320
+ event: "pre-sdk-execution",
6321
+ model: queryOptions.model,
6322
+ maxTurns: queryOptions.maxTurns,
6323
+ sdkEnv: {
6324
+ ANTHROPIC_BASE_URL: sdkEnv.ANTHROPIC_BASE_URL,
6325
+ hasANTHROPIC_API_KEY: !!sdkEnv.ANTHROPIC_API_KEY,
6326
+ hasANTHROPIC_AUTH_TOKEN: !!sdkEnv.ANTHROPIC_AUTH_TOKEN,
6327
+ hasANTHROPIC_CUSTOM_HEADERS: !!sdkEnv.ANTHROPIC_CUSTOM_HEADERS
6328
+ },
6329
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
6330
+ }),
6331
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
6332
+ isComplete: false
6333
+ };
6334
+ emitTraceEvent(
6335
+ preExecEvent,
6336
+ traceContext.tracePushUrl,
6337
+ traceContext.routeHeader,
6338
+ traceContext.authToken
6339
+ );
6340
+ }
6310
6341
  try {
6311
6342
  for await (const message of query({
6312
6343
  prompt: scenario.triggerPrompt,
@@ -6364,10 +6395,10 @@ async function executeWithClaudeCode(skill, scenario, options) {
6364
6395
  console.error(errorStack);
6365
6396
  }
6366
6397
  if (sdkError && typeof sdkError === "object") {
6367
- const errObj = sdkError;
6398
+ const errObj2 = sdkError;
6368
6399
  console.error("[SDK-ERROR] Error object properties:");
6369
- for (const key of Object.keys(errObj)) {
6370
- const value = errObj[key];
6400
+ for (const key of Object.keys(errObj2)) {
6401
+ const value = errObj2[key];
6371
6402
  if (value !== void 0 && key !== "stack") {
6372
6403
  try {
6373
6404
  const valueStr = typeof value === "object" ? JSON.stringify(value, null, 2) : String(value);
@@ -6377,7 +6408,7 @@ async function executeWithClaudeCode(skill, scenario, options) {
6377
6408
  }
6378
6409
  }
6379
6410
  }
6380
- const sdkErrorKeys = [
6411
+ const sdkErrorKeys2 = [
6381
6412
  "code",
6382
6413
  "status",
6383
6414
  "statusCode",
@@ -6400,19 +6431,19 @@ async function executeWithClaudeCode(skill, scenario, options) {
6400
6431
  "spawnargs"
6401
6432
  ];
6402
6433
  const extraInfo = {};
6403
- for (const key of sdkErrorKeys) {
6404
- if (key in errObj && errObj[key] !== void 0) {
6405
- extraInfo[key] = errObj[key];
6434
+ for (const key of sdkErrorKeys2) {
6435
+ if (key in errObj2 && errObj2[key] !== void 0) {
6436
+ extraInfo[key] = errObj2[key];
6406
6437
  }
6407
6438
  }
6408
6439
  if (Object.keys(extraInfo).length > 0) {
6409
6440
  console.error("[SDK-ERROR] SDK-specific error details:");
6410
6441
  console.error(JSON.stringify(extraInfo, null, 2));
6411
6442
  }
6412
- if (errObj.cause && typeof errObj.cause === "object") {
6443
+ if (errObj2.cause && typeof errObj2.cause === "object") {
6413
6444
  console.error("[SDK-ERROR] Error cause:");
6414
6445
  try {
6415
- console.error(JSON.stringify(errObj.cause, null, 2));
6446
+ console.error(JSON.stringify(errObj2.cause, null, 2));
6416
6447
  } catch {
6417
6448
  console.error("[SDK-ERROR] Error cause: [cannot serialize]");
6418
6449
  }
@@ -6436,17 +6467,92 @@ async function executeWithClaudeCode(skill, scenario, options) {
6436
6467
  console.error("[SDK-ERROR] USER:", process.env.USER);
6437
6468
  console.error("[SDK-ERROR] SHELL:", process.env.SHELL);
6438
6469
  console.error("[SDK-ERROR] ==========================================");
6470
+ const errObj = sdkError;
6471
+ const sdkSpecificInfo = {};
6472
+ const sdkErrorKeys = [
6473
+ "exitCode",
6474
+ "stderr",
6475
+ "stdout",
6476
+ "signal",
6477
+ "killed",
6478
+ "code",
6479
+ "status",
6480
+ "errno",
6481
+ "syscall",
6482
+ "spawnargs"
6483
+ ];
6484
+ for (const key of sdkErrorKeys) {
6485
+ if (errObj && key in errObj && errObj[key] !== void 0) {
6486
+ const val = errObj[key];
6487
+ if (typeof val === "string" && val.length > 500) {
6488
+ sdkSpecificInfo[key] = val.substring(0, 500) + "... [truncated]";
6489
+ } else {
6490
+ sdkSpecificInfo[key] = val;
6491
+ }
6492
+ }
6493
+ }
6494
+ let causeInfo;
6495
+ if (errObj?.cause && typeof errObj.cause === "object") {
6496
+ try {
6497
+ const causeStr = JSON.stringify(errObj.cause, null, 2);
6498
+ causeInfo = causeStr.length > 500 ? causeStr.substring(0, 500) + "... [truncated]" : causeStr;
6499
+ } catch {
6500
+ causeInfo = "[cannot serialize cause]";
6501
+ }
6502
+ }
6503
+ const sdkEnvDebug = {
6504
+ ANTHROPIC_BASE_URL: sdkEnv.ANTHROPIC_BASE_URL,
6505
+ hasANTHROPIC_API_KEY: !!sdkEnv.ANTHROPIC_API_KEY,
6506
+ hasANTHROPIC_AUTH_TOKEN: !!sdkEnv.ANTHROPIC_AUTH_TOKEN,
6507
+ hasANTHROPIC_CUSTOM_HEADERS: !!sdkEnv.ANTHROPIC_CUSTOM_HEADERS,
6508
+ ANTHROPIC_CUSTOM_HEADERS_preview: sdkEnv.ANTHROPIC_CUSTOM_HEADERS ? sdkEnv.ANTHROPIC_CUSTOM_HEADERS.split("\n").map((h) => h.split(":")[0]).join(", ") : void 0
6509
+ };
6439
6510
  const errorDetails = {
6440
6511
  messageCount,
6441
6512
  errorName,
6442
6513
  errorMessage,
6443
6514
  skillId: skill.id,
6444
6515
  scenarioId: scenario.id,
6445
- model: options.model || DEFAULT_MODEL
6516
+ model: options.model || DEFAULT_MODEL,
6517
+ sdkEnv: sdkEnvDebug,
6518
+ sdkError: Object.keys(sdkSpecificInfo).length > 0 ? sdkSpecificInfo : void 0,
6519
+ cause: causeInfo
6446
6520
  };
6521
+ if (traceContext) {
6522
+ const errorTraceEvent = {
6523
+ evalRunId: traceContext.evalRunId,
6524
+ scenarioId: traceContext.scenarioId,
6525
+ scenarioName: traceContext.scenarioName,
6526
+ targetId: traceContext.targetId,
6527
+ targetName: traceContext.targetName,
6528
+ stepNumber: traceStepNumber + 1,
6529
+ type: LiveTraceEventType.DIAGNOSTIC,
6530
+ outputPreview: JSON.stringify(
6531
+ {
6532
+ event: "sdk-execution-failed",
6533
+ error: errorMessage,
6534
+ errorName,
6535
+ messageCount,
6536
+ sdkEnv: sdkEnvDebug,
6537
+ sdkError: sdkSpecificInfo,
6538
+ cause: causeInfo
6539
+ },
6540
+ null,
6541
+ 2
6542
+ ).slice(0, 2e3),
6543
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
6544
+ isComplete: true
6545
+ };
6546
+ emitTraceEvent(
6547
+ errorTraceEvent,
6548
+ traceContext.tracePushUrl,
6549
+ traceContext.routeHeader,
6550
+ traceContext.authToken
6551
+ );
6552
+ }
6447
6553
  throw new Error(
6448
6554
  `Claude SDK execution failed after ${messageCount} messages: ${errorMessage}
6449
- Details: ${JSON.stringify(errorDetails)}` + (errorStack ? `
6555
+ Details: ${JSON.stringify(errorDetails, null, 2)}` + (errorStack ? `
6450
6556
  Stack: ${errorStack.split("\n").slice(0, 5).join("\n")}` : "")
6451
6557
  );
6452
6558
  }
@@ -6810,6 +6916,8 @@ ${stackLines.join("\n")}`);
6810
6916
  return parts.join(" ");
6811
6917
  }
6812
6918
  var ExecutionPhase = {
6919
+ /** Environment diagnostics phase (runs before execution) */
6920
+ DIAGNOSTICS: "diagnostics",
6813
6921
  CONFIG: "config-loading",
6814
6922
  API_CLIENT: "api-client-creation",
6815
6923
  FETCH_EVAL_RUN: "fetch-eval-run",
@@ -6826,6 +6934,402 @@ var ExecutionPhase = {
6826
6934
  UPDATE_STATUS: "update-status"
6827
6935
  };
6828
6936
 
6937
+ // src/diagnostics.ts
6938
+ import { spawn } from "child_process";
6939
+ import * as fs11 from "fs";
6940
+ import * as path9 from "path";
6941
+ import { LiveTraceEventType as LiveTraceEventType2 } from "@wix/evalforge-types";
6942
+ async function execCommand(command, timeoutMs = 5e3) {
6943
+ return new Promise((resolve) => {
6944
+ const proc2 = spawn("sh", ["-c", command], {
6945
+ timeout: timeoutMs
6946
+ });
6947
+ let stdout = "";
6948
+ let stderr = "";
6949
+ proc2.stdout.on("data", (data) => {
6950
+ stdout += data.toString();
6951
+ });
6952
+ proc2.stderr.on("data", (data) => {
6953
+ stderr += data.toString();
6954
+ });
6955
+ proc2.on("close", (code2) => {
6956
+ resolve({
6957
+ stdout: stdout.trim(),
6958
+ stderr: stderr.trim(),
6959
+ exitCode: code2 ?? -1
6960
+ });
6961
+ });
6962
+ proc2.on("error", (err) => {
6963
+ resolve({
6964
+ stdout: "",
6965
+ stderr: err.message,
6966
+ exitCode: -1
6967
+ });
6968
+ });
6969
+ });
6970
+ }
6971
+ async function testEnvironmentVariables() {
6972
+ const start = Date.now();
6973
+ const envVars = [
6974
+ "PATH",
6975
+ "HOME",
6976
+ "USER",
6977
+ "SHELL",
6978
+ "NODE_ENV",
6979
+ "EVAL_SERVER_URL",
6980
+ "AI_GATEWAY_URL",
6981
+ "ANTHROPIC_API_KEY",
6982
+ "ANTHROPIC_AUTH_TOKEN",
6983
+ "ANTHROPIC_BASE_URL",
6984
+ "ANTHROPIC_CUSTOM_HEADERS"
6985
+ ];
6986
+ const details = {};
6987
+ const missing = [];
6988
+ for (const key of envVars) {
6989
+ const value = process.env[key];
6990
+ if (value) {
6991
+ if (key.includes("SECRET") || key.includes("TOKEN") || key.includes("API_KEY")) {
6992
+ details[key] = `[SET - ${value.length} chars]`;
6993
+ } else if (key === "PATH") {
6994
+ details[key] = value.split(":");
6995
+ } else if (key === "ANTHROPIC_CUSTOM_HEADERS") {
6996
+ details[key] = value.split("\n").map((h) => h.split(":")[0]).join(", ");
6997
+ } else {
6998
+ details[key] = value;
6999
+ }
7000
+ } else {
7001
+ missing.push(key);
7002
+ }
7003
+ }
7004
+ details.missingEnvVars = missing;
7005
+ return {
7006
+ name: "environment-variables",
7007
+ passed: true,
7008
+ // Info only, doesn't fail
7009
+ details,
7010
+ durationMs: Date.now() - start
7011
+ };
7012
+ }
7013
+ async function testNodeEnvironment() {
7014
+ const start = Date.now();
7015
+ const details = {
7016
+ nodeVersion: process.version,
7017
+ platform: process.platform,
7018
+ arch: process.arch,
7019
+ cwd: process.cwd(),
7020
+ pid: process.pid,
7021
+ uptime: process.uptime(),
7022
+ memoryUsage: process.memoryUsage(),
7023
+ execPath: process.execPath
7024
+ };
7025
+ return {
7026
+ name: "node-environment",
7027
+ passed: true,
7028
+ details,
7029
+ durationMs: Date.now() - start
7030
+ };
7031
+ }
7032
+ async function testNpmGlobalDirectory() {
7033
+ const start = Date.now();
7034
+ const npmRootResult = await execCommand("npm root -g");
7035
+ const npmBinResult = await execCommand("npm bin -g");
7036
+ const npmRoot = npmRootResult.stdout;
7037
+ const npmBin = npmBinResult.stdout;
7038
+ const details = {
7039
+ npmRootGlobal: npmRoot,
7040
+ npmBinGlobal: npmBin,
7041
+ npmRootExitCode: npmRootResult.exitCode,
7042
+ npmBinExitCode: npmBinResult.exitCode
7043
+ };
7044
+ if (npmRoot) {
7045
+ const evaluatorPath = path9.join(npmRoot, "@wix", "evalforge-evaluator");
7046
+ const evaluatorExists = fs11.existsSync(evaluatorPath);
7047
+ details.evaluatorInstalled = evaluatorExists;
7048
+ if (evaluatorExists) {
7049
+ try {
7050
+ const files = fs11.readdirSync(evaluatorPath);
7051
+ details.evaluatorFiles = files;
7052
+ } catch {
7053
+ details.evaluatorFiles = "Failed to list files";
7054
+ }
7055
+ }
7056
+ }
7057
+ if (npmRoot) {
7058
+ const sdkPath = path9.join(
7059
+ npmRoot,
7060
+ "@wix",
7061
+ "evalforge-evaluator",
7062
+ "node_modules",
7063
+ "@anthropic-ai",
7064
+ "claude-agent-sdk"
7065
+ );
7066
+ const sdkExists = fs11.existsSync(sdkPath);
7067
+ details.claudeAgentSdkInstalled = sdkExists;
7068
+ }
7069
+ const passed = npmRootResult.exitCode === 0 && npmBinResult.exitCode === 0;
7070
+ return {
7071
+ name: "npm-global-directory",
7072
+ passed,
7073
+ details,
7074
+ error: passed ? void 0 : npmRootResult.stderr || npmBinResult.stderr,
7075
+ durationMs: Date.now() - start
7076
+ };
7077
+ }
7078
+ async function testClaudeBinary() {
7079
+ const start = Date.now();
7080
+ const whichResult = await execCommand("which claude");
7081
+ const versionResult = await execCommand("claude --version");
7082
+ const npmBinResult = await execCommand("npm bin -g");
7083
+ const npmBin = npmBinResult.stdout;
7084
+ let claudeInNpmBin = false;
7085
+ if (npmBin) {
7086
+ const claudePath = path9.join(npmBin, "claude");
7087
+ claudeInNpmBin = fs11.existsSync(claudePath);
7088
+ }
7089
+ const details = {
7090
+ whichClaude: whichResult.stdout || "(not found)",
7091
+ whichExitCode: whichResult.exitCode,
7092
+ claudeVersion: versionResult.stdout || versionResult.stderr,
7093
+ versionExitCode: versionResult.exitCode,
7094
+ claudeInNpmGlobalBin: claudeInNpmBin,
7095
+ npmGlobalBin: npmBin
7096
+ };
7097
+ const pathDirs = (process.env.PATH || "").split(":");
7098
+ const claudeFoundIn = [];
7099
+ for (const dir of pathDirs) {
7100
+ const claudePath = path9.join(dir, "claude");
7101
+ if (fs11.existsSync(claudePath)) {
7102
+ claudeFoundIn.push(dir);
7103
+ }
7104
+ }
7105
+ details.claudeFoundInPathDirs = claudeFoundIn;
7106
+ const passed = whichResult.exitCode === 0 || claudeInNpmBin;
7107
+ return {
7108
+ name: "claude-cli-binary",
7109
+ passed,
7110
+ details,
7111
+ error: passed ? void 0 : "Claude CLI binary not found in PATH. The SDK will fail to spawn it.",
7112
+ durationMs: Date.now() - start
7113
+ };
7114
+ }
7115
+ async function testChildProcess() {
7116
+ const start = Date.now();
7117
+ const echoResult = await execCommand('echo "diagnostic-test-success"');
7118
+ const nodeResult = await execCommand('node -e "console.log(process.pid)"');
7119
+ const details = {
7120
+ echoResult: echoResult.stdout,
7121
+ echoExitCode: echoResult.exitCode,
7122
+ nodeResult: nodeResult.stdout,
7123
+ nodeExitCode: nodeResult.exitCode
7124
+ };
7125
+ const passed = echoResult.exitCode === 0 && echoResult.stdout === "diagnostic-test-success";
7126
+ return {
7127
+ name: "child-process-spawning",
7128
+ passed,
7129
+ details,
7130
+ error: passed ? void 0 : "Failed to spawn child process",
7131
+ durationMs: Date.now() - start
7132
+ };
7133
+ }
7134
+ async function testNetworkConnectivity(config) {
7135
+ const start = Date.now();
7136
+ const aiGatewayUrl = config.aiGatewayUrl;
7137
+ if (!aiGatewayUrl) {
7138
+ return {
7139
+ name: "network-connectivity",
7140
+ passed: false,
7141
+ details: { error: "No AI_GATEWAY_URL configured" },
7142
+ error: "No AI_GATEWAY_URL configured",
7143
+ durationMs: Date.now() - start
7144
+ };
7145
+ }
7146
+ const curlResult = await execCommand(
7147
+ `curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${aiGatewayUrl}" 2>&1`
7148
+ );
7149
+ const serverUrl = config.serverUrl;
7150
+ const serverResult = await execCommand(
7151
+ `curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${serverUrl}/health" 2>&1`
7152
+ );
7153
+ const details = {
7154
+ aiGatewayUrl,
7155
+ aiGatewayHttpCode: curlResult.stdout,
7156
+ aiGatewayExitCode: curlResult.exitCode,
7157
+ serverUrl,
7158
+ serverHttpCode: serverResult.stdout,
7159
+ serverExitCode: serverResult.exitCode
7160
+ };
7161
+ const gatewayReachable = curlResult.exitCode === 0 && curlResult.stdout !== "000";
7162
+ return {
7163
+ name: "network-connectivity",
7164
+ passed: gatewayReachable,
7165
+ details,
7166
+ error: gatewayReachable ? void 0 : "Failed to reach AI Gateway",
7167
+ durationMs: Date.now() - start
7168
+ };
7169
+ }
7170
+ async function testSdkImport() {
7171
+ const start = Date.now();
7172
+ try {
7173
+ const sdk = await import("@anthropic-ai/claude-agent-sdk");
7174
+ const details = {
7175
+ sdkImported: true,
7176
+ hasQuery: typeof sdk.query === "function",
7177
+ exportedKeys: Object.keys(sdk)
7178
+ };
7179
+ return {
7180
+ name: "sdk-import",
7181
+ passed: true,
7182
+ details,
7183
+ durationMs: Date.now() - start
7184
+ };
7185
+ } catch (err) {
7186
+ const error = err instanceof Error ? err.message : String(err);
7187
+ return {
7188
+ name: "sdk-import",
7189
+ passed: false,
7190
+ details: {
7191
+ sdkImported: false,
7192
+ error
7193
+ },
7194
+ error: `Failed to import Claude Agent SDK: ${error}`,
7195
+ durationMs: Date.now() - start
7196
+ };
7197
+ }
7198
+ }
7199
+ async function testFileSystemAccess() {
7200
+ const start = Date.now();
7201
+ const testDir = "/tmp/evalforge-diagnostics";
7202
+ const testFile = path9.join(testDir, "test.txt");
7203
+ try {
7204
+ if (!fs11.existsSync(testDir)) {
7205
+ fs11.mkdirSync(testDir, { recursive: true });
7206
+ }
7207
+ fs11.writeFileSync(testFile, "diagnostic-test");
7208
+ const content = fs11.readFileSync(testFile, "utf8");
7209
+ fs11.unlinkSync(testFile);
7210
+ fs11.rmdirSync(testDir);
7211
+ const details = {
7212
+ canCreateDirectory: true,
7213
+ canWriteFile: true,
7214
+ canReadFile: content === "diagnostic-test",
7215
+ testDir,
7216
+ cwd: process.cwd(),
7217
+ cwdContents: fs11.readdirSync(process.cwd()).slice(0, 20)
7218
+ // First 20 files
7219
+ };
7220
+ return {
7221
+ name: "file-system-access",
7222
+ passed: true,
7223
+ details,
7224
+ durationMs: Date.now() - start
7225
+ };
7226
+ } catch (err) {
7227
+ const error = err instanceof Error ? err.message : String(err);
7228
+ return {
7229
+ name: "file-system-access",
7230
+ passed: false,
7231
+ details: {
7232
+ error,
7233
+ testDir,
7234
+ cwd: process.cwd()
7235
+ },
7236
+ error: `File system access failed: ${error}`,
7237
+ durationMs: Date.now() - start
7238
+ };
7239
+ }
7240
+ }
7241
+ function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader, authToken) {
7242
+ const event = {
7243
+ evalRunId: evalRunId2,
7244
+ scenarioId: "diagnostics",
7245
+ scenarioName: "Environment Diagnostics",
7246
+ targetId: "system",
7247
+ targetName: "System",
7248
+ stepNumber: 0,
7249
+ type: LiveTraceEventType2.DIAGNOSTIC,
7250
+ outputPreview: JSON.stringify(result, null, 2).slice(0, 2e3),
7251
+ // Limit size
7252
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
7253
+ isComplete: "summary" in result
7254
+ // Complete if it's the full report
7255
+ };
7256
+ console.log(`TRACE_EVENT:${JSON.stringify(event)}`);
7257
+ if (tracePushUrl) {
7258
+ const headers = {
7259
+ "Content-Type": "application/json"
7260
+ };
7261
+ if (routeHeader) {
7262
+ headers["x-wix-route"] = routeHeader;
7263
+ }
7264
+ if (authToken) {
7265
+ headers["Authorization"] = `Bearer ${authToken}`;
7266
+ }
7267
+ fetch(tracePushUrl, {
7268
+ method: "POST",
7269
+ headers,
7270
+ body: JSON.stringify([event])
7271
+ }).catch((err) => {
7272
+ console.error("[DIAGNOSTICS] Failed to push trace event:", err);
7273
+ });
7274
+ }
7275
+ }
7276
+ async function runDiagnostics(config, evalRunId2) {
7277
+ const startedAt = (/* @__PURE__ */ new Date()).toISOString();
7278
+ const startTime = Date.now();
7279
+ console.error("[DIAGNOSTICS] Starting environment diagnostics...");
7280
+ const tests = [];
7281
+ const runTest = async (testFn) => {
7282
+ const result = await testFn();
7283
+ tests.push(result);
7284
+ emitDiagnosticTraceEvent(
7285
+ evalRunId2,
7286
+ result,
7287
+ config.tracePushUrl,
7288
+ config.routeHeader,
7289
+ config.authToken
7290
+ );
7291
+ const status = result.passed ? "\u2713" : "\u2717";
7292
+ console.error(
7293
+ `[DIAGNOSTICS] ${status} ${result.name} (${result.durationMs}ms)`
7294
+ );
7295
+ if (!result.passed && result.error) {
7296
+ console.error(`[DIAGNOSTICS] Error: ${result.error}`);
7297
+ }
7298
+ };
7299
+ await runTest(testEnvironmentVariables);
7300
+ await runTest(testNodeEnvironment);
7301
+ await runTest(testNpmGlobalDirectory);
7302
+ await runTest(testClaudeBinary);
7303
+ await runTest(testChildProcess);
7304
+ await runTest(() => testNetworkConnectivity(config));
7305
+ await runTest(testSdkImport);
7306
+ await runTest(testFileSystemAccess);
7307
+ const completedAt = (/* @__PURE__ */ new Date()).toISOString();
7308
+ const totalDurationMs = Date.now() - startTime;
7309
+ const report = {
7310
+ startedAt,
7311
+ completedAt,
7312
+ totalDurationMs,
7313
+ tests,
7314
+ summary: {
7315
+ total: tests.length,
7316
+ passed: tests.filter((t) => t.passed).length,
7317
+ failed: tests.filter((t) => !t.passed).length
7318
+ }
7319
+ };
7320
+ emitDiagnosticTraceEvent(
7321
+ evalRunId2,
7322
+ report,
7323
+ config.tracePushUrl,
7324
+ config.routeHeader,
7325
+ config.authToken
7326
+ );
7327
+ console.error(
7328
+ `[DIAGNOSTICS] Completed: ${report.summary.passed}/${report.summary.total} tests passed (${totalDurationMs}ms)`
7329
+ );
7330
+ return report;
7331
+ }
7332
+
6829
7333
  // src/index.ts
6830
7334
  console.error(
6831
7335
  "[EVALUATOR-BOOT] Module loading started",
@@ -6893,6 +7397,33 @@ async function runEvaluation(projectId2, evalRunId2) {
6893
7397
  `[${ExecutionPhase.API_CLIENT}] Failed to create API client: ${apiErr instanceof Error ? apiErr.message : String(apiErr)}`
6894
7398
  );
6895
7399
  }
7400
+ state.currentPhase = ExecutionPhase.DIAGNOSTICS;
7401
+ state.currentContext = { projectId: projectId2, evalRunId: evalRunId2, phase: "diagnostics" };
7402
+ console.error("[DEBUG-H1.5] Running environment diagnostics...");
7403
+ try {
7404
+ const diagnosticReport = await runDiagnostics(config, evalRunId2);
7405
+ console.error(
7406
+ "[DEBUG-H1.5] Diagnostics completed",
7407
+ JSON.stringify({
7408
+ passed: diagnosticReport.summary.passed,
7409
+ failed: diagnosticReport.summary.failed,
7410
+ total: diagnosticReport.summary.total,
7411
+ durationMs: diagnosticReport.totalDurationMs
7412
+ })
7413
+ );
7414
+ const failedTests = diagnosticReport.tests.filter((t) => !t.passed);
7415
+ if (failedTests.length > 0) {
7416
+ console.error(
7417
+ "[DEBUG-H1.5] FAILED DIAGNOSTIC TESTS:",
7418
+ failedTests.map((t) => `${t.name}: ${t.error}`).join("\n")
7419
+ );
7420
+ }
7421
+ } catch (diagErr) {
7422
+ console.error(
7423
+ "[DEBUG-H1.5] Diagnostics failed (non-fatal):",
7424
+ diagErr instanceof Error ? diagErr.message : String(diagErr)
7425
+ );
7426
+ }
6896
7427
  state.currentPhase = ExecutionPhase.FETCH_EVAL_RUN;
6897
7428
  state.currentContext = { projectId: projectId2, evalRunId: evalRunId2, serverUrl: config.serverUrl };
6898
7429
  console.error(