@wix/evalforge-evaluator 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -24,7 +24,7 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
24
24
  ));
25
25
 
26
26
  // src/index.ts
27
- var import_evalforge_types4 = require("@wix/evalforge-types");
27
+ var import_evalforge_types5 = require("@wix/evalforge-types");
28
28
 
29
29
  // src/config.ts
30
30
  function loadConfig() {
@@ -87,8 +87,8 @@ function createApiClient(serverUrl, options = "") {
87
87
  }
88
88
  return headers;
89
89
  }
90
- async function fetchJson(path9) {
91
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path9}`;
90
+ async function fetchJson(path10) {
91
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
92
92
  console.error(`[API] GET ${url}`);
93
93
  const headers = buildHeaders();
94
94
  const response = await fetch(url, {
@@ -102,8 +102,8 @@ function createApiClient(serverUrl, options = "") {
102
102
  }
103
103
  return response.json();
104
104
  }
105
- async function postJson(path9, body) {
106
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path9}`;
105
+ async function postJson(path10, body) {
106
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
107
107
  console.error(`[API] POST ${url}`);
108
108
  const response = await fetch(url, {
109
109
  method: "POST",
@@ -117,8 +117,8 @@ function createApiClient(serverUrl, options = "") {
117
117
  );
118
118
  }
119
119
  }
120
- async function deleteRequest(path9) {
121
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path9}`;
120
+ async function deleteRequest(path10) {
121
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
122
122
  console.error(`[API] DELETE ${url}`);
123
123
  const headers = buildHeaders();
124
124
  const response = await fetch(url, {
@@ -132,8 +132,8 @@ function createApiClient(serverUrl, options = "") {
132
132
  );
133
133
  }
134
134
  }
135
- async function putJson(path9, body) {
136
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path9}`;
135
+ async function putJson(path10, body) {
136
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
137
137
  console.error(`[API] PUT ${url}`);
138
138
  const response = await fetch(url, {
139
139
  method: "PUT",
@@ -1157,17 +1157,17 @@ var ReadStream = class extends Minipass {
1157
1157
  [_size];
1158
1158
  [_remain];
1159
1159
  [_autoClose];
1160
- constructor(path9, opt) {
1160
+ constructor(path10, opt) {
1161
1161
  opt = opt || {};
1162
1162
  super(opt);
1163
1163
  this.readable = true;
1164
1164
  this.writable = false;
1165
- if (typeof path9 !== "string") {
1165
+ if (typeof path10 !== "string") {
1166
1166
  throw new TypeError("path must be a string");
1167
1167
  }
1168
1168
  this[_errored] = false;
1169
1169
  this[_fd] = typeof opt.fd === "number" ? opt.fd : void 0;
1170
- this[_path] = path9;
1170
+ this[_path] = path10;
1171
1171
  this[_readSize] = opt.readSize || 16 * 1024 * 1024;
1172
1172
  this[_reading] = false;
1173
1173
  this[_size] = typeof opt.size === "number" ? opt.size : Infinity;
@@ -1330,10 +1330,10 @@ var WriteStream = class extends import_events.default {
1330
1330
  [_flags];
1331
1331
  [_finished] = false;
1332
1332
  [_pos];
1333
- constructor(path9, opt) {
1333
+ constructor(path10, opt) {
1334
1334
  opt = opt || {};
1335
1335
  super(opt);
1336
- this[_path] = path9;
1336
+ this[_path] = path10;
1337
1337
  this[_fd] = typeof opt.fd === "number" ? opt.fd : void 0;
1338
1338
  this[_mode] = opt.mode === void 0 ? 438 : opt.mode;
1339
1339
  this[_pos] = typeof opt.start === "number" ? opt.start : void 0;
@@ -2226,10 +2226,10 @@ var Header = class {
2226
2226
  }
2227
2227
  const prefixSize = this.ctime || this.atime ? 130 : 155;
2228
2228
  const split = splitPrefix(this.path || "", prefixSize);
2229
- const path9 = split[0];
2229
+ const path10 = split[0];
2230
2230
  const prefix = split[1];
2231
2231
  this.needPax = !!split[2];
2232
- this.needPax = encString(buf, off, 100, path9) || this.needPax;
2232
+ this.needPax = encString(buf, off, 100, path10) || this.needPax;
2233
2233
  this.needPax = encNumber(buf, off + 100, 8, this.mode) || this.needPax;
2234
2234
  this.needPax = encNumber(buf, off + 108, 8, this.uid) || this.needPax;
2235
2235
  this.needPax = encNumber(buf, off + 116, 8, this.gid) || this.needPax;
@@ -3205,16 +3205,16 @@ var modeFix = (mode, isDir, portable) => {
3205
3205
  // ../../node_modules/tar/dist/esm/strip-absolute-path.js
3206
3206
  var import_node_path3 = require("node:path");
3207
3207
  var { isAbsolute, parse: parse3 } = import_node_path3.win32;
3208
- var stripAbsolutePath = (path9) => {
3208
+ var stripAbsolutePath = (path10) => {
3209
3209
  let r = "";
3210
- let parsed = parse3(path9);
3211
- while (isAbsolute(path9) || parsed.root) {
3212
- const root = path9.charAt(0) === "/" && path9.slice(0, 4) !== "//?/" ? "/" : parsed.root;
3213
- path9 = path9.slice(root.length);
3210
+ let parsed = parse3(path10);
3211
+ while (isAbsolute(path10) || parsed.root) {
3212
+ const root = path10.charAt(0) === "/" && path10.slice(0, 4) !== "//?/" ? "/" : parsed.root;
3213
+ path10 = path10.slice(root.length);
3214
3214
  r += root;
3215
- parsed = parse3(path9);
3215
+ parsed = parse3(path10);
3216
3216
  }
3217
- return [r, path9];
3217
+ return [r, path10];
3218
3218
  };
3219
3219
 
3220
3220
  // ../../node_modules/tar/dist/esm/winchars.js
@@ -3226,12 +3226,12 @@ var encode2 = (s) => raw.reduce((s2, c) => s2.split(c).join(toWin.get(c)), s);
3226
3226
  var decode = (s) => win.reduce((s2, c) => s2.split(c).join(toRaw.get(c)), s);
3227
3227
 
3228
3228
  // ../../node_modules/tar/dist/esm/write-entry.js
3229
- var prefixPath = (path9, prefix) => {
3229
+ var prefixPath = (path10, prefix) => {
3230
3230
  if (!prefix) {
3231
- return normalizeWindowsPath(path9);
3231
+ return normalizeWindowsPath(path10);
3232
3232
  }
3233
- path9 = normalizeWindowsPath(path9).replace(/^\.(\/|$)/, "");
3234
- return stripTrailingSlashes(prefix) + "/" + path9;
3233
+ path10 = normalizeWindowsPath(path10).replace(/^\.(\/|$)/, "");
3234
+ return stripTrailingSlashes(prefix) + "/" + path10;
3235
3235
  };
3236
3236
  var maxReadSize = 16 * 1024 * 1024;
3237
3237
  var PROCESS = /* @__PURE__ */ Symbol("process");
@@ -3376,8 +3376,8 @@ var WriteEntry = class extends Minipass {
3376
3376
  [MODE](mode) {
3377
3377
  return modeFix(mode, this.type === "Directory", this.portable);
3378
3378
  }
3379
- [PREFIX](path9) {
3380
- return prefixPath(path9, this.prefix);
3379
+ [PREFIX](path10) {
3380
+ return prefixPath(path10, this.prefix);
3381
3381
  }
3382
3382
  [HEADER]() {
3383
3383
  if (!this.stat) {
@@ -3758,8 +3758,8 @@ var WriteEntryTar = class extends Minipass {
3758
3758
  super.write(b);
3759
3759
  readEntry.pipe(this);
3760
3760
  }
3761
- [PREFIX](path9) {
3762
- return prefixPath(path9, this.prefix);
3761
+ [PREFIX](path10) {
3762
+ return prefixPath(path10, this.prefix);
3763
3763
  }
3764
3764
  [MODE](mode) {
3765
3765
  return modeFix(mode, this.type === "Directory", this.portable);
@@ -4183,8 +4183,8 @@ var PackJob = class {
4183
4183
  pending = false;
4184
4184
  ignore = false;
4185
4185
  piped = false;
4186
- constructor(path9, absolute) {
4187
- this.path = path9 || "./";
4186
+ constructor(path10, absolute) {
4187
+ this.path = path10 || "./";
4188
4188
  this.absolute = absolute;
4189
4189
  }
4190
4190
  };
@@ -4312,21 +4312,21 @@ var Pack = class extends Minipass {
4312
4312
  [WRITE](chunk) {
4313
4313
  return super.write(chunk);
4314
4314
  }
4315
- add(path9) {
4316
- this.write(path9);
4315
+ add(path10) {
4316
+ this.write(path10);
4317
4317
  return this;
4318
4318
  }
4319
- end(path9, encoding, cb) {
4320
- if (typeof path9 === "function") {
4321
- cb = path9;
4322
- path9 = void 0;
4319
+ end(path10, encoding, cb) {
4320
+ if (typeof path10 === "function") {
4321
+ cb = path10;
4322
+ path10 = void 0;
4323
4323
  }
4324
4324
  if (typeof encoding === "function") {
4325
4325
  cb = encoding;
4326
4326
  encoding = void 0;
4327
4327
  }
4328
- if (path9) {
4329
- this.add(path9);
4328
+ if (path10) {
4329
+ this.add(path10);
4330
4330
  }
4331
4331
  this[ENDED2] = true;
4332
4332
  this[PROCESS2]();
@@ -4334,14 +4334,14 @@ var Pack = class extends Minipass {
4334
4334
  cb();
4335
4335
  return this;
4336
4336
  }
4337
- write(path9) {
4337
+ write(path10) {
4338
4338
  if (this[ENDED2]) {
4339
4339
  throw new Error("write after end");
4340
4340
  }
4341
- if (path9 instanceof ReadEntry) {
4342
- this[ADDTARENTRY](path9);
4341
+ if (path10 instanceof ReadEntry) {
4342
+ this[ADDTARENTRY](path10);
4343
4343
  } else {
4344
- this[ADDFSENTRY](path9);
4344
+ this[ADDFSENTRY](path10);
4345
4345
  }
4346
4346
  return this.flowing;
4347
4347
  }
@@ -4684,9 +4684,9 @@ var getWriteFlag = !fMapEnabled ? () => "w" : (size) => size < fMapLimit ? fMapF
4684
4684
  // ../../node_modules/chownr/dist/esm/index.js
4685
4685
  var import_node_fs2 = __toESM(require("node:fs"), 1);
4686
4686
  var import_node_path5 = __toESM(require("node:path"), 1);
4687
- var lchownSync = (path9, uid, gid) => {
4687
+ var lchownSync = (path10, uid, gid) => {
4688
4688
  try {
4689
- return import_node_fs2.default.lchownSync(path9, uid, gid);
4689
+ return import_node_fs2.default.lchownSync(path10, uid, gid);
4690
4690
  } catch (er) {
4691
4691
  if (er?.code !== "ENOENT")
4692
4692
  throw er;
@@ -4769,9 +4769,9 @@ var CwdError = class extends Error {
4769
4769
  path;
4770
4770
  code;
4771
4771
  syscall = "chdir";
4772
- constructor(path9, code2) {
4773
- super(`${code2}: Cannot cd into '${path9}'`);
4774
- this.path = path9;
4772
+ constructor(path10, code2) {
4773
+ super(`${code2}: Cannot cd into '${path10}'`);
4774
+ this.path = path10;
4775
4775
  this.code = code2;
4776
4776
  }
4777
4777
  get name() {
@@ -4785,10 +4785,10 @@ var SymlinkError = class extends Error {
4785
4785
  symlink;
4786
4786
  syscall = "symlink";
4787
4787
  code = "TAR_SYMLINK_ERROR";
4788
- constructor(symlink, path9) {
4788
+ constructor(symlink, path10) {
4789
4789
  super("TAR_SYMLINK_ERROR: Cannot extract through symbolic link");
4790
4790
  this.symlink = symlink;
4791
- this.path = path9;
4791
+ this.path = path10;
4792
4792
  }
4793
4793
  get name() {
4794
4794
  return "SymlinkError";
@@ -4970,13 +4970,13 @@ var normalizeUnicode = (s) => {
4970
4970
  // ../../node_modules/tar/dist/esm/path-reservations.js
4971
4971
  var platform3 = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform;
4972
4972
  var isWindows2 = platform3 === "win32";
4973
- var getDirs = (path9) => {
4974
- const dirs = path9.split("/").slice(0, -1).reduce((set, path10) => {
4973
+ var getDirs = (path10) => {
4974
+ const dirs = path10.split("/").slice(0, -1).reduce((set, path11) => {
4975
4975
  const s = set[set.length - 1];
4976
4976
  if (s !== void 0) {
4977
- path10 = (0, import_node_path7.join)(s, path10);
4977
+ path11 = (0, import_node_path7.join)(s, path11);
4978
4978
  }
4979
- set.push(path10 || "/");
4979
+ set.push(path11 || "/");
4980
4980
  return set;
4981
4981
  }, []);
4982
4982
  return dirs;
@@ -4994,7 +4994,7 @@ var PathReservations = class {
4994
4994
  paths = isWindows2 ? ["win32 parallelization disabled"] : paths.map((p) => {
4995
4995
  return stripTrailingSlashes((0, import_node_path7.join)(normalizeUnicode(p))).toLowerCase();
4996
4996
  });
4997
- const dirs = new Set(paths.map((path9) => getDirs(path9)).reduce((a, b) => a.concat(b)));
4997
+ const dirs = new Set(paths.map((path10) => getDirs(path10)).reduce((a, b) => a.concat(b)));
4998
4998
  this.#reservations.set(fn, { dirs, paths });
4999
4999
  for (const p of paths) {
5000
5000
  const q = this.#queues.get(p);
@@ -5027,8 +5027,8 @@ var PathReservations = class {
5027
5027
  throw new Error("function does not have any path reservations");
5028
5028
  }
5029
5029
  return {
5030
- paths: res.paths.map((path9) => this.#queues.get(path9)),
5031
- dirs: [...res.dirs].map((path9) => this.#queues.get(path9))
5030
+ paths: res.paths.map((path10) => this.#queues.get(path10)),
5031
+ dirs: [...res.dirs].map((path10) => this.#queues.get(path10))
5032
5032
  };
5033
5033
  }
5034
5034
  // check if fn is first in line for all its paths, and is
@@ -5056,14 +5056,14 @@ var PathReservations = class {
5056
5056
  }
5057
5057
  const { paths, dirs } = res;
5058
5058
  const next = /* @__PURE__ */ new Set();
5059
- for (const path9 of paths) {
5060
- const q = this.#queues.get(path9);
5059
+ for (const path10 of paths) {
5060
+ const q = this.#queues.get(path10);
5061
5061
  if (!q || q?.[0] !== fn) {
5062
5062
  continue;
5063
5063
  }
5064
5064
  const q0 = q[1];
5065
5065
  if (!q0) {
5066
- this.#queues.delete(path9);
5066
+ this.#queues.delete(path10);
5067
5067
  continue;
5068
5068
  }
5069
5069
  q.shift();
@@ -5128,24 +5128,24 @@ var CHECKED_CWD = /* @__PURE__ */ Symbol("checkedCwd");
5128
5128
  var platform4 = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform;
5129
5129
  var isWindows3 = platform4 === "win32";
5130
5130
  var DEFAULT_MAX_DEPTH = 1024;
5131
- var unlinkFile = (path9, cb) => {
5131
+ var unlinkFile = (path10, cb) => {
5132
5132
  if (!isWindows3) {
5133
- return import_node_fs4.default.unlink(path9, cb);
5133
+ return import_node_fs4.default.unlink(path10, cb);
5134
5134
  }
5135
- const name2 = path9 + ".DELETE." + (0, import_node_crypto.randomBytes)(16).toString("hex");
5136
- import_node_fs4.default.rename(path9, name2, (er) => {
5135
+ const name2 = path10 + ".DELETE." + (0, import_node_crypto.randomBytes)(16).toString("hex");
5136
+ import_node_fs4.default.rename(path10, name2, (er) => {
5137
5137
  if (er) {
5138
5138
  return cb(er);
5139
5139
  }
5140
5140
  import_node_fs4.default.unlink(name2, cb);
5141
5141
  });
5142
5142
  };
5143
- var unlinkFileSync = (path9) => {
5143
+ var unlinkFileSync = (path10) => {
5144
5144
  if (!isWindows3) {
5145
- return import_node_fs4.default.unlinkSync(path9);
5145
+ return import_node_fs4.default.unlinkSync(path10);
5146
5146
  }
5147
- const name2 = path9 + ".DELETE." + (0, import_node_crypto.randomBytes)(16).toString("hex");
5148
- import_node_fs4.default.renameSync(path9, name2);
5147
+ const name2 = path10 + ".DELETE." + (0, import_node_crypto.randomBytes)(16).toString("hex");
5148
+ import_node_fs4.default.renameSync(path10, name2);
5149
5149
  import_node_fs4.default.unlinkSync(name2);
5150
5150
  };
5151
5151
  var uint32 = (a, b, c) => a !== void 0 && a === a >>> 0 ? a : b !== void 0 && b === b >>> 0 ? b : c;
@@ -5243,24 +5243,24 @@ var Unpack = class extends Parser {
5243
5243
  // return false if we need to skip this file
5244
5244
  // return true if the field was successfully sanitized
5245
5245
  [STRIPABSOLUTEPATH](entry, field) {
5246
- const path9 = entry[field];
5247
- if (!path9 || this.preservePaths)
5246
+ const path10 = entry[field];
5247
+ if (!path10 || this.preservePaths)
5248
5248
  return true;
5249
- const parts = path9.split("/");
5249
+ const parts = path10.split("/");
5250
5250
  if (parts.includes("..") || /* c8 ignore next */
5251
5251
  isWindows3 && /^[a-z]:\.\.$/i.test(parts[0] ?? "")) {
5252
5252
  this.warn("TAR_ENTRY_ERROR", `${field} contains '..'`, {
5253
5253
  entry,
5254
- [field]: path9
5254
+ [field]: path10
5255
5255
  });
5256
5256
  return false;
5257
5257
  }
5258
- const [root, stripped] = stripAbsolutePath(path9);
5258
+ const [root, stripped] = stripAbsolutePath(path10);
5259
5259
  if (root) {
5260
5260
  entry[field] = String(stripped);
5261
5261
  this.warn("TAR_ENTRY_INFO", `stripping ${root} from absolute ${field}`, {
5262
5262
  entry,
5263
- [field]: path9
5263
+ [field]: path10
5264
5264
  });
5265
5265
  }
5266
5266
  return true;
@@ -6027,9 +6027,9 @@ var mtimeFilter = (opt) => {
6027
6027
  if (!opt.mtimeCache) {
6028
6028
  opt.mtimeCache = /* @__PURE__ */ new Map();
6029
6029
  }
6030
- opt.filter = filter ? (path9, stat) => filter(path9, stat) && !/* c8 ignore start */
6031
- ((opt.mtimeCache?.get(path9) ?? stat.mtime ?? 0) > (stat.mtime ?? 0)) : (path9, stat) => !/* c8 ignore start */
6032
- ((opt.mtimeCache?.get(path9) ?? stat.mtime ?? 0) > (stat.mtime ?? 0));
6030
+ opt.filter = filter ? (path10, stat) => filter(path10, stat) && !/* c8 ignore start */
6031
+ ((opt.mtimeCache?.get(path10) ?? stat.mtime ?? 0) > (stat.mtime ?? 0)) : (path10, stat) => !/* c8 ignore start */
6032
+ ((opt.mtimeCache?.get(path10) ?? stat.mtime ?? 0) > (stat.mtime ?? 0));
6033
6033
  };
6034
6034
 
6035
6035
  // src/run-scenario/environment.ts
@@ -6324,6 +6324,37 @@ async function executeWithClaudeCode(skill, scenario, options) {
6324
6324
  queryOptions.mcpServers ? Object.keys(queryOptions.mcpServers) : "none"
6325
6325
  );
6326
6326
  console.log("[SDK-DEBUG] Calling SDK query()...");
6327
+ if (traceContext) {
6328
+ const preExecEvent = {
6329
+ evalRunId: traceContext.evalRunId,
6330
+ scenarioId: traceContext.scenarioId,
6331
+ scenarioName: traceContext.scenarioName,
6332
+ targetId: traceContext.targetId,
6333
+ targetName: traceContext.targetName,
6334
+ stepNumber: 0,
6335
+ type: import_evalforge_types.LiveTraceEventType.DIAGNOSTIC,
6336
+ outputPreview: JSON.stringify({
6337
+ event: "pre-sdk-execution",
6338
+ model: queryOptions.model,
6339
+ maxTurns: queryOptions.maxTurns,
6340
+ sdkEnv: {
6341
+ ANTHROPIC_BASE_URL: sdkEnv.ANTHROPIC_BASE_URL,
6342
+ hasANTHROPIC_API_KEY: !!sdkEnv.ANTHROPIC_API_KEY,
6343
+ hasANTHROPIC_AUTH_TOKEN: !!sdkEnv.ANTHROPIC_AUTH_TOKEN,
6344
+ hasANTHROPIC_CUSTOM_HEADERS: !!sdkEnv.ANTHROPIC_CUSTOM_HEADERS
6345
+ },
6346
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
6347
+ }),
6348
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
6349
+ isComplete: false
6350
+ };
6351
+ emitTraceEvent(
6352
+ preExecEvent,
6353
+ traceContext.tracePushUrl,
6354
+ traceContext.routeHeader,
6355
+ traceContext.authToken
6356
+ );
6357
+ }
6327
6358
  try {
6328
6359
  for await (const message of query({
6329
6360
  prompt: scenario.triggerPrompt,
@@ -6504,6 +6535,38 @@ async function executeWithClaudeCode(skill, scenario, options) {
6504
6535
  sdkError: Object.keys(sdkSpecificInfo).length > 0 ? sdkSpecificInfo : void 0,
6505
6536
  cause: causeInfo
6506
6537
  };
6538
+ if (traceContext) {
6539
+ const errorTraceEvent = {
6540
+ evalRunId: traceContext.evalRunId,
6541
+ scenarioId: traceContext.scenarioId,
6542
+ scenarioName: traceContext.scenarioName,
6543
+ targetId: traceContext.targetId,
6544
+ targetName: traceContext.targetName,
6545
+ stepNumber: traceStepNumber + 1,
6546
+ type: import_evalforge_types.LiveTraceEventType.DIAGNOSTIC,
6547
+ outputPreview: JSON.stringify(
6548
+ {
6549
+ event: "sdk-execution-failed",
6550
+ error: errorMessage,
6551
+ errorName,
6552
+ messageCount,
6553
+ sdkEnv: sdkEnvDebug,
6554
+ sdkError: sdkSpecificInfo,
6555
+ cause: causeInfo
6556
+ },
6557
+ null,
6558
+ 2
6559
+ ).slice(0, 2e3),
6560
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
6561
+ isComplete: true
6562
+ };
6563
+ emitTraceEvent(
6564
+ errorTraceEvent,
6565
+ traceContext.tracePushUrl,
6566
+ traceContext.routeHeader,
6567
+ traceContext.authToken
6568
+ );
6569
+ }
6507
6570
  throw new Error(
6508
6571
  `Claude SDK execution failed after ${messageCount} messages: ${errorMessage}
6509
6572
  Details: ${JSON.stringify(errorDetails, null, 2)}` + (errorStack ? `
@@ -6870,6 +6933,8 @@ ${stackLines.join("\n")}`);
6870
6933
  return parts.join(" ");
6871
6934
  }
6872
6935
  var ExecutionPhase = {
6936
+ /** Environment diagnostics phase (runs before execution) */
6937
+ DIAGNOSTICS: "diagnostics",
6873
6938
  CONFIG: "config-loading",
6874
6939
  API_CLIENT: "api-client-creation",
6875
6940
  FETCH_EVAL_RUN: "fetch-eval-run",
@@ -6886,6 +6951,402 @@ var ExecutionPhase = {
6886
6951
  UPDATE_STATUS: "update-status"
6887
6952
  };
6888
6953
 
6954
+ // src/diagnostics.ts
6955
+ var import_child_process = require("child_process");
6956
+ var fs11 = __toESM(require("fs"));
6957
+ var path9 = __toESM(require("path"));
6958
+ var import_evalforge_types4 = require("@wix/evalforge-types");
6959
+ async function execCommand(command, timeoutMs = 5e3) {
6960
+ return new Promise((resolve) => {
6961
+ const proc2 = (0, import_child_process.spawn)("sh", ["-c", command], {
6962
+ timeout: timeoutMs
6963
+ });
6964
+ let stdout = "";
6965
+ let stderr = "";
6966
+ proc2.stdout.on("data", (data) => {
6967
+ stdout += data.toString();
6968
+ });
6969
+ proc2.stderr.on("data", (data) => {
6970
+ stderr += data.toString();
6971
+ });
6972
+ proc2.on("close", (code2) => {
6973
+ resolve({
6974
+ stdout: stdout.trim(),
6975
+ stderr: stderr.trim(),
6976
+ exitCode: code2 ?? -1
6977
+ });
6978
+ });
6979
+ proc2.on("error", (err) => {
6980
+ resolve({
6981
+ stdout: "",
6982
+ stderr: err.message,
6983
+ exitCode: -1
6984
+ });
6985
+ });
6986
+ });
6987
+ }
6988
+ async function testEnvironmentVariables() {
6989
+ const start = Date.now();
6990
+ const envVars = [
6991
+ "PATH",
6992
+ "HOME",
6993
+ "USER",
6994
+ "SHELL",
6995
+ "NODE_ENV",
6996
+ "EVAL_SERVER_URL",
6997
+ "AI_GATEWAY_URL",
6998
+ "ANTHROPIC_API_KEY",
6999
+ "ANTHROPIC_AUTH_TOKEN",
7000
+ "ANTHROPIC_BASE_URL",
7001
+ "ANTHROPIC_CUSTOM_HEADERS"
7002
+ ];
7003
+ const details = {};
7004
+ const missing = [];
7005
+ for (const key of envVars) {
7006
+ const value = process.env[key];
7007
+ if (value) {
7008
+ if (key.includes("SECRET") || key.includes("TOKEN") || key.includes("API_KEY")) {
7009
+ details[key] = `[SET - ${value.length} chars]`;
7010
+ } else if (key === "PATH") {
7011
+ details[key] = value.split(":");
7012
+ } else if (key === "ANTHROPIC_CUSTOM_HEADERS") {
7013
+ details[key] = value.split("\n").map((h) => h.split(":")[0]).join(", ");
7014
+ } else {
7015
+ details[key] = value;
7016
+ }
7017
+ } else {
7018
+ missing.push(key);
7019
+ }
7020
+ }
7021
+ details.missingEnvVars = missing;
7022
+ return {
7023
+ name: "environment-variables",
7024
+ passed: true,
7025
+ // Info only, doesn't fail
7026
+ details,
7027
+ durationMs: Date.now() - start
7028
+ };
7029
+ }
7030
+ async function testNodeEnvironment() {
7031
+ const start = Date.now();
7032
+ const details = {
7033
+ nodeVersion: process.version,
7034
+ platform: process.platform,
7035
+ arch: process.arch,
7036
+ cwd: process.cwd(),
7037
+ pid: process.pid,
7038
+ uptime: process.uptime(),
7039
+ memoryUsage: process.memoryUsage(),
7040
+ execPath: process.execPath
7041
+ };
7042
+ return {
7043
+ name: "node-environment",
7044
+ passed: true,
7045
+ details,
7046
+ durationMs: Date.now() - start
7047
+ };
7048
+ }
7049
+ async function testNpmGlobalDirectory() {
7050
+ const start = Date.now();
7051
+ const npmRootResult = await execCommand("npm root -g");
7052
+ const npmBinResult = await execCommand("npm bin -g");
7053
+ const npmRoot = npmRootResult.stdout;
7054
+ const npmBin = npmBinResult.stdout;
7055
+ const details = {
7056
+ npmRootGlobal: npmRoot,
7057
+ npmBinGlobal: npmBin,
7058
+ npmRootExitCode: npmRootResult.exitCode,
7059
+ npmBinExitCode: npmBinResult.exitCode
7060
+ };
7061
+ if (npmRoot) {
7062
+ const evaluatorPath = path9.join(npmRoot, "@wix", "evalforge-evaluator");
7063
+ const evaluatorExists = fs11.existsSync(evaluatorPath);
7064
+ details.evaluatorInstalled = evaluatorExists;
7065
+ if (evaluatorExists) {
7066
+ try {
7067
+ const files = fs11.readdirSync(evaluatorPath);
7068
+ details.evaluatorFiles = files;
7069
+ } catch {
7070
+ details.evaluatorFiles = "Failed to list files";
7071
+ }
7072
+ }
7073
+ }
7074
+ if (npmRoot) {
7075
+ const sdkPath = path9.join(
7076
+ npmRoot,
7077
+ "@wix",
7078
+ "evalforge-evaluator",
7079
+ "node_modules",
7080
+ "@anthropic-ai",
7081
+ "claude-agent-sdk"
7082
+ );
7083
+ const sdkExists = fs11.existsSync(sdkPath);
7084
+ details.claudeAgentSdkInstalled = sdkExists;
7085
+ }
7086
+ const passed = npmRootResult.exitCode === 0 && npmBinResult.exitCode === 0;
7087
+ return {
7088
+ name: "npm-global-directory",
7089
+ passed,
7090
+ details,
7091
+ error: passed ? void 0 : npmRootResult.stderr || npmBinResult.stderr,
7092
+ durationMs: Date.now() - start
7093
+ };
7094
+ }
7095
+ async function testClaudeBinary() {
7096
+ const start = Date.now();
7097
+ const whichResult = await execCommand("which claude");
7098
+ const versionResult = await execCommand("claude --version");
7099
+ const npmBinResult = await execCommand("npm bin -g");
7100
+ const npmBin = npmBinResult.stdout;
7101
+ let claudeInNpmBin = false;
7102
+ if (npmBin) {
7103
+ const claudePath = path9.join(npmBin, "claude");
7104
+ claudeInNpmBin = fs11.existsSync(claudePath);
7105
+ }
7106
+ const details = {
7107
+ whichClaude: whichResult.stdout || "(not found)",
7108
+ whichExitCode: whichResult.exitCode,
7109
+ claudeVersion: versionResult.stdout || versionResult.stderr,
7110
+ versionExitCode: versionResult.exitCode,
7111
+ claudeInNpmGlobalBin: claudeInNpmBin,
7112
+ npmGlobalBin: npmBin
7113
+ };
7114
+ const pathDirs = (process.env.PATH || "").split(":");
7115
+ const claudeFoundIn = [];
7116
+ for (const dir of pathDirs) {
7117
+ const claudePath = path9.join(dir, "claude");
7118
+ if (fs11.existsSync(claudePath)) {
7119
+ claudeFoundIn.push(dir);
7120
+ }
7121
+ }
7122
+ details.claudeFoundInPathDirs = claudeFoundIn;
7123
+ const passed = whichResult.exitCode === 0 || claudeInNpmBin;
7124
+ return {
7125
+ name: "claude-cli-binary",
7126
+ passed,
7127
+ details,
7128
+ error: passed ? void 0 : "Claude CLI binary not found in PATH. The SDK will fail to spawn it.",
7129
+ durationMs: Date.now() - start
7130
+ };
7131
+ }
7132
+ async function testChildProcess() {
7133
+ const start = Date.now();
7134
+ const echoResult = await execCommand('echo "diagnostic-test-success"');
7135
+ const nodeResult = await execCommand('node -e "console.log(process.pid)"');
7136
+ const details = {
7137
+ echoResult: echoResult.stdout,
7138
+ echoExitCode: echoResult.exitCode,
7139
+ nodeResult: nodeResult.stdout,
7140
+ nodeExitCode: nodeResult.exitCode
7141
+ };
7142
+ const passed = echoResult.exitCode === 0 && echoResult.stdout === "diagnostic-test-success";
7143
+ return {
7144
+ name: "child-process-spawning",
7145
+ passed,
7146
+ details,
7147
+ error: passed ? void 0 : "Failed to spawn child process",
7148
+ durationMs: Date.now() - start
7149
+ };
7150
+ }
7151
+ async function testNetworkConnectivity(config) {
7152
+ const start = Date.now();
7153
+ const aiGatewayUrl = config.aiGatewayUrl;
7154
+ if (!aiGatewayUrl) {
7155
+ return {
7156
+ name: "network-connectivity",
7157
+ passed: false,
7158
+ details: { error: "No AI_GATEWAY_URL configured" },
7159
+ error: "No AI_GATEWAY_URL configured",
7160
+ durationMs: Date.now() - start
7161
+ };
7162
+ }
7163
+ const curlResult = await execCommand(
7164
+ `curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${aiGatewayUrl}" 2>&1`
7165
+ );
7166
+ const serverUrl = config.serverUrl;
7167
+ const serverResult = await execCommand(
7168
+ `curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "${serverUrl}/health" 2>&1`
7169
+ );
7170
+ const details = {
7171
+ aiGatewayUrl,
7172
+ aiGatewayHttpCode: curlResult.stdout,
7173
+ aiGatewayExitCode: curlResult.exitCode,
7174
+ serverUrl,
7175
+ serverHttpCode: serverResult.stdout,
7176
+ serverExitCode: serverResult.exitCode
7177
+ };
7178
+ const gatewayReachable = curlResult.exitCode === 0 && curlResult.stdout !== "000";
7179
+ return {
7180
+ name: "network-connectivity",
7181
+ passed: gatewayReachable,
7182
+ details,
7183
+ error: gatewayReachable ? void 0 : "Failed to reach AI Gateway",
7184
+ durationMs: Date.now() - start
7185
+ };
7186
+ }
7187
+ async function testSdkImport() {
7188
+ const start = Date.now();
7189
+ try {
7190
+ const sdk = await import("@anthropic-ai/claude-agent-sdk");
7191
+ const details = {
7192
+ sdkImported: true,
7193
+ hasQuery: typeof sdk.query === "function",
7194
+ exportedKeys: Object.keys(sdk)
7195
+ };
7196
+ return {
7197
+ name: "sdk-import",
7198
+ passed: true,
7199
+ details,
7200
+ durationMs: Date.now() - start
7201
+ };
7202
+ } catch (err) {
7203
+ const error = err instanceof Error ? err.message : String(err);
7204
+ return {
7205
+ name: "sdk-import",
7206
+ passed: false,
7207
+ details: {
7208
+ sdkImported: false,
7209
+ error
7210
+ },
7211
+ error: `Failed to import Claude Agent SDK: ${error}`,
7212
+ durationMs: Date.now() - start
7213
+ };
7214
+ }
7215
+ }
7216
+ async function testFileSystemAccess() {
7217
+ const start = Date.now();
7218
+ const testDir = "/tmp/evalforge-diagnostics";
7219
+ const testFile = path9.join(testDir, "test.txt");
7220
+ try {
7221
+ if (!fs11.existsSync(testDir)) {
7222
+ fs11.mkdirSync(testDir, { recursive: true });
7223
+ }
7224
+ fs11.writeFileSync(testFile, "diagnostic-test");
7225
+ const content = fs11.readFileSync(testFile, "utf8");
7226
+ fs11.unlinkSync(testFile);
7227
+ fs11.rmdirSync(testDir);
7228
+ const details = {
7229
+ canCreateDirectory: true,
7230
+ canWriteFile: true,
7231
+ canReadFile: content === "diagnostic-test",
7232
+ testDir,
7233
+ cwd: process.cwd(),
7234
+ cwdContents: fs11.readdirSync(process.cwd()).slice(0, 20)
7235
+ // First 20 files
7236
+ };
7237
+ return {
7238
+ name: "file-system-access",
7239
+ passed: true,
7240
+ details,
7241
+ durationMs: Date.now() - start
7242
+ };
7243
+ } catch (err) {
7244
+ const error = err instanceof Error ? err.message : String(err);
7245
+ return {
7246
+ name: "file-system-access",
7247
+ passed: false,
7248
+ details: {
7249
+ error,
7250
+ testDir,
7251
+ cwd: process.cwd()
7252
+ },
7253
+ error: `File system access failed: ${error}`,
7254
+ durationMs: Date.now() - start
7255
+ };
7256
+ }
7257
+ }
7258
+ function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader, authToken) {
7259
+ const event = {
7260
+ evalRunId: evalRunId2,
7261
+ scenarioId: "diagnostics",
7262
+ scenarioName: "Environment Diagnostics",
7263
+ targetId: "system",
7264
+ targetName: "System",
7265
+ stepNumber: 0,
7266
+ type: import_evalforge_types4.LiveTraceEventType.DIAGNOSTIC,
7267
+ outputPreview: JSON.stringify(result, null, 2).slice(0, 2e3),
7268
+ // Limit size
7269
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
7270
+ isComplete: "summary" in result
7271
+ // Complete if it's the full report
7272
+ };
7273
+ console.log(`TRACE_EVENT:${JSON.stringify(event)}`);
7274
+ if (tracePushUrl) {
7275
+ const headers = {
7276
+ "Content-Type": "application/json"
7277
+ };
7278
+ if (routeHeader) {
7279
+ headers["x-wix-route"] = routeHeader;
7280
+ }
7281
+ if (authToken) {
7282
+ headers["Authorization"] = `Bearer ${authToken}`;
7283
+ }
7284
+ fetch(tracePushUrl, {
7285
+ method: "POST",
7286
+ headers,
7287
+ body: JSON.stringify([event])
7288
+ }).catch((err) => {
7289
+ console.error("[DIAGNOSTICS] Failed to push trace event:", err);
7290
+ });
7291
+ }
7292
+ }
7293
+ async function runDiagnostics(config, evalRunId2) {
7294
+ const startedAt = (/* @__PURE__ */ new Date()).toISOString();
7295
+ const startTime = Date.now();
7296
+ console.error("[DIAGNOSTICS] Starting environment diagnostics...");
7297
+ const tests = [];
7298
+ const runTest = async (testFn) => {
7299
+ const result = await testFn();
7300
+ tests.push(result);
7301
+ emitDiagnosticTraceEvent(
7302
+ evalRunId2,
7303
+ result,
7304
+ config.tracePushUrl,
7305
+ config.routeHeader,
7306
+ config.authToken
7307
+ );
7308
+ const status = result.passed ? "\u2713" : "\u2717";
7309
+ console.error(
7310
+ `[DIAGNOSTICS] ${status} ${result.name} (${result.durationMs}ms)`
7311
+ );
7312
+ if (!result.passed && result.error) {
7313
+ console.error(`[DIAGNOSTICS] Error: ${result.error}`);
7314
+ }
7315
+ };
7316
+ await runTest(testEnvironmentVariables);
7317
+ await runTest(testNodeEnvironment);
7318
+ await runTest(testNpmGlobalDirectory);
7319
+ await runTest(testClaudeBinary);
7320
+ await runTest(testChildProcess);
7321
+ await runTest(() => testNetworkConnectivity(config));
7322
+ await runTest(testSdkImport);
7323
+ await runTest(testFileSystemAccess);
7324
+ const completedAt = (/* @__PURE__ */ new Date()).toISOString();
7325
+ const totalDurationMs = Date.now() - startTime;
7326
+ const report = {
7327
+ startedAt,
7328
+ completedAt,
7329
+ totalDurationMs,
7330
+ tests,
7331
+ summary: {
7332
+ total: tests.length,
7333
+ passed: tests.filter((t) => t.passed).length,
7334
+ failed: tests.filter((t) => !t.passed).length
7335
+ }
7336
+ };
7337
+ emitDiagnosticTraceEvent(
7338
+ evalRunId2,
7339
+ report,
7340
+ config.tracePushUrl,
7341
+ config.routeHeader,
7342
+ config.authToken
7343
+ );
7344
+ console.error(
7345
+ `[DIAGNOSTICS] Completed: ${report.summary.passed}/${report.summary.total} tests passed (${totalDurationMs}ms)`
7346
+ );
7347
+ return report;
7348
+ }
7349
+
6889
7350
  // src/index.ts
6890
7351
  console.error(
6891
7352
  "[EVALUATOR-BOOT] Module loading started",
@@ -6953,6 +7414,33 @@ async function runEvaluation(projectId2, evalRunId2) {
6953
7414
  `[${ExecutionPhase.API_CLIENT}] Failed to create API client: ${apiErr instanceof Error ? apiErr.message : String(apiErr)}`
6954
7415
  );
6955
7416
  }
7417
+ state.currentPhase = ExecutionPhase.DIAGNOSTICS;
7418
+ state.currentContext = { projectId: projectId2, evalRunId: evalRunId2, phase: "diagnostics" };
7419
+ console.error("[DEBUG-H1.5] Running environment diagnostics...");
7420
+ try {
7421
+ const diagnosticReport = await runDiagnostics(config, evalRunId2);
7422
+ console.error(
7423
+ "[DEBUG-H1.5] Diagnostics completed",
7424
+ JSON.stringify({
7425
+ passed: diagnosticReport.summary.passed,
7426
+ failed: diagnosticReport.summary.failed,
7427
+ total: diagnosticReport.summary.total,
7428
+ durationMs: diagnosticReport.totalDurationMs
7429
+ })
7430
+ );
7431
+ const failedTests = diagnosticReport.tests.filter((t) => !t.passed);
7432
+ if (failedTests.length > 0) {
7433
+ console.error(
7434
+ "[DEBUG-H1.5] FAILED DIAGNOSTIC TESTS:",
7435
+ failedTests.map((t) => `${t.name}: ${t.error}`).join("\n")
7436
+ );
7437
+ }
7438
+ } catch (diagErr) {
7439
+ console.error(
7440
+ "[DEBUG-H1.5] Diagnostics failed (non-fatal):",
7441
+ diagErr instanceof Error ? diagErr.message : String(diagErr)
7442
+ );
7443
+ }
6956
7444
  state.currentPhase = ExecutionPhase.FETCH_EVAL_RUN;
6957
7445
  state.currentContext = { projectId: projectId2, evalRunId: evalRunId2, serverUrl: config.serverUrl };
6958
7446
  console.error(
@@ -7072,7 +7560,7 @@ async function runEvaluation(projectId2, evalRunId2) {
7072
7560
  };
7073
7561
  try {
7074
7562
  await api.updateEvalRun(projectId2, evalRunId2, {
7075
- status: import_evalforge_types4.EvalStatus.COMPLETED,
7563
+ status: import_evalforge_types5.EvalStatus.COMPLETED,
7076
7564
  completedAt: (/* @__PURE__ */ new Date()).toISOString()
7077
7565
  });
7078
7566
  } catch (updateErr) {
@@ -7113,7 +7601,7 @@ runEvaluation(projectId, evalRunId).then(() => {
7113
7601
  authToken: config.authToken
7114
7602
  });
7115
7603
  await api.updateEvalRun(projectId, evalRunId, {
7116
- status: import_evalforge_types4.EvalStatus.FAILED,
7604
+ status: import_evalforge_types5.EvalStatus.FAILED,
7117
7605
  completedAt: (/* @__PURE__ */ new Date()).toISOString(),
7118
7606
  jobError,
7119
7607
  jobStatus: "FAILED"
@@ -7136,7 +7624,7 @@ runEvaluation(projectId, evalRunId).then(() => {
7136
7624
  authToken
7137
7625
  });
7138
7626
  await api.updateEvalRun(projectId, evalRunId, {
7139
- status: import_evalforge_types4.EvalStatus.FAILED,
7627
+ status: import_evalforge_types5.EvalStatus.FAILED,
7140
7628
  completedAt: (/* @__PURE__ */ new Date()).toISOString(),
7141
7629
  jobError: `Config load failed, then: ${jobError}`,
7142
7630
  jobStatus: "FAILED"