@wix/evalforge-evaluator 0.14.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -24,7 +24,7 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
24
24
  ));
25
25
 
26
26
  // src/index.ts
27
- var import_evalforge_types4 = require("@wix/evalforge-types");
27
+ var import_evalforge_types5 = require("@wix/evalforge-types");
28
28
 
29
29
  // src/config.ts
30
30
  function loadConfig() {
@@ -87,8 +87,8 @@ function createApiClient(serverUrl, options = "") {
87
87
  }
88
88
  return headers;
89
89
  }
90
- async function fetchJson(path9) {
91
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path9}`;
90
+ async function fetchJson(path10) {
91
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
92
92
  console.error(`[API] GET ${url}`);
93
93
  const headers = buildHeaders();
94
94
  const response = await fetch(url, {
@@ -102,8 +102,8 @@ function createApiClient(serverUrl, options = "") {
102
102
  }
103
103
  return response.json();
104
104
  }
105
- async function postJson(path9, body) {
106
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path9}`;
105
+ async function postJson(path10, body) {
106
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
107
107
  console.error(`[API] POST ${url}`);
108
108
  const response = await fetch(url, {
109
109
  method: "POST",
@@ -117,8 +117,8 @@ function createApiClient(serverUrl, options = "") {
117
117
  );
118
118
  }
119
119
  }
120
- async function deleteRequest(path9) {
121
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path9}`;
120
+ async function deleteRequest(path10) {
121
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
122
122
  console.error(`[API] DELETE ${url}`);
123
123
  const headers = buildHeaders();
124
124
  const response = await fetch(url, {
@@ -132,8 +132,8 @@ function createApiClient(serverUrl, options = "") {
132
132
  );
133
133
  }
134
134
  }
135
- async function putJson(path9, body) {
136
- const url = `${serverUrl}${apiPrefix}${pathPrefix}${path9}`;
135
+ async function putJson(path10, body) {
136
+ const url = `${serverUrl}${apiPrefix}${pathPrefix}${path10}`;
137
137
  console.error(`[API] PUT ${url}`);
138
138
  const response = await fetch(url, {
139
139
  method: "PUT",
@@ -1157,17 +1157,17 @@ var ReadStream = class extends Minipass {
1157
1157
  [_size];
1158
1158
  [_remain];
1159
1159
  [_autoClose];
1160
- constructor(path9, opt) {
1160
+ constructor(path10, opt) {
1161
1161
  opt = opt || {};
1162
1162
  super(opt);
1163
1163
  this.readable = true;
1164
1164
  this.writable = false;
1165
- if (typeof path9 !== "string") {
1165
+ if (typeof path10 !== "string") {
1166
1166
  throw new TypeError("path must be a string");
1167
1167
  }
1168
1168
  this[_errored] = false;
1169
1169
  this[_fd] = typeof opt.fd === "number" ? opt.fd : void 0;
1170
- this[_path] = path9;
1170
+ this[_path] = path10;
1171
1171
  this[_readSize] = opt.readSize || 16 * 1024 * 1024;
1172
1172
  this[_reading] = false;
1173
1173
  this[_size] = typeof opt.size === "number" ? opt.size : Infinity;
@@ -1330,10 +1330,10 @@ var WriteStream = class extends import_events.default {
1330
1330
  [_flags];
1331
1331
  [_finished] = false;
1332
1332
  [_pos];
1333
- constructor(path9, opt) {
1333
+ constructor(path10, opt) {
1334
1334
  opt = opt || {};
1335
1335
  super(opt);
1336
- this[_path] = path9;
1336
+ this[_path] = path10;
1337
1337
  this[_fd] = typeof opt.fd === "number" ? opt.fd : void 0;
1338
1338
  this[_mode] = opt.mode === void 0 ? 438 : opt.mode;
1339
1339
  this[_pos] = typeof opt.start === "number" ? opt.start : void 0;
@@ -2226,10 +2226,10 @@ var Header = class {
2226
2226
  }
2227
2227
  const prefixSize = this.ctime || this.atime ? 130 : 155;
2228
2228
  const split = splitPrefix(this.path || "", prefixSize);
2229
- const path9 = split[0];
2229
+ const path10 = split[0];
2230
2230
  const prefix = split[1];
2231
2231
  this.needPax = !!split[2];
2232
- this.needPax = encString(buf, off, 100, path9) || this.needPax;
2232
+ this.needPax = encString(buf, off, 100, path10) || this.needPax;
2233
2233
  this.needPax = encNumber(buf, off + 100, 8, this.mode) || this.needPax;
2234
2234
  this.needPax = encNumber(buf, off + 108, 8, this.uid) || this.needPax;
2235
2235
  this.needPax = encNumber(buf, off + 116, 8, this.gid) || this.needPax;
@@ -3205,16 +3205,16 @@ var modeFix = (mode, isDir, portable) => {
3205
3205
  // ../../node_modules/tar/dist/esm/strip-absolute-path.js
3206
3206
  var import_node_path3 = require("node:path");
3207
3207
  var { isAbsolute, parse: parse3 } = import_node_path3.win32;
3208
- var stripAbsolutePath = (path9) => {
3208
+ var stripAbsolutePath = (path10) => {
3209
3209
  let r = "";
3210
- let parsed = parse3(path9);
3211
- while (isAbsolute(path9) || parsed.root) {
3212
- const root = path9.charAt(0) === "/" && path9.slice(0, 4) !== "//?/" ? "/" : parsed.root;
3213
- path9 = path9.slice(root.length);
3210
+ let parsed = parse3(path10);
3211
+ while (isAbsolute(path10) || parsed.root) {
3212
+ const root = path10.charAt(0) === "/" && path10.slice(0, 4) !== "//?/" ? "/" : parsed.root;
3213
+ path10 = path10.slice(root.length);
3214
3214
  r += root;
3215
- parsed = parse3(path9);
3215
+ parsed = parse3(path10);
3216
3216
  }
3217
- return [r, path9];
3217
+ return [r, path10];
3218
3218
  };
3219
3219
 
3220
3220
  // ../../node_modules/tar/dist/esm/winchars.js
@@ -3226,12 +3226,12 @@ var encode2 = (s) => raw.reduce((s2, c) => s2.split(c).join(toWin.get(c)), s);
3226
3226
  var decode = (s) => win.reduce((s2, c) => s2.split(c).join(toRaw.get(c)), s);
3227
3227
 
3228
3228
  // ../../node_modules/tar/dist/esm/write-entry.js
3229
- var prefixPath = (path9, prefix) => {
3229
+ var prefixPath = (path10, prefix) => {
3230
3230
  if (!prefix) {
3231
- return normalizeWindowsPath(path9);
3231
+ return normalizeWindowsPath(path10);
3232
3232
  }
3233
- path9 = normalizeWindowsPath(path9).replace(/^\.(\/|$)/, "");
3234
- return stripTrailingSlashes(prefix) + "/" + path9;
3233
+ path10 = normalizeWindowsPath(path10).replace(/^\.(\/|$)/, "");
3234
+ return stripTrailingSlashes(prefix) + "/" + path10;
3235
3235
  };
3236
3236
  var maxReadSize = 16 * 1024 * 1024;
3237
3237
  var PROCESS = /* @__PURE__ */ Symbol("process");
@@ -3376,8 +3376,8 @@ var WriteEntry = class extends Minipass {
3376
3376
  [MODE](mode) {
3377
3377
  return modeFix(mode, this.type === "Directory", this.portable);
3378
3378
  }
3379
- [PREFIX](path9) {
3380
- return prefixPath(path9, this.prefix);
3379
+ [PREFIX](path10) {
3380
+ return prefixPath(path10, this.prefix);
3381
3381
  }
3382
3382
  [HEADER]() {
3383
3383
  if (!this.stat) {
@@ -3758,8 +3758,8 @@ var WriteEntryTar = class extends Minipass {
3758
3758
  super.write(b);
3759
3759
  readEntry.pipe(this);
3760
3760
  }
3761
- [PREFIX](path9) {
3762
- return prefixPath(path9, this.prefix);
3761
+ [PREFIX](path10) {
3762
+ return prefixPath(path10, this.prefix);
3763
3763
  }
3764
3764
  [MODE](mode) {
3765
3765
  return modeFix(mode, this.type === "Directory", this.portable);
@@ -4183,8 +4183,8 @@ var PackJob = class {
4183
4183
  pending = false;
4184
4184
  ignore = false;
4185
4185
  piped = false;
4186
- constructor(path9, absolute) {
4187
- this.path = path9 || "./";
4186
+ constructor(path10, absolute) {
4187
+ this.path = path10 || "./";
4188
4188
  this.absolute = absolute;
4189
4189
  }
4190
4190
  };
@@ -4312,21 +4312,21 @@ var Pack = class extends Minipass {
4312
4312
  [WRITE](chunk) {
4313
4313
  return super.write(chunk);
4314
4314
  }
4315
- add(path9) {
4316
- this.write(path9);
4315
+ add(path10) {
4316
+ this.write(path10);
4317
4317
  return this;
4318
4318
  }
4319
- end(path9, encoding, cb) {
4320
- if (typeof path9 === "function") {
4321
- cb = path9;
4322
- path9 = void 0;
4319
+ end(path10, encoding, cb) {
4320
+ if (typeof path10 === "function") {
4321
+ cb = path10;
4322
+ path10 = void 0;
4323
4323
  }
4324
4324
  if (typeof encoding === "function") {
4325
4325
  cb = encoding;
4326
4326
  encoding = void 0;
4327
4327
  }
4328
- if (path9) {
4329
- this.add(path9);
4328
+ if (path10) {
4329
+ this.add(path10);
4330
4330
  }
4331
4331
  this[ENDED2] = true;
4332
4332
  this[PROCESS2]();
@@ -4334,14 +4334,14 @@ var Pack = class extends Minipass {
4334
4334
  cb();
4335
4335
  return this;
4336
4336
  }
4337
- write(path9) {
4337
+ write(path10) {
4338
4338
  if (this[ENDED2]) {
4339
4339
  throw new Error("write after end");
4340
4340
  }
4341
- if (path9 instanceof ReadEntry) {
4342
- this[ADDTARENTRY](path9);
4341
+ if (path10 instanceof ReadEntry) {
4342
+ this[ADDTARENTRY](path10);
4343
4343
  } else {
4344
- this[ADDFSENTRY](path9);
4344
+ this[ADDFSENTRY](path10);
4345
4345
  }
4346
4346
  return this.flowing;
4347
4347
  }
@@ -4684,9 +4684,9 @@ var getWriteFlag = !fMapEnabled ? () => "w" : (size) => size < fMapLimit ? fMapF
4684
4684
  // ../../node_modules/chownr/dist/esm/index.js
4685
4685
  var import_node_fs2 = __toESM(require("node:fs"), 1);
4686
4686
  var import_node_path5 = __toESM(require("node:path"), 1);
4687
- var lchownSync = (path9, uid, gid) => {
4687
+ var lchownSync = (path10, uid, gid) => {
4688
4688
  try {
4689
- return import_node_fs2.default.lchownSync(path9, uid, gid);
4689
+ return import_node_fs2.default.lchownSync(path10, uid, gid);
4690
4690
  } catch (er) {
4691
4691
  if (er?.code !== "ENOENT")
4692
4692
  throw er;
@@ -4769,9 +4769,9 @@ var CwdError = class extends Error {
4769
4769
  path;
4770
4770
  code;
4771
4771
  syscall = "chdir";
4772
- constructor(path9, code2) {
4773
- super(`${code2}: Cannot cd into '${path9}'`);
4774
- this.path = path9;
4772
+ constructor(path10, code2) {
4773
+ super(`${code2}: Cannot cd into '${path10}'`);
4774
+ this.path = path10;
4775
4775
  this.code = code2;
4776
4776
  }
4777
4777
  get name() {
@@ -4785,10 +4785,10 @@ var SymlinkError = class extends Error {
4785
4785
  symlink;
4786
4786
  syscall = "symlink";
4787
4787
  code = "TAR_SYMLINK_ERROR";
4788
- constructor(symlink, path9) {
4788
+ constructor(symlink, path10) {
4789
4789
  super("TAR_SYMLINK_ERROR: Cannot extract through symbolic link");
4790
4790
  this.symlink = symlink;
4791
- this.path = path9;
4791
+ this.path = path10;
4792
4792
  }
4793
4793
  get name() {
4794
4794
  return "SymlinkError";
@@ -4970,13 +4970,13 @@ var normalizeUnicode = (s) => {
4970
4970
  // ../../node_modules/tar/dist/esm/path-reservations.js
4971
4971
  var platform3 = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform;
4972
4972
  var isWindows2 = platform3 === "win32";
4973
- var getDirs = (path9) => {
4974
- const dirs = path9.split("/").slice(0, -1).reduce((set, path10) => {
4973
+ var getDirs = (path10) => {
4974
+ const dirs = path10.split("/").slice(0, -1).reduce((set, path11) => {
4975
4975
  const s = set[set.length - 1];
4976
4976
  if (s !== void 0) {
4977
- path10 = (0, import_node_path7.join)(s, path10);
4977
+ path11 = (0, import_node_path7.join)(s, path11);
4978
4978
  }
4979
- set.push(path10 || "/");
4979
+ set.push(path11 || "/");
4980
4980
  return set;
4981
4981
  }, []);
4982
4982
  return dirs;
@@ -4994,7 +4994,7 @@ var PathReservations = class {
4994
4994
  paths = isWindows2 ? ["win32 parallelization disabled"] : paths.map((p) => {
4995
4995
  return stripTrailingSlashes((0, import_node_path7.join)(normalizeUnicode(p))).toLowerCase();
4996
4996
  });
4997
- const dirs = new Set(paths.map((path9) => getDirs(path9)).reduce((a, b) => a.concat(b)));
4997
+ const dirs = new Set(paths.map((path10) => getDirs(path10)).reduce((a, b) => a.concat(b)));
4998
4998
  this.#reservations.set(fn, { dirs, paths });
4999
4999
  for (const p of paths) {
5000
5000
  const q = this.#queues.get(p);
@@ -5027,8 +5027,8 @@ var PathReservations = class {
5027
5027
  throw new Error("function does not have any path reservations");
5028
5028
  }
5029
5029
  return {
5030
- paths: res.paths.map((path9) => this.#queues.get(path9)),
5031
- dirs: [...res.dirs].map((path9) => this.#queues.get(path9))
5030
+ paths: res.paths.map((path10) => this.#queues.get(path10)),
5031
+ dirs: [...res.dirs].map((path10) => this.#queues.get(path10))
5032
5032
  };
5033
5033
  }
5034
5034
  // check if fn is first in line for all its paths, and is
@@ -5056,14 +5056,14 @@ var PathReservations = class {
5056
5056
  }
5057
5057
  const { paths, dirs } = res;
5058
5058
  const next = /* @__PURE__ */ new Set();
5059
- for (const path9 of paths) {
5060
- const q = this.#queues.get(path9);
5059
+ for (const path10 of paths) {
5060
+ const q = this.#queues.get(path10);
5061
5061
  if (!q || q?.[0] !== fn) {
5062
5062
  continue;
5063
5063
  }
5064
5064
  const q0 = q[1];
5065
5065
  if (!q0) {
5066
- this.#queues.delete(path9);
5066
+ this.#queues.delete(path10);
5067
5067
  continue;
5068
5068
  }
5069
5069
  q.shift();
@@ -5128,24 +5128,24 @@ var CHECKED_CWD = /* @__PURE__ */ Symbol("checkedCwd");
5128
5128
  var platform4 = process.env.TESTING_TAR_FAKE_PLATFORM || process.platform;
5129
5129
  var isWindows3 = platform4 === "win32";
5130
5130
  var DEFAULT_MAX_DEPTH = 1024;
5131
- var unlinkFile = (path9, cb) => {
5131
+ var unlinkFile = (path10, cb) => {
5132
5132
  if (!isWindows3) {
5133
- return import_node_fs4.default.unlink(path9, cb);
5133
+ return import_node_fs4.default.unlink(path10, cb);
5134
5134
  }
5135
- const name2 = path9 + ".DELETE." + (0, import_node_crypto.randomBytes)(16).toString("hex");
5136
- import_node_fs4.default.rename(path9, name2, (er) => {
5135
+ const name2 = path10 + ".DELETE." + (0, import_node_crypto.randomBytes)(16).toString("hex");
5136
+ import_node_fs4.default.rename(path10, name2, (er) => {
5137
5137
  if (er) {
5138
5138
  return cb(er);
5139
5139
  }
5140
5140
  import_node_fs4.default.unlink(name2, cb);
5141
5141
  });
5142
5142
  };
5143
- var unlinkFileSync = (path9) => {
5143
+ var unlinkFileSync = (path10) => {
5144
5144
  if (!isWindows3) {
5145
- return import_node_fs4.default.unlinkSync(path9);
5145
+ return import_node_fs4.default.unlinkSync(path10);
5146
5146
  }
5147
- const name2 = path9 + ".DELETE." + (0, import_node_crypto.randomBytes)(16).toString("hex");
5148
- import_node_fs4.default.renameSync(path9, name2);
5147
+ const name2 = path10 + ".DELETE." + (0, import_node_crypto.randomBytes)(16).toString("hex");
5148
+ import_node_fs4.default.renameSync(path10, name2);
5149
5149
  import_node_fs4.default.unlinkSync(name2);
5150
5150
  };
5151
5151
  var uint32 = (a, b, c) => a !== void 0 && a === a >>> 0 ? a : b !== void 0 && b === b >>> 0 ? b : c;
@@ -5243,24 +5243,24 @@ var Unpack = class extends Parser {
5243
5243
  // return false if we need to skip this file
5244
5244
  // return true if the field was successfully sanitized
5245
5245
  [STRIPABSOLUTEPATH](entry, field) {
5246
- const path9 = entry[field];
5247
- if (!path9 || this.preservePaths)
5246
+ const path10 = entry[field];
5247
+ if (!path10 || this.preservePaths)
5248
5248
  return true;
5249
- const parts = path9.split("/");
5249
+ const parts = path10.split("/");
5250
5250
  if (parts.includes("..") || /* c8 ignore next */
5251
5251
  isWindows3 && /^[a-z]:\.\.$/i.test(parts[0] ?? "")) {
5252
5252
  this.warn("TAR_ENTRY_ERROR", `${field} contains '..'`, {
5253
5253
  entry,
5254
- [field]: path9
5254
+ [field]: path10
5255
5255
  });
5256
5256
  return false;
5257
5257
  }
5258
- const [root, stripped] = stripAbsolutePath(path9);
5258
+ const [root, stripped] = stripAbsolutePath(path10);
5259
5259
  if (root) {
5260
5260
  entry[field] = String(stripped);
5261
5261
  this.warn("TAR_ENTRY_INFO", `stripping ${root} from absolute ${field}`, {
5262
5262
  entry,
5263
- [field]: path9
5263
+ [field]: path10
5264
5264
  });
5265
5265
  }
5266
5266
  return true;
@@ -6027,9 +6027,9 @@ var mtimeFilter = (opt) => {
6027
6027
  if (!opt.mtimeCache) {
6028
6028
  opt.mtimeCache = /* @__PURE__ */ new Map();
6029
6029
  }
6030
- opt.filter = filter ? (path9, stat) => filter(path9, stat) && !/* c8 ignore start */
6031
- ((opt.mtimeCache?.get(path9) ?? stat.mtime ?? 0) > (stat.mtime ?? 0)) : (path9, stat) => !/* c8 ignore start */
6032
- ((opt.mtimeCache?.get(path9) ?? stat.mtime ?? 0) > (stat.mtime ?? 0));
6030
+ opt.filter = filter ? (path10, stat) => filter(path10, stat) && !/* c8 ignore start */
6031
+ ((opt.mtimeCache?.get(path10) ?? stat.mtime ?? 0) > (stat.mtime ?? 0)) : (path10, stat) => !/* c8 ignore start */
6032
+ ((opt.mtimeCache?.get(path10) ?? stat.mtime ?? 0) > (stat.mtime ?? 0));
6033
6033
  };
6034
6034
 
6035
6035
  // src/run-scenario/environment.ts
@@ -6324,6 +6324,37 @@ async function executeWithClaudeCode(skill, scenario, options) {
6324
6324
  queryOptions.mcpServers ? Object.keys(queryOptions.mcpServers) : "none"
6325
6325
  );
6326
6326
  console.log("[SDK-DEBUG] Calling SDK query()...");
6327
+ if (traceContext) {
6328
+ const preExecEvent = {
6329
+ evalRunId: traceContext.evalRunId,
6330
+ scenarioId: traceContext.scenarioId,
6331
+ scenarioName: traceContext.scenarioName,
6332
+ targetId: traceContext.targetId,
6333
+ targetName: traceContext.targetName,
6334
+ stepNumber: 0,
6335
+ type: import_evalforge_types.LiveTraceEventType.DIAGNOSTIC,
6336
+ outputPreview: JSON.stringify({
6337
+ event: "pre-sdk-execution",
6338
+ model: queryOptions.model,
6339
+ maxTurns: queryOptions.maxTurns,
6340
+ sdkEnv: {
6341
+ ANTHROPIC_BASE_URL: sdkEnv.ANTHROPIC_BASE_URL,
6342
+ hasANTHROPIC_API_KEY: !!sdkEnv.ANTHROPIC_API_KEY,
6343
+ hasANTHROPIC_AUTH_TOKEN: !!sdkEnv.ANTHROPIC_AUTH_TOKEN,
6344
+ hasANTHROPIC_CUSTOM_HEADERS: !!sdkEnv.ANTHROPIC_CUSTOM_HEADERS
6345
+ },
6346
+ timestamp: (/* @__PURE__ */ new Date()).toISOString()
6347
+ }),
6348
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
6349
+ isComplete: false
6350
+ };
6351
+ emitTraceEvent(
6352
+ preExecEvent,
6353
+ traceContext.tracePushUrl,
6354
+ traceContext.routeHeader,
6355
+ traceContext.authToken
6356
+ );
6357
+ }
6327
6358
  try {
6328
6359
  for await (const message of query({
6329
6360
  prompt: scenario.triggerPrompt,
@@ -6504,6 +6535,38 @@ async function executeWithClaudeCode(skill, scenario, options) {
6504
6535
  sdkError: Object.keys(sdkSpecificInfo).length > 0 ? sdkSpecificInfo : void 0,
6505
6536
  cause: causeInfo
6506
6537
  };
6538
+ if (traceContext) {
6539
+ const errorTraceEvent = {
6540
+ evalRunId: traceContext.evalRunId,
6541
+ scenarioId: traceContext.scenarioId,
6542
+ scenarioName: traceContext.scenarioName,
6543
+ targetId: traceContext.targetId,
6544
+ targetName: traceContext.targetName,
6545
+ stepNumber: traceStepNumber + 1,
6546
+ type: import_evalforge_types.LiveTraceEventType.DIAGNOSTIC,
6547
+ outputPreview: JSON.stringify(
6548
+ {
6549
+ event: "sdk-execution-failed",
6550
+ error: errorMessage,
6551
+ errorName,
6552
+ messageCount,
6553
+ sdkEnv: sdkEnvDebug,
6554
+ sdkError: sdkSpecificInfo,
6555
+ cause: causeInfo
6556
+ },
6557
+ null,
6558
+ 2
6559
+ ).slice(0, 2e3),
6560
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
6561
+ isComplete: true
6562
+ };
6563
+ emitTraceEvent(
6564
+ errorTraceEvent,
6565
+ traceContext.tracePushUrl,
6566
+ traceContext.routeHeader,
6567
+ traceContext.authToken
6568
+ );
6569
+ }
6507
6570
  throw new Error(
6508
6571
  `Claude SDK execution failed after ${messageCount} messages: ${errorMessage}
6509
6572
  Details: ${JSON.stringify(errorDetails, null, 2)}` + (errorStack ? `
@@ -6870,6 +6933,8 @@ ${stackLines.join("\n")}`);
6870
6933
  return parts.join(" ");
6871
6934
  }
6872
6935
  var ExecutionPhase = {
6936
+ /** Environment diagnostics phase (runs before execution) */
6937
+ DIAGNOSTICS: "diagnostics",
6873
6938
  CONFIG: "config-loading",
6874
6939
  API_CLIENT: "api-client-creation",
6875
6940
  FETCH_EVAL_RUN: "fetch-eval-run",
@@ -6886,6 +6951,585 @@ var ExecutionPhase = {
6886
6951
  UPDATE_STATUS: "update-status"
6887
6952
  };
6888
6953
 
6954
+ // src/diagnostics.ts
6955
+ var import_child_process = require("child_process");
6956
+ var fs11 = __toESM(require("fs"));
6957
+ var path9 = __toESM(require("path"));
6958
+ var import_evalforge_types4 = require("@wix/evalforge-types");
6959
+ async function execCommand(command, timeoutMs = 1e4) {
6960
+ return new Promise((resolve) => {
6961
+ try {
6962
+ const proc2 = (0, import_child_process.spawn)("sh", ["-c", command], {
6963
+ timeout: timeoutMs
6964
+ });
6965
+ let stdout = "";
6966
+ let stderr = "";
6967
+ proc2.stdout.on("data", (data) => {
6968
+ stdout += data.toString();
6969
+ });
6970
+ proc2.stderr.on("data", (data) => {
6971
+ stderr += data.toString();
6972
+ });
6973
+ proc2.on("close", (code2) => {
6974
+ resolve({
6975
+ stdout: stdout.trim(),
6976
+ stderr: stderr.trim(),
6977
+ exitCode: code2 ?? -1
6978
+ });
6979
+ });
6980
+ proc2.on("error", (err) => {
6981
+ resolve({
6982
+ stdout: "",
6983
+ stderr: err.message,
6984
+ exitCode: -1
6985
+ });
6986
+ });
6987
+ } catch (err) {
6988
+ resolve({
6989
+ stdout: "",
6990
+ stderr: err instanceof Error ? err.message : String(err),
6991
+ exitCode: -99
6992
+ });
6993
+ }
6994
+ });
6995
+ }
6996
+ async function safeRunTest(testName, testFn) {
6997
+ const start = Date.now();
6998
+ try {
6999
+ return await testFn();
7000
+ } catch (err) {
7001
+ const error = err instanceof Error ? err.message : String(err);
7002
+ return {
7003
+ name: testName,
7004
+ passed: false,
7005
+ details: {
7006
+ testCrashed: true,
7007
+ error,
7008
+ stack: err instanceof Error ? err.stack : void 0
7009
+ },
7010
+ error: `Test crashed: ${error}`,
7011
+ durationMs: Date.now() - start
7012
+ };
7013
+ }
7014
+ }
7015
+ async function testClaudeBinaryDiscovery() {
7016
+ const start = Date.now();
7017
+ const details = {};
7018
+ const npmRootResult = await execCommand("npm root -g");
7019
+ const npmBinResult = await execCommand("npm bin -g");
7020
+ const npmRoot = npmRootResult.stdout;
7021
+ const npmBin = npmBinResult.stdout;
7022
+ details.npmRoot = npmRoot;
7023
+ details.npmBin = npmBin;
7024
+ const evaluatorBinPath = path9.join(
7025
+ npmRoot,
7026
+ "@wix",
7027
+ "evalforge-evaluator",
7028
+ "node_modules",
7029
+ ".bin"
7030
+ );
7031
+ details.evaluatorBinPath = evaluatorBinPath;
7032
+ const lsBinResult = await execCommand(`ls -la "${evaluatorBinPath}" 2>&1`);
7033
+ details.evaluatorBinContents = lsBinResult.stdout || lsBinResult.stderr;
7034
+ details.lsBinExitCode = lsBinResult.exitCode;
7035
+ const claudePath = path9.join(evaluatorBinPath, "claude");
7036
+ let claudeExists = false;
7037
+ try {
7038
+ claudeExists = fs11.existsSync(claudePath);
7039
+ } catch {
7040
+ claudeExists = false;
7041
+ }
7042
+ details.claudePath = claudePath;
7043
+ details.claudeExists = claudeExists;
7044
+ if (claudeExists) {
7045
+ const readlinkResult = await execCommand(
7046
+ `readlink -f "${claudePath}" 2>&1`
7047
+ );
7048
+ details.claudeRealPath = readlinkResult.stdout || readlinkResult.stderr;
7049
+ const statResult = await execCommand(`stat "${claudePath}" 2>&1`);
7050
+ details.claudeStat = statResult.stdout || statResult.stderr;
7051
+ const lsClaudeResult = await execCommand(`ls -la "${claudePath}" 2>&1`);
7052
+ details.claudeFileInfo = lsClaudeResult.stdout;
7053
+ }
7054
+ const whichResult = await execCommand("which claude 2>&1");
7055
+ details.whichClaude = whichResult.stdout || "(not in PATH)";
7056
+ details.whichExitCode = whichResult.exitCode;
7057
+ const currentPath = process.env.PATH || "";
7058
+ details.currentPATH = currentPath.split(":");
7059
+ details.pathLength = currentPath.split(":").length;
7060
+ const passed = claudeExists || whichResult.exitCode === 0;
7061
+ return {
7062
+ name: "claude-binary-discovery",
7063
+ passed,
7064
+ details,
7065
+ error: passed ? void 0 : `Claude binary not found at ${claudePath}`,
7066
+ durationMs: Date.now() - start
7067
+ };
7068
+ }
7069
+ async function testClaudeExecution() {
7070
+ const start = Date.now();
7071
+ const details = {};
7072
+ const npmRootResult = await execCommand("npm root -g");
7073
+ const npmRoot = npmRootResult.stdout;
7074
+ const claudePath = path9.join(
7075
+ npmRoot,
7076
+ "@wix",
7077
+ "evalforge-evaluator",
7078
+ "node_modules",
7079
+ ".bin",
7080
+ "claude"
7081
+ );
7082
+ details.claudePath = claudePath;
7083
+ const versionResult = await execCommand(
7084
+ `"${claudePath}" --version 2>&1`,
7085
+ 15e3
7086
+ );
7087
+ details.versionCommand = {
7088
+ command: `"${claudePath}" --version`,
7089
+ stdout: versionResult.stdout,
7090
+ stderr: versionResult.stderr,
7091
+ exitCode: versionResult.exitCode
7092
+ };
7093
+ const helpResult = await execCommand(
7094
+ `"${claudePath}" --help 2>&1 | head -50`,
7095
+ 15e3
7096
+ );
7097
+ details.helpCommand = {
7098
+ command: `"${claudePath}" --help | head -50`,
7099
+ stdout: helpResult.stdout.slice(0, 1500),
7100
+ stderr: helpResult.stderr.slice(0, 500),
7101
+ exitCode: helpResult.exitCode
7102
+ };
7103
+ const whichClaudeResult = await execCommand("which claude 2>&1");
7104
+ if (whichClaudeResult.exitCode === 0) {
7105
+ const pathVersionResult = await execCommand("claude --version 2>&1", 15e3);
7106
+ details.pathVersionCommand = {
7107
+ whichClaude: whichClaudeResult.stdout,
7108
+ stdout: pathVersionResult.stdout,
7109
+ stderr: pathVersionResult.stderr,
7110
+ exitCode: pathVersionResult.exitCode
7111
+ };
7112
+ }
7113
+ const passed = versionResult.exitCode === 0 || helpResult.exitCode === 0;
7114
+ return {
7115
+ name: "claude-cli-execution",
7116
+ passed,
7117
+ details,
7118
+ error: passed ? void 0 : `Claude CLI failed. Version exit: ${versionResult.exitCode}, Help exit: ${helpResult.exitCode}`,
7119
+ durationMs: Date.now() - start
7120
+ };
7121
+ }
7122
+ async function testEnvironmentDump() {
7123
+ const start = Date.now();
7124
+ const details = {};
7125
+ const importantVars = [
7126
+ "PATH",
7127
+ "HOME",
7128
+ "USER",
7129
+ "SHELL",
7130
+ "NODE_ENV",
7131
+ "PWD",
7132
+ "EVAL_SERVER_URL",
7133
+ "AI_GATEWAY_URL",
7134
+ "TRACE_PUSH_URL",
7135
+ "EVAL_AUTH_TOKEN",
7136
+ "ANTHROPIC_API_KEY",
7137
+ "ANTHROPIC_AUTH_TOKEN",
7138
+ "ANTHROPIC_BASE_URL",
7139
+ "ANTHROPIC_CUSTOM_HEADERS"
7140
+ ];
7141
+ const capturedVars = {};
7142
+ for (const key of importantVars) {
7143
+ const value = process.env[key];
7144
+ if (value) {
7145
+ if (key.includes("SECRET") || key.includes("TOKEN") || key.includes("API_KEY")) {
7146
+ capturedVars[key] = `[REDACTED - ${value.length} chars, starts: ${value.slice(0, 10)}...]`;
7147
+ } else if (key === "ANTHROPIC_CUSTOM_HEADERS") {
7148
+ capturedVars[key] = value.split("\n").map((h) => {
7149
+ const [name2, val] = h.split(":");
7150
+ return `${name2}: ${val ? "[" + val.length + " chars]" : "(empty)"}`;
7151
+ }).join(" | ");
7152
+ } else if (key === "PATH") {
7153
+ const parts = value.split(":");
7154
+ capturedVars[key] = `[${parts.length} entries] First: ${parts.slice(0, 3).join(":")} ... Last: ${parts.slice(-2).join(":")}`;
7155
+ } else {
7156
+ capturedVars[key] = value;
7157
+ }
7158
+ } else {
7159
+ capturedVars[key] = "(NOT SET)";
7160
+ }
7161
+ }
7162
+ details.importantVars = capturedVars;
7163
+ const envResult = await execCommand("env | sort | head -50");
7164
+ details.envCommandOutput = envResult.stdout;
7165
+ details.envExitCode = envResult.exitCode;
7166
+ details.nodeInfo = {
7167
+ version: process.version,
7168
+ platform: process.platform,
7169
+ arch: process.arch,
7170
+ pid: process.pid,
7171
+ cwd: process.cwd(),
7172
+ execPath: process.execPath
7173
+ };
7174
+ return {
7175
+ name: "environment-dump",
7176
+ passed: true,
7177
+ // Info test, always passes
7178
+ details,
7179
+ durationMs: Date.now() - start
7180
+ };
7181
+ }
7182
+ async function testFileSystemStructure() {
7183
+ const start = Date.now();
7184
+ const details = {};
7185
+ const npmRootResult = await execCommand("npm root -g");
7186
+ const npmRoot = npmRootResult.stdout;
7187
+ const lsCwdResult = await execCommand("ls -la");
7188
+ details.currentDirectory = {
7189
+ path: process.cwd(),
7190
+ contents: lsCwdResult.stdout
7191
+ };
7192
+ const lsNpmRootResult = await execCommand(
7193
+ `ls -la "${npmRoot}" 2>&1 | head -30`
7194
+ );
7195
+ details.npmGlobalRoot = {
7196
+ path: npmRoot,
7197
+ contents: lsNpmRootResult.stdout
7198
+ };
7199
+ const wixPath = path9.join(npmRoot, "@wix");
7200
+ const lsWixResult = await execCommand(`ls -la "${wixPath}" 2>&1`);
7201
+ details.wixPackages = {
7202
+ path: wixPath,
7203
+ contents: lsWixResult.stdout
7204
+ };
7205
+ const evaluatorPath = path9.join(npmRoot, "@wix", "evalforge-evaluator");
7206
+ const lsEvaluatorResult = await execCommand(`ls -la "${evaluatorPath}" 2>&1`);
7207
+ details.evaluatorDir = {
7208
+ path: evaluatorPath,
7209
+ contents: lsEvaluatorResult.stdout
7210
+ };
7211
+ const nodeModulesPath = path9.join(evaluatorPath, "node_modules");
7212
+ const lsNodeModulesResult = await execCommand(
7213
+ `ls "${nodeModulesPath}" 2>&1 | head -30`
7214
+ );
7215
+ details.evaluatorNodeModules = {
7216
+ path: nodeModulesPath,
7217
+ contents: lsNodeModulesResult.stdout
7218
+ };
7219
+ const anthropicPath = path9.join(nodeModulesPath, "@anthropic-ai");
7220
+ const lsAnthropicResult = await execCommand(`ls -la "${anthropicPath}" 2>&1`);
7221
+ details.anthropicPackages = {
7222
+ path: anthropicPath,
7223
+ contents: lsAnthropicResult.stdout
7224
+ };
7225
+ const binPath = path9.join(nodeModulesPath, ".bin");
7226
+ const lsBinResult = await execCommand(`ls -la "${binPath}" 2>&1`);
7227
+ details.binDirectory = {
7228
+ path: binPath,
7229
+ contents: lsBinResult.stdout
7230
+ };
7231
+ return {
7232
+ name: "file-system-structure",
7233
+ passed: true,
7234
+ // Info test, always passes
7235
+ details,
7236
+ durationMs: Date.now() - start
7237
+ };
7238
+ }
7239
+ async function testNetworkConnectivity(config) {
7240
+ const start = Date.now();
7241
+ const details = {};
7242
+ const dnsResult = await execCommand(
7243
+ "nslookup manage.wix.com 2>&1 | head -10"
7244
+ );
7245
+ details.dnsLookup = {
7246
+ command: "nslookup manage.wix.com",
7247
+ output: dnsResult.stdout || dnsResult.stderr,
7248
+ exitCode: dnsResult.exitCode
7249
+ };
7250
+ const pingResult = await execCommand("ping -c 2 manage.wix.com 2>&1");
7251
+ details.pingTest = {
7252
+ command: "ping -c 2 manage.wix.com",
7253
+ output: pingResult.stdout || pingResult.stderr,
7254
+ exitCode: pingResult.exitCode
7255
+ };
7256
+ const gatewayUrl = config.aiGatewayUrl || "https://manage.wix.com/_api/eval-wix-ai-gateway-proxy";
7257
+ const curlGatewayResult = await execCommand(
7258
+ `curl -v -s --connect-timeout 5 --max-time 10 "${gatewayUrl}" 2>&1 | tail -30`
7259
+ );
7260
+ details.aiGatewayTest = {
7261
+ url: gatewayUrl,
7262
+ output: curlGatewayResult.stdout,
7263
+ exitCode: curlGatewayResult.exitCode
7264
+ };
7265
+ const serverUrl = config.serverUrl;
7266
+ const curlServerResult = await execCommand(
7267
+ `curl -v -s --connect-timeout 5 --max-time 10 "${serverUrl}/health" 2>&1 | tail -30`
7268
+ );
7269
+ details.backendServerTest = {
7270
+ url: `${serverUrl}/health`,
7271
+ output: curlServerResult.stdout,
7272
+ exitCode: curlServerResult.exitCode
7273
+ };
7274
+ const httpsResult = await execCommand(
7275
+ 'curl -s --connect-timeout 5 -o /dev/null -w "HTTP_CODE:%{http_code} TIME:%{time_total}s" https://www.google.com 2>&1'
7276
+ );
7277
+ details.httpsBaseline = {
7278
+ command: "curl https://www.google.com",
7279
+ output: httpsResult.stdout,
7280
+ exitCode: httpsResult.exitCode
7281
+ };
7282
+ const networkWorks = pingResult.exitCode === 0 || httpsResult.exitCode === 0;
7283
+ const gatewayReachable = curlGatewayResult.exitCode === 0;
7284
+ return {
7285
+ name: "network-connectivity",
7286
+ passed: networkWorks && gatewayReachable,
7287
+ details,
7288
+ error: networkWorks && gatewayReachable ? void 0 : `Network: ${networkWorks ? "OK" : "FAILED"}, Gateway: ${gatewayReachable ? "OK" : "FAILED"}`,
7289
+ durationMs: Date.now() - start
7290
+ };
7291
+ }
7292
+ async function testChildProcessSpawning() {
7293
+ const start = Date.now();
7294
+ const details = {};
7295
+ const echoResult = await execCommand('echo "DIAGNOSTIC_TEST_SUCCESS_12345"');
7296
+ details.echoTest = {
7297
+ command: 'echo "DIAGNOSTIC_TEST_SUCCESS_12345"',
7298
+ output: echoResult.stdout,
7299
+ exitCode: echoResult.exitCode,
7300
+ passed: echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345"
7301
+ };
7302
+ const nodeResult = await execCommand(
7303
+ 'node -e "console.log(JSON.stringify({pid: process.pid, version: process.version, platform: process.platform}))"'
7304
+ );
7305
+ details.nodeTest = {
7306
+ command: 'node -e "console.log(JSON.stringify({...}))"',
7307
+ output: nodeResult.stdout,
7308
+ exitCode: nodeResult.exitCode
7309
+ };
7310
+ const shellResult = await execCommand(
7311
+ 'echo "PID: $$"; pwd; whoami; date; uname -a'
7312
+ );
7313
+ details.shellTest = {
7314
+ command: 'echo "PID: $$"; pwd; whoami; date; uname -a',
7315
+ output: shellResult.stdout,
7316
+ exitCode: shellResult.exitCode
7317
+ };
7318
+ const stderrResult = await execCommand(
7319
+ `node -e "console.error('stderr test')"`
7320
+ );
7321
+ details.stderrTest = {
7322
+ command: `node -e "console.error('stderr test')"`,
7323
+ stderr: stderrResult.stderr,
7324
+ exitCode: stderrResult.exitCode
7325
+ };
7326
+ const exitCodeResult = await execCommand("exit 42");
7327
+ details.exitCodeTest = {
7328
+ command: "exit 42",
7329
+ exitCode: exitCodeResult.exitCode,
7330
+ passed: exitCodeResult.exitCode === 42
7331
+ };
7332
+ const passed = echoResult.exitCode === 0 && echoResult.stdout === "DIAGNOSTIC_TEST_SUCCESS_12345";
7333
+ return {
7334
+ name: "child-process-spawning",
7335
+ passed,
7336
+ details,
7337
+ error: passed ? void 0 : "Echo test failed",
7338
+ durationMs: Date.now() - start
7339
+ };
7340
+ }
7341
+ async function testSdkImport() {
7342
+ const start = Date.now();
7343
+ const details = {};
7344
+ try {
7345
+ const sdk = await import("@anthropic-ai/claude-agent-sdk");
7346
+ details.sdkImported = true;
7347
+ details.exportedKeys = Object.keys(sdk);
7348
+ details.hasQuery = typeof sdk.query === "function";
7349
+ if (typeof sdk.query === "function") {
7350
+ details.queryFunctionExists = true;
7351
+ details.queryFunctionType = typeof sdk.query;
7352
+ }
7353
+ return {
7354
+ name: "sdk-import",
7355
+ passed: true,
7356
+ details,
7357
+ durationMs: Date.now() - start
7358
+ };
7359
+ } catch (err) {
7360
+ const error = err instanceof Error ? err.message : String(err);
7361
+ return {
7362
+ name: "sdk-import",
7363
+ passed: false,
7364
+ details: {
7365
+ sdkImported: false,
7366
+ error,
7367
+ stack: err instanceof Error ? err.stack?.split("\n").slice(0, 5) : void 0
7368
+ },
7369
+ error: `Failed to import SDK: ${error}`,
7370
+ durationMs: Date.now() - start
7371
+ };
7372
+ }
7373
+ }
7374
+ async function testFileSystemWrite() {
7375
+ const start = Date.now();
7376
+ const details = {};
7377
+ const testDir = "/tmp/evalforge-diagnostics-test";
7378
+ const testFile = path9.join(testDir, "test-file.txt");
7379
+ const testContent = `Diagnostic test at ${(/* @__PURE__ */ new Date()).toISOString()}`;
7380
+ try {
7381
+ if (!fs11.existsSync(testDir)) {
7382
+ fs11.mkdirSync(testDir, { recursive: true });
7383
+ }
7384
+ details.directoryCreated = true;
7385
+ fs11.writeFileSync(testFile, testContent);
7386
+ details.fileWritten = true;
7387
+ const readContent = fs11.readFileSync(testFile, "utf8");
7388
+ details.fileRead = true;
7389
+ details.contentMatches = readContent === testContent;
7390
+ const lsResult = await execCommand(`ls -la "${testDir}"`);
7391
+ details.directoryContents = lsResult.stdout;
7392
+ fs11.unlinkSync(testFile);
7393
+ fs11.rmdirSync(testDir);
7394
+ details.cleanedUp = true;
7395
+ return {
7396
+ name: "file-system-write",
7397
+ passed: true,
7398
+ details,
7399
+ durationMs: Date.now() - start
7400
+ };
7401
+ } catch (err) {
7402
+ const error = err instanceof Error ? err.message : String(err);
7403
+ return {
7404
+ name: "file-system-write",
7405
+ passed: false,
7406
+ details: {
7407
+ ...details,
7408
+ error,
7409
+ testDir,
7410
+ testFile
7411
+ },
7412
+ error: `File system write failed: ${error}`,
7413
+ durationMs: Date.now() - start
7414
+ };
7415
+ }
7416
+ }
7417
+ function emitDiagnosticTraceEvent(evalRunId2, result, tracePushUrl, routeHeader, authToken) {
7418
+ const truncatedResult = "summary" in result ? result : {
7419
+ ...result,
7420
+ details: JSON.parse(
7421
+ JSON.stringify(
7422
+ result.details,
7423
+ (_, v) => typeof v === "string" && v.length > 500 ? v.slice(0, 500) + "... [truncated]" : v
7424
+ )
7425
+ )
7426
+ };
7427
+ const event = {
7428
+ evalRunId: evalRunId2,
7429
+ scenarioId: "diagnostics",
7430
+ scenarioName: "Environment Diagnostics",
7431
+ targetId: "system",
7432
+ targetName: "name" in truncatedResult ? truncatedResult.name : "Summary",
7433
+ stepNumber: 0,
7434
+ type: import_evalforge_types4.LiveTraceEventType.DIAGNOSTIC,
7435
+ outputPreview: JSON.stringify(truncatedResult, null, 2).slice(0, 3e3),
7436
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
7437
+ isComplete: "summary" in result
7438
+ };
7439
+ console.log(`TRACE_EVENT:${JSON.stringify(event)}`);
7440
+ if (tracePushUrl) {
7441
+ const headers = {
7442
+ "Content-Type": "application/json"
7443
+ };
7444
+ if (routeHeader) {
7445
+ headers["x-wix-route"] = routeHeader;
7446
+ }
7447
+ if (authToken) {
7448
+ headers["Authorization"] = `Bearer ${authToken}`;
7449
+ }
7450
+ fetch(tracePushUrl, {
7451
+ method: "POST",
7452
+ headers,
7453
+ body: JSON.stringify([event])
7454
+ }).catch((err) => {
7455
+ console.error(
7456
+ "[DIAGNOSTICS] Failed to push trace event to backend:",
7457
+ err
7458
+ );
7459
+ });
7460
+ }
7461
+ }
7462
+ async function runDiagnostics(config, evalRunId2) {
7463
+ const startedAt = (/* @__PURE__ */ new Date()).toISOString();
7464
+ const startTime = Date.now();
7465
+ console.error("");
7466
+ console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
7467
+ console.error("\u2551 EVALFORGE ENVIRONMENT DIAGNOSTICS \u2551");
7468
+ console.error("\u2551 (Results sent to backend via trace events) \u2551");
7469
+ console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
7470
+ console.error("");
7471
+ const tests = [];
7472
+ const runTest = async (testName, testFn) => {
7473
+ console.error(`[DIAG] Running: ${testName}...`);
7474
+ const result = await safeRunTest(testName, testFn);
7475
+ tests.push(result);
7476
+ const status = result.passed ? "\u2713 PASS" : "\u2717 FAIL";
7477
+ console.error(`[DIAG] ${status}: ${result.name} (${result.durationMs}ms)`);
7478
+ console.error("[DIAG] Details:");
7479
+ console.error(JSON.stringify(result.details, null, 2));
7480
+ console.error("");
7481
+ if (!result.passed && result.error) {
7482
+ console.error(`[DIAG] ERROR: ${result.error}`);
7483
+ }
7484
+ emitDiagnosticTraceEvent(
7485
+ evalRunId2,
7486
+ result,
7487
+ config.tracePushUrl,
7488
+ config.routeHeader,
7489
+ config.authToken
7490
+ );
7491
+ };
7492
+ await runTest("claude-binary-discovery", testClaudeBinaryDiscovery);
7493
+ await runTest("claude-cli-execution", testClaudeExecution);
7494
+ await runTest("environment-dump", testEnvironmentDump);
7495
+ await runTest("file-system-structure", testFileSystemStructure);
7496
+ await runTest("network-connectivity", () => testNetworkConnectivity(config));
7497
+ await runTest("child-process-spawning", testChildProcessSpawning);
7498
+ await runTest("sdk-import", testSdkImport);
7499
+ await runTest("file-system-write", testFileSystemWrite);
7500
+ const completedAt = (/* @__PURE__ */ new Date()).toISOString();
7501
+ const totalDurationMs = Date.now() - startTime;
7502
+ const report = {
7503
+ startedAt,
7504
+ completedAt,
7505
+ totalDurationMs,
7506
+ tests,
7507
+ summary: {
7508
+ total: tests.length,
7509
+ passed: tests.filter((t) => t.passed).length,
7510
+ failed: tests.filter((t) => !t.passed).length
7511
+ }
7512
+ };
7513
+ emitDiagnosticTraceEvent(
7514
+ evalRunId2,
7515
+ report,
7516
+ config.tracePushUrl,
7517
+ config.routeHeader,
7518
+ config.authToken
7519
+ );
7520
+ console.error("");
7521
+ console.error("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
7522
+ console.error(
7523
+ `\u2551 DIAGNOSTICS COMPLETE: ${report.summary.passed}/${report.summary.total} passed, ${report.summary.failed} failed`.padEnd(
7524
+ 60
7525
+ ) + "\u2551"
7526
+ );
7527
+ console.error(`\u2551 Total time: ${totalDurationMs}ms`.padEnd(60) + "\u2551");
7528
+ console.error("\u255A\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u255D");
7529
+ console.error("");
7530
+ return report;
7531
+ }
7532
+
6889
7533
  // src/index.ts
6890
7534
  console.error(
6891
7535
  "[EVALUATOR-BOOT] Module loading started",
@@ -6953,6 +7597,33 @@ async function runEvaluation(projectId2, evalRunId2) {
6953
7597
  `[${ExecutionPhase.API_CLIENT}] Failed to create API client: ${apiErr instanceof Error ? apiErr.message : String(apiErr)}`
6954
7598
  );
6955
7599
  }
7600
+ state.currentPhase = ExecutionPhase.DIAGNOSTICS;
7601
+ state.currentContext = { projectId: projectId2, evalRunId: evalRunId2, phase: "diagnostics" };
7602
+ console.error("[DEBUG-H1.5] Running environment diagnostics...");
7603
+ try {
7604
+ const diagnosticReport = await runDiagnostics(config, evalRunId2);
7605
+ console.error(
7606
+ "[DEBUG-H1.5] Diagnostics completed",
7607
+ JSON.stringify({
7608
+ passed: diagnosticReport.summary.passed,
7609
+ failed: diagnosticReport.summary.failed,
7610
+ total: diagnosticReport.summary.total,
7611
+ durationMs: diagnosticReport.totalDurationMs
7612
+ })
7613
+ );
7614
+ const failedTests = diagnosticReport.tests.filter((t) => !t.passed);
7615
+ if (failedTests.length > 0) {
7616
+ console.error(
7617
+ "[DEBUG-H1.5] FAILED DIAGNOSTIC TESTS:",
7618
+ failedTests.map((t) => `${t.name}: ${t.error}`).join("\n")
7619
+ );
7620
+ }
7621
+ } catch (diagErr) {
7622
+ console.error(
7623
+ "[DEBUG-H1.5] Diagnostics failed (non-fatal):",
7624
+ diagErr instanceof Error ? diagErr.message : String(diagErr)
7625
+ );
7626
+ }
6956
7627
  state.currentPhase = ExecutionPhase.FETCH_EVAL_RUN;
6957
7628
  state.currentContext = { projectId: projectId2, evalRunId: evalRunId2, serverUrl: config.serverUrl };
6958
7629
  console.error(
@@ -7072,7 +7743,7 @@ async function runEvaluation(projectId2, evalRunId2) {
7072
7743
  };
7073
7744
  try {
7074
7745
  await api.updateEvalRun(projectId2, evalRunId2, {
7075
- status: import_evalforge_types4.EvalStatus.COMPLETED,
7746
+ status: import_evalforge_types5.EvalStatus.COMPLETED,
7076
7747
  completedAt: (/* @__PURE__ */ new Date()).toISOString()
7077
7748
  });
7078
7749
  } catch (updateErr) {
@@ -7113,7 +7784,7 @@ runEvaluation(projectId, evalRunId).then(() => {
7113
7784
  authToken: config.authToken
7114
7785
  });
7115
7786
  await api.updateEvalRun(projectId, evalRunId, {
7116
- status: import_evalforge_types4.EvalStatus.FAILED,
7787
+ status: import_evalforge_types5.EvalStatus.FAILED,
7117
7788
  completedAt: (/* @__PURE__ */ new Date()).toISOString(),
7118
7789
  jobError,
7119
7790
  jobStatus: "FAILED"
@@ -7136,7 +7807,7 @@ runEvaluation(projectId, evalRunId).then(() => {
7136
7807
  authToken
7137
7808
  });
7138
7809
  await api.updateEvalRun(projectId, evalRunId, {
7139
- status: import_evalforge_types4.EvalStatus.FAILED,
7810
+ status: import_evalforge_types5.EvalStatus.FAILED,
7140
7811
  completedAt: (/* @__PURE__ */ new Date()).toISOString(),
7141
7812
  jobError: `Config load failed, then: ${jobError}`,
7142
7813
  jobStatus: "FAILED"