@wix/evalforge-evaluator 0.60.0 → 0.61.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -137,6 +137,16 @@ function createApiClient(serverUrl, options = "") {
137
137
  getSkill(projectId2, id) {
138
138
  return fetchJson(`/projects/${projectId2}/skills/${id}`);
139
139
  },
140
+ getSkillVersion(projectId2, skillId, versionId) {
141
+ return fetchJson(
142
+ `/projects/${projectId2}/skills/${skillId}/versions/${versionId}`
143
+ );
144
+ },
145
+ getLatestSkillVersion(projectId2, skillId) {
146
+ return fetchJson(
147
+ `/projects/${projectId2}/skills/${skillId}/versions/latest`
148
+ );
149
+ },
140
150
  getAgent(projectId2, id) {
141
151
  return fetchJson(`/projects/${projectId2}/agents/${id}`);
142
152
  },
@@ -274,6 +284,29 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
274
284
  skillsGroup.skillIds.map((id) => api.getSkill(projectId2, id))
275
285
  );
276
286
  }
287
+ if (evalRun.skillVersions && Object.keys(evalRun.skillVersions).length > 0) {
288
+ skills = await Promise.all(
289
+ skills.map(async (skill) => {
290
+ const versionId = evalRun.skillVersions?.[skill.id];
291
+ if (versionId) {
292
+ const version = await api.getSkillVersion(
293
+ projectId2,
294
+ skill.id,
295
+ versionId
296
+ );
297
+ return { ...skill, latestVersion: version };
298
+ }
299
+ return skill;
300
+ })
301
+ );
302
+ }
303
+ skills = skills.map((skill) => {
304
+ const hasPinnedVersion = evalRun.skillVersions?.[skill.id];
305
+ if (!hasPinnedVersion && skill.source) {
306
+ return { ...skill, latestVersion: void 0 };
307
+ }
308
+ return skill;
309
+ });
277
310
  }
278
311
  let mcps = [];
279
312
  if (evalRun.mcpIds && evalRun.mcpIds.length > 0) {
@@ -1088,10 +1121,10 @@ var Minipass = class extends EventEmitter {
1088
1121
  * Return a void Promise that resolves once the stream ends.
1089
1122
  */
1090
1123
  async promise() {
1091
- return new Promise((resolve, reject) => {
1124
+ return new Promise((resolve2, reject) => {
1092
1125
  this.on(DESTROYED, () => reject(new Error("stream destroyed")));
1093
1126
  this.on("error", (er) => reject(er));
1094
- this.on("end", () => resolve());
1127
+ this.on("end", () => resolve2());
1095
1128
  });
1096
1129
  }
1097
1130
  /**
@@ -1115,7 +1148,7 @@ var Minipass = class extends EventEmitter {
1115
1148
  return Promise.resolve({ done: false, value: res });
1116
1149
  if (this[EOF])
1117
1150
  return stop();
1118
- let resolve;
1151
+ let resolve2;
1119
1152
  let reject;
1120
1153
  const onerr = (er) => {
1121
1154
  this.off("data", ondata);
@@ -1129,19 +1162,19 @@ var Minipass = class extends EventEmitter {
1129
1162
  this.off("end", onend);
1130
1163
  this.off(DESTROYED, ondestroy);
1131
1164
  this.pause();
1132
- resolve({ value, done: !!this[EOF] });
1165
+ resolve2({ value, done: !!this[EOF] });
1133
1166
  };
1134
1167
  const onend = () => {
1135
1168
  this.off("error", onerr);
1136
1169
  this.off("data", ondata);
1137
1170
  this.off(DESTROYED, ondestroy);
1138
1171
  stop();
1139
- resolve({ done: true, value: void 0 });
1172
+ resolve2({ done: true, value: void 0 });
1140
1173
  };
1141
1174
  const ondestroy = () => onerr(new Error("stream destroyed"));
1142
1175
  return new Promise((res2, rej) => {
1143
1176
  reject = rej;
1144
- resolve = res2;
1177
+ resolve2 = res2;
1145
1178
  this.once(DESTROYED, ondestroy);
1146
1179
  this.once("error", onerr);
1147
1180
  this.once("end", onend);
@@ -3269,9 +3302,9 @@ var listFile = (opt, _files) => {
3269
3302
  const parse4 = new Parser(opt);
3270
3303
  const readSize = opt.maxReadSize || 16 * 1024 * 1024;
3271
3304
  const file = opt.file;
3272
- const p = new Promise((resolve, reject) => {
3305
+ const p = new Promise((resolve2, reject) => {
3273
3306
  parse4.on("error", reject);
3274
- parse4.on("end", resolve);
3307
+ parse4.on("end", resolve2);
3275
3308
  fs2.stat(file, (er, stat) => {
3276
3309
  if (er) {
3277
3310
  reject(er);
@@ -5912,9 +5945,9 @@ var extractFile = (opt, _) => {
5912
5945
  const u = new Unpack(opt);
5913
5946
  const readSize = opt.maxReadSize || 16 * 1024 * 1024;
5914
5947
  const file = opt.file;
5915
- const p = new Promise((resolve, reject) => {
5948
+ const p = new Promise((resolve2, reject) => {
5916
5949
  u.on("error", reject);
5917
- u.on("close", resolve);
5950
+ u.on("close", resolve2);
5918
5951
  fs9.stat(file, (er, stat) => {
5919
5952
  if (er) {
5920
5953
  reject(er);
@@ -6048,7 +6081,7 @@ var replaceAsync = (opt, files) => {
6048
6081
  };
6049
6082
  fs10.read(fd, headBuf, 0, 512, position, onread);
6050
6083
  };
6051
- const promise = new Promise((resolve, reject) => {
6084
+ const promise = new Promise((resolve2, reject) => {
6052
6085
  p.on("error", reject);
6053
6086
  let flag = "r+";
6054
6087
  const onopen = (er, fd) => {
@@ -6073,7 +6106,7 @@ var replaceAsync = (opt, files) => {
6073
6106
  });
6074
6107
  p.pipe(stream);
6075
6108
  stream.on("error", reject);
6076
- stream.on("close", resolve);
6109
+ stream.on("close", resolve2);
6077
6110
  addFilesAsync2(p, files);
6078
6111
  });
6079
6112
  });
@@ -6344,13 +6377,68 @@ import {
6344
6377
  TRACE_EVENT_PREFIX,
6345
6378
  AVAILABLE_MODELS
6346
6379
  } from "@wix/evalforge-types";
6380
+
6381
+ // src/run-scenario/agents/claude-code/write-skills.ts
6382
+ import { mkdir as mkdir2, writeFile } from "fs/promises";
6383
+ import { dirname as dirname2, join as join2, resolve, sep } from "path";
6384
+ import { fetchSkillFolderRaw } from "@wix/evalforge-github-client";
6385
+ async function writeSkillsToFilesystem(cwd, skills, fetchFn = fetchSkillFolderRaw) {
6386
+ await Promise.all(
6387
+ skills.map((skill) => writeSkillToFilesystem(cwd, skill, fetchFn))
6388
+ );
6389
+ }
6390
+ async function writeSkillToFilesystem(cwd, skill, fetchFn = fetchSkillFolderRaw) {
6391
+ const skillName = skill.name;
6392
+ const skillDir = join2(cwd, ".claude", "skills", skillName);
6393
+ await mkdir2(skillDir, { recursive: true });
6394
+ const version = skill.latestVersion;
6395
+ if (version?.files && version.files.length > 0) {
6396
+ await writeSkillFiles(skillDir, version.files);
6397
+ console.log(
6398
+ `[Skill] ${skillName}: wrote ${version.files.length} file(s) from snapshot`
6399
+ );
6400
+ } else if (skill.source) {
6401
+ try {
6402
+ const files = await fetchFn(skill.source, {
6403
+ userAgent: "EvalForge-Evaluator"
6404
+ });
6405
+ await writeSkillFiles(skillDir, files);
6406
+ console.log(
6407
+ `[Skill] ${skillName}: wrote ${files.length} file(s) from GitHub (live)`
6408
+ );
6409
+ } catch (error) {
6410
+ const message = error instanceof Error ? error.message : "Unknown error";
6411
+ console.error(
6412
+ `[Skill] ${skillName}: GitHub fetch failed: ${message}, no files to fall back to`
6413
+ );
6414
+ throw new Error(
6415
+ `Failed to write skill ${skillName} to filesystem: ${message}`
6416
+ );
6417
+ }
6418
+ } else {
6419
+ throw new Error(`Skill ${skillName} has no files and no source configured`);
6420
+ }
6421
+ }
6422
+ async function writeSkillFiles(skillDir, files) {
6423
+ const resolvedBase = resolve(skillDir);
6424
+ for (const file of files) {
6425
+ const filePath = resolve(skillDir, file.path);
6426
+ if (!filePath.startsWith(resolvedBase + sep) && filePath !== resolvedBase) {
6427
+ throw new Error(
6428
+ `Path traversal detected in skill file: "${file.path}" resolves outside skill directory`
6429
+ );
6430
+ }
6431
+ await mkdir2(dirname2(filePath), { recursive: true });
6432
+ await writeFile(filePath, file.content, "utf-8");
6433
+ }
6434
+ }
6435
+
6436
+ // src/run-scenario/agents/claude-code/execute.ts
6347
6437
  import { randomUUID } from "crypto";
6348
- import { mkdir as mkdir3, writeFile as writeFile3 } from "fs/promises";
6349
- import { join as join4 } from "path";
6350
6438
 
6351
6439
  // src/run-scenario/agents/claude-code/write-mcp.ts
6352
- import { writeFile } from "fs/promises";
6353
- import { join as join2 } from "path";
6440
+ import { writeFile as writeFile2 } from "fs/promises";
6441
+ import { join as join3 } from "path";
6354
6442
  import { MCP_SERVERS_JSON_KEY } from "@wix/evalforge-types";
6355
6443
  async function writeMcpToFilesystem(cwd, mcps) {
6356
6444
  if (mcps.length === 0) return;
@@ -6371,14 +6459,14 @@ async function writeMcpToFilesystem(cwd, mcps) {
6371
6459
  null,
6372
6460
  2
6373
6461
  );
6374
- const filePath = join2(cwd, ".mcp.json");
6375
- await writeFile(filePath, content, "utf8");
6462
+ const filePath = join3(cwd, ".mcp.json");
6463
+ await writeFile2(filePath, content, "utf8");
6376
6464
  console.log(`[MCP] Written to ${filePath}`);
6377
6465
  }
6378
6466
 
6379
6467
  // src/run-scenario/agents/claude-code/write-sub-agents.ts
6380
- import { mkdir as mkdir2, writeFile as writeFile2 } from "fs/promises";
6381
- import { join as join3 } from "path";
6468
+ import { mkdir as mkdir3, writeFile as writeFile3 } from "fs/promises";
6469
+ import { join as join4 } from "path";
6382
6470
  var AGENTS_DIR = ".claude/agents";
6383
6471
  function toAgentFilename(name2, index, nameCount) {
6384
6472
  const base = (name2 || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
@@ -6388,13 +6476,13 @@ function toAgentFilename(name2, index, nameCount) {
6388
6476
  }
6389
6477
  async function writeSubAgentsToFilesystem(cwd, subAgents) {
6390
6478
  if (subAgents.length === 0) return;
6391
- const agentsDir = join3(cwd, AGENTS_DIR);
6392
- await mkdir2(agentsDir, { recursive: true });
6479
+ const agentsDir = join4(cwd, AGENTS_DIR);
6480
+ await mkdir3(agentsDir, { recursive: true });
6393
6481
  const nameCount = /* @__PURE__ */ new Map();
6394
6482
  for (const [i, agent] of subAgents.entries()) {
6395
6483
  const filename = toAgentFilename(agent.name, i, nameCount);
6396
- const filePath = join3(agentsDir, `${filename}.md`);
6397
- await writeFile2(filePath, agent.subAgentMd, "utf8");
6484
+ const filePath = join4(agentsDir, `${filename}.md`);
6485
+ await writeFile3(filePath, agent.subAgentMd, "utf8");
6398
6486
  }
6399
6487
  console.log(`[SubAgents] Written to ${agentsDir}`);
6400
6488
  }
@@ -7173,16 +7261,6 @@ Stack: ${errorStack.split("\n").slice(0, 5).join("\n")}` : "")
7173
7261
  llmTrace
7174
7262
  };
7175
7263
  }
7176
- async function writeSkillsToFilesystem(cwd, skills) {
7177
- for (const skill of skills) {
7178
- const skillName = skill.name;
7179
- const skillDir = join4(cwd, ".claude", "skills", skillName);
7180
- await mkdir3(skillDir, { recursive: true });
7181
- const skillPath = join4(skillDir, "SKILL.md");
7182
- await writeFile3(skillPath, skill.skillMd, "utf-8");
7183
- console.log(`[Skill] Written to ${skillPath}`);
7184
- }
7185
- }
7186
7264
  function buildSdkEnvironment(options) {
7187
7265
  const env = { ...process.env };
7188
7266
  const placeholderApiKey = "sk-ant-api03-placeholder-auth-handled-by-gateway-000000000000000000000000";
@@ -8180,7 +8258,7 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
8180
8258
  mcps: evalData.mcps.length > 0 ? evalData.mcps : void 0,
8181
8259
  subAgents: evalData.subAgents.length > 0 ? evalData.subAgents : void 0
8182
8260
  };
8183
- const result = await adapter.execute(executionContext);
8261
+ const { outputText, durationMs, llmTrace } = await adapter.execute(executionContext);
8184
8262
  const completedAt = (/* @__PURE__ */ new Date()).toISOString();
8185
8263
  const afterSnapshot = workDir ? snapshotDirectory(workDir) : {};
8186
8264
  const fileDiffs = diffSnapshots(beforeSnapshot, afterSnapshot);
@@ -8192,13 +8270,13 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
8192
8270
  scenarioId: scenario.id,
8193
8271
  scenarioName: scenario.name,
8194
8272
  modelConfig: agent?.modelConfig,
8195
- duration: result.durationMs,
8196
- outputText: result.outputText,
8273
+ duration: durationMs,
8274
+ outputText,
8197
8275
  fileDiffs: fileDiffs.length > 0 ? fileDiffs : void 0,
8198
8276
  templateFiles: templateFiles && templateFiles.length > 0 ? templateFiles : void 0,
8199
8277
  startedAt,
8200
8278
  completedAt,
8201
- llmTrace: result.llmTrace
8279
+ llmTrace
8202
8280
  };
8203
8281
  }
8204
8282