@wix/evalforge-evaluator 0.60.0 → 0.61.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -160,6 +160,16 @@ function createApiClient(serverUrl, options = "") {
160
160
  getSkill(projectId2, id) {
161
161
  return fetchJson(`/projects/${projectId2}/skills/${id}`);
162
162
  },
163
+ getSkillVersion(projectId2, skillId, versionId) {
164
+ return fetchJson(
165
+ `/projects/${projectId2}/skills/${skillId}/versions/${versionId}`
166
+ );
167
+ },
168
+ getLatestSkillVersion(projectId2, skillId) {
169
+ return fetchJson(
170
+ `/projects/${projectId2}/skills/${skillId}/versions/latest`
171
+ );
172
+ },
163
173
  getAgent(projectId2, id) {
164
174
  return fetchJson(`/projects/${projectId2}/agents/${id}`);
165
175
  },
@@ -294,6 +304,29 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
294
304
  skillsGroup.skillIds.map((id) => api.getSkill(projectId2, id))
295
305
  );
296
306
  }
307
+ if (evalRun.skillVersions && Object.keys(evalRun.skillVersions).length > 0) {
308
+ skills = await Promise.all(
309
+ skills.map(async (skill) => {
310
+ const versionId = evalRun.skillVersions?.[skill.id];
311
+ if (versionId) {
312
+ const version = await api.getSkillVersion(
313
+ projectId2,
314
+ skill.id,
315
+ versionId
316
+ );
317
+ return { ...skill, latestVersion: version };
318
+ }
319
+ return skill;
320
+ })
321
+ );
322
+ }
323
+ skills = skills.map((skill) => {
324
+ const hasPinnedVersion = evalRun.skillVersions?.[skill.id];
325
+ if (!hasPinnedVersion && skill.source) {
326
+ return { ...skill, latestVersion: void 0 };
327
+ }
328
+ return skill;
329
+ });
297
330
  }
298
331
  let mcps = [];
299
332
  if (evalRun.mcpIds && evalRun.mcpIds.length > 0) {
@@ -1106,10 +1139,10 @@ var Minipass = class extends import_node_events.EventEmitter {
1106
1139
  * Return a void Promise that resolves once the stream ends.
1107
1140
  */
1108
1141
  async promise() {
1109
- return new Promise((resolve, reject) => {
1142
+ return new Promise((resolve2, reject) => {
1110
1143
  this.on(DESTROYED, () => reject(new Error("stream destroyed")));
1111
1144
  this.on("error", (er) => reject(er));
1112
- this.on("end", () => resolve());
1145
+ this.on("end", () => resolve2());
1113
1146
  });
1114
1147
  }
1115
1148
  /**
@@ -1133,7 +1166,7 @@ var Minipass = class extends import_node_events.EventEmitter {
1133
1166
  return Promise.resolve({ done: false, value: res });
1134
1167
  if (this[EOF])
1135
1168
  return stop();
1136
- let resolve;
1169
+ let resolve2;
1137
1170
  let reject;
1138
1171
  const onerr = (er) => {
1139
1172
  this.off("data", ondata);
@@ -1147,19 +1180,19 @@ var Minipass = class extends import_node_events.EventEmitter {
1147
1180
  this.off("end", onend);
1148
1181
  this.off(DESTROYED, ondestroy);
1149
1182
  this.pause();
1150
- resolve({ value, done: !!this[EOF] });
1183
+ resolve2({ value, done: !!this[EOF] });
1151
1184
  };
1152
1185
  const onend = () => {
1153
1186
  this.off("error", onerr);
1154
1187
  this.off("data", ondata);
1155
1188
  this.off(DESTROYED, ondestroy);
1156
1189
  stop();
1157
- resolve({ done: true, value: void 0 });
1190
+ resolve2({ done: true, value: void 0 });
1158
1191
  };
1159
1192
  const ondestroy = () => onerr(new Error("stream destroyed"));
1160
1193
  return new Promise((res2, rej) => {
1161
1194
  reject = rej;
1162
- resolve = res2;
1195
+ resolve2 = res2;
1163
1196
  this.once(DESTROYED, ondestroy);
1164
1197
  this.once("error", onerr);
1165
1198
  this.once("end", onend);
@@ -3287,9 +3320,9 @@ var listFile = (opt, _files) => {
3287
3320
  const parse4 = new Parser(opt);
3288
3321
  const readSize = opt.maxReadSize || 16 * 1024 * 1024;
3289
3322
  const file = opt.file;
3290
- const p = new Promise((resolve, reject) => {
3323
+ const p = new Promise((resolve2, reject) => {
3291
3324
  parse4.on("error", reject);
3292
- parse4.on("end", resolve);
3325
+ parse4.on("end", resolve2);
3293
3326
  import_node_fs.default.stat(file, (er, stat) => {
3294
3327
  if (er) {
3295
3328
  reject(er);
@@ -5930,9 +5963,9 @@ var extractFile = (opt, _) => {
5930
5963
  const u = new Unpack(opt);
5931
5964
  const readSize = opt.maxReadSize || 16 * 1024 * 1024;
5932
5965
  const file = opt.file;
5933
- const p = new Promise((resolve, reject) => {
5966
+ const p = new Promise((resolve2, reject) => {
5934
5967
  u.on("error", reject);
5935
- u.on("close", resolve);
5968
+ u.on("close", resolve2);
5936
5969
  import_node_fs5.default.stat(file, (er, stat) => {
5937
5970
  if (er) {
5938
5971
  reject(er);
@@ -6066,7 +6099,7 @@ var replaceAsync = (opt, files) => {
6066
6099
  };
6067
6100
  import_node_fs6.default.read(fd, headBuf, 0, 512, position, onread);
6068
6101
  };
6069
- const promise = new Promise((resolve, reject) => {
6102
+ const promise = new Promise((resolve2, reject) => {
6070
6103
  p.on("error", reject);
6071
6104
  let flag = "r+";
6072
6105
  const onopen = (er, fd) => {
@@ -6091,7 +6124,7 @@ var replaceAsync = (opt, files) => {
6091
6124
  });
6092
6125
  p.pipe(stream);
6093
6126
  stream.on("error", reject);
6094
- stream.on("close", resolve);
6127
+ stream.on("close", resolve2);
6095
6128
  addFilesAsync2(p, files);
6096
6129
  });
6097
6130
  });
@@ -6357,13 +6390,68 @@ var import_evalforge_types4 = require("@wix/evalforge-types");
6357
6390
 
6358
6391
  // src/run-scenario/agents/claude-code/execute.ts
6359
6392
  var import_evalforge_types3 = require("@wix/evalforge-types");
6360
- var import_crypto = require("crypto");
6361
- var import_promises5 = require("fs/promises");
6362
- var import_path7 = require("path");
6363
6393
 
6364
- // src/run-scenario/agents/claude-code/write-mcp.ts
6394
+ // src/run-scenario/agents/claude-code/write-skills.ts
6365
6395
  var import_promises3 = require("fs/promises");
6366
6396
  var import_path5 = require("path");
6397
+ var import_evalforge_github_client = require("@wix/evalforge-github-client");
6398
+ async function writeSkillsToFilesystem(cwd, skills, fetchFn = import_evalforge_github_client.fetchSkillFolderRaw) {
6399
+ await Promise.all(
6400
+ skills.map((skill) => writeSkillToFilesystem(cwd, skill, fetchFn))
6401
+ );
6402
+ }
6403
+ async function writeSkillToFilesystem(cwd, skill, fetchFn = import_evalforge_github_client.fetchSkillFolderRaw) {
6404
+ const skillName = skill.name;
6405
+ const skillDir = (0, import_path5.join)(cwd, ".claude", "skills", skillName);
6406
+ await (0, import_promises3.mkdir)(skillDir, { recursive: true });
6407
+ const version = skill.latestVersion;
6408
+ if (version?.files && version.files.length > 0) {
6409
+ await writeSkillFiles(skillDir, version.files);
6410
+ console.log(
6411
+ `[Skill] ${skillName}: wrote ${version.files.length} file(s) from snapshot`
6412
+ );
6413
+ } else if (skill.source) {
6414
+ try {
6415
+ const files = await fetchFn(skill.source, {
6416
+ userAgent: "EvalForge-Evaluator"
6417
+ });
6418
+ await writeSkillFiles(skillDir, files);
6419
+ console.log(
6420
+ `[Skill] ${skillName}: wrote ${files.length} file(s) from GitHub (live)`
6421
+ );
6422
+ } catch (error) {
6423
+ const message = error instanceof Error ? error.message : "Unknown error";
6424
+ console.error(
6425
+ `[Skill] ${skillName}: GitHub fetch failed: ${message}, no files to fall back to`
6426
+ );
6427
+ throw new Error(
6428
+ `Failed to write skill ${skillName} to filesystem: ${message}`
6429
+ );
6430
+ }
6431
+ } else {
6432
+ throw new Error(`Skill ${skillName} has no files and no source configured`);
6433
+ }
6434
+ }
6435
+ async function writeSkillFiles(skillDir, files) {
6436
+ const resolvedBase = (0, import_path5.resolve)(skillDir);
6437
+ for (const file of files) {
6438
+ const filePath = (0, import_path5.resolve)(skillDir, file.path);
6439
+ if (!filePath.startsWith(resolvedBase + import_path5.sep) && filePath !== resolvedBase) {
6440
+ throw new Error(
6441
+ `Path traversal detected in skill file: "${file.path}" resolves outside skill directory`
6442
+ );
6443
+ }
6444
+ await (0, import_promises3.mkdir)((0, import_path5.dirname)(filePath), { recursive: true });
6445
+ await (0, import_promises3.writeFile)(filePath, file.content, "utf-8");
6446
+ }
6447
+ }
6448
+
6449
+ // src/run-scenario/agents/claude-code/execute.ts
6450
+ var import_crypto = require("crypto");
6451
+
6452
+ // src/run-scenario/agents/claude-code/write-mcp.ts
6453
+ var import_promises4 = require("fs/promises");
6454
+ var import_path6 = require("path");
6367
6455
  var import_evalforge_types2 = require("@wix/evalforge-types");
6368
6456
  async function writeMcpToFilesystem(cwd, mcps) {
6369
6457
  if (mcps.length === 0) return;
@@ -6384,14 +6472,14 @@ async function writeMcpToFilesystem(cwd, mcps) {
6384
6472
  null,
6385
6473
  2
6386
6474
  );
6387
- const filePath = (0, import_path5.join)(cwd, ".mcp.json");
6388
- await (0, import_promises3.writeFile)(filePath, content, "utf8");
6475
+ const filePath = (0, import_path6.join)(cwd, ".mcp.json");
6476
+ await (0, import_promises4.writeFile)(filePath, content, "utf8");
6389
6477
  console.log(`[MCP] Written to ${filePath}`);
6390
6478
  }
6391
6479
 
6392
6480
  // src/run-scenario/agents/claude-code/write-sub-agents.ts
6393
- var import_promises4 = require("fs/promises");
6394
- var import_path6 = require("path");
6481
+ var import_promises5 = require("fs/promises");
6482
+ var import_path7 = require("path");
6395
6483
  var AGENTS_DIR = ".claude/agents";
6396
6484
  function toAgentFilename(name2, index, nameCount) {
6397
6485
  const base = (name2 || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
@@ -6401,13 +6489,13 @@ function toAgentFilename(name2, index, nameCount) {
6401
6489
  }
6402
6490
  async function writeSubAgentsToFilesystem(cwd, subAgents) {
6403
6491
  if (subAgents.length === 0) return;
6404
- const agentsDir = (0, import_path6.join)(cwd, AGENTS_DIR);
6405
- await (0, import_promises4.mkdir)(agentsDir, { recursive: true });
6492
+ const agentsDir = (0, import_path7.join)(cwd, AGENTS_DIR);
6493
+ await (0, import_promises5.mkdir)(agentsDir, { recursive: true });
6406
6494
  const nameCount = /* @__PURE__ */ new Map();
6407
6495
  for (const [i, agent] of subAgents.entries()) {
6408
6496
  const filename = toAgentFilename(agent.name, i, nameCount);
6409
- const filePath = (0, import_path6.join)(agentsDir, `${filename}.md`);
6410
- await (0, import_promises4.writeFile)(filePath, agent.subAgentMd, "utf8");
6497
+ const filePath = (0, import_path7.join)(agentsDir, `${filename}.md`);
6498
+ await (0, import_promises5.writeFile)(filePath, agent.subAgentMd, "utf8");
6411
6499
  }
6412
6500
  console.log(`[SubAgents] Written to ${agentsDir}`);
6413
6501
  }
@@ -7186,16 +7274,6 @@ Stack: ${errorStack.split("\n").slice(0, 5).join("\n")}` : "")
7186
7274
  llmTrace
7187
7275
  };
7188
7276
  }
7189
- async function writeSkillsToFilesystem(cwd, skills) {
7190
- for (const skill of skills) {
7191
- const skillName = skill.name;
7192
- const skillDir = (0, import_path7.join)(cwd, ".claude", "skills", skillName);
7193
- await (0, import_promises5.mkdir)(skillDir, { recursive: true });
7194
- const skillPath = (0, import_path7.join)(skillDir, "SKILL.md");
7195
- await (0, import_promises5.writeFile)(skillPath, skill.skillMd, "utf-8");
7196
- console.log(`[Skill] Written to ${skillPath}`);
7197
- }
7198
- }
7199
7277
  function buildSdkEnvironment(options) {
7200
7278
  const env = { ...process.env };
7201
7279
  const placeholderApiKey = "sk-ant-api03-placeholder-auth-handled-by-gateway-000000000000000000000000";
@@ -8193,7 +8271,7 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
8193
8271
  mcps: evalData.mcps.length > 0 ? evalData.mcps : void 0,
8194
8272
  subAgents: evalData.subAgents.length > 0 ? evalData.subAgents : void 0
8195
8273
  };
8196
- const result = await adapter.execute(executionContext);
8274
+ const { outputText, durationMs, llmTrace } = await adapter.execute(executionContext);
8197
8275
  const completedAt = (/* @__PURE__ */ new Date()).toISOString();
8198
8276
  const afterSnapshot = workDir ? snapshotDirectory(workDir) : {};
8199
8277
  const fileDiffs = diffSnapshots(beforeSnapshot, afterSnapshot);
@@ -8205,13 +8283,13 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
8205
8283
  scenarioId: scenario.id,
8206
8284
  scenarioName: scenario.name,
8207
8285
  modelConfig: agent?.modelConfig,
8208
- duration: result.durationMs,
8209
- outputText: result.outputText,
8286
+ duration: durationMs,
8287
+ outputText,
8210
8288
  fileDiffs: fileDiffs.length > 0 ? fileDiffs : void 0,
8211
8289
  templateFiles: templateFiles && templateFiles.length > 0 ? templateFiles : void 0,
8212
8290
  startedAt,
8213
8291
  completedAt,
8214
- llmTrace: result.llmTrace
8292
+ llmTrace
8215
8293
  };
8216
8294
  }
8217
8295