@wix/evalforge-evaluator 0.60.0 → 0.61.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +116 -38
- package/build/index.js.map +4 -4
- package/build/index.mjs +116 -38
- package/build/index.mjs.map +4 -4
- package/build/types/api-client.d.ts +4 -2
- package/build/types/fetch-evaluation-data.d.ts +2 -2
- package/build/types/run-scenario/agents/claude-code/execute.d.ts +2 -2
- package/build/types/run-scenario/agents/claude-code/write-skills.d.ts +21 -0
- package/package.json +6 -5
package/build/index.mjs
CHANGED
|
@@ -137,6 +137,16 @@ function createApiClient(serverUrl, options = "") {
|
|
|
137
137
|
getSkill(projectId2, id) {
|
|
138
138
|
return fetchJson(`/projects/${projectId2}/skills/${id}`);
|
|
139
139
|
},
|
|
140
|
+
getSkillVersion(projectId2, skillId, versionId) {
|
|
141
|
+
return fetchJson(
|
|
142
|
+
`/projects/${projectId2}/skills/${skillId}/versions/${versionId}`
|
|
143
|
+
);
|
|
144
|
+
},
|
|
145
|
+
getLatestSkillVersion(projectId2, skillId) {
|
|
146
|
+
return fetchJson(
|
|
147
|
+
`/projects/${projectId2}/skills/${skillId}/versions/latest`
|
|
148
|
+
);
|
|
149
|
+
},
|
|
140
150
|
getAgent(projectId2, id) {
|
|
141
151
|
return fetchJson(`/projects/${projectId2}/agents/${id}`);
|
|
142
152
|
},
|
|
@@ -274,6 +284,29 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
|
|
|
274
284
|
skillsGroup.skillIds.map((id) => api.getSkill(projectId2, id))
|
|
275
285
|
);
|
|
276
286
|
}
|
|
287
|
+
if (evalRun.skillVersions && Object.keys(evalRun.skillVersions).length > 0) {
|
|
288
|
+
skills = await Promise.all(
|
|
289
|
+
skills.map(async (skill) => {
|
|
290
|
+
const versionId = evalRun.skillVersions?.[skill.id];
|
|
291
|
+
if (versionId) {
|
|
292
|
+
const version = await api.getSkillVersion(
|
|
293
|
+
projectId2,
|
|
294
|
+
skill.id,
|
|
295
|
+
versionId
|
|
296
|
+
);
|
|
297
|
+
return { ...skill, latestVersion: version };
|
|
298
|
+
}
|
|
299
|
+
return skill;
|
|
300
|
+
})
|
|
301
|
+
);
|
|
302
|
+
}
|
|
303
|
+
skills = skills.map((skill) => {
|
|
304
|
+
const hasPinnedVersion = evalRun.skillVersions?.[skill.id];
|
|
305
|
+
if (!hasPinnedVersion && skill.source) {
|
|
306
|
+
return { ...skill, latestVersion: void 0 };
|
|
307
|
+
}
|
|
308
|
+
return skill;
|
|
309
|
+
});
|
|
277
310
|
}
|
|
278
311
|
let mcps = [];
|
|
279
312
|
if (evalRun.mcpIds && evalRun.mcpIds.length > 0) {
|
|
@@ -1088,10 +1121,10 @@ var Minipass = class extends EventEmitter {
|
|
|
1088
1121
|
* Return a void Promise that resolves once the stream ends.
|
|
1089
1122
|
*/
|
|
1090
1123
|
async promise() {
|
|
1091
|
-
return new Promise((
|
|
1124
|
+
return new Promise((resolve2, reject) => {
|
|
1092
1125
|
this.on(DESTROYED, () => reject(new Error("stream destroyed")));
|
|
1093
1126
|
this.on("error", (er) => reject(er));
|
|
1094
|
-
this.on("end", () =>
|
|
1127
|
+
this.on("end", () => resolve2());
|
|
1095
1128
|
});
|
|
1096
1129
|
}
|
|
1097
1130
|
/**
|
|
@@ -1115,7 +1148,7 @@ var Minipass = class extends EventEmitter {
|
|
|
1115
1148
|
return Promise.resolve({ done: false, value: res });
|
|
1116
1149
|
if (this[EOF])
|
|
1117
1150
|
return stop();
|
|
1118
|
-
let
|
|
1151
|
+
let resolve2;
|
|
1119
1152
|
let reject;
|
|
1120
1153
|
const onerr = (er) => {
|
|
1121
1154
|
this.off("data", ondata);
|
|
@@ -1129,19 +1162,19 @@ var Minipass = class extends EventEmitter {
|
|
|
1129
1162
|
this.off("end", onend);
|
|
1130
1163
|
this.off(DESTROYED, ondestroy);
|
|
1131
1164
|
this.pause();
|
|
1132
|
-
|
|
1165
|
+
resolve2({ value, done: !!this[EOF] });
|
|
1133
1166
|
};
|
|
1134
1167
|
const onend = () => {
|
|
1135
1168
|
this.off("error", onerr);
|
|
1136
1169
|
this.off("data", ondata);
|
|
1137
1170
|
this.off(DESTROYED, ondestroy);
|
|
1138
1171
|
stop();
|
|
1139
|
-
|
|
1172
|
+
resolve2({ done: true, value: void 0 });
|
|
1140
1173
|
};
|
|
1141
1174
|
const ondestroy = () => onerr(new Error("stream destroyed"));
|
|
1142
1175
|
return new Promise((res2, rej) => {
|
|
1143
1176
|
reject = rej;
|
|
1144
|
-
|
|
1177
|
+
resolve2 = res2;
|
|
1145
1178
|
this.once(DESTROYED, ondestroy);
|
|
1146
1179
|
this.once("error", onerr);
|
|
1147
1180
|
this.once("end", onend);
|
|
@@ -3269,9 +3302,9 @@ var listFile = (opt, _files) => {
|
|
|
3269
3302
|
const parse4 = new Parser(opt);
|
|
3270
3303
|
const readSize = opt.maxReadSize || 16 * 1024 * 1024;
|
|
3271
3304
|
const file = opt.file;
|
|
3272
|
-
const p = new Promise((
|
|
3305
|
+
const p = new Promise((resolve2, reject) => {
|
|
3273
3306
|
parse4.on("error", reject);
|
|
3274
|
-
parse4.on("end",
|
|
3307
|
+
parse4.on("end", resolve2);
|
|
3275
3308
|
fs2.stat(file, (er, stat) => {
|
|
3276
3309
|
if (er) {
|
|
3277
3310
|
reject(er);
|
|
@@ -5912,9 +5945,9 @@ var extractFile = (opt, _) => {
|
|
|
5912
5945
|
const u = new Unpack(opt);
|
|
5913
5946
|
const readSize = opt.maxReadSize || 16 * 1024 * 1024;
|
|
5914
5947
|
const file = opt.file;
|
|
5915
|
-
const p = new Promise((
|
|
5948
|
+
const p = new Promise((resolve2, reject) => {
|
|
5916
5949
|
u.on("error", reject);
|
|
5917
|
-
u.on("close",
|
|
5950
|
+
u.on("close", resolve2);
|
|
5918
5951
|
fs9.stat(file, (er, stat) => {
|
|
5919
5952
|
if (er) {
|
|
5920
5953
|
reject(er);
|
|
@@ -6048,7 +6081,7 @@ var replaceAsync = (opt, files) => {
|
|
|
6048
6081
|
};
|
|
6049
6082
|
fs10.read(fd, headBuf, 0, 512, position, onread);
|
|
6050
6083
|
};
|
|
6051
|
-
const promise = new Promise((
|
|
6084
|
+
const promise = new Promise((resolve2, reject) => {
|
|
6052
6085
|
p.on("error", reject);
|
|
6053
6086
|
let flag = "r+";
|
|
6054
6087
|
const onopen = (er, fd) => {
|
|
@@ -6073,7 +6106,7 @@ var replaceAsync = (opt, files) => {
|
|
|
6073
6106
|
});
|
|
6074
6107
|
p.pipe(stream);
|
|
6075
6108
|
stream.on("error", reject);
|
|
6076
|
-
stream.on("close",
|
|
6109
|
+
stream.on("close", resolve2);
|
|
6077
6110
|
addFilesAsync2(p, files);
|
|
6078
6111
|
});
|
|
6079
6112
|
});
|
|
@@ -6344,13 +6377,68 @@ import {
|
|
|
6344
6377
|
TRACE_EVENT_PREFIX,
|
|
6345
6378
|
AVAILABLE_MODELS
|
|
6346
6379
|
} from "@wix/evalforge-types";
|
|
6380
|
+
|
|
6381
|
+
// src/run-scenario/agents/claude-code/write-skills.ts
|
|
6382
|
+
import { mkdir as mkdir2, writeFile } from "fs/promises";
|
|
6383
|
+
import { dirname as dirname2, join as join2, resolve, sep } from "path";
|
|
6384
|
+
import { fetchSkillFolderRaw } from "@wix/evalforge-github-client";
|
|
6385
|
+
async function writeSkillsToFilesystem(cwd, skills, fetchFn = fetchSkillFolderRaw) {
|
|
6386
|
+
await Promise.all(
|
|
6387
|
+
skills.map((skill) => writeSkillToFilesystem(cwd, skill, fetchFn))
|
|
6388
|
+
);
|
|
6389
|
+
}
|
|
6390
|
+
async function writeSkillToFilesystem(cwd, skill, fetchFn = fetchSkillFolderRaw) {
|
|
6391
|
+
const skillName = skill.name;
|
|
6392
|
+
const skillDir = join2(cwd, ".claude", "skills", skillName);
|
|
6393
|
+
await mkdir2(skillDir, { recursive: true });
|
|
6394
|
+
const version = skill.latestVersion;
|
|
6395
|
+
if (version?.files && version.files.length > 0) {
|
|
6396
|
+
await writeSkillFiles(skillDir, version.files);
|
|
6397
|
+
console.log(
|
|
6398
|
+
`[Skill] ${skillName}: wrote ${version.files.length} file(s) from snapshot`
|
|
6399
|
+
);
|
|
6400
|
+
} else if (skill.source) {
|
|
6401
|
+
try {
|
|
6402
|
+
const files = await fetchFn(skill.source, {
|
|
6403
|
+
userAgent: "EvalForge-Evaluator"
|
|
6404
|
+
});
|
|
6405
|
+
await writeSkillFiles(skillDir, files);
|
|
6406
|
+
console.log(
|
|
6407
|
+
`[Skill] ${skillName}: wrote ${files.length} file(s) from GitHub (live)`
|
|
6408
|
+
);
|
|
6409
|
+
} catch (error) {
|
|
6410
|
+
const message = error instanceof Error ? error.message : "Unknown error";
|
|
6411
|
+
console.error(
|
|
6412
|
+
`[Skill] ${skillName}: GitHub fetch failed: ${message}, no files to fall back to`
|
|
6413
|
+
);
|
|
6414
|
+
throw new Error(
|
|
6415
|
+
`Failed to write skill ${skillName} to filesystem: ${message}`
|
|
6416
|
+
);
|
|
6417
|
+
}
|
|
6418
|
+
} else {
|
|
6419
|
+
throw new Error(`Skill ${skillName} has no files and no source configured`);
|
|
6420
|
+
}
|
|
6421
|
+
}
|
|
6422
|
+
async function writeSkillFiles(skillDir, files) {
|
|
6423
|
+
const resolvedBase = resolve(skillDir);
|
|
6424
|
+
for (const file of files) {
|
|
6425
|
+
const filePath = resolve(skillDir, file.path);
|
|
6426
|
+
if (!filePath.startsWith(resolvedBase + sep) && filePath !== resolvedBase) {
|
|
6427
|
+
throw new Error(
|
|
6428
|
+
`Path traversal detected in skill file: "${file.path}" resolves outside skill directory`
|
|
6429
|
+
);
|
|
6430
|
+
}
|
|
6431
|
+
await mkdir2(dirname2(filePath), { recursive: true });
|
|
6432
|
+
await writeFile(filePath, file.content, "utf-8");
|
|
6433
|
+
}
|
|
6434
|
+
}
|
|
6435
|
+
|
|
6436
|
+
// src/run-scenario/agents/claude-code/execute.ts
|
|
6347
6437
|
import { randomUUID } from "crypto";
|
|
6348
|
-
import { mkdir as mkdir3, writeFile as writeFile3 } from "fs/promises";
|
|
6349
|
-
import { join as join4 } from "path";
|
|
6350
6438
|
|
|
6351
6439
|
// src/run-scenario/agents/claude-code/write-mcp.ts
|
|
6352
|
-
import { writeFile } from "fs/promises";
|
|
6353
|
-
import { join as
|
|
6440
|
+
import { writeFile as writeFile2 } from "fs/promises";
|
|
6441
|
+
import { join as join3 } from "path";
|
|
6354
6442
|
import { MCP_SERVERS_JSON_KEY } from "@wix/evalforge-types";
|
|
6355
6443
|
async function writeMcpToFilesystem(cwd, mcps) {
|
|
6356
6444
|
if (mcps.length === 0) return;
|
|
@@ -6371,14 +6459,14 @@ async function writeMcpToFilesystem(cwd, mcps) {
|
|
|
6371
6459
|
null,
|
|
6372
6460
|
2
|
|
6373
6461
|
);
|
|
6374
|
-
const filePath =
|
|
6375
|
-
await
|
|
6462
|
+
const filePath = join3(cwd, ".mcp.json");
|
|
6463
|
+
await writeFile2(filePath, content, "utf8");
|
|
6376
6464
|
console.log(`[MCP] Written to ${filePath}`);
|
|
6377
6465
|
}
|
|
6378
6466
|
|
|
6379
6467
|
// src/run-scenario/agents/claude-code/write-sub-agents.ts
|
|
6380
|
-
import { mkdir as
|
|
6381
|
-
import { join as
|
|
6468
|
+
import { mkdir as mkdir3, writeFile as writeFile3 } from "fs/promises";
|
|
6469
|
+
import { join as join4 } from "path";
|
|
6382
6470
|
var AGENTS_DIR = ".claude/agents";
|
|
6383
6471
|
function toAgentFilename(name2, index, nameCount) {
|
|
6384
6472
|
const base = (name2 || "").toLowerCase().replace(/\s+/g, "-").replace(/[^a-z0-9-]/g, "").replace(/^-+|-+$/g, "") || `sub-agent-${index}`;
|
|
@@ -6388,13 +6476,13 @@ function toAgentFilename(name2, index, nameCount) {
|
|
|
6388
6476
|
}
|
|
6389
6477
|
async function writeSubAgentsToFilesystem(cwd, subAgents) {
|
|
6390
6478
|
if (subAgents.length === 0) return;
|
|
6391
|
-
const agentsDir =
|
|
6392
|
-
await
|
|
6479
|
+
const agentsDir = join4(cwd, AGENTS_DIR);
|
|
6480
|
+
await mkdir3(agentsDir, { recursive: true });
|
|
6393
6481
|
const nameCount = /* @__PURE__ */ new Map();
|
|
6394
6482
|
for (const [i, agent] of subAgents.entries()) {
|
|
6395
6483
|
const filename = toAgentFilename(agent.name, i, nameCount);
|
|
6396
|
-
const filePath =
|
|
6397
|
-
await
|
|
6484
|
+
const filePath = join4(agentsDir, `${filename}.md`);
|
|
6485
|
+
await writeFile3(filePath, agent.subAgentMd, "utf8");
|
|
6398
6486
|
}
|
|
6399
6487
|
console.log(`[SubAgents] Written to ${agentsDir}`);
|
|
6400
6488
|
}
|
|
@@ -7173,16 +7261,6 @@ Stack: ${errorStack.split("\n").slice(0, 5).join("\n")}` : "")
|
|
|
7173
7261
|
llmTrace
|
|
7174
7262
|
};
|
|
7175
7263
|
}
|
|
7176
|
-
async function writeSkillsToFilesystem(cwd, skills) {
|
|
7177
|
-
for (const skill of skills) {
|
|
7178
|
-
const skillName = skill.name;
|
|
7179
|
-
const skillDir = join4(cwd, ".claude", "skills", skillName);
|
|
7180
|
-
await mkdir3(skillDir, { recursive: true });
|
|
7181
|
-
const skillPath = join4(skillDir, "SKILL.md");
|
|
7182
|
-
await writeFile3(skillPath, skill.skillMd, "utf-8");
|
|
7183
|
-
console.log(`[Skill] Written to ${skillPath}`);
|
|
7184
|
-
}
|
|
7185
|
-
}
|
|
7186
7264
|
function buildSdkEnvironment(options) {
|
|
7187
7265
|
const env = { ...process.env };
|
|
7188
7266
|
const placeholderApiKey = "sk-ant-api03-placeholder-auth-handled-by-gateway-000000000000000000000000";
|
|
@@ -8180,7 +8258,7 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
|
|
|
8180
8258
|
mcps: evalData.mcps.length > 0 ? evalData.mcps : void 0,
|
|
8181
8259
|
subAgents: evalData.subAgents.length > 0 ? evalData.subAgents : void 0
|
|
8182
8260
|
};
|
|
8183
|
-
const
|
|
8261
|
+
const { outputText, durationMs, llmTrace } = await adapter.execute(executionContext);
|
|
8184
8262
|
const completedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
8185
8263
|
const afterSnapshot = workDir ? snapshotDirectory(workDir) : {};
|
|
8186
8264
|
const fileDiffs = diffSnapshots(beforeSnapshot, afterSnapshot);
|
|
@@ -8192,13 +8270,13 @@ async function runAgentWithContext(config, evalRunId2, scenario, evalData, workD
|
|
|
8192
8270
|
scenarioId: scenario.id,
|
|
8193
8271
|
scenarioName: scenario.name,
|
|
8194
8272
|
modelConfig: agent?.modelConfig,
|
|
8195
|
-
duration:
|
|
8196
|
-
outputText
|
|
8273
|
+
duration: durationMs,
|
|
8274
|
+
outputText,
|
|
8197
8275
|
fileDiffs: fileDiffs.length > 0 ? fileDiffs : void 0,
|
|
8198
8276
|
templateFiles: templateFiles && templateFiles.length > 0 ? templateFiles : void 0,
|
|
8199
8277
|
startedAt,
|
|
8200
8278
|
completedAt,
|
|
8201
|
-
llmTrace
|
|
8279
|
+
llmTrace
|
|
8202
8280
|
};
|
|
8203
8281
|
}
|
|
8204
8282
|
|