@wix/evalforge-evaluator 0.143.0 → 0.145.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +49 -68
- package/build/index.js.map +2 -2
- package/build/index.mjs +54 -69
- package/build/index.mjs.map +2 -2
- package/build/types/api-client.d.ts +3 -7
- package/package.json +5 -5
package/build/index.js
CHANGED
|
@@ -155,37 +155,23 @@ function createApiClient(serverUrl, options = "") {
|
|
|
155
155
|
getScenario(projectId2, id) {
|
|
156
156
|
return fetchJson(`/projects/${projectId2}/test-scenarios/${id}`);
|
|
157
157
|
},
|
|
158
|
-
getSkill(projectId2, id) {
|
|
159
|
-
return fetchJson(`/projects/${projectId2}/skills/${id}`);
|
|
160
|
-
},
|
|
161
|
-
getSkillVersion(projectId2, skillId, versionId) {
|
|
162
|
-
return fetchJson(
|
|
163
|
-
`/projects/${projectId2}/skills/${skillId}/versions/${versionId}`
|
|
164
|
-
);
|
|
165
|
-
},
|
|
166
|
-
getLatestSkillVersion(projectId2, skillId) {
|
|
167
|
-
return fetchJson(
|
|
168
|
-
`/projects/${projectId2}/skills/${skillId}/versions/latest`
|
|
169
|
-
);
|
|
170
|
-
},
|
|
171
158
|
getAgent(projectId2, id) {
|
|
172
159
|
return fetchJson(`/projects/${projectId2}/agents/${id}`);
|
|
173
160
|
},
|
|
174
161
|
getTemplate(projectId2, id) {
|
|
175
162
|
return fetchJson(`/projects/${projectId2}/templates/${id}`);
|
|
176
163
|
},
|
|
177
|
-
getMcp(projectId2, id) {
|
|
178
|
-
return fetchJson(`/projects/${projectId2}/mcps/${id}`);
|
|
179
|
-
},
|
|
180
|
-
getSubAgent(projectId2, id) {
|
|
181
|
-
return fetchJson(`/projects/${projectId2}/sub-agents/${id}`);
|
|
182
|
-
},
|
|
183
|
-
getRule(projectId2, id) {
|
|
184
|
-
return fetchJson(`/projects/${projectId2}/rules/${id}`);
|
|
185
|
-
},
|
|
186
164
|
getPreset(projectId2, id) {
|
|
187
165
|
return fetchJson(`/projects/${projectId2}/presets/${id}`);
|
|
188
166
|
},
|
|
167
|
+
getCapability(projectId2, id) {
|
|
168
|
+
return fetchJson(`/projects/${projectId2}/capabilities/${id}`);
|
|
169
|
+
},
|
|
170
|
+
getCapabilityVersion(projectId2, capabilityId, versionId) {
|
|
171
|
+
return fetchJson(
|
|
172
|
+
`/projects/${projectId2}/capabilities/${capabilityId}/versions/${versionId}`
|
|
173
|
+
);
|
|
174
|
+
},
|
|
189
175
|
addResult(projectId2, evalRunId2, result) {
|
|
190
176
|
return postJson(
|
|
191
177
|
`/projects/${projectId2}/eval-runs/${evalRunId2}/results`,
|
|
@@ -345,55 +331,50 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
|
|
|
345
331
|
agent = await api.getAgent(projectId2, evalRun.agentId);
|
|
346
332
|
}
|
|
347
333
|
let skills = [];
|
|
348
|
-
const resolvedSkillIds = evalRun.skillIds ?? [];
|
|
349
|
-
if (resolvedSkillIds.length > 0) {
|
|
350
|
-
const fetchResults = await Promise.allSettled(
|
|
351
|
-
resolvedSkillIds.map((id) => api.getSkill(projectId2, id))
|
|
352
|
-
);
|
|
353
|
-
skills = fetchResults.filter(
|
|
354
|
-
(r) => r.status === "fulfilled"
|
|
355
|
-
).map((r) => r.value).filter((s) => !s.deleted);
|
|
356
|
-
if (evalRun.skillVersions && Object.keys(evalRun.skillVersions).length > 0) {
|
|
357
|
-
skills = await Promise.all(
|
|
358
|
-
skills.map(async (skill) => {
|
|
359
|
-
const versionId = evalRun.skillVersions?.[skill.id];
|
|
360
|
-
if (versionId) {
|
|
361
|
-
const version = await api.getSkillVersion(
|
|
362
|
-
projectId2,
|
|
363
|
-
skill.id,
|
|
364
|
-
versionId
|
|
365
|
-
);
|
|
366
|
-
return { ...skill, latestVersion: version };
|
|
367
|
-
}
|
|
368
|
-
return skill;
|
|
369
|
-
})
|
|
370
|
-
);
|
|
371
|
-
}
|
|
372
|
-
skills = skills.map((skill) => {
|
|
373
|
-
const hasPinnedVersion = evalRun.skillVersions?.[skill.id];
|
|
374
|
-
if (!hasPinnedVersion && skill.source) {
|
|
375
|
-
return { ...skill, latestVersion: void 0 };
|
|
376
|
-
}
|
|
377
|
-
return skill;
|
|
378
|
-
});
|
|
379
|
-
}
|
|
380
334
|
let mcps = [];
|
|
381
|
-
if (evalRun.mcpIds && evalRun.mcpIds.length > 0) {
|
|
382
|
-
mcps = await Promise.all(
|
|
383
|
-
evalRun.mcpIds.map((id) => api.getMcp(projectId2, id))
|
|
384
|
-
);
|
|
385
|
-
}
|
|
386
335
|
let subAgents = [];
|
|
387
|
-
if (evalRun.subAgentIds && evalRun.subAgentIds.length > 0) {
|
|
388
|
-
subAgents = await Promise.all(
|
|
389
|
-
evalRun.subAgentIds.map((id) => api.getSubAgent(projectId2, id))
|
|
390
|
-
);
|
|
391
|
-
}
|
|
392
336
|
let rules = [];
|
|
393
|
-
if (evalRun.
|
|
394
|
-
|
|
395
|
-
evalRun.
|
|
337
|
+
if (evalRun.capabilityIds && evalRun.capabilityIds.length > 0) {
|
|
338
|
+
const fetchResults = await Promise.allSettled(
|
|
339
|
+
evalRun.capabilityIds.map((id) => api.getCapability(projectId2, id))
|
|
340
|
+
);
|
|
341
|
+
const capabilities = fetchResults.filter(
|
|
342
|
+
(r) => r.status === "fulfilled"
|
|
343
|
+
).map((r) => r.value).filter((c) => !c.deleted);
|
|
344
|
+
const pinned = evalRun.capabilityVersions ?? {};
|
|
345
|
+
const withVersions = await Promise.all(
|
|
346
|
+
capabilities.map(async (cap) => {
|
|
347
|
+
const versionId = pinned[cap.id];
|
|
348
|
+
if (versionId) {
|
|
349
|
+
const version = await api.getCapabilityVersion(
|
|
350
|
+
projectId2,
|
|
351
|
+
cap.id,
|
|
352
|
+
versionId
|
|
353
|
+
);
|
|
354
|
+
return { ...cap, latestVersion: version };
|
|
355
|
+
}
|
|
356
|
+
if (cap.source) {
|
|
357
|
+
return { ...cap, latestVersion: void 0 };
|
|
358
|
+
}
|
|
359
|
+
return cap;
|
|
360
|
+
})
|
|
396
361
|
);
|
|
362
|
+
for (const cap of withVersions) {
|
|
363
|
+
switch (cap.capabilityType) {
|
|
364
|
+
case "SKILL":
|
|
365
|
+
skills.push((0, import_evalforge_types.capabilityToSkillWithLatestVersion)(cap));
|
|
366
|
+
break;
|
|
367
|
+
case "SUB_AGENT":
|
|
368
|
+
subAgents.push((0, import_evalforge_types.capabilityToSubAgent)(cap));
|
|
369
|
+
break;
|
|
370
|
+
case "RULE":
|
|
371
|
+
rules.push((0, import_evalforge_types.capabilityToRule)(cap));
|
|
372
|
+
break;
|
|
373
|
+
case "MCP":
|
|
374
|
+
mcps.push((0, import_evalforge_types.capabilityToMcp)(cap));
|
|
375
|
+
break;
|
|
376
|
+
}
|
|
377
|
+
}
|
|
397
378
|
}
|
|
398
379
|
const templateIds = [
|
|
399
380
|
...new Set(
|
|
@@ -4978,7 +4959,7 @@ async function runEvaluation(projectId2, evalRunId2) {
|
|
|
4978
4959
|
hasAgent: !!agent,
|
|
4979
4960
|
agentId: evalData.evalRun.agentId,
|
|
4980
4961
|
presetId: evalData.evalRun.presetId,
|
|
4981
|
-
|
|
4962
|
+
capabilityIds: evalData.evalRun.capabilityIds
|
|
4982
4963
|
};
|
|
4983
4964
|
if (scenarioItems.length > 0 && !agent) {
|
|
4984
4965
|
throw new Error(
|