@wix/evalforge-evaluator 0.142.0 → 0.144.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +95 -44
- package/build/index.js.map +2 -2
- package/build/index.mjs +100 -45
- package/build/index.mjs.map +2 -2
- package/build/types/api-client.d.ts +3 -1
- package/package.json +5 -5
package/build/index.js
CHANGED
|
@@ -186,6 +186,14 @@ function createApiClient(serverUrl, options = "") {
|
|
|
186
186
|
getPreset(projectId2, id) {
|
|
187
187
|
return fetchJson(`/projects/${projectId2}/presets/${id}`);
|
|
188
188
|
},
|
|
189
|
+
getCapability(projectId2, id) {
|
|
190
|
+
return fetchJson(`/projects/${projectId2}/capabilities/${id}`);
|
|
191
|
+
},
|
|
192
|
+
getCapabilityVersion(projectId2, capabilityId, versionId) {
|
|
193
|
+
return fetchJson(
|
|
194
|
+
`/projects/${projectId2}/capabilities/${capabilityId}/versions/${versionId}`
|
|
195
|
+
);
|
|
196
|
+
},
|
|
189
197
|
addResult(projectId2, evalRunId2, result) {
|
|
190
198
|
return postJson(
|
|
191
199
|
`/projects/${projectId2}/eval-runs/${evalRunId2}/results`,
|
|
@@ -345,55 +353,98 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
|
|
|
345
353
|
agent = await api.getAgent(projectId2, evalRun.agentId);
|
|
346
354
|
}
|
|
347
355
|
let skills = [];
|
|
348
|
-
|
|
349
|
-
|
|
356
|
+
let mcps = [];
|
|
357
|
+
let subAgents = [];
|
|
358
|
+
let rules = [];
|
|
359
|
+
if (evalRun.capabilityIds && evalRun.capabilityIds.length > 0) {
|
|
350
360
|
const fetchResults = await Promise.allSettled(
|
|
351
|
-
|
|
361
|
+
evalRun.capabilityIds.map((id) => api.getCapability(projectId2, id))
|
|
352
362
|
);
|
|
353
|
-
|
|
363
|
+
const capabilities = fetchResults.filter(
|
|
354
364
|
(r) => r.status === "fulfilled"
|
|
355
|
-
).map((r) => r.value).filter((
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
365
|
+
).map((r) => r.value).filter((c) => !c.deleted);
|
|
366
|
+
const pinned = evalRun.capabilityVersions ?? {};
|
|
367
|
+
const withVersions = await Promise.all(
|
|
368
|
+
capabilities.map(async (cap) => {
|
|
369
|
+
const versionId = pinned[cap.id];
|
|
370
|
+
if (versionId) {
|
|
371
|
+
const version = await api.getCapabilityVersion(
|
|
372
|
+
projectId2,
|
|
373
|
+
cap.id,
|
|
374
|
+
versionId
|
|
375
|
+
);
|
|
376
|
+
return { ...cap, latestVersion: version };
|
|
377
|
+
}
|
|
378
|
+
if (cap.source) {
|
|
379
|
+
return { ...cap, latestVersion: void 0 };
|
|
380
|
+
}
|
|
381
|
+
return cap;
|
|
382
|
+
})
|
|
383
|
+
);
|
|
384
|
+
for (const cap of withVersions) {
|
|
385
|
+
switch (cap.capabilityType) {
|
|
386
|
+
case "SKILL":
|
|
387
|
+
skills.push((0, import_evalforge_types.capabilityToSkillWithLatestVersion)(cap));
|
|
388
|
+
break;
|
|
389
|
+
case "SUB_AGENT":
|
|
390
|
+
subAgents.push((0, import_evalforge_types.capabilityToSubAgent)(cap));
|
|
391
|
+
break;
|
|
392
|
+
case "RULE":
|
|
393
|
+
rules.push((0, import_evalforge_types.capabilityToRule)(cap));
|
|
394
|
+
break;
|
|
395
|
+
case "MCP":
|
|
396
|
+
mcps.push((0, import_evalforge_types.capabilityToMcp)(cap));
|
|
397
|
+
break;
|
|
398
|
+
}
|
|
371
399
|
}
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
400
|
+
} else {
|
|
401
|
+
const resolvedSkillIds = evalRun.skillIds ?? [];
|
|
402
|
+
if (resolvedSkillIds.length > 0) {
|
|
403
|
+
const fetchResults = await Promise.allSettled(
|
|
404
|
+
resolvedSkillIds.map((id) => api.getSkill(projectId2, id))
|
|
405
|
+
);
|
|
406
|
+
skills = fetchResults.filter(
|
|
407
|
+
(r) => r.status === "fulfilled"
|
|
408
|
+
).map((r) => r.value).filter((s) => !s.deleted);
|
|
409
|
+
if (evalRun.skillVersions && Object.keys(evalRun.skillVersions).length > 0) {
|
|
410
|
+
skills = await Promise.all(
|
|
411
|
+
skills.map(async (skill) => {
|
|
412
|
+
const versionId = evalRun.skillVersions?.[skill.id];
|
|
413
|
+
if (versionId) {
|
|
414
|
+
const version = await api.getSkillVersion(
|
|
415
|
+
projectId2,
|
|
416
|
+
skill.id,
|
|
417
|
+
versionId
|
|
418
|
+
);
|
|
419
|
+
return { ...skill, latestVersion: version };
|
|
420
|
+
}
|
|
421
|
+
return skill;
|
|
422
|
+
})
|
|
423
|
+
);
|
|
376
424
|
}
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
)
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
425
|
+
skills = skills.map((skill) => {
|
|
426
|
+
const hasPinnedVersion = evalRun.skillVersions?.[skill.id];
|
|
427
|
+
if (!hasPinnedVersion && skill.source) {
|
|
428
|
+
return { ...skill, latestVersion: void 0 };
|
|
429
|
+
}
|
|
430
|
+
return skill;
|
|
431
|
+
});
|
|
432
|
+
}
|
|
433
|
+
if (evalRun.mcpIds && evalRun.mcpIds.length > 0) {
|
|
434
|
+
mcps = await Promise.all(
|
|
435
|
+
evalRun.mcpIds.map((id) => api.getMcp(projectId2, id))
|
|
436
|
+
);
|
|
437
|
+
}
|
|
438
|
+
if (evalRun.subAgentIds && evalRun.subAgentIds.length > 0) {
|
|
439
|
+
subAgents = await Promise.all(
|
|
440
|
+
evalRun.subAgentIds.map((id) => api.getSubAgent(projectId2, id))
|
|
441
|
+
);
|
|
442
|
+
}
|
|
443
|
+
if (evalRun.ruleIds && evalRun.ruleIds.length > 0) {
|
|
444
|
+
rules = await Promise.all(
|
|
445
|
+
evalRun.ruleIds.map((id) => api.getRule(projectId2, id))
|
|
446
|
+
);
|
|
447
|
+
}
|
|
397
448
|
}
|
|
398
449
|
const templateIds = [
|
|
399
450
|
...new Set(
|