@wix/evalforge-evaluator 0.143.0 → 0.144.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -186,6 +186,14 @@ function createApiClient(serverUrl, options = "") {
186
186
  getPreset(projectId2, id) {
187
187
  return fetchJson(`/projects/${projectId2}/presets/${id}`);
188
188
  },
189
+ getCapability(projectId2, id) {
190
+ return fetchJson(`/projects/${projectId2}/capabilities/${id}`);
191
+ },
192
+ getCapabilityVersion(projectId2, capabilityId, versionId) {
193
+ return fetchJson(
194
+ `/projects/${projectId2}/capabilities/${capabilityId}/versions/${versionId}`
195
+ );
196
+ },
189
197
  addResult(projectId2, evalRunId2, result) {
190
198
  return postJson(
191
199
  `/projects/${projectId2}/eval-runs/${evalRunId2}/results`,
@@ -345,55 +353,98 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
345
353
  agent = await api.getAgent(projectId2, evalRun.agentId);
346
354
  }
347
355
  let skills = [];
348
- const resolvedSkillIds = evalRun.skillIds ?? [];
349
- if (resolvedSkillIds.length > 0) {
356
+ let mcps = [];
357
+ let subAgents = [];
358
+ let rules = [];
359
+ if (evalRun.capabilityIds && evalRun.capabilityIds.length > 0) {
350
360
  const fetchResults = await Promise.allSettled(
351
- resolvedSkillIds.map((id) => api.getSkill(projectId2, id))
361
+ evalRun.capabilityIds.map((id) => api.getCapability(projectId2, id))
352
362
  );
353
- skills = fetchResults.filter(
363
+ const capabilities = fetchResults.filter(
354
364
  (r) => r.status === "fulfilled"
355
- ).map((r) => r.value).filter((s) => !s.deleted);
356
- if (evalRun.skillVersions && Object.keys(evalRun.skillVersions).length > 0) {
357
- skills = await Promise.all(
358
- skills.map(async (skill) => {
359
- const versionId = evalRun.skillVersions?.[skill.id];
360
- if (versionId) {
361
- const version = await api.getSkillVersion(
362
- projectId2,
363
- skill.id,
364
- versionId
365
- );
366
- return { ...skill, latestVersion: version };
367
- }
368
- return skill;
369
- })
370
- );
365
+ ).map((r) => r.value).filter((c) => !c.deleted);
366
+ const pinned = evalRun.capabilityVersions ?? {};
367
+ const withVersions = await Promise.all(
368
+ capabilities.map(async (cap) => {
369
+ const versionId = pinned[cap.id];
370
+ if (versionId) {
371
+ const version = await api.getCapabilityVersion(
372
+ projectId2,
373
+ cap.id,
374
+ versionId
375
+ );
376
+ return { ...cap, latestVersion: version };
377
+ }
378
+ if (cap.source) {
379
+ return { ...cap, latestVersion: void 0 };
380
+ }
381
+ return cap;
382
+ })
383
+ );
384
+ for (const cap of withVersions) {
385
+ switch (cap.capabilityType) {
386
+ case "SKILL":
387
+ skills.push((0, import_evalforge_types.capabilityToSkillWithLatestVersion)(cap));
388
+ break;
389
+ case "SUB_AGENT":
390
+ subAgents.push((0, import_evalforge_types.capabilityToSubAgent)(cap));
391
+ break;
392
+ case "RULE":
393
+ rules.push((0, import_evalforge_types.capabilityToRule)(cap));
394
+ break;
395
+ case "MCP":
396
+ mcps.push((0, import_evalforge_types.capabilityToMcp)(cap));
397
+ break;
398
+ }
371
399
  }
372
- skills = skills.map((skill) => {
373
- const hasPinnedVersion = evalRun.skillVersions?.[skill.id];
374
- if (!hasPinnedVersion && skill.source) {
375
- return { ...skill, latestVersion: void 0 };
400
+ } else {
401
+ const resolvedSkillIds = evalRun.skillIds ?? [];
402
+ if (resolvedSkillIds.length > 0) {
403
+ const fetchResults = await Promise.allSettled(
404
+ resolvedSkillIds.map((id) => api.getSkill(projectId2, id))
405
+ );
406
+ skills = fetchResults.filter(
407
+ (r) => r.status === "fulfilled"
408
+ ).map((r) => r.value).filter((s) => !s.deleted);
409
+ if (evalRun.skillVersions && Object.keys(evalRun.skillVersions).length > 0) {
410
+ skills = await Promise.all(
411
+ skills.map(async (skill) => {
412
+ const versionId = evalRun.skillVersions?.[skill.id];
413
+ if (versionId) {
414
+ const version = await api.getSkillVersion(
415
+ projectId2,
416
+ skill.id,
417
+ versionId
418
+ );
419
+ return { ...skill, latestVersion: version };
420
+ }
421
+ return skill;
422
+ })
423
+ );
376
424
  }
377
- return skill;
378
- });
379
- }
380
- let mcps = [];
381
- if (evalRun.mcpIds && evalRun.mcpIds.length > 0) {
382
- mcps = await Promise.all(
383
- evalRun.mcpIds.map((id) => api.getMcp(projectId2, id))
384
- );
385
- }
386
- let subAgents = [];
387
- if (evalRun.subAgentIds && evalRun.subAgentIds.length > 0) {
388
- subAgents = await Promise.all(
389
- evalRun.subAgentIds.map((id) => api.getSubAgent(projectId2, id))
390
- );
391
- }
392
- let rules = [];
393
- if (evalRun.ruleIds && evalRun.ruleIds.length > 0) {
394
- rules = await Promise.all(
395
- evalRun.ruleIds.map((id) => api.getRule(projectId2, id))
396
- );
425
+ skills = skills.map((skill) => {
426
+ const hasPinnedVersion = evalRun.skillVersions?.[skill.id];
427
+ if (!hasPinnedVersion && skill.source) {
428
+ return { ...skill, latestVersion: void 0 };
429
+ }
430
+ return skill;
431
+ });
432
+ }
433
+ if (evalRun.mcpIds && evalRun.mcpIds.length > 0) {
434
+ mcps = await Promise.all(
435
+ evalRun.mcpIds.map((id) => api.getMcp(projectId2, id))
436
+ );
437
+ }
438
+ if (evalRun.subAgentIds && evalRun.subAgentIds.length > 0) {
439
+ subAgents = await Promise.all(
440
+ evalRun.subAgentIds.map((id) => api.getSubAgent(projectId2, id))
441
+ );
442
+ }
443
+ if (evalRun.ruleIds && evalRun.ruleIds.length > 0) {
444
+ rules = await Promise.all(
445
+ evalRun.ruleIds.map((id) => api.getRule(projectId2, id))
446
+ );
447
+ }
397
448
  }
398
449
  const templateIds = [
399
450
  ...new Set(