@wix/evalforge-evaluator 0.143.0 → 0.145.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -155,37 +155,23 @@ function createApiClient(serverUrl, options = "") {
155
155
  getScenario(projectId2, id) {
156
156
  return fetchJson(`/projects/${projectId2}/test-scenarios/${id}`);
157
157
  },
158
- getSkill(projectId2, id) {
159
- return fetchJson(`/projects/${projectId2}/skills/${id}`);
160
- },
161
- getSkillVersion(projectId2, skillId, versionId) {
162
- return fetchJson(
163
- `/projects/${projectId2}/skills/${skillId}/versions/${versionId}`
164
- );
165
- },
166
- getLatestSkillVersion(projectId2, skillId) {
167
- return fetchJson(
168
- `/projects/${projectId2}/skills/${skillId}/versions/latest`
169
- );
170
- },
171
158
  getAgent(projectId2, id) {
172
159
  return fetchJson(`/projects/${projectId2}/agents/${id}`);
173
160
  },
174
161
  getTemplate(projectId2, id) {
175
162
  return fetchJson(`/projects/${projectId2}/templates/${id}`);
176
163
  },
177
- getMcp(projectId2, id) {
178
- return fetchJson(`/projects/${projectId2}/mcps/${id}`);
179
- },
180
- getSubAgent(projectId2, id) {
181
- return fetchJson(`/projects/${projectId2}/sub-agents/${id}`);
182
- },
183
- getRule(projectId2, id) {
184
- return fetchJson(`/projects/${projectId2}/rules/${id}`);
185
- },
186
164
  getPreset(projectId2, id) {
187
165
  return fetchJson(`/projects/${projectId2}/presets/${id}`);
188
166
  },
167
+ getCapability(projectId2, id) {
168
+ return fetchJson(`/projects/${projectId2}/capabilities/${id}`);
169
+ },
170
+ getCapabilityVersion(projectId2, capabilityId, versionId) {
171
+ return fetchJson(
172
+ `/projects/${projectId2}/capabilities/${capabilityId}/versions/${versionId}`
173
+ );
174
+ },
189
175
  addResult(projectId2, evalRunId2, result) {
190
176
  return postJson(
191
177
  `/projects/${projectId2}/eval-runs/${evalRunId2}/results`,
@@ -345,55 +331,50 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
345
331
  agent = await api.getAgent(projectId2, evalRun.agentId);
346
332
  }
347
333
  let skills = [];
348
- const resolvedSkillIds = evalRun.skillIds ?? [];
349
- if (resolvedSkillIds.length > 0) {
350
- const fetchResults = await Promise.allSettled(
351
- resolvedSkillIds.map((id) => api.getSkill(projectId2, id))
352
- );
353
- skills = fetchResults.filter(
354
- (r) => r.status === "fulfilled"
355
- ).map((r) => r.value).filter((s) => !s.deleted);
356
- if (evalRun.skillVersions && Object.keys(evalRun.skillVersions).length > 0) {
357
- skills = await Promise.all(
358
- skills.map(async (skill) => {
359
- const versionId = evalRun.skillVersions?.[skill.id];
360
- if (versionId) {
361
- const version = await api.getSkillVersion(
362
- projectId2,
363
- skill.id,
364
- versionId
365
- );
366
- return { ...skill, latestVersion: version };
367
- }
368
- return skill;
369
- })
370
- );
371
- }
372
- skills = skills.map((skill) => {
373
- const hasPinnedVersion = evalRun.skillVersions?.[skill.id];
374
- if (!hasPinnedVersion && skill.source) {
375
- return { ...skill, latestVersion: void 0 };
376
- }
377
- return skill;
378
- });
379
- }
380
334
  let mcps = [];
381
- if (evalRun.mcpIds && evalRun.mcpIds.length > 0) {
382
- mcps = await Promise.all(
383
- evalRun.mcpIds.map((id) => api.getMcp(projectId2, id))
384
- );
385
- }
386
335
  let subAgents = [];
387
- if (evalRun.subAgentIds && evalRun.subAgentIds.length > 0) {
388
- subAgents = await Promise.all(
389
- evalRun.subAgentIds.map((id) => api.getSubAgent(projectId2, id))
390
- );
391
- }
392
336
  let rules = [];
393
- if (evalRun.ruleIds && evalRun.ruleIds.length > 0) {
394
- rules = await Promise.all(
395
- evalRun.ruleIds.map((id) => api.getRule(projectId2, id))
337
+ if (evalRun.capabilityIds && evalRun.capabilityIds.length > 0) {
338
+ const fetchResults = await Promise.allSettled(
339
+ evalRun.capabilityIds.map((id) => api.getCapability(projectId2, id))
340
+ );
341
+ const capabilities = fetchResults.filter(
342
+ (r) => r.status === "fulfilled"
343
+ ).map((r) => r.value).filter((c) => !c.deleted);
344
+ const pinned = evalRun.capabilityVersions ?? {};
345
+ const withVersions = await Promise.all(
346
+ capabilities.map(async (cap) => {
347
+ const versionId = pinned[cap.id];
348
+ if (versionId) {
349
+ const version = await api.getCapabilityVersion(
350
+ projectId2,
351
+ cap.id,
352
+ versionId
353
+ );
354
+ return { ...cap, latestVersion: version };
355
+ }
356
+ if (cap.source) {
357
+ return { ...cap, latestVersion: void 0 };
358
+ }
359
+ return cap;
360
+ })
396
361
  );
362
+ for (const cap of withVersions) {
363
+ switch (cap.capabilityType) {
364
+ case "SKILL":
365
+ skills.push((0, import_evalforge_types.capabilityToSkillWithLatestVersion)(cap));
366
+ break;
367
+ case "SUB_AGENT":
368
+ subAgents.push((0, import_evalforge_types.capabilityToSubAgent)(cap));
369
+ break;
370
+ case "RULE":
371
+ rules.push((0, import_evalforge_types.capabilityToRule)(cap));
372
+ break;
373
+ case "MCP":
374
+ mcps.push((0, import_evalforge_types.capabilityToMcp)(cap));
375
+ break;
376
+ }
377
+ }
397
378
  }
398
379
  const templateIds = [
399
380
  ...new Set(
@@ -4978,7 +4959,7 @@ async function runEvaluation(projectId2, evalRunId2) {
4978
4959
  hasAgent: !!agent,
4979
4960
  agentId: evalData.evalRun.agentId,
4980
4961
  presetId: evalData.evalRun.presetId,
4981
- skillIds: evalData.evalRun.skillIds
4962
+ capabilityIds: evalData.evalRun.capabilityIds
4982
4963
  };
4983
4964
  if (scenarioItems.length > 0 && !agent) {
4984
4965
  throw new Error(