@wix/evalforge-evaluator 0.143.0 → 0.145.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -132,37 +132,23 @@ function createApiClient(serverUrl, options = "") {
132
132
  getScenario(projectId2, id) {
133
133
  return fetchJson(`/projects/${projectId2}/test-scenarios/${id}`);
134
134
  },
135
- getSkill(projectId2, id) {
136
- return fetchJson(`/projects/${projectId2}/skills/${id}`);
137
- },
138
- getSkillVersion(projectId2, skillId, versionId) {
139
- return fetchJson(
140
- `/projects/${projectId2}/skills/${skillId}/versions/${versionId}`
141
- );
142
- },
143
- getLatestSkillVersion(projectId2, skillId) {
144
- return fetchJson(
145
- `/projects/${projectId2}/skills/${skillId}/versions/latest`
146
- );
147
- },
148
135
  getAgent(projectId2, id) {
149
136
  return fetchJson(`/projects/${projectId2}/agents/${id}`);
150
137
  },
151
138
  getTemplate(projectId2, id) {
152
139
  return fetchJson(`/projects/${projectId2}/templates/${id}`);
153
140
  },
154
- getMcp(projectId2, id) {
155
- return fetchJson(`/projects/${projectId2}/mcps/${id}`);
156
- },
157
- getSubAgent(projectId2, id) {
158
- return fetchJson(`/projects/${projectId2}/sub-agents/${id}`);
159
- },
160
- getRule(projectId2, id) {
161
- return fetchJson(`/projects/${projectId2}/rules/${id}`);
162
- },
163
141
  getPreset(projectId2, id) {
164
142
  return fetchJson(`/projects/${projectId2}/presets/${id}`);
165
143
  },
144
+ getCapability(projectId2, id) {
145
+ return fetchJson(`/projects/${projectId2}/capabilities/${id}`);
146
+ },
147
+ getCapabilityVersion(projectId2, capabilityId, versionId) {
148
+ return fetchJson(
149
+ `/projects/${projectId2}/capabilities/${capabilityId}/versions/${versionId}`
150
+ );
151
+ },
166
152
  addResult(projectId2, evalRunId2, result) {
167
153
  return postJson(
168
154
  `/projects/${projectId2}/eval-runs/${evalRunId2}/results`,
@@ -184,7 +170,11 @@ function createApiClient(serverUrl, options = "") {
184
170
  import {
185
171
  isSystemAssertionId,
186
172
  SYSTEM_ASSERTIONS,
187
- isAllowedBuildCommandString
173
+ isAllowedBuildCommandString,
174
+ capabilityToSkillWithLatestVersion,
175
+ capabilityToSubAgent,
176
+ capabilityToRule,
177
+ capabilityToMcp
188
178
  } from "@wix/evalforge-types";
189
179
 
190
180
  // src/resolve-placeholders.ts
@@ -326,55 +316,50 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
326
316
  agent = await api.getAgent(projectId2, evalRun.agentId);
327
317
  }
328
318
  let skills = [];
329
- const resolvedSkillIds = evalRun.skillIds ?? [];
330
- if (resolvedSkillIds.length > 0) {
331
- const fetchResults = await Promise.allSettled(
332
- resolvedSkillIds.map((id) => api.getSkill(projectId2, id))
333
- );
334
- skills = fetchResults.filter(
335
- (r) => r.status === "fulfilled"
336
- ).map((r) => r.value).filter((s) => !s.deleted);
337
- if (evalRun.skillVersions && Object.keys(evalRun.skillVersions).length > 0) {
338
- skills = await Promise.all(
339
- skills.map(async (skill) => {
340
- const versionId = evalRun.skillVersions?.[skill.id];
341
- if (versionId) {
342
- const version = await api.getSkillVersion(
343
- projectId2,
344
- skill.id,
345
- versionId
346
- );
347
- return { ...skill, latestVersion: version };
348
- }
349
- return skill;
350
- })
351
- );
352
- }
353
- skills = skills.map((skill) => {
354
- const hasPinnedVersion = evalRun.skillVersions?.[skill.id];
355
- if (!hasPinnedVersion && skill.source) {
356
- return { ...skill, latestVersion: void 0 };
357
- }
358
- return skill;
359
- });
360
- }
361
319
  let mcps = [];
362
- if (evalRun.mcpIds && evalRun.mcpIds.length > 0) {
363
- mcps = await Promise.all(
364
- evalRun.mcpIds.map((id) => api.getMcp(projectId2, id))
365
- );
366
- }
367
320
  let subAgents = [];
368
- if (evalRun.subAgentIds && evalRun.subAgentIds.length > 0) {
369
- subAgents = await Promise.all(
370
- evalRun.subAgentIds.map((id) => api.getSubAgent(projectId2, id))
371
- );
372
- }
373
321
  let rules = [];
374
- if (evalRun.ruleIds && evalRun.ruleIds.length > 0) {
375
- rules = await Promise.all(
376
- evalRun.ruleIds.map((id) => api.getRule(projectId2, id))
322
+ if (evalRun.capabilityIds && evalRun.capabilityIds.length > 0) {
323
+ const fetchResults = await Promise.allSettled(
324
+ evalRun.capabilityIds.map((id) => api.getCapability(projectId2, id))
325
+ );
326
+ const capabilities = fetchResults.filter(
327
+ (r) => r.status === "fulfilled"
328
+ ).map((r) => r.value).filter((c) => !c.deleted);
329
+ const pinned = evalRun.capabilityVersions ?? {};
330
+ const withVersions = await Promise.all(
331
+ capabilities.map(async (cap) => {
332
+ const versionId = pinned[cap.id];
333
+ if (versionId) {
334
+ const version = await api.getCapabilityVersion(
335
+ projectId2,
336
+ cap.id,
337
+ versionId
338
+ );
339
+ return { ...cap, latestVersion: version };
340
+ }
341
+ if (cap.source) {
342
+ return { ...cap, latestVersion: void 0 };
343
+ }
344
+ return cap;
345
+ })
377
346
  );
347
+ for (const cap of withVersions) {
348
+ switch (cap.capabilityType) {
349
+ case "SKILL":
350
+ skills.push(capabilityToSkillWithLatestVersion(cap));
351
+ break;
352
+ case "SUB_AGENT":
353
+ subAgents.push(capabilityToSubAgent(cap));
354
+ break;
355
+ case "RULE":
356
+ rules.push(capabilityToRule(cap));
357
+ break;
358
+ case "MCP":
359
+ mcps.push(capabilityToMcp(cap));
360
+ break;
361
+ }
362
+ }
378
363
  }
379
364
  const templateIds = [
380
365
  ...new Set(
@@ -4987,7 +4972,7 @@ async function runEvaluation(projectId2, evalRunId2) {
4987
4972
  hasAgent: !!agent,
4988
4973
  agentId: evalData.evalRun.agentId,
4989
4974
  presetId: evalData.evalRun.presetId,
4990
- skillIds: evalData.evalRun.skillIds
4975
+ capabilityIds: evalData.evalRun.capabilityIds
4991
4976
  };
4992
4977
  if (scenarioItems.length > 0 && !agent) {
4993
4978
  throw new Error(