@wix/evalforge-evaluator 0.142.0 → 0.144.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -163,6 +163,14 @@ function createApiClient(serverUrl, options = "") {
163
163
  getPreset(projectId2, id) {
164
164
  return fetchJson(`/projects/${projectId2}/presets/${id}`);
165
165
  },
166
+ getCapability(projectId2, id) {
167
+ return fetchJson(`/projects/${projectId2}/capabilities/${id}`);
168
+ },
169
+ getCapabilityVersion(projectId2, capabilityId, versionId) {
170
+ return fetchJson(
171
+ `/projects/${projectId2}/capabilities/${capabilityId}/versions/${versionId}`
172
+ );
173
+ },
166
174
  addResult(projectId2, evalRunId2, result) {
167
175
  return postJson(
168
176
  `/projects/${projectId2}/eval-runs/${evalRunId2}/results`,
@@ -184,7 +192,11 @@ function createApiClient(serverUrl, options = "") {
184
192
  import {
185
193
  isSystemAssertionId,
186
194
  SYSTEM_ASSERTIONS,
187
- isAllowedBuildCommandString
195
+ isAllowedBuildCommandString,
196
+ capabilityToSkillWithLatestVersion,
197
+ capabilityToSubAgent,
198
+ capabilityToRule,
199
+ capabilityToMcp
188
200
  } from "@wix/evalforge-types";
189
201
 
190
202
  // src/resolve-placeholders.ts
@@ -326,55 +338,98 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
326
338
  agent = await api.getAgent(projectId2, evalRun.agentId);
327
339
  }
328
340
  let skills = [];
329
- const resolvedSkillIds = evalRun.skillIds ?? [];
330
- if (resolvedSkillIds.length > 0) {
341
+ let mcps = [];
342
+ let subAgents = [];
343
+ let rules = [];
344
+ if (evalRun.capabilityIds && evalRun.capabilityIds.length > 0) {
331
345
  const fetchResults = await Promise.allSettled(
332
- resolvedSkillIds.map((id) => api.getSkill(projectId2, id))
346
+ evalRun.capabilityIds.map((id) => api.getCapability(projectId2, id))
333
347
  );
334
- skills = fetchResults.filter(
348
+ const capabilities = fetchResults.filter(
335
349
  (r) => r.status === "fulfilled"
336
- ).map((r) => r.value).filter((s) => !s.deleted);
337
- if (evalRun.skillVersions && Object.keys(evalRun.skillVersions).length > 0) {
338
- skills = await Promise.all(
339
- skills.map(async (skill) => {
340
- const versionId = evalRun.skillVersions?.[skill.id];
341
- if (versionId) {
342
- const version = await api.getSkillVersion(
343
- projectId2,
344
- skill.id,
345
- versionId
346
- );
347
- return { ...skill, latestVersion: version };
348
- }
349
- return skill;
350
- })
351
- );
350
+ ).map((r) => r.value).filter((c) => !c.deleted);
351
+ const pinned = evalRun.capabilityVersions ?? {};
352
+ const withVersions = await Promise.all(
353
+ capabilities.map(async (cap) => {
354
+ const versionId = pinned[cap.id];
355
+ if (versionId) {
356
+ const version = await api.getCapabilityVersion(
357
+ projectId2,
358
+ cap.id,
359
+ versionId
360
+ );
361
+ return { ...cap, latestVersion: version };
362
+ }
363
+ if (cap.source) {
364
+ return { ...cap, latestVersion: void 0 };
365
+ }
366
+ return cap;
367
+ })
368
+ );
369
+ for (const cap of withVersions) {
370
+ switch (cap.capabilityType) {
371
+ case "SKILL":
372
+ skills.push(capabilityToSkillWithLatestVersion(cap));
373
+ break;
374
+ case "SUB_AGENT":
375
+ subAgents.push(capabilityToSubAgent(cap));
376
+ break;
377
+ case "RULE":
378
+ rules.push(capabilityToRule(cap));
379
+ break;
380
+ case "MCP":
381
+ mcps.push(capabilityToMcp(cap));
382
+ break;
383
+ }
352
384
  }
353
- skills = skills.map((skill) => {
354
- const hasPinnedVersion = evalRun.skillVersions?.[skill.id];
355
- if (!hasPinnedVersion && skill.source) {
356
- return { ...skill, latestVersion: void 0 };
385
+ } else {
386
+ const resolvedSkillIds = evalRun.skillIds ?? [];
387
+ if (resolvedSkillIds.length > 0) {
388
+ const fetchResults = await Promise.allSettled(
389
+ resolvedSkillIds.map((id) => api.getSkill(projectId2, id))
390
+ );
391
+ skills = fetchResults.filter(
392
+ (r) => r.status === "fulfilled"
393
+ ).map((r) => r.value).filter((s) => !s.deleted);
394
+ if (evalRun.skillVersions && Object.keys(evalRun.skillVersions).length > 0) {
395
+ skills = await Promise.all(
396
+ skills.map(async (skill) => {
397
+ const versionId = evalRun.skillVersions?.[skill.id];
398
+ if (versionId) {
399
+ const version = await api.getSkillVersion(
400
+ projectId2,
401
+ skill.id,
402
+ versionId
403
+ );
404
+ return { ...skill, latestVersion: version };
405
+ }
406
+ return skill;
407
+ })
408
+ );
357
409
  }
358
- return skill;
359
- });
360
- }
361
- let mcps = [];
362
- if (evalRun.mcpIds && evalRun.mcpIds.length > 0) {
363
- mcps = await Promise.all(
364
- evalRun.mcpIds.map((id) => api.getMcp(projectId2, id))
365
- );
366
- }
367
- let subAgents = [];
368
- if (evalRun.subAgentIds && evalRun.subAgentIds.length > 0) {
369
- subAgents = await Promise.all(
370
- evalRun.subAgentIds.map((id) => api.getSubAgent(projectId2, id))
371
- );
372
- }
373
- let rules = [];
374
- if (evalRun.ruleIds && evalRun.ruleIds.length > 0) {
375
- rules = await Promise.all(
376
- evalRun.ruleIds.map((id) => api.getRule(projectId2, id))
377
- );
410
+ skills = skills.map((skill) => {
411
+ const hasPinnedVersion = evalRun.skillVersions?.[skill.id];
412
+ if (!hasPinnedVersion && skill.source) {
413
+ return { ...skill, latestVersion: void 0 };
414
+ }
415
+ return skill;
416
+ });
417
+ }
418
+ if (evalRun.mcpIds && evalRun.mcpIds.length > 0) {
419
+ mcps = await Promise.all(
420
+ evalRun.mcpIds.map((id) => api.getMcp(projectId2, id))
421
+ );
422
+ }
423
+ if (evalRun.subAgentIds && evalRun.subAgentIds.length > 0) {
424
+ subAgents = await Promise.all(
425
+ evalRun.subAgentIds.map((id) => api.getSubAgent(projectId2, id))
426
+ );
427
+ }
428
+ if (evalRun.ruleIds && evalRun.ruleIds.length > 0) {
429
+ rules = await Promise.all(
430
+ evalRun.ruleIds.map((id) => api.getRule(projectId2, id))
431
+ );
432
+ }
378
433
  }
379
434
  const templateIds = [
380
435
  ...new Set(