@wix/evalforge-evaluator 0.143.0 → 0.144.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +95 -44
- package/build/index.js.map +2 -2
- package/build/index.mjs +100 -45
- package/build/index.mjs.map +2 -2
- package/build/types/api-client.d.ts +3 -1
- package/package.json +5 -5
package/build/index.mjs
CHANGED
|
@@ -163,6 +163,14 @@ function createApiClient(serverUrl, options = "") {
|
|
|
163
163
|
getPreset(projectId2, id) {
|
|
164
164
|
return fetchJson(`/projects/${projectId2}/presets/${id}`);
|
|
165
165
|
},
|
|
166
|
+
getCapability(projectId2, id) {
|
|
167
|
+
return fetchJson(`/projects/${projectId2}/capabilities/${id}`);
|
|
168
|
+
},
|
|
169
|
+
getCapabilityVersion(projectId2, capabilityId, versionId) {
|
|
170
|
+
return fetchJson(
|
|
171
|
+
`/projects/${projectId2}/capabilities/${capabilityId}/versions/${versionId}`
|
|
172
|
+
);
|
|
173
|
+
},
|
|
166
174
|
addResult(projectId2, evalRunId2, result) {
|
|
167
175
|
return postJson(
|
|
168
176
|
`/projects/${projectId2}/eval-runs/${evalRunId2}/results`,
|
|
@@ -184,7 +192,11 @@ function createApiClient(serverUrl, options = "") {
|
|
|
184
192
|
import {
|
|
185
193
|
isSystemAssertionId,
|
|
186
194
|
SYSTEM_ASSERTIONS,
|
|
187
|
-
isAllowedBuildCommandString
|
|
195
|
+
isAllowedBuildCommandString,
|
|
196
|
+
capabilityToSkillWithLatestVersion,
|
|
197
|
+
capabilityToSubAgent,
|
|
198
|
+
capabilityToRule,
|
|
199
|
+
capabilityToMcp
|
|
188
200
|
} from "@wix/evalforge-types";
|
|
189
201
|
|
|
190
202
|
// src/resolve-placeholders.ts
|
|
@@ -326,55 +338,98 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
|
|
|
326
338
|
agent = await api.getAgent(projectId2, evalRun.agentId);
|
|
327
339
|
}
|
|
328
340
|
let skills = [];
|
|
329
|
-
|
|
330
|
-
|
|
341
|
+
let mcps = [];
|
|
342
|
+
let subAgents = [];
|
|
343
|
+
let rules = [];
|
|
344
|
+
if (evalRun.capabilityIds && evalRun.capabilityIds.length > 0) {
|
|
331
345
|
const fetchResults = await Promise.allSettled(
|
|
332
|
-
|
|
346
|
+
evalRun.capabilityIds.map((id) => api.getCapability(projectId2, id))
|
|
333
347
|
);
|
|
334
|
-
|
|
348
|
+
const capabilities = fetchResults.filter(
|
|
335
349
|
(r) => r.status === "fulfilled"
|
|
336
|
-
).map((r) => r.value).filter((
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
350
|
+
).map((r) => r.value).filter((c) => !c.deleted);
|
|
351
|
+
const pinned = evalRun.capabilityVersions ?? {};
|
|
352
|
+
const withVersions = await Promise.all(
|
|
353
|
+
capabilities.map(async (cap) => {
|
|
354
|
+
const versionId = pinned[cap.id];
|
|
355
|
+
if (versionId) {
|
|
356
|
+
const version = await api.getCapabilityVersion(
|
|
357
|
+
projectId2,
|
|
358
|
+
cap.id,
|
|
359
|
+
versionId
|
|
360
|
+
);
|
|
361
|
+
return { ...cap, latestVersion: version };
|
|
362
|
+
}
|
|
363
|
+
if (cap.source) {
|
|
364
|
+
return { ...cap, latestVersion: void 0 };
|
|
365
|
+
}
|
|
366
|
+
return cap;
|
|
367
|
+
})
|
|
368
|
+
);
|
|
369
|
+
for (const cap of withVersions) {
|
|
370
|
+
switch (cap.capabilityType) {
|
|
371
|
+
case "SKILL":
|
|
372
|
+
skills.push(capabilityToSkillWithLatestVersion(cap));
|
|
373
|
+
break;
|
|
374
|
+
case "SUB_AGENT":
|
|
375
|
+
subAgents.push(capabilityToSubAgent(cap));
|
|
376
|
+
break;
|
|
377
|
+
case "RULE":
|
|
378
|
+
rules.push(capabilityToRule(cap));
|
|
379
|
+
break;
|
|
380
|
+
case "MCP":
|
|
381
|
+
mcps.push(capabilityToMcp(cap));
|
|
382
|
+
break;
|
|
383
|
+
}
|
|
352
384
|
}
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
385
|
+
} else {
|
|
386
|
+
const resolvedSkillIds = evalRun.skillIds ?? [];
|
|
387
|
+
if (resolvedSkillIds.length > 0) {
|
|
388
|
+
const fetchResults = await Promise.allSettled(
|
|
389
|
+
resolvedSkillIds.map((id) => api.getSkill(projectId2, id))
|
|
390
|
+
);
|
|
391
|
+
skills = fetchResults.filter(
|
|
392
|
+
(r) => r.status === "fulfilled"
|
|
393
|
+
).map((r) => r.value).filter((s) => !s.deleted);
|
|
394
|
+
if (evalRun.skillVersions && Object.keys(evalRun.skillVersions).length > 0) {
|
|
395
|
+
skills = await Promise.all(
|
|
396
|
+
skills.map(async (skill) => {
|
|
397
|
+
const versionId = evalRun.skillVersions?.[skill.id];
|
|
398
|
+
if (versionId) {
|
|
399
|
+
const version = await api.getSkillVersion(
|
|
400
|
+
projectId2,
|
|
401
|
+
skill.id,
|
|
402
|
+
versionId
|
|
403
|
+
);
|
|
404
|
+
return { ...skill, latestVersion: version };
|
|
405
|
+
}
|
|
406
|
+
return skill;
|
|
407
|
+
})
|
|
408
|
+
);
|
|
357
409
|
}
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
)
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
410
|
+
skills = skills.map((skill) => {
|
|
411
|
+
const hasPinnedVersion = evalRun.skillVersions?.[skill.id];
|
|
412
|
+
if (!hasPinnedVersion && skill.source) {
|
|
413
|
+
return { ...skill, latestVersion: void 0 };
|
|
414
|
+
}
|
|
415
|
+
return skill;
|
|
416
|
+
});
|
|
417
|
+
}
|
|
418
|
+
if (evalRun.mcpIds && evalRun.mcpIds.length > 0) {
|
|
419
|
+
mcps = await Promise.all(
|
|
420
|
+
evalRun.mcpIds.map((id) => api.getMcp(projectId2, id))
|
|
421
|
+
);
|
|
422
|
+
}
|
|
423
|
+
if (evalRun.subAgentIds && evalRun.subAgentIds.length > 0) {
|
|
424
|
+
subAgents = await Promise.all(
|
|
425
|
+
evalRun.subAgentIds.map((id) => api.getSubAgent(projectId2, id))
|
|
426
|
+
);
|
|
427
|
+
}
|
|
428
|
+
if (evalRun.ruleIds && evalRun.ruleIds.length > 0) {
|
|
429
|
+
rules = await Promise.all(
|
|
430
|
+
evalRun.ruleIds.map((id) => api.getRule(projectId2, id))
|
|
431
|
+
);
|
|
432
|
+
}
|
|
378
433
|
}
|
|
379
434
|
const templateIds = [
|
|
380
435
|
...new Set(
|