@wix/evalforge-evaluator 0.143.0 → 0.145.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +49 -68
- package/build/index.js.map +2 -2
- package/build/index.mjs +54 -69
- package/build/index.mjs.map +2 -2
- package/build/types/api-client.d.ts +3 -7
- package/package.json +5 -5
package/build/index.mjs
CHANGED
|
@@ -132,37 +132,23 @@ function createApiClient(serverUrl, options = "") {
|
|
|
132
132
|
getScenario(projectId2, id) {
|
|
133
133
|
return fetchJson(`/projects/${projectId2}/test-scenarios/${id}`);
|
|
134
134
|
},
|
|
135
|
-
getSkill(projectId2, id) {
|
|
136
|
-
return fetchJson(`/projects/${projectId2}/skills/${id}`);
|
|
137
|
-
},
|
|
138
|
-
getSkillVersion(projectId2, skillId, versionId) {
|
|
139
|
-
return fetchJson(
|
|
140
|
-
`/projects/${projectId2}/skills/${skillId}/versions/${versionId}`
|
|
141
|
-
);
|
|
142
|
-
},
|
|
143
|
-
getLatestSkillVersion(projectId2, skillId) {
|
|
144
|
-
return fetchJson(
|
|
145
|
-
`/projects/${projectId2}/skills/${skillId}/versions/latest`
|
|
146
|
-
);
|
|
147
|
-
},
|
|
148
135
|
getAgent(projectId2, id) {
|
|
149
136
|
return fetchJson(`/projects/${projectId2}/agents/${id}`);
|
|
150
137
|
},
|
|
151
138
|
getTemplate(projectId2, id) {
|
|
152
139
|
return fetchJson(`/projects/${projectId2}/templates/${id}`);
|
|
153
140
|
},
|
|
154
|
-
getMcp(projectId2, id) {
|
|
155
|
-
return fetchJson(`/projects/${projectId2}/mcps/${id}`);
|
|
156
|
-
},
|
|
157
|
-
getSubAgent(projectId2, id) {
|
|
158
|
-
return fetchJson(`/projects/${projectId2}/sub-agents/${id}`);
|
|
159
|
-
},
|
|
160
|
-
getRule(projectId2, id) {
|
|
161
|
-
return fetchJson(`/projects/${projectId2}/rules/${id}`);
|
|
162
|
-
},
|
|
163
141
|
getPreset(projectId2, id) {
|
|
164
142
|
return fetchJson(`/projects/${projectId2}/presets/${id}`);
|
|
165
143
|
},
|
|
144
|
+
getCapability(projectId2, id) {
|
|
145
|
+
return fetchJson(`/projects/${projectId2}/capabilities/${id}`);
|
|
146
|
+
},
|
|
147
|
+
getCapabilityVersion(projectId2, capabilityId, versionId) {
|
|
148
|
+
return fetchJson(
|
|
149
|
+
`/projects/${projectId2}/capabilities/${capabilityId}/versions/${versionId}`
|
|
150
|
+
);
|
|
151
|
+
},
|
|
166
152
|
addResult(projectId2, evalRunId2, result) {
|
|
167
153
|
return postJson(
|
|
168
154
|
`/projects/${projectId2}/eval-runs/${evalRunId2}/results`,
|
|
@@ -184,7 +170,11 @@ function createApiClient(serverUrl, options = "") {
|
|
|
184
170
|
import {
|
|
185
171
|
isSystemAssertionId,
|
|
186
172
|
SYSTEM_ASSERTIONS,
|
|
187
|
-
isAllowedBuildCommandString
|
|
173
|
+
isAllowedBuildCommandString,
|
|
174
|
+
capabilityToSkillWithLatestVersion,
|
|
175
|
+
capabilityToSubAgent,
|
|
176
|
+
capabilityToRule,
|
|
177
|
+
capabilityToMcp
|
|
188
178
|
} from "@wix/evalforge-types";
|
|
189
179
|
|
|
190
180
|
// src/resolve-placeholders.ts
|
|
@@ -326,55 +316,50 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
|
|
|
326
316
|
agent = await api.getAgent(projectId2, evalRun.agentId);
|
|
327
317
|
}
|
|
328
318
|
let skills = [];
|
|
329
|
-
const resolvedSkillIds = evalRun.skillIds ?? [];
|
|
330
|
-
if (resolvedSkillIds.length > 0) {
|
|
331
|
-
const fetchResults = await Promise.allSettled(
|
|
332
|
-
resolvedSkillIds.map((id) => api.getSkill(projectId2, id))
|
|
333
|
-
);
|
|
334
|
-
skills = fetchResults.filter(
|
|
335
|
-
(r) => r.status === "fulfilled"
|
|
336
|
-
).map((r) => r.value).filter((s) => !s.deleted);
|
|
337
|
-
if (evalRun.skillVersions && Object.keys(evalRun.skillVersions).length > 0) {
|
|
338
|
-
skills = await Promise.all(
|
|
339
|
-
skills.map(async (skill) => {
|
|
340
|
-
const versionId = evalRun.skillVersions?.[skill.id];
|
|
341
|
-
if (versionId) {
|
|
342
|
-
const version = await api.getSkillVersion(
|
|
343
|
-
projectId2,
|
|
344
|
-
skill.id,
|
|
345
|
-
versionId
|
|
346
|
-
);
|
|
347
|
-
return { ...skill, latestVersion: version };
|
|
348
|
-
}
|
|
349
|
-
return skill;
|
|
350
|
-
})
|
|
351
|
-
);
|
|
352
|
-
}
|
|
353
|
-
skills = skills.map((skill) => {
|
|
354
|
-
const hasPinnedVersion = evalRun.skillVersions?.[skill.id];
|
|
355
|
-
if (!hasPinnedVersion && skill.source) {
|
|
356
|
-
return { ...skill, latestVersion: void 0 };
|
|
357
|
-
}
|
|
358
|
-
return skill;
|
|
359
|
-
});
|
|
360
|
-
}
|
|
361
319
|
let mcps = [];
|
|
362
|
-
if (evalRun.mcpIds && evalRun.mcpIds.length > 0) {
|
|
363
|
-
mcps = await Promise.all(
|
|
364
|
-
evalRun.mcpIds.map((id) => api.getMcp(projectId2, id))
|
|
365
|
-
);
|
|
366
|
-
}
|
|
367
320
|
let subAgents = [];
|
|
368
|
-
if (evalRun.subAgentIds && evalRun.subAgentIds.length > 0) {
|
|
369
|
-
subAgents = await Promise.all(
|
|
370
|
-
evalRun.subAgentIds.map((id) => api.getSubAgent(projectId2, id))
|
|
371
|
-
);
|
|
372
|
-
}
|
|
373
321
|
let rules = [];
|
|
374
|
-
if (evalRun.
|
|
375
|
-
|
|
376
|
-
evalRun.
|
|
322
|
+
if (evalRun.capabilityIds && evalRun.capabilityIds.length > 0) {
|
|
323
|
+
const fetchResults = await Promise.allSettled(
|
|
324
|
+
evalRun.capabilityIds.map((id) => api.getCapability(projectId2, id))
|
|
325
|
+
);
|
|
326
|
+
const capabilities = fetchResults.filter(
|
|
327
|
+
(r) => r.status === "fulfilled"
|
|
328
|
+
).map((r) => r.value).filter((c) => !c.deleted);
|
|
329
|
+
const pinned = evalRun.capabilityVersions ?? {};
|
|
330
|
+
const withVersions = await Promise.all(
|
|
331
|
+
capabilities.map(async (cap) => {
|
|
332
|
+
const versionId = pinned[cap.id];
|
|
333
|
+
if (versionId) {
|
|
334
|
+
const version = await api.getCapabilityVersion(
|
|
335
|
+
projectId2,
|
|
336
|
+
cap.id,
|
|
337
|
+
versionId
|
|
338
|
+
);
|
|
339
|
+
return { ...cap, latestVersion: version };
|
|
340
|
+
}
|
|
341
|
+
if (cap.source) {
|
|
342
|
+
return { ...cap, latestVersion: void 0 };
|
|
343
|
+
}
|
|
344
|
+
return cap;
|
|
345
|
+
})
|
|
377
346
|
);
|
|
347
|
+
for (const cap of withVersions) {
|
|
348
|
+
switch (cap.capabilityType) {
|
|
349
|
+
case "SKILL":
|
|
350
|
+
skills.push(capabilityToSkillWithLatestVersion(cap));
|
|
351
|
+
break;
|
|
352
|
+
case "SUB_AGENT":
|
|
353
|
+
subAgents.push(capabilityToSubAgent(cap));
|
|
354
|
+
break;
|
|
355
|
+
case "RULE":
|
|
356
|
+
rules.push(capabilityToRule(cap));
|
|
357
|
+
break;
|
|
358
|
+
case "MCP":
|
|
359
|
+
mcps.push(capabilityToMcp(cap));
|
|
360
|
+
break;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
378
363
|
}
|
|
379
364
|
const templateIds = [
|
|
380
365
|
...new Set(
|
|
@@ -4987,7 +4972,7 @@ async function runEvaluation(projectId2, evalRunId2) {
|
|
|
4987
4972
|
hasAgent: !!agent,
|
|
4988
4973
|
agentId: evalData.evalRun.agentId,
|
|
4989
4974
|
presetId: evalData.evalRun.presetId,
|
|
4990
|
-
|
|
4975
|
+
capabilityIds: evalData.evalRun.capabilityIds
|
|
4991
4976
|
};
|
|
4992
4977
|
if (scenarioItems.length > 0 && !agent) {
|
|
4993
4978
|
throw new Error(
|