@wix/evalforge-evaluator 0.120.0 → 0.121.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +3 -90
- package/build/index.js.map +2 -2
- package/build/index.mjs +3 -90
- package/build/index.mjs.map +2 -2
- package/build/types/api-client.d.ts +1 -2
- package/build/types/fetch-evaluation-data.d.ts +1 -5
- package/package.json +5 -5
package/build/index.js
CHANGED
|
@@ -186,9 +186,6 @@ function createApiClient(serverUrl, options = "") {
|
|
|
186
186
|
getPreset(projectId2, id) {
|
|
187
187
|
return fetchJson(`/projects/${projectId2}/presets/${id}`);
|
|
188
188
|
},
|
|
189
|
-
getAssertion(projectId2, id) {
|
|
190
|
-
return fetchJson(`/projects/${projectId2}/assertions/${id}`);
|
|
191
|
-
},
|
|
192
189
|
addResult(projectId2, evalRunId2, result) {
|
|
193
190
|
return postJson(
|
|
194
191
|
`/projects/${projectId2}/eval-runs/${evalRunId2}/results`,
|
|
@@ -250,9 +247,6 @@ function resolveValue(value, placeholders) {
|
|
|
250
247
|
}
|
|
251
248
|
return value;
|
|
252
249
|
}
|
|
253
|
-
function resolvePlaceholdersInString(text, placeholders) {
|
|
254
|
-
return resolveValue(text, placeholders);
|
|
255
|
-
}
|
|
256
250
|
|
|
257
251
|
// src/fetch-evaluation-data.ts
|
|
258
252
|
function parseSkillNamesFromParams(value) {
|
|
@@ -265,59 +259,6 @@ function parseSkillNamesFromParams(value) {
|
|
|
265
259
|
}
|
|
266
260
|
return [];
|
|
267
261
|
}
|
|
268
|
-
function applyParamsToAssertion(assertion, params) {
|
|
269
|
-
if (!params || Object.keys(params).length === 0) {
|
|
270
|
-
return assertion;
|
|
271
|
-
}
|
|
272
|
-
if (assertion.type === "llm_judge") {
|
|
273
|
-
const stringParams = {};
|
|
274
|
-
for (const [key, value] of Object.entries(params)) {
|
|
275
|
-
stringParams[key] = String(value ?? "");
|
|
276
|
-
}
|
|
277
|
-
const prompt = resolvePlaceholdersInString(assertion.prompt, stringParams);
|
|
278
|
-
return {
|
|
279
|
-
...assertion,
|
|
280
|
-
prompt,
|
|
281
|
-
...params.model !== void 0 && { model: params.model },
|
|
282
|
-
...params.maxTokens !== void 0 && {
|
|
283
|
-
maxTokens: params.maxTokens
|
|
284
|
-
},
|
|
285
|
-
...params.temperature !== void 0 && {
|
|
286
|
-
temperature: params.temperature
|
|
287
|
-
},
|
|
288
|
-
...params.minScore !== void 0 && {
|
|
289
|
-
minScore: params.minScore
|
|
290
|
-
}
|
|
291
|
-
};
|
|
292
|
-
}
|
|
293
|
-
if (assertion.type === "time_limit" && params.maxDurationMs !== void 0) {
|
|
294
|
-
return {
|
|
295
|
-
...assertion,
|
|
296
|
-
maxDurationMs: params.maxDurationMs
|
|
297
|
-
};
|
|
298
|
-
}
|
|
299
|
-
if (assertion.type === "skill_was_called" && params.skillNames !== void 0) {
|
|
300
|
-
return {
|
|
301
|
-
...assertion,
|
|
302
|
-
skillNames: parseSkillNamesFromParams(params.skillNames)
|
|
303
|
-
};
|
|
304
|
-
}
|
|
305
|
-
if (assertion.type === "tool_called_with_param") {
|
|
306
|
-
return {
|
|
307
|
-
...assertion,
|
|
308
|
-
...params.toolName !== void 0 && {
|
|
309
|
-
toolName: params.toolName
|
|
310
|
-
},
|
|
311
|
-
...params.expectedParams !== void 0 && {
|
|
312
|
-
expectedParams: params.expectedParams
|
|
313
|
-
},
|
|
314
|
-
...params.requireSuccess !== void 0 && {
|
|
315
|
-
requireSuccess: params.requireSuccess
|
|
316
|
-
}
|
|
317
|
-
};
|
|
318
|
-
}
|
|
319
|
-
return { ...assertion, ...params };
|
|
320
|
-
}
|
|
321
262
|
function resolveSystemAssertion(assertionId, params) {
|
|
322
263
|
const systemAssertion = import_evalforge_types.SYSTEM_ASSERTIONS[assertionId];
|
|
323
264
|
let baseAssertion;
|
|
@@ -372,18 +313,6 @@ function resolveSystemAssertion(assertionId, params) {
|
|
|
372
313
|
}
|
|
373
314
|
return baseAssertion;
|
|
374
315
|
}
|
|
375
|
-
function customAssertionToAssertion(ca, params) {
|
|
376
|
-
const config = ca.config;
|
|
377
|
-
const baseAssertion = {
|
|
378
|
-
type: "llm_judge",
|
|
379
|
-
prompt: config?.prompt ?? "",
|
|
380
|
-
minScore: config?.minScore,
|
|
381
|
-
model: config?.model,
|
|
382
|
-
maxTokens: config?.maxTokens,
|
|
383
|
-
temperature: config?.temperature
|
|
384
|
-
};
|
|
385
|
-
return applyParamsToAssertion(baseAssertion, params);
|
|
386
|
-
}
|
|
387
316
|
async function fetchEvaluationData(api, projectId2, evalRunId2) {
|
|
388
317
|
const evalRun = await api.getEvalRun(projectId2, evalRunId2);
|
|
389
318
|
const scenarios = await Promise.all(
|
|
@@ -453,30 +382,14 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
|
|
|
453
382
|
templateIds.map((id) => api.getTemplate(projectId2, id))
|
|
454
383
|
) : [];
|
|
455
384
|
const templateMap = new Map(templates.map((t) => [t.id, t]));
|
|
456
|
-
const assertionIds = [
|
|
457
|
-
...new Set(
|
|
458
|
-
scenarios.flatMap((s) => s.assertionLinks ?? []).map((link) => link.assertionId).filter((id) => !(0, import_evalforge_types.isSystemAssertionId)(id))
|
|
459
|
-
)
|
|
460
|
-
];
|
|
461
|
-
const assertions = assertionIds.length > 0 ? await Promise.all(
|
|
462
|
-
assertionIds.map((id) => api.getAssertion(projectId2, id))
|
|
463
|
-
) : [];
|
|
464
|
-
const assertionMap = new Map(assertions.map((a) => [a.id, a]));
|
|
465
385
|
const scenarioItems = scenarios.map((scenario) => {
|
|
466
386
|
const resolvedAssertions = (scenario.assertionLinks ?? []).map((link) => {
|
|
467
387
|
const { assertionId, params } = link;
|
|
468
|
-
if ((0, import_evalforge_types.isSystemAssertionId)(assertionId)) {
|
|
469
|
-
return resolveSystemAssertion(
|
|
470
|
-
assertionId,
|
|
471
|
-
params
|
|
472
|
-
);
|
|
473
|
-
}
|
|
474
|
-
const customAssertion = assertionMap.get(assertionId);
|
|
475
|
-
if (!customAssertion) {
|
|
388
|
+
if (!(0, import_evalforge_types.isSystemAssertionId)(assertionId)) {
|
|
476
389
|
return null;
|
|
477
390
|
}
|
|
478
|
-
return
|
|
479
|
-
|
|
391
|
+
return resolveSystemAssertion(
|
|
392
|
+
assertionId,
|
|
480
393
|
params
|
|
481
394
|
);
|
|
482
395
|
}).filter((a) => a !== null);
|