@wix/evalforge-evaluator 0.87.0 → 0.88.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +20 -3
- package/build/index.js.map +2 -2
- package/build/index.mjs +21 -4
- package/build/index.mjs.map +2 -2
- package/package.json +4 -4
package/build/index.mjs
CHANGED
|
@@ -213,7 +213,21 @@ function applyParamsToAssertion(assertion, params) {
|
|
|
213
213
|
);
|
|
214
214
|
}
|
|
215
215
|
}
|
|
216
|
-
return {
|
|
216
|
+
return {
|
|
217
|
+
...assertion,
|
|
218
|
+
prompt,
|
|
219
|
+
systemPrompt,
|
|
220
|
+
...params.model !== void 0 && { model: params.model },
|
|
221
|
+
...params.maxTokens !== void 0 && {
|
|
222
|
+
maxTokens: params.maxTokens
|
|
223
|
+
},
|
|
224
|
+
...params.temperature !== void 0 && {
|
|
225
|
+
temperature: params.temperature
|
|
226
|
+
},
|
|
227
|
+
...params.minScore !== void 0 && {
|
|
228
|
+
minScore: params.minScore
|
|
229
|
+
}
|
|
230
|
+
};
|
|
217
231
|
}
|
|
218
232
|
if (assertion.type === "skill_was_called" && params.skillNames !== void 0) {
|
|
219
233
|
return {
|
|
@@ -245,7 +259,10 @@ function resolveSystemAssertion(assertionId, params) {
|
|
|
245
259
|
type: "llm_judge",
|
|
246
260
|
prompt: params?.prompt ?? "",
|
|
247
261
|
systemPrompt: params?.systemPrompt,
|
|
248
|
-
minScore: params?.minScore
|
|
262
|
+
minScore: params?.minScore,
|
|
263
|
+
model: params?.model,
|
|
264
|
+
maxTokens: params?.maxTokens,
|
|
265
|
+
temperature: params?.temperature
|
|
249
266
|
};
|
|
250
267
|
break;
|
|
251
268
|
default:
|
|
@@ -381,7 +398,7 @@ async function fetchEvaluationData(api, projectId2, evalRunId2) {
|
|
|
381
398
|
// src/run-scenario/index.ts
|
|
382
399
|
import {
|
|
383
400
|
AssertionResultStatus,
|
|
384
|
-
|
|
401
|
+
DEFAULT_JUDGE_MODEL
|
|
385
402
|
} from "@wix/evalforge-types";
|
|
386
403
|
import {
|
|
387
404
|
evaluateAssertions as evaluateAssertionsBase
|
|
@@ -2510,7 +2527,7 @@ async function runScenario(config, evalRunId2, scenario, evalData, template, res
|
|
|
2510
2527
|
}))
|
|
2511
2528
|
};
|
|
2512
2529
|
const { "x-wix-ai-gateway-stream": _stream, ...judgeHeaders } = config.aiGatewayHeaders;
|
|
2513
|
-
const defaultJudgeModel =
|
|
2530
|
+
const defaultJudgeModel = DEFAULT_JUDGE_MODEL;
|
|
2514
2531
|
const assertionContext = {
|
|
2515
2532
|
workDir,
|
|
2516
2533
|
defaultJudgeModel,
|