@wix/evalforge-evaluator 0.154.0 → 0.155.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +26 -3
- package/build/index.js.map +2 -2
- package/build/index.mjs +26 -3
- package/build/index.mjs.map +2 -2
- package/build/types/evaluation-loop.d.ts +6 -0
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -4976,6 +4976,7 @@ async function runEvaluationLoop(scenarioItems, evalData, callbacks) {
|
|
|
4976
4976
|
const runsPerScenario = evalData.evalRun.runsPerScenario ?? 1;
|
|
4977
4977
|
let completedExecutions = 0;
|
|
4978
4978
|
let erroredExecutions = 0;
|
|
4979
|
+
let firstErrorMessage;
|
|
4979
4980
|
const totalExecutions = scenarioItems.length * runsPerScenario;
|
|
4980
4981
|
const targetId = evalData.evalRun.presetId ?? evalData.agent?.id ?? evalData.evalRun.id;
|
|
4981
4982
|
const targetName = evalData.presetName ?? evalData.agent?.name ?? "";
|
|
@@ -5013,6 +5014,9 @@ async function runEvaluationLoop(scenarioItems, evalData, callbacks) {
|
|
|
5013
5014
|
};
|
|
5014
5015
|
await callbacks.addResult(errorResult);
|
|
5015
5016
|
erroredExecutions++;
|
|
5017
|
+
if (firstErrorMessage === void 0) {
|
|
5018
|
+
firstErrorMessage = errorResult.outputText;
|
|
5019
|
+
}
|
|
5016
5020
|
}
|
|
5017
5021
|
if (scenarioResult !== null) {
|
|
5018
5022
|
await callbacks.addResult({ ...scenarioResult, iterationIndex });
|
|
@@ -5020,7 +5024,12 @@ async function runEvaluationLoop(scenarioItems, evalData, callbacks) {
|
|
|
5020
5024
|
completedExecutions++;
|
|
5021
5025
|
}
|
|
5022
5026
|
}
|
|
5023
|
-
return {
|
|
5027
|
+
return {
|
|
5028
|
+
completedExecutions,
|
|
5029
|
+
totalExecutions,
|
|
5030
|
+
erroredExecutions,
|
|
5031
|
+
firstErrorMessage
|
|
5032
|
+
};
|
|
5024
5033
|
}
|
|
5025
5034
|
|
|
5026
5035
|
// src/error-reporter.ts
|
|
@@ -5204,7 +5213,12 @@ async function runEvaluation(projectId2, evalRunId2) {
|
|
|
5204
5213
|
);
|
|
5205
5214
|
}
|
|
5206
5215
|
const skillNames = evalData.skills.map((s) => s.name).join(", ");
|
|
5207
|
-
const {
|
|
5216
|
+
const {
|
|
5217
|
+
completedExecutions,
|
|
5218
|
+
totalExecutions,
|
|
5219
|
+
erroredExecutions,
|
|
5220
|
+
firstErrorMessage
|
|
5221
|
+
} = await runEvaluationLoop(scenarioItems, evalData, {
|
|
5208
5222
|
runScenario: (scenario, template, resolvedAssertions) => {
|
|
5209
5223
|
state.currentPhase = ExecutionPhase.EXECUTE_SKILL;
|
|
5210
5224
|
state.currentContext = {
|
|
@@ -5253,10 +5267,14 @@ async function runEvaluation(projectId2, evalRunId2) {
|
|
|
5253
5267
|
};
|
|
5254
5268
|
const allFailed = totalExecutions > 0 && erroredExecutions === totalExecutions;
|
|
5255
5269
|
const finalStatus = allFailed ? import_evalforge_types15.EvalStatus.FAILED : import_evalforge_types15.EvalStatus.COMPLETED;
|
|
5270
|
+
const jobErrorOnAllFailed = allFailed ? truncateForJobError(
|
|
5271
|
+
firstErrorMessage ?? `All ${totalExecutions} executions errored without an error message`
|
|
5272
|
+
) : void 0;
|
|
5256
5273
|
try {
|
|
5257
5274
|
await api.updateEvalRun(projectId2, evalRunId2, {
|
|
5258
5275
|
status: finalStatus,
|
|
5259
|
-
completedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
5276
|
+
completedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
5277
|
+
...allFailed ? { jobError: jobErrorOnAllFailed, jobStatus: "FAILED" } : {}
|
|
5260
5278
|
});
|
|
5261
5279
|
} catch (updateErr) {
|
|
5262
5280
|
throw new Error(
|
|
@@ -5264,6 +5282,11 @@ async function runEvaluation(projectId2, evalRunId2) {
|
|
|
5264
5282
|
);
|
|
5265
5283
|
}
|
|
5266
5284
|
}
|
|
5285
|
+
var JOB_ERROR_MAX_LENGTH = 1e3;
|
|
5286
|
+
function truncateForJobError(message) {
|
|
5287
|
+
if (message.length <= JOB_ERROR_MAX_LENGTH) return message;
|
|
5288
|
+
return `${message.slice(0, JOB_ERROR_MAX_LENGTH)}\u2026 [truncated]`;
|
|
5289
|
+
}
|
|
5267
5290
|
var projectId = process.argv[2];
|
|
5268
5291
|
var evalRunId = process.argv[3];
|
|
5269
5292
|
console.error(
|