@wix/evalforge-evaluator 0.141.0 → 0.142.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -4766,6 +4766,7 @@ import { randomUUID as randomUUID5 } from "crypto";
4766
4766
  async function runEvaluationLoop(scenarioItems, evalData, callbacks) {
4767
4767
  const runsPerScenario = evalData.evalRun.runsPerScenario ?? 1;
4768
4768
  let completedExecutions = 0;
4769
+ let erroredExecutions = 0;
4769
4770
  const totalExecutions = scenarioItems.length * runsPerScenario;
4770
4771
  const targetId = evalData.evalRun.presetId ?? evalData.agent?.id ?? evalData.evalRun.id;
4771
4772
  const targetName = evalData.presetName ?? evalData.agent?.name ?? "";
@@ -4802,6 +4803,7 @@ async function runEvaluationLoop(scenarioItems, evalData, callbacks) {
4802
4803
  iterationIndex
4803
4804
  };
4804
4805
  await callbacks.addResult(errorResult);
4806
+ erroredExecutions++;
4805
4807
  }
4806
4808
  if (scenarioResult !== null) {
4807
4809
  await callbacks.addResult({ ...scenarioResult, iterationIndex });
@@ -4809,7 +4811,7 @@ async function runEvaluationLoop(scenarioItems, evalData, callbacks) {
4809
4811
  completedExecutions++;
4810
4812
  }
4811
4813
  }
4812
- return { completedExecutions, totalExecutions };
4814
+ return { completedExecutions, totalExecutions, erroredExecutions };
4813
4815
  }
4814
4816
 
4815
4817
  // src/error-reporter.ts
@@ -4993,50 +4995,46 @@ async function runEvaluation(projectId2, evalRunId2) {
4993
4995
  );
4994
4996
  }
4995
4997
  const skillNames = evalData.skills.map((s) => s.name).join(", ");
4996
- const { completedExecutions, totalExecutions } = await runEvaluationLoop(
4997
- scenarioItems,
4998
- evalData,
4999
- {
5000
- runScenario: (scenario, template, resolvedAssertions) => {
5001
- state.currentPhase = ExecutionPhase.EXECUTE_SKILL;
5002
- state.currentContext = {
5003
- projectId: projectId2,
5004
- evalRunId: evalRunId2,
5005
- scenarioId: scenario.id,
5006
- scenarioName: scenario.name,
5007
- presetId: evalData.evalRun.presetId,
5008
- presetName: evalData.presetName,
5009
- agentId: agent?.id,
5010
- agentName: agent?.name
5011
- };
5012
- return runScenario(
5013
- config,
5014
- evalRunId2,
5015
- scenario,
5016
- evalData,
5017
- template,
5018
- resolvedAssertions
5019
- );
5020
- },
5021
- addResult: async (result) => {
5022
- state.currentPhase = ExecutionPhase.ADD_RESULT;
5023
- state.currentContext = {
5024
- ...state.currentContext,
5025
- resultId: result.id
5026
- };
5027
- await api.addResult(projectId2, evalRunId2, result);
5028
- },
5029
- onProgress: (completed, total, iterLabel) => {
5030
- console.log(
5031
- "[Evaluator] Running scenario with preset:",
5032
- evalData.presetName,
5033
- skillNames ? `(${skillNames})` : "",
5034
- agent ? `with agent: ${agent.name}` : "",
5035
- `(${completed}/${total})${iterLabel}`
5036
- );
5037
- }
4998
+ const { completedExecutions, totalExecutions, erroredExecutions } = await runEvaluationLoop(scenarioItems, evalData, {
4999
+ runScenario: (scenario, template, resolvedAssertions) => {
5000
+ state.currentPhase = ExecutionPhase.EXECUTE_SKILL;
5001
+ state.currentContext = {
5002
+ projectId: projectId2,
5003
+ evalRunId: evalRunId2,
5004
+ scenarioId: scenario.id,
5005
+ scenarioName: scenario.name,
5006
+ presetId: evalData.evalRun.presetId,
5007
+ presetName: evalData.presetName,
5008
+ agentId: agent?.id,
5009
+ agentName: agent?.name
5010
+ };
5011
+ return runScenario(
5012
+ config,
5013
+ evalRunId2,
5014
+ scenario,
5015
+ evalData,
5016
+ template,
5017
+ resolvedAssertions
5018
+ );
5019
+ },
5020
+ addResult: async (result) => {
5021
+ state.currentPhase = ExecutionPhase.ADD_RESULT;
5022
+ state.currentContext = {
5023
+ ...state.currentContext,
5024
+ resultId: result.id
5025
+ };
5026
+ await api.addResult(projectId2, evalRunId2, result);
5027
+ },
5028
+ onProgress: (completed, total, iterLabel) => {
5029
+ console.log(
5030
+ "[Evaluator] Running scenario with preset:",
5031
+ evalData.presetName,
5032
+ skillNames ? `(${skillNames})` : "",
5033
+ agent ? `with agent: ${agent.name}` : "",
5034
+ `(${completed}/${total})${iterLabel}`
5035
+ );
5038
5036
  }
5039
- );
5037
+ });
5040
5038
  state.currentPhase = ExecutionPhase.UPDATE_STATUS;
5041
5039
  state.currentContext = {
5042
5040
  projectId: projectId2,
@@ -5044,14 +5042,16 @@ async function runEvaluation(projectId2, evalRunId2) {
5044
5042
  completedExecutions,
5045
5043
  totalExecutions
5046
5044
  };
5045
+ const allFailed = totalExecutions > 0 && erroredExecutions === totalExecutions;
5046
+ const finalStatus = allFailed ? EvalStatus2.FAILED : EvalStatus2.COMPLETED;
5047
5047
  try {
5048
5048
  await api.updateEvalRun(projectId2, evalRunId2, {
5049
- status: EvalStatus2.COMPLETED,
5049
+ status: finalStatus,
5050
5050
  completedAt: (/* @__PURE__ */ new Date()).toISOString()
5051
5051
  });
5052
5052
  } catch (updateErr) {
5053
5053
  throw new Error(
5054
- `[${ExecutionPhase.UPDATE_STATUS}] Failed to update eval run status to COMPLETED: ${updateErr instanceof Error ? updateErr.message : String(updateErr)}`
5054
+ `[${ExecutionPhase.UPDATE_STATUS}] Failed to update eval run status to ${finalStatus}: ${updateErr instanceof Error ? updateErr.message : String(updateErr)}`
5055
5055
  );
5056
5056
  }
5057
5057
  }