@wix/evalforge-evaluator 0.141.0 → 0.142.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -4757,6 +4757,7 @@ var import_crypto5 = require("crypto");
4757
4757
  async function runEvaluationLoop(scenarioItems, evalData, callbacks) {
4758
4758
  const runsPerScenario = evalData.evalRun.runsPerScenario ?? 1;
4759
4759
  let completedExecutions = 0;
4760
+ let erroredExecutions = 0;
4760
4761
  const totalExecutions = scenarioItems.length * runsPerScenario;
4761
4762
  const targetId = evalData.evalRun.presetId ?? evalData.agent?.id ?? evalData.evalRun.id;
4762
4763
  const targetName = evalData.presetName ?? evalData.agent?.name ?? "";
@@ -4793,6 +4794,7 @@ async function runEvaluationLoop(scenarioItems, evalData, callbacks) {
4793
4794
  iterationIndex
4794
4795
  };
4795
4796
  await callbacks.addResult(errorResult);
4797
+ erroredExecutions++;
4796
4798
  }
4797
4799
  if (scenarioResult !== null) {
4798
4800
  await callbacks.addResult({ ...scenarioResult, iterationIndex });
@@ -4800,7 +4802,7 @@ async function runEvaluationLoop(scenarioItems, evalData, callbacks) {
4800
4802
  completedExecutions++;
4801
4803
  }
4802
4804
  }
4803
- return { completedExecutions, totalExecutions };
4805
+ return { completedExecutions, totalExecutions, erroredExecutions };
4804
4806
  }
4805
4807
 
4806
4808
  // src/error-reporter.ts
@@ -4984,50 +4986,46 @@ async function runEvaluation(projectId2, evalRunId2) {
4984
4986
  );
4985
4987
  }
4986
4988
  const skillNames = evalData.skills.map((s) => s.name).join(", ");
4987
- const { completedExecutions, totalExecutions } = await runEvaluationLoop(
4988
- scenarioItems,
4989
- evalData,
4990
- {
4991
- runScenario: (scenario, template, resolvedAssertions) => {
4992
- state.currentPhase = ExecutionPhase.EXECUTE_SKILL;
4993
- state.currentContext = {
4994
- projectId: projectId2,
4995
- evalRunId: evalRunId2,
4996
- scenarioId: scenario.id,
4997
- scenarioName: scenario.name,
4998
- presetId: evalData.evalRun.presetId,
4999
- presetName: evalData.presetName,
5000
- agentId: agent?.id,
5001
- agentName: agent?.name
5002
- };
5003
- return runScenario(
5004
- config,
5005
- evalRunId2,
5006
- scenario,
5007
- evalData,
5008
- template,
5009
- resolvedAssertions
5010
- );
5011
- },
5012
- addResult: async (result) => {
5013
- state.currentPhase = ExecutionPhase.ADD_RESULT;
5014
- state.currentContext = {
5015
- ...state.currentContext,
5016
- resultId: result.id
5017
- };
5018
- await api.addResult(projectId2, evalRunId2, result);
5019
- },
5020
- onProgress: (completed, total, iterLabel) => {
5021
- console.log(
5022
- "[Evaluator] Running scenario with preset:",
5023
- evalData.presetName,
5024
- skillNames ? `(${skillNames})` : "",
5025
- agent ? `with agent: ${agent.name}` : "",
5026
- `(${completed}/${total})${iterLabel}`
5027
- );
5028
- }
4989
+ const { completedExecutions, totalExecutions, erroredExecutions } = await runEvaluationLoop(scenarioItems, evalData, {
4990
+ runScenario: (scenario, template, resolvedAssertions) => {
4991
+ state.currentPhase = ExecutionPhase.EXECUTE_SKILL;
4992
+ state.currentContext = {
4993
+ projectId: projectId2,
4994
+ evalRunId: evalRunId2,
4995
+ scenarioId: scenario.id,
4996
+ scenarioName: scenario.name,
4997
+ presetId: evalData.evalRun.presetId,
4998
+ presetName: evalData.presetName,
4999
+ agentId: agent?.id,
5000
+ agentName: agent?.name
5001
+ };
5002
+ return runScenario(
5003
+ config,
5004
+ evalRunId2,
5005
+ scenario,
5006
+ evalData,
5007
+ template,
5008
+ resolvedAssertions
5009
+ );
5010
+ },
5011
+ addResult: async (result) => {
5012
+ state.currentPhase = ExecutionPhase.ADD_RESULT;
5013
+ state.currentContext = {
5014
+ ...state.currentContext,
5015
+ resultId: result.id
5016
+ };
5017
+ await api.addResult(projectId2, evalRunId2, result);
5018
+ },
5019
+ onProgress: (completed, total, iterLabel) => {
5020
+ console.log(
5021
+ "[Evaluator] Running scenario with preset:",
5022
+ evalData.presetName,
5023
+ skillNames ? `(${skillNames})` : "",
5024
+ agent ? `with agent: ${agent.name}` : "",
5025
+ `(${completed}/${total})${iterLabel}`
5026
+ );
5029
5027
  }
5030
- );
5028
+ });
5031
5029
  state.currentPhase = ExecutionPhase.UPDATE_STATUS;
5032
5030
  state.currentContext = {
5033
5031
  projectId: projectId2,
@@ -5035,14 +5033,16 @@ async function runEvaluation(projectId2, evalRunId2) {
5035
5033
  completedExecutions,
5036
5034
  totalExecutions
5037
5035
  };
5036
+ const allFailed = totalExecutions > 0 && erroredExecutions === totalExecutions;
5037
+ const finalStatus = allFailed ? import_evalforge_types15.EvalStatus.FAILED : import_evalforge_types15.EvalStatus.COMPLETED;
5038
5038
  try {
5039
5039
  await api.updateEvalRun(projectId2, evalRunId2, {
5040
- status: import_evalforge_types15.EvalStatus.COMPLETED,
5040
+ status: finalStatus,
5041
5041
  completedAt: (/* @__PURE__ */ new Date()).toISOString()
5042
5042
  });
5043
5043
  } catch (updateErr) {
5044
5044
  throw new Error(
5045
- `[${ExecutionPhase.UPDATE_STATUS}] Failed to update eval run status to COMPLETED: ${updateErr instanceof Error ? updateErr.message : String(updateErr)}`
5045
+ `[${ExecutionPhase.UPDATE_STATUS}] Failed to update eval run status to ${finalStatus}: ${updateErr instanceof Error ? updateErr.message : String(updateErr)}`
5046
5046
  );
5047
5047
  }
5048
5048
  }