@wix/evalforge-evaluator 0.183.0 → 0.184.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -8670,9 +8670,12 @@ function processMessages(timestampedMessages, startTime, endTime) {
8670
8670
  if (!step.toolCalls) continue;
8671
8671
  for (const tc of step.toolCalls) {
8672
8672
  if (tc.toolUseId && toolResultErrors.has(tc.toolUseId)) {
8673
- step.hasToolError = true;
8674
- step.toolErrorContent = toolResultErrors.get(tc.toolUseId);
8675
- break;
8673
+ tc.isError = true;
8674
+ tc.errorContent = toolResultErrors.get(tc.toolUseId);
8675
+ if (!step.hasToolError) {
8676
+ step.hasToolError = true;
8677
+ step.toolErrorContent = tc.errorContent;
8678
+ }
8676
8679
  }
8677
8680
  }
8678
8681
  }
@@ -8776,8 +8779,8 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
8776
8779
  },
8777
8780
  costUsd: stepCost / totalSubSteps,
8778
8781
  outputPreview: step.thinking?.slice(0, 200),
8779
- success: isSuccess,
8780
- error: errorMsg
8782
+ success: true,
8783
+ error: void 0
8781
8784
  });
8782
8785
  }
8783
8786
  if (toolCallCount > 0) {
@@ -8787,6 +8790,8 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
8787
8790
  const toolBudgetSteps = toolSubSteps + textSubSteps;
8788
8791
  const toolFraction = toolBudgetSteps > 0 ? 1 / toolBudgetSteps : 1;
8789
8792
  const remainingFraction = (totalSubSteps - thinkingSubSteps) / totalSubSteps;
8793
+ const toolSuccess = !tc.isError;
8794
+ const toolError = tc.isError ? tc.errorContent ?? "Tool call failed" : void 0;
8790
8795
  subSteps.push({
8791
8796
  id: (0, import_crypto2.randomUUID)(),
8792
8797
  stepNumber: 0,
@@ -8811,8 +8816,8 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
8811
8816
  toolName: tc.toolName,
8812
8817
  toolArguments: JSON.stringify(tc.args),
8813
8818
  outputPreview: tcIdx === 0 && !hasText ? (step.text || step.thinking)?.slice(0, 200) : void 0,
8814
- success: isSuccess,
8815
- error: errorMsg
8819
+ success: toolSuccess,
8820
+ error: toolError
8816
8821
  });
8817
8822
  }
8818
8823
  }
@@ -8833,8 +8838,8 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
8833
8838
  },
8834
8839
  costUsd: stepCost - subSteps.reduce((s, ss) => s + ss.costUsd, 0),
8835
8840
  outputPreview: step.text?.slice(0, 200),
8836
- success: isSuccess,
8837
- error: errorMsg
8841
+ success: true,
8842
+ error: void 0
8838
8843
  });
8839
8844
  }
8840
8845
  if (subSteps.length === 0) {