@wix/evalforge-evaluator 0.183.0 → 0.184.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -8701,9 +8701,12 @@ function processMessages(timestampedMessages, startTime, endTime) {
8701
8701
  if (!step.toolCalls) continue;
8702
8702
  for (const tc of step.toolCalls) {
8703
8703
  if (tc.toolUseId && toolResultErrors.has(tc.toolUseId)) {
8704
- step.hasToolError = true;
8705
- step.toolErrorContent = toolResultErrors.get(tc.toolUseId);
8706
- break;
8704
+ tc.isError = true;
8705
+ tc.errorContent = toolResultErrors.get(tc.toolUseId);
8706
+ if (!step.hasToolError) {
8707
+ step.hasToolError = true;
8708
+ step.toolErrorContent = tc.errorContent;
8709
+ }
8707
8710
  }
8708
8711
  }
8709
8712
  }
@@ -8807,8 +8810,8 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
8807
8810
  },
8808
8811
  costUsd: stepCost / totalSubSteps,
8809
8812
  outputPreview: step.thinking?.slice(0, 200),
8810
- success: isSuccess,
8811
- error: errorMsg
8813
+ success: true,
8814
+ error: void 0
8812
8815
  });
8813
8816
  }
8814
8817
  if (toolCallCount > 0) {
@@ -8818,6 +8821,8 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
8818
8821
  const toolBudgetSteps = toolSubSteps + textSubSteps;
8819
8822
  const toolFraction = toolBudgetSteps > 0 ? 1 / toolBudgetSteps : 1;
8820
8823
  const remainingFraction = (totalSubSteps - thinkingSubSteps) / totalSubSteps;
8824
+ const toolSuccess = !tc.isError;
8825
+ const toolError = tc.isError ? tc.errorContent ?? "Tool call failed" : void 0;
8821
8826
  subSteps.push({
8822
8827
  id: randomUUID(),
8823
8828
  stepNumber: 0,
@@ -8842,8 +8847,8 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
8842
8847
  toolName: tc.toolName,
8843
8848
  toolArguments: JSON.stringify(tc.args),
8844
8849
  outputPreview: tcIdx === 0 && !hasText ? (step.text || step.thinking)?.slice(0, 200) : void 0,
8845
- success: isSuccess,
8846
- error: errorMsg
8850
+ success: toolSuccess,
8851
+ error: toolError
8847
8852
  });
8848
8853
  }
8849
8854
  }
@@ -8864,8 +8869,8 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
8864
8869
  },
8865
8870
  costUsd: stepCost - subSteps.reduce((s, ss) => s + ss.costUsd, 0),
8866
8871
  outputPreview: step.text?.slice(0, 200),
8867
- success: isSuccess,
8868
- error: errorMsg
8872
+ success: true,
8873
+ error: void 0
8869
8874
  });
8870
8875
  }
8871
8876
  if (subSteps.length === 0) {