@wix/evalforge-evaluator 0.183.0 → 0.184.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js
CHANGED
|
@@ -8670,9 +8670,12 @@ function processMessages(timestampedMessages, startTime, endTime) {
|
|
|
8670
8670
|
if (!step.toolCalls) continue;
|
|
8671
8671
|
for (const tc of step.toolCalls) {
|
|
8672
8672
|
if (tc.toolUseId && toolResultErrors.has(tc.toolUseId)) {
|
|
8673
|
-
|
|
8674
|
-
|
|
8675
|
-
|
|
8673
|
+
tc.isError = true;
|
|
8674
|
+
tc.errorContent = toolResultErrors.get(tc.toolUseId);
|
|
8675
|
+
if (!step.hasToolError) {
|
|
8676
|
+
step.hasToolError = true;
|
|
8677
|
+
step.toolErrorContent = tc.errorContent;
|
|
8678
|
+
}
|
|
8676
8679
|
}
|
|
8677
8680
|
}
|
|
8678
8681
|
}
|
|
@@ -8776,8 +8779,8 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
|
|
|
8776
8779
|
},
|
|
8777
8780
|
costUsd: stepCost / totalSubSteps,
|
|
8778
8781
|
outputPreview: step.thinking?.slice(0, 200),
|
|
8779
|
-
success:
|
|
8780
|
-
error:
|
|
8782
|
+
success: true,
|
|
8783
|
+
error: void 0
|
|
8781
8784
|
});
|
|
8782
8785
|
}
|
|
8783
8786
|
if (toolCallCount > 0) {
|
|
@@ -8787,6 +8790,8 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
|
|
|
8787
8790
|
const toolBudgetSteps = toolSubSteps + textSubSteps;
|
|
8788
8791
|
const toolFraction = toolBudgetSteps > 0 ? 1 / toolBudgetSteps : 1;
|
|
8789
8792
|
const remainingFraction = (totalSubSteps - thinkingSubSteps) / totalSubSteps;
|
|
8793
|
+
const toolSuccess = !tc.isError;
|
|
8794
|
+
const toolError = tc.isError ? tc.errorContent ?? "Tool call failed" : void 0;
|
|
8790
8795
|
subSteps.push({
|
|
8791
8796
|
id: (0, import_crypto2.randomUUID)(),
|
|
8792
8797
|
stepNumber: 0,
|
|
@@ -8811,8 +8816,8 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
|
|
|
8811
8816
|
toolName: tc.toolName,
|
|
8812
8817
|
toolArguments: JSON.stringify(tc.args),
|
|
8813
8818
|
outputPreview: tcIdx === 0 && !hasText ? (step.text || step.thinking)?.slice(0, 200) : void 0,
|
|
8814
|
-
success:
|
|
8815
|
-
error:
|
|
8819
|
+
success: toolSuccess,
|
|
8820
|
+
error: toolError
|
|
8816
8821
|
});
|
|
8817
8822
|
}
|
|
8818
8823
|
}
|
|
@@ -8833,8 +8838,8 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
|
|
|
8833
8838
|
},
|
|
8834
8839
|
costUsd: stepCost - subSteps.reduce((s, ss) => s + ss.costUsd, 0),
|
|
8835
8840
|
outputPreview: step.text?.slice(0, 200),
|
|
8836
|
-
success:
|
|
8837
|
-
error:
|
|
8841
|
+
success: true,
|
|
8842
|
+
error: void 0
|
|
8838
8843
|
});
|
|
8839
8844
|
}
|
|
8840
8845
|
if (subSteps.length === 0) {
|