@wix/evalforge-evaluator 0.183.0 → 0.184.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs
CHANGED
|
@@ -8701,9 +8701,12 @@ function processMessages(timestampedMessages, startTime, endTime) {
|
|
|
8701
8701
|
if (!step.toolCalls) continue;
|
|
8702
8702
|
for (const tc of step.toolCalls) {
|
|
8703
8703
|
if (tc.toolUseId && toolResultErrors.has(tc.toolUseId)) {
|
|
8704
|
-
|
|
8705
|
-
|
|
8706
|
-
|
|
8704
|
+
tc.isError = true;
|
|
8705
|
+
tc.errorContent = toolResultErrors.get(tc.toolUseId);
|
|
8706
|
+
if (!step.hasToolError) {
|
|
8707
|
+
step.hasToolError = true;
|
|
8708
|
+
step.toolErrorContent = tc.errorContent;
|
|
8709
|
+
}
|
|
8707
8710
|
}
|
|
8708
8711
|
}
|
|
8709
8712
|
}
|
|
@@ -8807,8 +8810,8 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
|
|
|
8807
8810
|
},
|
|
8808
8811
|
costUsd: stepCost / totalSubSteps,
|
|
8809
8812
|
outputPreview: step.thinking?.slice(0, 200),
|
|
8810
|
-
success:
|
|
8811
|
-
error:
|
|
8813
|
+
success: true,
|
|
8814
|
+
error: void 0
|
|
8812
8815
|
});
|
|
8813
8816
|
}
|
|
8814
8817
|
if (toolCallCount > 0) {
|
|
@@ -8818,6 +8821,8 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
|
|
|
8818
8821
|
const toolBudgetSteps = toolSubSteps + textSubSteps;
|
|
8819
8822
|
const toolFraction = toolBudgetSteps > 0 ? 1 / toolBudgetSteps : 1;
|
|
8820
8823
|
const remainingFraction = (totalSubSteps - thinkingSubSteps) / totalSubSteps;
|
|
8824
|
+
const toolSuccess = !tc.isError;
|
|
8825
|
+
const toolError = tc.isError ? tc.errorContent ?? "Tool call failed" : void 0;
|
|
8821
8826
|
subSteps.push({
|
|
8822
8827
|
id: randomUUID(),
|
|
8823
8828
|
stepNumber: 0,
|
|
@@ -8842,8 +8847,8 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
|
|
|
8842
8847
|
toolName: tc.toolName,
|
|
8843
8848
|
toolArguments: JSON.stringify(tc.args),
|
|
8844
8849
|
outputPreview: tcIdx === 0 && !hasText ? (step.text || step.thinking)?.slice(0, 200) : void 0,
|
|
8845
|
-
success:
|
|
8846
|
-
error:
|
|
8850
|
+
success: toolSuccess,
|
|
8851
|
+
error: toolError
|
|
8847
8852
|
});
|
|
8848
8853
|
}
|
|
8849
8854
|
}
|
|
@@ -8864,8 +8869,8 @@ function buildLLMTraceFromSteps(steps, totalDurationMs, usage, model) {
|
|
|
8864
8869
|
},
|
|
8865
8870
|
costUsd: stepCost - subSteps.reduce((s, ss) => s + ss.costUsd, 0),
|
|
8866
8871
|
outputPreview: step.text?.slice(0, 200),
|
|
8867
|
-
success:
|
|
8868
|
-
error:
|
|
8872
|
+
success: true,
|
|
8873
|
+
error: void 0
|
|
8869
8874
|
});
|
|
8870
8875
|
}
|
|
8871
8876
|
if (subSteps.length === 0) {
|