@wix/evalforge-evaluator 0.30.0 → 0.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +62 -4
- package/build/index.js.map +3 -3
- package/build/index.mjs +62 -4
- package/build/index.mjs.map +3 -3
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -6599,6 +6599,49 @@ function emitTraceEvent(event, tracePushUrl, routeHeader, authToken) {
|
|
|
6599
6599
|
});
|
|
6600
6600
|
}
|
|
6601
6601
|
}
|
|
6602
|
+
function extractToolActionDescription(toolName, toolArgs) {
|
|
6603
|
+
if (!toolName) {
|
|
6604
|
+
return "Using tool...";
|
|
6605
|
+
}
|
|
6606
|
+
if (toolArgs) {
|
|
6607
|
+
try {
|
|
6608
|
+
const args = JSON.parse(toolArgs);
|
|
6609
|
+
if ((toolName === "Task" || toolName === "dispatch_agent") && args.description) {
|
|
6610
|
+
const desc2 = String(args.description).slice(0, 55);
|
|
6611
|
+
const truncatedDesc = desc2.length < String(args.description).length ? `${desc2}...` : desc2;
|
|
6612
|
+
return `Task: ${truncatedDesc}`;
|
|
6613
|
+
}
|
|
6614
|
+
if ((toolName === "Bash" || toolName === "bash" || toolName === "execute") && args.command) {
|
|
6615
|
+
const cmd = String(args.command).slice(0, 50);
|
|
6616
|
+
return `Running: ${cmd}${String(args.command).length > 50 ? "..." : ""}`;
|
|
6617
|
+
}
|
|
6618
|
+
if (toolName === "Search" || toolName === "search" || toolName === "Grep") {
|
|
6619
|
+
const query = args.query || args.pattern || args.search;
|
|
6620
|
+
if (query) {
|
|
6621
|
+
return `Searching: ${String(query).slice(0, 40)}`;
|
|
6622
|
+
}
|
|
6623
|
+
}
|
|
6624
|
+
if (toolName === "LS" || toolName === "ls" || toolName === "ListFiles") {
|
|
6625
|
+
const path9 = args.path || args.directory || ".";
|
|
6626
|
+
return `Listing: ${String(path9).slice(0, 50)}`;
|
|
6627
|
+
}
|
|
6628
|
+
if ((toolName === "Read" || toolName === "read" || toolName === "View") && (args.file_path || args.path || args.target_file)) {
|
|
6629
|
+
const filePath = String(
|
|
6630
|
+
args.file_path || args.path || args.target_file
|
|
6631
|
+
).slice(0, 50);
|
|
6632
|
+
return `Reading: ${filePath}`;
|
|
6633
|
+
}
|
|
6634
|
+
if ((toolName === "Write" || toolName === "Edit" || toolName === "write") && (args.file_path || args.path || args.target_file)) {
|
|
6635
|
+
const filePath = String(
|
|
6636
|
+
args.file_path || args.path || args.target_file
|
|
6637
|
+
).slice(0, 50);
|
|
6638
|
+
return `Writing: ${filePath}`;
|
|
6639
|
+
}
|
|
6640
|
+
} catch {
|
|
6641
|
+
}
|
|
6642
|
+
}
|
|
6643
|
+
return `Using ${toolName}...`;
|
|
6644
|
+
}
|
|
6602
6645
|
async function pushTraceEvent(url2, event, routeHeader, authToken) {
|
|
6603
6646
|
try {
|
|
6604
6647
|
const headers = {
|
|
@@ -6932,15 +6975,27 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6932
6975
|
}, SDK_TIMEOUT_MS);
|
|
6933
6976
|
});
|
|
6934
6977
|
if (traceContext) {
|
|
6978
|
+
let lastReportedAction = "";
|
|
6979
|
+
let sameActionCount = 0;
|
|
6935
6980
|
heartbeatHandle = setInterval(() => {
|
|
6936
6981
|
const elapsedMs = Date.now() - executionStartTime;
|
|
6937
6982
|
let progressMessage = lastAction;
|
|
6938
|
-
if (
|
|
6983
|
+
if (lastAction === lastReportedAction) {
|
|
6984
|
+
sameActionCount++;
|
|
6985
|
+
} else {
|
|
6986
|
+
sameActionCount = 1;
|
|
6987
|
+
lastReportedAction = lastAction;
|
|
6988
|
+
}
|
|
6989
|
+
const isTaskTool = lastToolName === "Task" || lastToolName === "dispatch_agent";
|
|
6990
|
+
if (isTaskTool && sameActionCount > 1) {
|
|
6991
|
+
progressMessage = `Waiting for ${lastAction}`;
|
|
6992
|
+
} else if (lastToolName && lastFilePath) {
|
|
6939
6993
|
progressMessage = `${lastToolName}: ${lastFilePath}`;
|
|
6940
|
-
} else if (lastToolName) {
|
|
6994
|
+
} else if (lastToolName && !isTaskTool) {
|
|
6941
6995
|
progressMessage = `Using ${lastToolName}...`;
|
|
6942
6996
|
}
|
|
6943
|
-
|
|
6997
|
+
const elapsedSec = Math.round(elapsedMs / 1e3);
|
|
6998
|
+
progressMessage += ` (${elapsedSec}s, step ${traceStepNumber})`;
|
|
6944
6999
|
const progressEvent = {
|
|
6945
7000
|
evalRunId: traceContext.evalRunId,
|
|
6946
7001
|
scenarioId: traceContext.scenarioId,
|
|
@@ -7001,7 +7056,10 @@ IMPORTANT: This is an automated evaluation run. Execute the requested changes im
|
|
|
7001
7056
|
if (traceEvent.type === import_evalforge_types.LiveTraceEventType.THINKING) {
|
|
7002
7057
|
lastAction = "Thinking...";
|
|
7003
7058
|
} else if (traceEvent.type === import_evalforge_types.LiveTraceEventType.TOOL_USE) {
|
|
7004
|
-
lastAction =
|
|
7059
|
+
lastAction = extractToolActionDescription(
|
|
7060
|
+
traceEvent.toolName,
|
|
7061
|
+
traceEvent.toolArgs
|
|
7062
|
+
);
|
|
7005
7063
|
} else if (traceEvent.type === import_evalforge_types.LiveTraceEventType.FILE_WRITE) {
|
|
7006
7064
|
lastAction = `Writing: ${traceEvent.filePath || "file"}`;
|
|
7007
7065
|
} else if (traceEvent.type === import_evalforge_types.LiveTraceEventType.FILE_READ) {
|