@wix/evalforge-evaluator 0.29.0 → 0.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +71 -1
- package/build/index.js.map +3 -3
- package/build/index.mjs +71 -1
- package/build/index.mjs.map +3 -3
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -6599,6 +6599,48 @@ function emitTraceEvent(event, tracePushUrl, routeHeader, authToken) {
|
|
|
6599
6599
|
});
|
|
6600
6600
|
}
|
|
6601
6601
|
}
|
|
6602
|
+
function extractToolActionDescription(toolName, toolArgs) {
|
|
6603
|
+
if (!toolName) {
|
|
6604
|
+
return "Using tool...";
|
|
6605
|
+
}
|
|
6606
|
+
if (toolArgs) {
|
|
6607
|
+
try {
|
|
6608
|
+
const args = JSON.parse(toolArgs);
|
|
6609
|
+
if ((toolName === "Task" || toolName === "dispatch_agent") && args.description) {
|
|
6610
|
+
const desc2 = String(args.description).slice(0, 60);
|
|
6611
|
+
return desc2.length < String(args.description).length ? `${desc2}...` : desc2;
|
|
6612
|
+
}
|
|
6613
|
+
if ((toolName === "Bash" || toolName === "bash" || toolName === "execute") && args.command) {
|
|
6614
|
+
const cmd = String(args.command).slice(0, 50);
|
|
6615
|
+
return `Running: ${cmd}${String(args.command).length > 50 ? "..." : ""}`;
|
|
6616
|
+
}
|
|
6617
|
+
if (toolName === "Search" || toolName === "search" || toolName === "Grep") {
|
|
6618
|
+
const query = args.query || args.pattern || args.search;
|
|
6619
|
+
if (query) {
|
|
6620
|
+
return `Searching: ${String(query).slice(0, 40)}`;
|
|
6621
|
+
}
|
|
6622
|
+
}
|
|
6623
|
+
if (toolName === "LS" || toolName === "ls" || toolName === "ListFiles") {
|
|
6624
|
+
const path9 = args.path || args.directory || ".";
|
|
6625
|
+
return `Listing: ${String(path9).slice(0, 50)}`;
|
|
6626
|
+
}
|
|
6627
|
+
if ((toolName === "Read" || toolName === "read" || toolName === "View") && (args.file_path || args.path || args.target_file)) {
|
|
6628
|
+
const filePath = String(
|
|
6629
|
+
args.file_path || args.path || args.target_file
|
|
6630
|
+
).slice(0, 50);
|
|
6631
|
+
return `Reading: ${filePath}`;
|
|
6632
|
+
}
|
|
6633
|
+
if ((toolName === "Write" || toolName === "Edit" || toolName === "write") && (args.file_path || args.path || args.target_file)) {
|
|
6634
|
+
const filePath = String(
|
|
6635
|
+
args.file_path || args.path || args.target_file
|
|
6636
|
+
).slice(0, 50);
|
|
6637
|
+
return `Writing: ${filePath}`;
|
|
6638
|
+
}
|
|
6639
|
+
} catch {
|
|
6640
|
+
}
|
|
6641
|
+
}
|
|
6642
|
+
return `Using ${toolName}...`;
|
|
6643
|
+
}
|
|
6602
6644
|
async function pushTraceEvent(url2, event, routeHeader, authToken) {
|
|
6603
6645
|
try {
|
|
6604
6646
|
const headers = {
|
|
@@ -6825,6 +6867,9 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6825
6867
|
console.log("[SDK-DEBUG] ============================================");
|
|
6826
6868
|
let traceStepNumber = 0;
|
|
6827
6869
|
const traceContext = options.traceContext;
|
|
6870
|
+
let lastAction = "Starting...";
|
|
6871
|
+
let lastToolName;
|
|
6872
|
+
let lastFilePath;
|
|
6828
6873
|
const maxTurns = options.maxTurns ?? 10;
|
|
6829
6874
|
console.error(
|
|
6830
6875
|
"[DEBUG-H5] Claude SDK query START",
|
|
@@ -6931,6 +6976,13 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6931
6976
|
if (traceContext) {
|
|
6932
6977
|
heartbeatHandle = setInterval(() => {
|
|
6933
6978
|
const elapsedMs = Date.now() - executionStartTime;
|
|
6979
|
+
let progressMessage = lastAction;
|
|
6980
|
+
if (lastToolName && lastFilePath) {
|
|
6981
|
+
progressMessage = `${lastToolName}: ${lastFilePath}`;
|
|
6982
|
+
} else if (lastToolName) {
|
|
6983
|
+
progressMessage = `Using ${lastToolName}...`;
|
|
6984
|
+
}
|
|
6985
|
+
progressMessage += ` (${Math.round(elapsedMs / 1e3)}s)`;
|
|
6934
6986
|
const progressEvent = {
|
|
6935
6987
|
evalRunId: traceContext.evalRunId,
|
|
6936
6988
|
scenarioId: traceContext.scenarioId,
|
|
@@ -6939,7 +6991,9 @@ async function executeWithClaudeCode(skill, scenario, options) {
|
|
|
6939
6991
|
targetName: traceContext.targetName,
|
|
6940
6992
|
stepNumber: traceStepNumber,
|
|
6941
6993
|
type: import_evalforge_types.LiveTraceEventType.PROGRESS,
|
|
6942
|
-
outputPreview:
|
|
6994
|
+
outputPreview: progressMessage,
|
|
6995
|
+
toolName: lastToolName,
|
|
6996
|
+
filePath: lastFilePath,
|
|
6943
6997
|
elapsedMs,
|
|
6944
6998
|
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
6945
6999
|
isComplete: false
|
|
@@ -6984,6 +7038,22 @@ IMPORTANT: This is an automated evaluation run. Execute the requested changes im
|
|
|
6984
7038
|
// Not complete yet
|
|
6985
7039
|
);
|
|
6986
7040
|
if (traceEvent) {
|
|
7041
|
+
lastToolName = traceEvent.toolName;
|
|
7042
|
+
lastFilePath = traceEvent.filePath;
|
|
7043
|
+
if (traceEvent.type === import_evalforge_types.LiveTraceEventType.THINKING) {
|
|
7044
|
+
lastAction = "Thinking...";
|
|
7045
|
+
} else if (traceEvent.type === import_evalforge_types.LiveTraceEventType.TOOL_USE) {
|
|
7046
|
+
lastAction = extractToolActionDescription(
|
|
7047
|
+
traceEvent.toolName,
|
|
7048
|
+
traceEvent.toolArgs
|
|
7049
|
+
);
|
|
7050
|
+
} else if (traceEvent.type === import_evalforge_types.LiveTraceEventType.FILE_WRITE) {
|
|
7051
|
+
lastAction = `Writing: ${traceEvent.filePath || "file"}`;
|
|
7052
|
+
} else if (traceEvent.type === import_evalforge_types.LiveTraceEventType.FILE_READ) {
|
|
7053
|
+
lastAction = `Reading: ${traceEvent.filePath || "file"}`;
|
|
7054
|
+
} else if (traceEvent.type === import_evalforge_types.LiveTraceEventType.COMPLETION) {
|
|
7055
|
+
lastAction = "Processing response...";
|
|
7056
|
+
}
|
|
6987
7057
|
emitTraceEvent(
|
|
6988
7058
|
traceEvent,
|
|
6989
7059
|
traceContext.tracePushUrl,
|