@wix/evalforge-evaluator 0.30.0 → 0.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -6599,6 +6599,49 @@ function emitTraceEvent(event, tracePushUrl, routeHeader, authToken) {
6599
6599
  });
6600
6600
  }
6601
6601
  }
6602
+ function extractToolActionDescription(toolName, toolArgs) {
6603
+ if (!toolName) {
6604
+ return "Using tool...";
6605
+ }
6606
+ if (toolArgs) {
6607
+ try {
6608
+ const args = JSON.parse(toolArgs);
6609
+ if ((toolName === "Task" || toolName === "dispatch_agent") && args.description) {
6610
+ const desc2 = String(args.description).slice(0, 55);
6611
+ const truncatedDesc = desc2.length < String(args.description).length ? `${desc2}...` : desc2;
6612
+ return `Task: ${truncatedDesc}`;
6613
+ }
6614
+ if ((toolName === "Bash" || toolName === "bash" || toolName === "execute") && args.command) {
6615
+ const cmd = String(args.command).slice(0, 50);
6616
+ return `Running: ${cmd}${String(args.command).length > 50 ? "..." : ""}`;
6617
+ }
6618
+ if (toolName === "Search" || toolName === "search" || toolName === "Grep") {
6619
+ const query = args.query || args.pattern || args.search;
6620
+ if (query) {
6621
+ return `Searching: ${String(query).slice(0, 40)}`;
6622
+ }
6623
+ }
6624
+ if (toolName === "LS" || toolName === "ls" || toolName === "ListFiles") {
6625
+ const path9 = args.path || args.directory || ".";
6626
+ return `Listing: ${String(path9).slice(0, 50)}`;
6627
+ }
6628
+ if ((toolName === "Read" || toolName === "read" || toolName === "View") && (args.file_path || args.path || args.target_file)) {
6629
+ const filePath = String(
6630
+ args.file_path || args.path || args.target_file
6631
+ ).slice(0, 50);
6632
+ return `Reading: ${filePath}`;
6633
+ }
6634
+ if ((toolName === "Write" || toolName === "Edit" || toolName === "write") && (args.file_path || args.path || args.target_file)) {
6635
+ const filePath = String(
6636
+ args.file_path || args.path || args.target_file
6637
+ ).slice(0, 50);
6638
+ return `Writing: ${filePath}`;
6639
+ }
6640
+ } catch {
6641
+ }
6642
+ }
6643
+ return `Using ${toolName}...`;
6644
+ }
6602
6645
  async function pushTraceEvent(url2, event, routeHeader, authToken) {
6603
6646
  try {
6604
6647
  const headers = {
@@ -6932,15 +6975,27 @@ async function executeWithClaudeCode(skill, scenario, options) {
6932
6975
  }, SDK_TIMEOUT_MS);
6933
6976
  });
6934
6977
  if (traceContext) {
6978
+ let lastReportedAction = "";
6979
+ let sameActionCount = 0;
6935
6980
  heartbeatHandle = setInterval(() => {
6936
6981
  const elapsedMs = Date.now() - executionStartTime;
6937
6982
  let progressMessage = lastAction;
6938
- if (lastToolName && lastFilePath) {
6983
+ if (lastAction === lastReportedAction) {
6984
+ sameActionCount++;
6985
+ } else {
6986
+ sameActionCount = 1;
6987
+ lastReportedAction = lastAction;
6988
+ }
6989
+ const isTaskTool = lastToolName === "Task" || lastToolName === "dispatch_agent";
6990
+ if (isTaskTool && sameActionCount > 1) {
6991
+ progressMessage = `Waiting for ${lastAction}`;
6992
+ } else if (lastToolName && lastFilePath) {
6939
6993
  progressMessage = `${lastToolName}: ${lastFilePath}`;
6940
- } else if (lastToolName) {
6994
+ } else if (lastToolName && !isTaskTool) {
6941
6995
  progressMessage = `Using ${lastToolName}...`;
6942
6996
  }
6943
- progressMessage += ` (${Math.round(elapsedMs / 1e3)}s)`;
6997
+ const elapsedSec = Math.round(elapsedMs / 1e3);
6998
+ progressMessage += ` (${elapsedSec}s, step ${traceStepNumber})`;
6944
6999
  const progressEvent = {
6945
7000
  evalRunId: traceContext.evalRunId,
6946
7001
  scenarioId: traceContext.scenarioId,
@@ -7001,7 +7056,10 @@ IMPORTANT: This is an automated evaluation run. Execute the requested changes im
7001
7056
  if (traceEvent.type === import_evalforge_types.LiveTraceEventType.THINKING) {
7002
7057
  lastAction = "Thinking...";
7003
7058
  } else if (traceEvent.type === import_evalforge_types.LiveTraceEventType.TOOL_USE) {
7004
- lastAction = `Using ${traceEvent.toolName || "tool"}...`;
7059
+ lastAction = extractToolActionDescription(
7060
+ traceEvent.toolName,
7061
+ traceEvent.toolArgs
7062
+ );
7005
7063
  } else if (traceEvent.type === import_evalforge_types.LiveTraceEventType.FILE_WRITE) {
7006
7064
  lastAction = `Writing: ${traceEvent.filePath || "file"}`;
7007
7065
  } else if (traceEvent.type === import_evalforge_types.LiveTraceEventType.FILE_READ) {