@wix/evalforge-evaluator 0.29.0 → 0.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -6599,6 +6599,48 @@ function emitTraceEvent(event, tracePushUrl, routeHeader, authToken) {
6599
6599
  });
6600
6600
  }
6601
6601
  }
6602
+ function extractToolActionDescription(toolName, toolArgs) {
6603
+ if (!toolName) {
6604
+ return "Using tool...";
6605
+ }
6606
+ if (toolArgs) {
6607
+ try {
6608
+ const args = JSON.parse(toolArgs);
6609
+ if ((toolName === "Task" || toolName === "dispatch_agent") && args.description) {
6610
+ const desc2 = String(args.description).slice(0, 60);
6611
+ return desc2.length < String(args.description).length ? `${desc2}...` : desc2;
6612
+ }
6613
+ if ((toolName === "Bash" || toolName === "bash" || toolName === "execute") && args.command) {
6614
+ const cmd = String(args.command).slice(0, 50);
6615
+ return `Running: ${cmd}${String(args.command).length > 50 ? "..." : ""}`;
6616
+ }
6617
+ if (toolName === "Search" || toolName === "search" || toolName === "Grep") {
6618
+ const query = args.query || args.pattern || args.search;
6619
+ if (query) {
6620
+ return `Searching: ${String(query).slice(0, 40)}`;
6621
+ }
6622
+ }
6623
+ if (toolName === "LS" || toolName === "ls" || toolName === "ListFiles") {
6624
+ const path9 = args.path || args.directory || ".";
6625
+ return `Listing: ${String(path9).slice(0, 50)}`;
6626
+ }
6627
+ if ((toolName === "Read" || toolName === "read" || toolName === "View") && (args.file_path || args.path || args.target_file)) {
6628
+ const filePath = String(
6629
+ args.file_path || args.path || args.target_file
6630
+ ).slice(0, 50);
6631
+ return `Reading: ${filePath}`;
6632
+ }
6633
+ if ((toolName === "Write" || toolName === "Edit" || toolName === "write") && (args.file_path || args.path || args.target_file)) {
6634
+ const filePath = String(
6635
+ args.file_path || args.path || args.target_file
6636
+ ).slice(0, 50);
6637
+ return `Writing: ${filePath}`;
6638
+ }
6639
+ } catch {
6640
+ }
6641
+ }
6642
+ return `Using ${toolName}...`;
6643
+ }
6602
6644
  async function pushTraceEvent(url2, event, routeHeader, authToken) {
6603
6645
  try {
6604
6646
  const headers = {
@@ -6825,6 +6867,9 @@ async function executeWithClaudeCode(skill, scenario, options) {
6825
6867
  console.log("[SDK-DEBUG] ============================================");
6826
6868
  let traceStepNumber = 0;
6827
6869
  const traceContext = options.traceContext;
6870
+ let lastAction = "Starting...";
6871
+ let lastToolName;
6872
+ let lastFilePath;
6828
6873
  const maxTurns = options.maxTurns ?? 10;
6829
6874
  console.error(
6830
6875
  "[DEBUG-H5] Claude SDK query START",
@@ -6931,6 +6976,13 @@ async function executeWithClaudeCode(skill, scenario, options) {
6931
6976
  if (traceContext) {
6932
6977
  heartbeatHandle = setInterval(() => {
6933
6978
  const elapsedMs = Date.now() - executionStartTime;
6979
+ let progressMessage = lastAction;
6980
+ if (lastToolName && lastFilePath) {
6981
+ progressMessage = `${lastToolName}: ${lastFilePath}`;
6982
+ } else if (lastToolName) {
6983
+ progressMessage = `Using ${lastToolName}...`;
6984
+ }
6985
+ progressMessage += ` (${Math.round(elapsedMs / 1e3)}s)`;
6934
6986
  const progressEvent = {
6935
6987
  evalRunId: traceContext.evalRunId,
6936
6988
  scenarioId: traceContext.scenarioId,
@@ -6939,7 +6991,9 @@ async function executeWithClaudeCode(skill, scenario, options) {
6939
6991
  targetName: traceContext.targetName,
6940
6992
  stepNumber: traceStepNumber,
6941
6993
  type: import_evalforge_types.LiveTraceEventType.PROGRESS,
6942
- outputPreview: `Executing... (${Math.round(elapsedMs / 1e3)}s elapsed, ${messageCount} messages)`,
6994
+ outputPreview: progressMessage,
6995
+ toolName: lastToolName,
6996
+ filePath: lastFilePath,
6943
6997
  elapsedMs,
6944
6998
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
6945
6999
  isComplete: false
@@ -6984,6 +7038,22 @@ IMPORTANT: This is an automated evaluation run. Execute the requested changes im
6984
7038
  // Not complete yet
6985
7039
  );
6986
7040
  if (traceEvent) {
7041
+ lastToolName = traceEvent.toolName;
7042
+ lastFilePath = traceEvent.filePath;
7043
+ if (traceEvent.type === import_evalforge_types.LiveTraceEventType.THINKING) {
7044
+ lastAction = "Thinking...";
7045
+ } else if (traceEvent.type === import_evalforge_types.LiveTraceEventType.TOOL_USE) {
7046
+ lastAction = extractToolActionDescription(
7047
+ traceEvent.toolName,
7048
+ traceEvent.toolArgs
7049
+ );
7050
+ } else if (traceEvent.type === import_evalforge_types.LiveTraceEventType.FILE_WRITE) {
7051
+ lastAction = `Writing: ${traceEvent.filePath || "file"}`;
7052
+ } else if (traceEvent.type === import_evalforge_types.LiveTraceEventType.FILE_READ) {
7053
+ lastAction = `Reading: ${traceEvent.filePath || "file"}`;
7054
+ } else if (traceEvent.type === import_evalforge_types.LiveTraceEventType.COMPLETION) {
7055
+ lastAction = "Processing response...";
7056
+ }
6987
7057
  emitTraceEvent(
6988
7058
  traceEvent,
6989
7059
  traceContext.tracePushUrl,