@wix/evalforge-evaluator 0.29.0 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -6825,6 +6825,9 @@ async function executeWithClaudeCode(skill, scenario, options) {
6825
6825
  console.log("[SDK-DEBUG] ============================================");
6826
6826
  let traceStepNumber = 0;
6827
6827
  const traceContext = options.traceContext;
6828
+ let lastAction = "Starting...";
6829
+ let lastToolName;
6830
+ let lastFilePath;
6828
6831
  const maxTurns = options.maxTurns ?? 10;
6829
6832
  console.error(
6830
6833
  "[DEBUG-H5] Claude SDK query START",
@@ -6931,6 +6934,13 @@ async function executeWithClaudeCode(skill, scenario, options) {
6931
6934
  if (traceContext) {
6932
6935
  heartbeatHandle = setInterval(() => {
6933
6936
  const elapsedMs = Date.now() - executionStartTime;
6937
+ let progressMessage = lastAction;
6938
+ if (lastToolName && lastFilePath) {
6939
+ progressMessage = `${lastToolName}: ${lastFilePath}`;
6940
+ } else if (lastToolName) {
6941
+ progressMessage = `Using ${lastToolName}...`;
6942
+ }
6943
+ progressMessage += ` (${Math.round(elapsedMs / 1e3)}s)`;
6934
6944
  const progressEvent = {
6935
6945
  evalRunId: traceContext.evalRunId,
6936
6946
  scenarioId: traceContext.scenarioId,
@@ -6939,7 +6949,9 @@ async function executeWithClaudeCode(skill, scenario, options) {
6939
6949
  targetName: traceContext.targetName,
6940
6950
  stepNumber: traceStepNumber,
6941
6951
  type: import_evalforge_types.LiveTraceEventType.PROGRESS,
6942
- outputPreview: `Executing... (${Math.round(elapsedMs / 1e3)}s elapsed, ${messageCount} messages)`,
6952
+ outputPreview: progressMessage,
6953
+ toolName: lastToolName,
6954
+ filePath: lastFilePath,
6943
6955
  elapsedMs,
6944
6956
  timestamp: (/* @__PURE__ */ new Date()).toISOString(),
6945
6957
  isComplete: false
@@ -6984,6 +6996,19 @@ IMPORTANT: This is an automated evaluation run. Execute the requested changes im
6984
6996
  // Not complete yet
6985
6997
  );
6986
6998
  if (traceEvent) {
6999
+ lastToolName = traceEvent.toolName;
7000
+ lastFilePath = traceEvent.filePath;
7001
+ if (traceEvent.type === import_evalforge_types.LiveTraceEventType.THINKING) {
7002
+ lastAction = "Thinking...";
7003
+ } else if (traceEvent.type === import_evalforge_types.LiveTraceEventType.TOOL_USE) {
7004
+ lastAction = `Using ${traceEvent.toolName || "tool"}...`;
7005
+ } else if (traceEvent.type === import_evalforge_types.LiveTraceEventType.FILE_WRITE) {
7006
+ lastAction = `Writing: ${traceEvent.filePath || "file"}`;
7007
+ } else if (traceEvent.type === import_evalforge_types.LiveTraceEventType.FILE_READ) {
7008
+ lastAction = `Reading: ${traceEvent.filePath || "file"}`;
7009
+ } else if (traceEvent.type === import_evalforge_types.LiveTraceEventType.COMPLETION) {
7010
+ lastAction = "Processing response...";
7011
+ }
6987
7012
  emitTraceEvent(
6988
7013
  traceEvent,
6989
7014
  traceContext.tracePushUrl,