npm - testchimp-runner-core - Versions diffs - 0.0.34 → 0.0.35 - Mend

testchimp-runner-core 0.0.34 → 0.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

package/dist/execution-service.d.ts +1 -4
package/dist/execution-service.d.ts.map +1 -1
package/dist/execution-service.js +155 -468
package/dist/execution-service.js.map +1 -1
package/dist/index.d.ts +3 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +11 -1
package/dist/index.js.map +1 -1
package/dist/orchestrator/decision-parser.d.ts +18 -0
package/dist/orchestrator/decision-parser.d.ts.map +1 -0
package/dist/orchestrator/decision-parser.js +127 -0
package/dist/orchestrator/decision-parser.js.map +1 -0
package/dist/orchestrator/index.d.ts +4 -2
package/dist/orchestrator/index.d.ts.map +1 -1
package/dist/orchestrator/index.js +14 -2
package/dist/orchestrator/index.js.map +1 -1
package/dist/orchestrator/orchestrator-agent.d.ts +17 -14
package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
package/dist/orchestrator/orchestrator-agent.js +534 -204
package/dist/orchestrator/orchestrator-agent.js.map +1 -1
package/dist/orchestrator/orchestrator-prompts.d.ts +14 -2
package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
package/dist/orchestrator/orchestrator-prompts.js +529 -247
package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
package/dist/orchestrator/page-som-handler.d.ts +106 -0
package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
package/dist/orchestrator/page-som-handler.js +1353 -0
package/dist/orchestrator/page-som-handler.js.map +1 -0
package/dist/orchestrator/som-types.d.ts +149 -0
package/dist/orchestrator/som-types.d.ts.map +1 -0
package/dist/orchestrator/som-types.js +87 -0
package/dist/orchestrator/som-types.js.map +1 -0
package/dist/orchestrator/tool-registry.d.ts +2 -0
package/dist/orchestrator/tool-registry.d.ts.map +1 -1
package/dist/orchestrator/tool-registry.js.map +1 -1
package/dist/orchestrator/tools/index.d.ts +4 -1
package/dist/orchestrator/tools/index.d.ts.map +1 -1
package/dist/orchestrator/tools/index.js +7 -2
package/dist/orchestrator/tools/index.js.map +1 -1
package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
package/dist/orchestrator/types.d.ts +23 -1
package/dist/orchestrator/types.d.ts.map +1 -1
package/dist/orchestrator/types.js +11 -1
package/dist/orchestrator/types.js.map +1 -1
package/dist/scenario-service.d.ts +5 -0
package/dist/scenario-service.d.ts.map +1 -1
package/dist/scenario-service.js +17 -0
package/dist/scenario-service.js.map +1 -1
package/dist/scenario-worker-class.d.ts +4 -0
package/dist/scenario-worker-class.d.ts.map +1 -1
package/dist/scenario-worker-class.js +18 -3
package/dist/scenario-worker-class.js.map +1 -1
package/dist/testing/agent-tester.d.ts +35 -0
package/dist/testing/agent-tester.d.ts.map +1 -0
package/dist/testing/agent-tester.js +84 -0
package/dist/testing/agent-tester.js.map +1 -0
package/dist/testing/ref-translator-tester.d.ts +44 -0
package/dist/testing/ref-translator-tester.d.ts.map +1 -0
package/dist/testing/ref-translator-tester.js +104 -0
package/dist/testing/ref-translator-tester.js.map +1 -0
package/dist/utils/hierarchical-selector.d.ts +47 -0
package/dist/utils/hierarchical-selector.d.ts.map +1 -0
package/dist/utils/hierarchical-selector.js +212 -0
package/dist/utils/hierarchical-selector.js.map +1 -0
package/dist/utils/page-info-retry.d.ts +14 -0
package/dist/utils/page-info-retry.d.ts.map +1 -0
package/dist/utils/page-info-retry.js +60 -0
package/dist/utils/page-info-retry.js.map +1 -0
package/dist/utils/page-info-utils.d.ts +1 -0
package/dist/utils/page-info-utils.d.ts.map +1 -1
package/dist/utils/page-info-utils.js +46 -18
package/dist/utils/page-info-utils.js.map +1 -1
package/dist/utils/ref-attacher.d.ts +21 -0
package/dist/utils/ref-attacher.d.ts.map +1 -0
package/dist/utils/ref-attacher.js +149 -0
package/dist/utils/ref-attacher.js.map +1 -0
package/dist/utils/ref-translator.d.ts +49 -0
package/dist/utils/ref-translator.d.ts.map +1 -0
package/dist/utils/ref-translator.js +276 -0
package/dist/utils/ref-translator.js.map +1 -0
package/package.json +1 -1
package/plandocs/exploratory-mode-support-v2.plan.md +953 -0
package/plandocs/exploratory-mode-support.plan.md +928 -0
package/plandocs/journey-id-tracking-addendum.md +227 -0
package/src/execution-service.ts +179 -596
package/src/index.ts +10 -0
package/src/orchestrator/decision-parser.ts +139 -0
package/src/orchestrator/index.ts +25 -1
package/src/orchestrator/orchestrator-agent.ts +656 -236
package/src/orchestrator/orchestrator-prompts.ts +559 -247
package/src/orchestrator/page-som-handler.ts +1565 -0
package/src/orchestrator/som-types.ts +188 -0
package/src/orchestrator/tool-registry.ts +2 -0
package/src/orchestrator/tools/index.ts +4 -1
package/src/orchestrator/tools/refresh-som-markers.ts +69 -0
package/src/orchestrator/tools/view-previous-screenshot.ts +103 -0
package/src/orchestrator/types.ts +49 -6
package/src/scenario-service.ts +20 -0
package/src/scenario-worker-class.ts +24 -3
package/src/utils/page-info-retry.ts +65 -0
package/src/utils/page-info-utils.ts +53 -18
package/testchimp-runner-core-0.0.35.tgz +0 -0
package/src/orchestrator/orchestrator-agent.ts.backup +0 -1386
package/testchimp-runner-core-0.0.33.tgz +0 -0
/package/{RELEASE_0.0.26.md → releasenotes/RELEASE_0.0.26.md} +0 -0
/package/{RELEASE_0.0.27.md → releasenotes/RELEASE_0.0.27.md} +0 -0
/package/{RELEASE_0.0.28.md → releasenotes/RELEASE_0.0.28.md} +0 -0

package/src/orchestrator/orchestrator-agent.ts CHANGED Viewed

@@ -6,7 +6,7 @@
 import { expect } from '@playwright/test';
 import { LLMFacade } from '../llm-facade';
 import { ProgressReporter, TokenUsage, StepExecutionStatus } from '../progress-reporter';
-import { getEnhancedPageInfo } from '../utils/page-info-utils';
+import { getEnhancedPageInfo, PageInfo } from '../utils/page-info-utils';
 import { CoordinateConverter } from '../utils/coordinate-converter';
 import { ToolRegistry, ToolExecutionContext } from './tool-registry';
 import { DEFAULT_MODEL } from '../model-constants';
@@ -20,9 +20,14 @@ import {
   SelfReflection,
   NoteToFutureSelf,
   CoordinateAction,
+  ExplorationMode,
   DEFAULT_AGENT_CONFIG
 } from './types';
 import { OrchestratorPrompts } from './orchestrator-prompts';
+import { PageInfoRetry } from '../utils/page-info-retry';
+import { DecisionParser } from './decision-parser';
+import { PageSoMHandler } from './page-som-handler';
+import { SomCommand, CommandRunStatus, InteractionAction, isSomVerification, isSomCommand, SomVerification } from './som-types';
 /**
  * Orchestrator Agent - manages step execution with tool use and memory
@@ -34,6 +39,9 @@ export class OrchestratorAgent {
   private config: Required<AgentConfig>;
   private logger?: (message: string, level?: 'log' | 'error' | 'warn' | 'debug') => void;
   private debugMode: boolean = false;
+  private decisionParser: DecisionParser;
+  private somHandler?: PageSoMHandler;
+  private previousSomScreenshot?: string;  // Track previous iteration's screenshot
   constructor(
     llmFacade: LLMFacade,
@@ -49,6 +57,12 @@ export class OrchestratorAgent {
     this.progressReporter = progressReporter;
     this.logger = logger;
     this.debugMode = debugMode || false;
+    this.decisionParser = new DecisionParser(logger);
+    // Initialize SoM handler if enabled
+    if (this.config.useSoM) {
+      this.somHandler = new PageSoMHandler(null as any, this.logger);
+    }
   }
   setDebugMode(enabled: boolean): void {
@@ -65,13 +79,14 @@ export class OrchestratorAgent {
     totalSteps: number,
     scenarioSteps: string[],
     memory: JourneyMemory,
-    jobId: string
+    jobId: string,
+    priorSteps?: string[],    // NEW: For repair mode (undefined for script gen)
+    nextSteps?: string[]      // NEW: For repair mode (undefined for script gen)
   ): Promise<OrchestratorStepResult> {
     this.logger?.(`\n[Orchestrator] ========== STEP ${stepNumber}/${totalSteps} ==========`);
     this.logger?.(`[Orchestrator] 🎯 Goal: ${stepDescription}`);
     let iteration = 0;
-    let previousReflection: SelfReflection | undefined = undefined;
     let noteToSelf: NoteToFutureSelf | undefined = memory.latestNote;  // Start with note from previous step
     const commandsExecuted: string[] = [];
     let consecutiveFailures = 0;  // Track consecutive iterations with failed commands
@@ -90,9 +105,10 @@ export class OrchestratorAgent {
         totalSteps,
         scenarioSteps,
         memory,
-        previousReflection,
         consecutiveFailures,
-        noteToSelf  // NEW: Pass note from previous iteration
+        noteToSelf,  // Pass note from previous iteration
+        priorSteps,  // NEW: Pass repair context
+        nextSteps    // NEW: Pass repair context
       );
       // Call agent to make decision
@@ -105,7 +121,7 @@ export class OrchestratorAgent {
       );
       // Log agent's reasoning
-      this.logAgentDecision(decision, iteration);
+      this.decisionParser.log(decision, iteration);
       // Report progress
       await this.reportStepProgress(jobId, stepNumber, stepDescription, decision, iteration);
@@ -113,17 +129,39 @@ export class OrchestratorAgent {
       // Execute tools if requested (tools are READ-ONLY, they don't change state)
       let toolResults: Record<string, any> = {};
-      // ANTI-LOOP: Detect if agent is taking screenshots repeatedly without acting
+      // ANTI-LOOP: Detect and BLOCK screenshot loops (PER STEP)
+      const screenshotsThisStep = memory.history.filter(s =>
+        s.stepNumber === stepNumber &&
+        (s.code.includes('take_screenshot') || s.action.includes('Screenshot'))
+      );
       const recentScreenshots = memory.history.slice(-3).filter(s =>
         s.code.includes('take_screenshot') || s.action.includes('Screenshot')
       );
-      if (recentScreenshots.length >= 2 && iteration >= 3) {
+      if (screenshotsThisStep.length >= 3) {
+        this.logger?.(`[Orchestrator] 🚨 SCREENSHOT LOOP - ${screenshotsThisStep.length} screenshots THIS STEP! BLOCKING further screenshots`, 'error');
+      } else if (recentScreenshots.length >= 2 && iteration >= 3) {
         this.logger?.(`[Orchestrator] ⚠️  WARNING: ${recentScreenshots.length} screenshots in last 3 iterations - agent may be looping`, 'warn');
-        this.logger?.(`[Orchestrator] 💭 System: Stop gathering info, START ACTING with available selectors`);
       }
       if (decision.toolCalls && decision.toolCalls.length > 0) {
-        toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber);
+        // ENFORCE: Block screenshot tool calls if too many taken IN THIS STEP
+        if (screenshotsThisStep.length >= 3) {
+          decision.toolCalls = decision.toolCalls.filter(tc => tc.name !== 'take_screenshot');
+          if (decision.toolCalls.length === 0) {
+            this.logger?.(`[Orchestrator] 🚫 REJECTED screenshot tool call - loop detected. Agent must ACT.`, 'warn');
+            toolResults = [{
+              toolName: 'take_screenshot',
+              success: false,
+              error: 'SYSTEM BLOCKED: Too many screenshots taken. You must use existing DOM snapshots and execute commands now. Analysis paralysis detected.',
+              data: null
+            }];
+          }
+        }
+        if (decision.toolCalls.length > 0) {
+          toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber, context.currentPageInfo.refMap);
+        }
         // If agent wants to wait for tool results before proceeding, call agent again
         if (decision.needsToolResults) {
@@ -147,7 +185,7 @@ export class OrchestratorAgent {
         this.logger?.(`[Orchestrator] 🚧 BLOCKER DETECTED: ${decision.blockerDetected.description}`);
         this.logger?.(`[Orchestrator] 🧹 Clearing blocker with ${decision.blockerDetected.clearingCommands.length} command(s)...`);
-        const blockerResult = await this.executeCommandsSequentially(
+        const blockerResult = await this.executeCommands(
           decision.blockerDetected.clearingCommands,
           page,
           memory,
@@ -171,8 +209,8 @@ export class OrchestratorAgent {
       }
       // Execute main commands (only if no blocker failure)
-      if (decision.commands && decision.commands.length > 0 && !iterationHadFailure) {
-        const executeResult = await this.executeCommandsSequentially(
+      if (!iterationHadFailure && decision.commands && decision.commands.length > 0) {
+        const executeResult = await this.executeCommands(
           decision.commands,
           page,
           memory,
@@ -193,8 +231,8 @@ export class OrchestratorAgent {
         }
       }
-      // Handle coordinate-based actions (NEW - fallback when selectors fail)
-      if (decision.coordinateAction && !iterationHadFailure) {
+      // Handle coordinate-based actions (NEW - fallback when selectors fail) - ONLY if enabled
+      if (this.config.enableCoordinateMode && decision.coordinateAction && !iterationHadFailure) {
         coordinateAttempts++;
         this.logger?.(`[Orchestrator] 🎯 Coordinate Action (attempt ${coordinateAttempts}/2): ${decision.coordinateAction.action} at (${decision.coordinateAction.xPercent}%, ${decision.coordinateAction.yPercent}%)`);
@@ -211,7 +249,7 @@ export class OrchestratorAgent {
           coordCommands.forEach(cmd => this.logger?.(`  ${cmd}`));
           // Execute coordinate commands
-          const coordResult = await this.executeCommandsSequentially(
+          const coordResult = await this.executeCommands(
             coordCommands,
             page,
             memory,
@@ -380,10 +418,7 @@ export class OrchestratorAgent {
         }
       }
-      // Store self-reflection for next iteration
-      previousReflection = decision.selfReflection;
-      // Store note to future self (NEW - tactical memory across iterations AND steps)
+      // Store note to future self (tactical memory across iterations AND steps)
       if (decision.noteToFutureSelf) {
         noteToSelf = {
           fromIteration: iteration,
@@ -441,9 +476,10 @@ export class OrchestratorAgent {
     totalSteps: number,
     scenarioSteps: string[],
     memory: JourneyMemory,
-    previousReflection?: SelfReflection,
     consecutiveFailures?: number,
-    noteFromPreviousIteration?: NoteToFutureSelf  // NEW
+    noteFromPreviousIteration?: NoteToFutureSelf,
+    priorSteps?: string[],    // NEW: For repair mode
+    nextSteps?: string[]      // NEW: For repair mode
   ): Promise<AgentContext> {
     // Get fresh DOM
     const currentPageInfo = await getEnhancedPageInfo(page);
@@ -452,8 +488,41 @@ export class OrchestratorAgent {
     // Get recent steps
     const recentSteps = memory.history.slice(-this.config.recentStepsCount);
+    // SoM integration: Update markers and capture screenshot with visual IDs
+    let somScreenshot: string | undefined = undefined;
+    let somElementMap: string | undefined = undefined;
+    if (this.config.useSoM && this.somHandler) {
+      try {
+        if (!this.somHandler) {
+          this.somHandler = new PageSoMHandler(page, this.logger);
+        } else {
+          this.somHandler.setPage(page);
+        }
+        // Wait briefly for page stability (handles first iteration + safety net for fast SPAs)
+        try {
+          await page.waitForLoadState('domcontentloaded', { timeout: 5000 });
+        } catch (error: any) {
+          // Page already loaded or timeout - continue
+        }
+        // Update SoM markers
+        await this.somHandler.updateSom();
+        // Get screenshot WITH markers (viewport only - agent can scroll or use take_screenshot for full page)
+        somScreenshot = await this.somHandler.getScreenshot(true, false, 60);
+        // Get element map for disambiguation
+        somElementMap = this.somHandler.getSomElementMap();
+        this.logger?.(`[Orchestrator] SoM screenshot captured for agent decision-making`, 'log');
+      } catch (error: any) {
+        this.logger?.(`[Orchestrator] Failed to capture SoM screenshot: ${error.message}`, 'error');
+      }
+    }
     // Build context
-    return {
+    const context = {
       overallGoal: scenarioSteps.join('\n'),
       currentStepGoal,
       stepNumber,
@@ -465,9 +534,19 @@ export class OrchestratorAgent {
       recentSteps,
       experiences: memory.experiences,
       extractedData: memory.extractedData,
-      previousIterationGuidance: previousReflection,
-      noteFromPreviousIteration  // NEW: Pass tactical note from previous iteration
+      noteFromPreviousIteration,  // Pass tactical note from previous iteration
+      somScreenshot,  // SoM screenshot with visual markers (current)
+      somElementMap,  // SoM element details for disambiguation
+      priorSteps,     // NEW: Repair context (undefined for script gen)
+      nextSteps       // NEW: Repair context (undefined for script gen)
     };
+    // Save current screenshot as previous for next iteration (for tool access)
+    if (somScreenshot) {
+      this.previousSomScreenshot = somScreenshot;
+    }
+    return context;
   }
   /**
@@ -487,10 +566,20 @@ export class OrchestratorAgent {
     // Build appropriate system prompt based on mode
     const toolDescriptions = this.toolRegistry.generateToolDescriptions();
-    const systemPrompt = useCoordinateMode
-      ? OrchestratorPrompts.buildCoordinateSystemPrompt()
-      : OrchestratorPrompts.buildSystemPrompt(toolDescriptions);
-    const userPrompt = OrchestratorPrompts.buildUserPrompt(context, consecutiveFailures);
+    let systemPrompt: string;
+    if (this.config.useSoM) {
+      // SoM mode: Use visual element identification
+      systemPrompt = OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates);
+    } else if (useCoordinateMode) {
+      // Coordinate mode: Fallback when selectors fail
+      systemPrompt = OrchestratorPrompts.buildCoordinateSystemPrompt();
+    } else {
+      // Standard mode: DOM-based selectors
+      systemPrompt = OrchestratorPrompts.buildSystemPrompt(toolDescriptions, this.config.enableCoordinateMode);
+    }
+    const userPrompt = OrchestratorPrompts.buildUserPrompt(context, consecutiveFailures, this.config.enableCoordinateMode);
     // Log prompt lengths for monitoring
     const systemLength = systemPrompt.length;
@@ -502,12 +591,18 @@ export class OrchestratorAgent {
     try {
       // Call LLM directly via provider
-      const llmRequest = {
+      const llmRequest: any = {
         model: DEFAULT_MODEL,
         systemPrompt,
         userPrompt
       };
+      // Include current SoM screenshot as image
+      if (context.somScreenshot) {
+        llmRequest.imageUrl = context.somScreenshot;
+        this.logger?.(`[Orchestrator] Including SoM screenshot in LLM request`, 'log');
+      }
       const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
       // Report token usage
@@ -529,7 +624,7 @@ export class OrchestratorAgent {
       }
       // Parse response
-      return this.parseAgentDecision(response.answer);
+      return this.decisionParser.parse(response.answer);
     } catch (error: any) {
       this.logger?.(`[Orchestrator] ✗ Agent call failed: ${error.message}`, 'error');
@@ -543,57 +638,6 @@ export class OrchestratorAgent {
     }
   }
-  /**
-   * Parse agent decision from LLM response
-   */
-  private parseAgentDecision(response: string): AgentDecision {
-    try {
-      // Extract JSON from response
-      const jsonMatch = response.match(/\{[\s\S]*\}/);
-      if (!jsonMatch) {
-        this.logger?.(`[Orchestrator] ✗ No JSON found in LLM response`, 'error');
-        this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
-        throw new Error('No JSON found in response');
-      }
-      const parsed = JSON.parse(jsonMatch[0]);
-      // Validate required fields
-      // Accept either "reasoning" or "statusReasoning" (LLMs sometimes only provide one)
-      if (!parsed.status || (!parsed.reasoning && !parsed.statusReasoning)) {
-        this.logger?.(`[Orchestrator] ✗ Missing required fields in parsed JSON`, 'error');
-        this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
-        this.logger?.(`[Orchestrator] 📄 PARSED JSON:\n${JSON.stringify(parsed, null, 2)}`, 'error');
-        this.logger?.(`[Orchestrator] ❌ Has status: ${!!parsed.status}, Has reasoning: ${!!parsed.reasoning}, Has statusReasoning: ${!!parsed.statusReasoning}`, 'error');
-        throw new Error('Missing required fields: status and (reasoning or statusReasoning)');
-      }
-      // Normalize: if reasoning is missing but statusReasoning exists, use statusReasoning as reasoning
-      if (!parsed.reasoning && parsed.statusReasoning) {
-        parsed.reasoning = parsed.statusReasoning;
-      }
-      return parsed as AgentDecision;
-    } catch (error: any) {
-      this.logger?.(`[Orchestrator] ✗ Failed to parse agent decision: ${error.message}`, 'error');
-      // Only log full response if not already logged above
-      if (!error.message.includes('Missing required fields') && !error.message.includes('No JSON found')) {
-        this.logger?.(`[Orchestrator] 📄 FULL LLM RESPONSE:\n${response}`, 'error');
-      }
-      // Return fallback
-      return {
-        status: 'stuck',
-        statusReasoning: 'Failed to parse agent response',
-        reasoning: `Parse error: ${error.message}`
-      };
-    }
-  }
   /**
    * Execute tools
    */
@@ -601,16 +645,20 @@ export class OrchestratorAgent {
     toolCalls: any[],
     page: any,
     memory: JourneyMemory,
-    stepNumber: number
+    stepNumber: number,
+    refMap?: Map<string, any>
   ): Promise<Record<string, any>> {
     this.logger?.(`[Orchestrator] 🔧 Executing ${toolCalls.length} tool(s)`);
     const results: Record<string, any> = {};
-    const toolContext: ToolExecutionContext = {
+    const toolContext: ToolExecutionContext & { refMap?: Map<string, any>; previousSomScreenshot?: string; somHandler?: any } = {
       page,
       memory,
       stepNumber,
-      logger: this.logger
+      logger: this.logger,
+      refMap,  // Pass refMap for interact_with_ref tool
+      previousSomScreenshot: this.previousSomScreenshot,  // For view_previous_screenshot tool
+      somHandler: this.somHandler  // For refresh_som_markers tool
     };
     for (const toolCall of toolCalls.slice(0, this.config.maxToolCallsPerIteration)) {
@@ -630,71 +678,259 @@ export class OrchestratorAgent {
   }
   /**
-   * Execute commands sequentially with SHARED context (variables persist across commands)
+   * Parse SomCommand from command object
    */
-  private async executeCommandsSequentially(
-    commands: string[],
+  private parseSomCommand(cmd: any): SomCommand | null {
+    if (typeof cmd === 'object' && cmd.action) {
+      // Valid if: has elementRef, OR has coord, OR is navigation action
+      const isNavigationAction = ['navigate', 'goBack', 'goForward', 'reload'].includes(cmd.action);
+      const hasValidTarget = cmd.elementRef || cmd.coord || isNavigationAction;
+      if (hasValidTarget) {
+        return {
+          elementRef: cmd.elementRef,
+          coord: cmd.coord,
+          action: cmd.action,
+          value: cmd.value,
+          fromCoord: cmd.fromCoord,
+          toCoord: cmd.toCoord,
+          force: cmd.force,
+          scrollAmount: cmd.scrollAmount,
+          scrollDirection: cmd.scrollDirection,
+          button: cmd.button,
+          clickCount: cmd.clickCount,
+          modifiers: cmd.modifiers,
+          delay: cmd.delay,
+          timeout: cmd.timeout
+        };
+      }
+    }
+    return null;
+  }
+  /**
+   * Execute commands (mix of ref and playwright commands)
+   */
+  private async executeCommands(
+    commands: string[] | any[],
     page: any,
     memory: JourneyMemory,
     stepNumber: number,
     iteration: number,
     jobId: string
   ): Promise<{ executed: string[]; allSucceeded: boolean }> {
-    this.logger?.(`[Orchestrator] 📝 Executing ${commands.length} command(s) in shared context`);
+    this.logger?.(`[Orchestrator] 📝 Executing ${commands.length} command(s)`);
     const executed: string[] = [];
-    const limitedCommands = commands.slice(0, this.config.maxCommandsPerIteration);
-    // Build execution with shared context (all commands share scope - variables persist)
-    const commandsWithTracking = limitedCommands.map((cmd, i) => {
-      return `
-// Command ${i + 1}/${limitedCommands.length}
+    if (commands.length === 0) {
+      return { executed: [], allSucceeded: true };
+    }
+    // SoM mode: Execute commands through PageSoMHandler
+    if (this.config.useSoM && this.somHandler) {
+      this.logger?.(`[Orchestrator] Using SoM mode for command execution`, 'log');
+      for (let i = 0; i < commands.length; i++) {
+        const cmd = commands[i];
+        // Check if verification or action command
+        if (isSomVerification(cmd)) {
+          // Handle verification command
+          try {
+            const result = await this.somHandler.executeVerification(cmd);
+            // Always add command to executed array (even if verification failed)
+            // Scripts should contain the expect even if it fails during generation
+            if (result.playwrightCommand) {
+              executed.push(result.playwrightCommand);
+            }
+            if (result.success) {
+              this.logger?.(`[Orchestrator]   ✓ [${i + 1}/${commands.length}] Verification passed`, 'log');
+              memory.history.push({
+                stepNumber,
+                iteration,
+                action: `Verification ${i + 1}/${commands.length}: ${cmd.verificationType}`,
+                code: result.playwrightCommand,
+                result: 'success',
+                observation: `Verified: ${cmd.description || cmd.expected}`,
+                url: page.url(),
+                timestamp: Date.now()
+              });
+            } else {
+              this.logger?.(`[Orchestrator]   ✗ [${i + 1}/${commands.length}] Verification failed (non-fatal): ${result.error}`, 'warn');
+              memory.history.push({
+                stepNumber,
+                iteration,
+                action: `Verification ${i + 1}/${commands.length} - FAILED`,
+                code: result.playwrightCommand || JSON.stringify(cmd),
+                result: 'failure',
+                observation: `Failed: ${result.error}`,
+                error: result.error,
+                url: page.url(),
+                timestamp: Date.now()
+              });
+              // Continue anyway - verification failures are non-blocking for script generation
+            }
+            // Small delay between commands
+            if (i < commands.length - 1) {
+              await page.waitForTimeout(300);
+            }
+          } catch (error: any) {
+            this.logger?.(`[Orchestrator]   ✗ [${i + 1}/${commands.length}] Verification exception: ${error.message}`, 'error');
+          }
+        } else if (isSomCommand(cmd)) {
+          // Handle action command (existing logic)
+          const somCommand = cmd as SomCommand;
+          try {
+            const result = await this.somHandler.runCommand(
+              somCommand,
+              this.config.somUseSomIdBasedCommands || false
+            );
+            if (result.status === CommandRunStatus.SUCCESS && result.successAttempt) {
+              this.logger?.(`[Orchestrator]   ✓ [${i + 1}/${commands.length}] SoM action succeeded`, 'log');
+              executed.push(result.successAttempt.command!);
+              memory.history.push({
+                stepNumber,
+                iteration,
+                action: `SoM Action ${i + 1}/${commands.length}: ${somCommand.action}`,
+                code: result.successAttempt.command!,
+                result: 'success',
+                observation: 'Executed successfully',
+                url: page.url(),
+                timestamp: Date.now()
+              });
+              // Small delay for form validation/animations
+              if (i < commands.length - 1) {
+                await page.waitForTimeout(300);
+              }
+            } else {
+              this.logger?.(`[Orchestrator]   ✗ [${i + 1}/${commands.length}] SoM action failed: ${result.error}`, 'error');
+              memory.history.push({
+                stepNumber,
+                iteration,
+                action: `SoM Action ${i + 1}/${commands.length}: ${somCommand.action} - FAILED`,
+                code: JSON.stringify(somCommand),
+                result: 'failure',
+                observation: `Failed: ${result.error}`,
+                error: result.error,
+                url: page.url(),
+                timestamp: Date.now()
+              });
+              // Refresh SoM after batch (DOM may have changed)
+              if (this.somHandler && page) {
+                this.somHandler.setPage(page);
+                await this.somHandler.updateSom();
+              }
+              return { executed, allSucceeded: false };
+            }
+          } catch (error: any) {
+            this.logger?.(`[Orchestrator]   ✗ [${i + 1}/${commands.length}] SoM action exception: ${error.message}`, 'error');
+            memory.history.push({
+              stepNumber,
+              iteration,
+              action: `SoM Action ${i + 1}/${commands.length} - EXCEPTION`,
+              code: JSON.stringify(somCommand),
+              result: 'failure',
+              observation: `Exception: ${error.message}`,
+              error: error.message,
+              url: page.url(),
+              timestamp: Date.now()
+            });
+            // Refresh SoM after batch (DOM may have changed)
+            if (this.somHandler && page) {
+              this.somHandler.setPage(page);
+              await this.somHandler.updateSom();
+            }
+            return { executed, allSucceeded: false };
+          }
+        } else {
+          this.logger?.(`[Orchestrator]   ⚠ [${i + 1}/${commands.length}] Not a valid SoM command/verification, skipping`, 'warn');
+        }
+      }
+      // Always wait for page to stabilize after command batch
+      // This handles both explicit navigation AND clicks that trigger navigation/SPA routes
+      try {
+        this.logger?.(`[Orchestrator] Waiting for page to stabilize...`, 'log');
+        // Use networkidle with short timeout to catch navigation without blocking on SPAs with continuous requests
+        await page.waitForLoadState('networkidle', { timeout: 3000 });
+        this.logger?.(`[Orchestrator] Page stabilized (networkidle)`, 'log');
+      } catch (error: any) {
+        // If networkidle times out, fall back to domcontentloaded
+        try {
+          await page.waitForLoadState('domcontentloaded', { timeout: 2000 });
+          this.logger?.(`[Orchestrator] Page loaded (domcontentloaded)`, 'log');
+        } catch (error2: any) {
+          this.logger?.(`[Orchestrator] Page load wait timeout (continuing anyway)`, 'warn');
+        }
+      }
+      // Refresh SoM after batch (DOM may have changed and page is now stable)
+      if (this.somHandler && page) {
+        this.somHandler.setPage(page);
+        await this.somHandler.updateSom();
+      }
+      return { executed, allSucceeded: true };
+    }
+    // Standard mode: Execute all commands in sequence with small delay between them
+    // Delay helps with form validation, button enabling, and animations
+    const wrappedCode = (commands as string[]).map((cmd, i) => `
+// Command ${i + 1}/${commands.length}
 try {
   ${cmd}
   __results.push({ index: ${i}, success: true });
+  ${i < commands.length - 1 ? 'await page.waitForTimeout(300);' : ''} // Small delay for form validation/animations
 } catch (error) {
   __results.push({ index: ${i}, success: false, error: error.message });
-  throw error; // Stop on first failure
-}`;
-    }).join('\n');
+  throw error;
+}`).join('\n');
-    const wrappedCode = `
-const __results = [];
-${commandsWithTracking}
-return __results;
-`;
+    const fullCode = `const __results = []; ${wrappedCode} return __results;`;
     try {
-      // Execute in shared context - variables declared here persist for entire scenario
-      // Pass both page and expect to make Playwright assertions available
-      const func = new Function('page', 'expect', 'return (async () => { ' + wrappedCode + ' })()');
-      const results = await func(page, expect);
-      // Record results for each command
-      for (let i = 0; i < limitedCommands.length; i++) {
-        const cmd = limitedCommands[i];
+      const func = new Function('page', 'expect', 'return (async () => { ' + fullCode + ' })()');
+      const results = await func(page, (global as any).expect);
+      for (let i = 0; i < commands.length; i++) {
+        const cmd = commands[i];
         const result = results[i];
         if (result && result.success) {
-          this.logger?.(`[Orchestrator]   ✓ [${i + 1}/${limitedCommands.length}] Success`);
-          // Record in history
+          this.logger?.(`[Orchestrator]   ✓ [${i + 1}/${commands.length}] Success`);
           memory.history.push({
             stepNumber,
             iteration,
-            action: `Command ${i + 1}/${limitedCommands.length}`,
+            action: `Command ${i + 1}/${commands.length}`,
             code: cmd,
             result: 'success',
             observation: 'Executed successfully',
             url: page.url(),
             timestamp: Date.now()
           });
           executed.push(cmd);
         }
       }
-      // Cap history
       if (memory.history.length > this.config.maxHistorySize) {
         memory.history = memory.history.slice(-this.config.maxHistorySize);
       }
@@ -702,132 +938,25 @@ return __results;
       return { executed, allSucceeded: true };
     } catch (error: any) {
-      // One of the commands failed - find which one
       const errorMessage = error.message || String(error);
+      this.logger?.(`[Orchestrator] ❌ Command execution failed: ${errorMessage}`, 'error');
-      // Capture page state for debug logging
-      let pageStateDebug = '';
-      if (this.debugMode) {
-        try {
-          const pageInfo = await getEnhancedPageInfo(page);
-          pageStateDebug = `
-=== DEBUG: PAGE STATE AT FAILURE ===
-URL: ${page.url()}
-Title: ${pageInfo.title}
-INTERACTIVE ELEMENTS:
-${pageInfo.formattedElements}
-ARIA SNAPSHOT:
-${JSON.stringify(pageInfo.ariaSnapshot, null, 2)}
-====================================`;
-        } catch (debugError: any) {
-          pageStateDebug = `Failed to capture page state: ${debugError.message}`;
-        }
-      }
-      // Record all that succeeded, then the failure
-      for (let i = 0; i < limitedCommands.length; i++) {
-        const cmd = limitedCommands[i];
-        // This is a failed command (error happened here or earlier)
-        if (executed.length <= i) {
-          this.logger?.(`[Orchestrator]   ✗ [${i + 1}/${limitedCommands.length}] Failed: ${errorMessage}`, 'error');
-          // Log detailed debug info
-          if (this.debugMode && pageStateDebug) {
-            this.logger?.(pageStateDebug, 'debug');
-          }
-          memory.history.push({
-            stepNumber,
-            iteration,
-            action: `Command ${i + 1}/${limitedCommands.length} - FAILED`,
-            code: cmd,
-            result: 'failure',
-            observation: `Failed with error: ${errorMessage}. This selector likely doesn't exist or is incorrect.`,
-            error: errorMessage,
-            url: page.url(),
-            timestamp: Date.now()
-          });
-          if (i < limitedCommands.length - 1) {
-            this.logger?.(`[Orchestrator]   ⚠ Skipping remaining ${limitedCommands.length - i - 1} command(s)`, 'warn');
-          }
-          break;
-        }
-      }
+      memory.history.push({
+        stepNumber,
+        iteration,
+        action: `Command - FAILED`,
+        code: commands[executed.length] || '',
+        result: 'failure',
+        observation: `Failed: ${errorMessage}`,
+        error: errorMessage,
+        url: page.url(),
+        timestamp: Date.now()
+      });
       return { executed, allSucceeded: false };
     }
   }
-  /**
-   * Execute a single command
-   */
-  private async executeCommand(cmd: string, page: any): Promise<void> {
-    // Wrap in async function and execute
-    const wrapped = `(async () => { ${cmd} })()`;
-    try {
-      await eval(wrapped);
-    } catch (error: any) {
-      // If eval fails, try direct execution with page context
-      // Pass both page and expect to make Playwright assertions available
-      const func = new Function('page', 'expect', `return (async () => { ${cmd} })()`);
-      await func(page, expect);
-    }
-  }
-  /**
-   * Log agent decision
-   */
-  private logAgentDecision(decision: AgentDecision, iteration: number): void {
-    this.logger?.(`[Orchestrator] 💭 REASONING: ${decision.reasoning}`);
-    if (decision.selfReflection) {
-      this.logger?.(`[Orchestrator] 🧠 SELF-REFLECTION:`);
-      this.logger?.(`[Orchestrator]    Next: ${decision.selfReflection.guidanceForNext}`);
-      if (decision.selfReflection.detectingLoop) {
-        this.logger?.(`[Orchestrator]    🔄 LOOP DETECTED: ${decision.selfReflection.loopReasoning}`, 'warn');
-      }
-    }
-    if (decision.toolCalls && decision.toolCalls.length > 0) {
-      this.logger?.(`[Orchestrator] 🔧 TOOLS: ${decision.toolCalls.map(t => t.name).join(', ')}`);
-      if (decision.toolReasoning) {
-        this.logger?.(`[Orchestrator] 📋 Why: ${decision.toolReasoning}`);
-      }
-    }
-    if (decision.blockerDetected) {
-      this.logger?.(`[Orchestrator] 🚧 BLOCKER: ${decision.blockerDetected.description}`, 'warn');
-      this.logger?.(`[Orchestrator] 🧹 Clearing with ${decision.blockerDetected.clearingCommands.length} command(s)`);
-    }
-    if (decision.stepReEvaluation?.detected) {
-      this.logger?.(`[Orchestrator] 🔍 STEP RE-EVALUATION: ${decision.stepReEvaluation.issue}`, 'warn');
-      this.logger?.(`[Orchestrator] 📝 Explanation: ${decision.stepReEvaluation.explanation}`);
-    }
-    if (decision.commands && decision.commands.length > 0) {
-      this.logger?.(`[Orchestrator] 📝 COMMANDS (${decision.commands.length}):`);
-      decision.commands.slice(0, 3).forEach((cmd, i) => {
-        this.logger?.(`[Orchestrator]    ${i + 1}. ${cmd.substring(0, 80)}...`);
-      });
-      if (decision.commands.length > 3) {
-        this.logger?.(`[Orchestrator]    ... and ${decision.commands.length - 3} more`);
-      }
-      if (decision.commandReasoning) {
-        this.logger?.(`[Orchestrator] 💡 Why: ${decision.commandReasoning}`);
-      }
-    }
-    // Experiences will be logged when added to memory, no need to log here
-  }
   /**
    * Report step progress
    */
@@ -857,6 +986,297 @@ ${JSON.stringify(pageInfo.ariaSnapshot, null, 2)}
       agentStatus: decision.status
     });
   }
+  /**
+   * Execute exploration mode - agent autonomously explores to achieve journey goal
+   * Fires onStepProgress callbacks for each autonomous action (transparent to caller)
+   */
+  async executeExploration(
+    page: any,
+    explorationConfig: ExplorationMode,
+    jobId: string
+  ): Promise<OrchestratorStepResult> {
+    this.logger?.(`\n[Orchestrator] ========== EXPLORATION MODE ==========`);
+    this.logger?.(`[Orchestrator] 🎯 Journey Goal: ${explorationConfig.explorationPrompt}`);
+    if (explorationConfig.testDataPrompt) {
+      this.logger?.(`[Orchestrator] 📋 Test Data: ${explorationConfig.testDataPrompt}`);
+    }
+    const memory: JourneyMemory = {
+      history: [],
+      experiences: [],
+      extractedData: {}
+    };
+    const maxSteps = explorationConfig.maxExplorationSteps || 50;
+    let stepNumber = 0;
+    const commandsExecuted: string[] = [];
+    while (stepNumber < maxSteps) {
+      stepNumber++;
+      this.logger?.(`\n[Orchestrator] === Exploration Step ${stepNumber}/${maxSteps} ===`);
+      // Build exploratory context
+      const context = await this.buildExploratoryContext(
+        page,
+        explorationConfig.explorationPrompt,
+        explorationConfig.testDataPrompt,
+        memory,
+        stepNumber,
+        maxSteps
+      );
+      // Call agent with exploratory prompt
+      const decision = await this.callExploratoryAgent(
+        context,
+        jobId,
+        stepNumber
+      );
+      this.decisionParser.log(decision, stepNumber);
+      // Report step start (fires JourneyRunner's beforeStepStart callback)
+      if (this.progressReporter?.onStepProgress) {
+        const stepInfo = {
+          jobId,
+          stepNumber,
+          stepId: `exploration-${stepNumber}-${Date.now()}`,
+          description: decision.reasoning,
+          code: '',  // Will be filled after commands execute
+          status: StepExecutionStatus.IN_PROGRESS,
+          wasRepaired: false
+        };
+        await this.progressReporter.onStepProgress(stepInfo);
+      }
+      // Execute tools if requested
+      if (decision.toolCalls && decision.toolCalls.length > 0) {
+        const toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber);
+        // If needs tool results, call agent again
+        if (decision.needsToolResults) {
+          const updatedContext = { ...context, toolResults };
+          const continuedDecision = await this.callExploratoryAgent(updatedContext, jobId, stepNumber);
+          decision.commands = continuedDecision.commands || decision.commands;
+          decision.commandReasoning = continuedDecision.commandReasoning || decision.commandReasoning;
+          decision.status = continuedDecision.status;
+        }
+      }
+      // Handle blocker clearing
+      if (decision.blockerDetected && decision.blockerDetected.clearingCommands) {
+        this.logger?.(`[Orchestrator] 🚧 Clearing blocker: ${decision.blockerDetected.description}`);
+        const blockerResult = await this.executeCommands(
+          decision.blockerDetected.clearingCommands,
+          page,
+          memory,
+          stepNumber,
+          1,
+          jobId
+        );
+        commandsExecuted.push(...blockerResult.executed);
+      }
+      // Execute exploration commands
+      let commandsSucceeded = true;
+      if (decision.commands && decision.commands.length > 0) {
+        const executeResult = await this.executeCommands(
+          decision.commands,
+          page,
+          memory,
+          stepNumber,
+          1,
+          jobId
+        );
+        commandsExecuted.push(...executeResult.executed);
+        commandsSucceeded = executeResult.allSucceeded;
+      }
+      // Report step completion (fires JourneyRunner's onStepComplete callback)
+      if (this.progressReporter?.onStepProgress) {
+        const stepInfo = {
+          jobId,
+          stepNumber,
+          stepId: `exploration-${stepNumber}-${Date.now()}`,
+          description: decision.reasoning,
+          code: decision.commands?.join('\n') || '',
+          status: commandsSucceeded ? StepExecutionStatus.SUCCESS : StepExecutionStatus.FAILURE,
+          error: commandsSucceeded ? undefined : 'Command execution failed',
+          wasRepaired: false
+        };
+        await this.progressReporter.onStepProgress(stepInfo);
+      }
+      // Add experiences (both app patterns AND exploration progress)
+      if (decision.experiences) {
+        memory.experiences.push(...decision.experiences);
+        if (memory.experiences.length > this.config.maxExperiences) {
+          memory.experiences = memory.experiences.slice(-this.config.maxExperiences);
+        }
+      }
+      // Store note for next iteration
+      if (decision.noteToFutureSelf) {
+        memory.latestNote = {
+          fromIteration: stepNumber,
+          content: decision.noteToFutureSelf
+        };
+      }
+      // Check termination
+      if (decision.status === 'complete') {
+        this.logger?.(`[Orchestrator] ✅ Journey exploration complete: ${decision.statusReasoning}`);
+        return {
+          success: true,
+          commands: commandsExecuted,
+          iterations: stepNumber,
+          terminationReason: 'complete',
+          memory
+        };
+      } else if (decision.status === 'stuck') {
+        this.logger?.(`[Orchestrator] ❌ Exploration stuck: ${decision.statusReasoning}`);
+        return {
+          success: false,
+          commands: commandsExecuted,
+          iterations: stepNumber,
+          terminationReason: 'agent_stuck',
+          memory,
+          error: decision.statusReasoning
+        };
+      }
+    }
+    // Hit max steps - not necessarily a failure
+    this.logger?.(`[Orchestrator] ⚠ Maximum exploration steps reached (budget limit)`);
+    return {
+      success: true,  // Not a failure - just budget limit
+      commands: commandsExecuted,
+      iterations: stepNumber,
+      terminationReason: 'system_limit',
+      memory
+    };
+  }
+  private async buildExploratoryContext(
+    page: any,
+    explorationPrompt: string,
+    testDataPrompt: string | undefined,
+    memory: JourneyMemory,
+    stepNumber: number,
+    maxSteps: number
+  ): Promise<AgentContext> {
+    // Wait for page to be ready and elements to appear (especially important after navigation)
+    const currentPageInfo = await PageInfoRetry.getWithRetry(page);
+    const currentURL = page.url();
+    const recentSteps = memory.history.slice(-this.config.recentStepsCount);
+    // SoM integration for exploratory mode
+    let somScreenshot: string | undefined = undefined;
+    let somElementMap: string | undefined = undefined;
+    if (this.config.useSoM && this.somHandler) {
+      try {
+        this.somHandler.setPage(page);
+        // Wait briefly for page stability (handles first iteration + safety net for fast SPAs)
+        try {
+          await page.waitForLoadState('domcontentloaded', { timeout: 2000 });
+        } catch (error: any) {
+          // Page already loaded or timeout - continue
+        }
+        // Update SoM markers
+        await this.somHandler.updateSom();
+        somScreenshot = await this.somHandler.getScreenshot(true, false, 60);  // Viewport only - agent can scroll or request full page
+        // Get element map for disambiguation
+        somElementMap = this.somHandler.getSomElementMap();
+        this.logger?.(`[Orchestrator] SoM screenshot captured for exploratory agent`, 'log');
+      } catch (error: any) {
+        this.logger?.(`[Orchestrator] Failed to capture SoM screenshot: ${error.message}`, 'error');
+      }
+    }
+    const context = {
+      overallGoal: explorationPrompt,
+      currentStepGoal: explorationPrompt,  // Same as overall for single journey
+      stepNumber,
+      totalSteps: maxSteps,
+      completedSteps: [],
+      remainingSteps: [],
+      currentPageInfo,
+      currentURL,
+      recentSteps,
+      experiences: memory.experiences,
+      extractedData: memory.extractedData,
+      noteFromPreviousIteration: memory.latestNote,
+      testDataPrompt,  // CRITICAL: Store testDataPrompt in context
+      somScreenshot,  // SoM screenshot for exploratory mode (current)
+      somElementMap   // SoM element details for disambiguation
+    };
+    // Save current screenshot as previous for next iteration (for tool access)
+    if (somScreenshot) {
+      this.previousSomScreenshot = somScreenshot;
+    }
+    return context;
+  }
+  private async callExploratoryAgent(
+    context: AgentContext,
+    jobId: string,
+    stepNumber: number
+  ): Promise<AgentDecision> {
+    const toolDescriptions = this.toolRegistry.generateToolDescriptions();
+    // Use SoM system prompt if in SoM mode, otherwise use standard exploratory prompt
+    const systemPrompt = this.config.useSoM
+      ? OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates)
+      : OrchestratorPrompts.buildExploratorySystemPrompt(toolDescriptions);
+    const userPrompt = OrchestratorPrompts.buildExploratoryUserPrompt(
+      context,
+      context.overallGoal,
+      context.testDataPrompt,  // Pass testDataPrompt from context
+      stepNumber,
+      context.totalSteps
+    );
+    const llmRequest: any = {
+      model: DEFAULT_MODEL,
+      systemPrompt,
+      userPrompt
+    };
+    // Include current SoM screenshot as image
+    if (context.somScreenshot) {
+      llmRequest.imageUrl = context.somScreenshot;
+      this.logger?.(`[Orchestrator] Including SoM screenshot in exploratory LLM request`, 'log');
+    }
+    const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
+    // Report token usage
+    if (response.usage && this.progressReporter?.onTokensUsed) {
+      await this.progressReporter.onTokensUsed({
+        jobId,
+        stepNumber,
+        iteration: 1,
+        inputTokens: response.usage.inputTokens,
+        outputTokens: response.usage.outputTokens,
+        includesImage: false,
+        model: DEFAULT_MODEL,
+        timestamp: Date.now()
+      });
+    }
+    // Parse response (same JSON format as regular mode)
+    const decision = this.decisionParser.parse(response.answer);
+    return decision;
+  }
 }