npm - testchimp-runner-core - Versions diffs - 0.0.33 → 0.0.35 - Mend

testchimp-runner-core 0.0.33 → 0.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (152) hide show

package/dist/execution-service.d.ts +1 -4
package/dist/execution-service.d.ts.map +1 -1
package/dist/execution-service.js +155 -468
package/dist/execution-service.js.map +1 -1
package/dist/index.d.ts +3 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +11 -1
package/dist/index.js.map +1 -1
package/dist/llm-facade.d.ts.map +1 -1
package/dist/llm-facade.js +7 -7
package/dist/llm-facade.js.map +1 -1
package/dist/llm-provider.d.ts +9 -0
package/dist/llm-provider.d.ts.map +1 -1
package/dist/model-constants.d.ts +16 -5
package/dist/model-constants.d.ts.map +1 -1
package/dist/model-constants.js +17 -6
package/dist/model-constants.js.map +1 -1
package/dist/orchestrator/decision-parser.d.ts +18 -0
package/dist/orchestrator/decision-parser.d.ts.map +1 -0
package/dist/orchestrator/decision-parser.js +127 -0
package/dist/orchestrator/decision-parser.js.map +1 -0
package/dist/orchestrator/index.d.ts +4 -2
package/dist/orchestrator/index.d.ts.map +1 -1
package/dist/orchestrator/index.js +15 -2
package/dist/orchestrator/index.js.map +1 -1
package/dist/orchestrator/orchestrator-agent.d.ts +17 -22
package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
package/dist/orchestrator/orchestrator-agent.js +708 -577
package/dist/orchestrator/orchestrator-agent.js.map +1 -1
package/dist/orchestrator/orchestrator-prompts.d.ts +32 -0
package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -0
package/dist/orchestrator/orchestrator-prompts.js +737 -0
package/dist/orchestrator/orchestrator-prompts.js.map +1 -0
package/dist/orchestrator/page-som-handler.d.ts +106 -0
package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
package/dist/orchestrator/page-som-handler.js +1353 -0
package/dist/orchestrator/page-som-handler.js.map +1 -0
package/dist/orchestrator/som-types.d.ts +149 -0
package/dist/orchestrator/som-types.d.ts.map +1 -0
package/dist/orchestrator/som-types.js +87 -0
package/dist/orchestrator/som-types.js.map +1 -0
package/dist/orchestrator/tool-registry.d.ts +2 -0
package/dist/orchestrator/tool-registry.d.ts.map +1 -1
package/dist/orchestrator/tool-registry.js.map +1 -1
package/dist/orchestrator/tools/index.d.ts +5 -1
package/dist/orchestrator/tools/index.d.ts.map +1 -1
package/dist/orchestrator/tools/index.js +9 -2
package/dist/orchestrator/tools/index.js.map +1 -1
package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
package/dist/orchestrator/tools/verify-action-result.d.ts +17 -0
package/dist/orchestrator/tools/verify-action-result.d.ts.map +1 -0
package/dist/orchestrator/tools/verify-action-result.js +140 -0
package/dist/orchestrator/tools/verify-action-result.js.map +1 -0
package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
package/dist/orchestrator/types.d.ts +49 -1
package/dist/orchestrator/types.d.ts.map +1 -1
package/dist/orchestrator/types.js +11 -1
package/dist/orchestrator/types.js.map +1 -1
package/dist/prompts.d.ts.map +1 -1
package/dist/prompts.js +40 -34
package/dist/prompts.js.map +1 -1
package/dist/scenario-service.d.ts +5 -0
package/dist/scenario-service.d.ts.map +1 -1
package/dist/scenario-service.js +17 -0
package/dist/scenario-service.js.map +1 -1
package/dist/scenario-worker-class.d.ts +4 -0
package/dist/scenario-worker-class.d.ts.map +1 -1
package/dist/scenario-worker-class.js +21 -3
package/dist/scenario-worker-class.js.map +1 -1
package/dist/testing/agent-tester.d.ts +35 -0
package/dist/testing/agent-tester.d.ts.map +1 -0
package/dist/testing/agent-tester.js +84 -0
package/dist/testing/agent-tester.js.map +1 -0
package/dist/testing/ref-translator-tester.d.ts +44 -0
package/dist/testing/ref-translator-tester.d.ts.map +1 -0
package/dist/testing/ref-translator-tester.js +104 -0
package/dist/testing/ref-translator-tester.js.map +1 -0
package/dist/utils/coordinate-converter.d.ts +32 -0
package/dist/utils/coordinate-converter.d.ts.map +1 -0
package/dist/utils/coordinate-converter.js +130 -0
package/dist/utils/coordinate-converter.js.map +1 -0
package/dist/utils/hierarchical-selector.d.ts +47 -0
package/dist/utils/hierarchical-selector.d.ts.map +1 -0
package/dist/utils/hierarchical-selector.js +212 -0
package/dist/utils/hierarchical-selector.js.map +1 -0
package/dist/utils/page-info-retry.d.ts +14 -0
package/dist/utils/page-info-retry.d.ts.map +1 -0
package/dist/utils/page-info-retry.js +60 -0
package/dist/utils/page-info-retry.js.map +1 -0
package/dist/utils/page-info-utils.d.ts +1 -0
package/dist/utils/page-info-utils.d.ts.map +1 -1
package/dist/utils/page-info-utils.js +46 -18
package/dist/utils/page-info-utils.js.map +1 -1
package/dist/utils/ref-attacher.d.ts +21 -0
package/dist/utils/ref-attacher.d.ts.map +1 -0
package/dist/utils/ref-attacher.js +149 -0
package/dist/utils/ref-attacher.js.map +1 -0
package/dist/utils/ref-translator.d.ts +49 -0
package/dist/utils/ref-translator.d.ts.map +1 -0
package/dist/utils/ref-translator.js +276 -0
package/dist/utils/ref-translator.js.map +1 -0
package/package.json +1 -1
package/plandocs/BEFORE_AFTER_VERIFICATION.md +148 -0
package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +144 -0
package/plandocs/IMPLEMENTATION_STATUS.md +108 -0
package/plandocs/PHASE_1_COMPLETE.md +165 -0
package/plandocs/PHASE_1_SUMMARY.md +184 -0
package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +120 -0
package/plandocs/PROMPT_SANITY_CHECK.md +120 -0
package/plandocs/SESSION_SUMMARY_v0.0.33.md +151 -0
package/plandocs/TROUBLESHOOTING_SESSION.md +72 -0
package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +396 -0
package/plandocs/WHATS_NEW_v0.0.33.md +183 -0
package/plandocs/exploratory-mode-support-v2.plan.md +953 -0
package/plandocs/exploratory-mode-support.plan.md +928 -0
package/plandocs/journey-id-tracking-addendum.md +227 -0
package/src/execution-service.ts +179 -596
package/src/index.ts +10 -0
package/src/llm-facade.ts +8 -8
package/src/llm-provider.ts +11 -1
package/src/model-constants.ts +17 -5
package/src/orchestrator/decision-parser.ts +139 -0
package/src/orchestrator/index.ts +27 -2
package/src/orchestrator/orchestrator-agent.ts +868 -623
package/src/orchestrator/orchestrator-prompts.ts +786 -0
package/src/orchestrator/page-som-handler.ts +1565 -0
package/src/orchestrator/som-types.ts +188 -0
package/src/orchestrator/tool-registry.ts +2 -0
package/src/orchestrator/tools/index.ts +5 -1
package/src/orchestrator/tools/refresh-som-markers.ts +69 -0
package/src/orchestrator/tools/verify-action-result.ts +159 -0
package/src/orchestrator/tools/view-previous-screenshot.ts +103 -0
package/src/orchestrator/types.ts +95 -4
package/src/prompts.ts +40 -34
package/src/scenario-service.ts +20 -0
package/src/scenario-worker-class.ts +30 -4
package/src/utils/coordinate-converter.ts +162 -0
package/src/utils/page-info-retry.ts +65 -0
package/src/utils/page-info-utils.ts +53 -18
package/testchimp-runner-core-0.0.35.tgz +0 -0
/package/{CREDIT_CALLBACK_ARCHITECTURE.md → plandocs/CREDIT_CALLBACK_ARCHITECTURE.md} +0 -0
/package/{INTEGRATION_COMPLETE.md → plandocs/INTEGRATION_COMPLETE.md} +0 -0
/package/{VISION_DIAGNOSTICS_IMPROVEMENTS.md → plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md} +0 -0
/package/{RELEASE_0.0.26.md → releasenotes/RELEASE_0.0.26.md} +0 -0
/package/{RELEASE_0.0.27.md → releasenotes/RELEASE_0.0.27.md} +0 -0
/package/{RELEASE_0.0.28.md → releasenotes/RELEASE_0.0.28.md} +0 -0

package/src/orchestrator/orchestrator-agent.ts CHANGED Viewed

@@ -6,7 +6,8 @@
 import { expect } from '@playwright/test';
 import { LLMFacade } from '../llm-facade';
 import { ProgressReporter, TokenUsage, StepExecutionStatus } from '../progress-reporter';
-import { getEnhancedPageInfo } from '../utils/page-info-utils';
+import { getEnhancedPageInfo, PageInfo } from '../utils/page-info-utils';
+import { CoordinateConverter } from '../utils/coordinate-converter';
 import { ToolRegistry, ToolExecutionContext } from './tool-registry';
 import { DEFAULT_MODEL } from '../model-constants';
 import {
@@ -17,8 +18,16 @@ import {
   MemoryStep,
   OrchestratorStepResult,
   SelfReflection,
+  NoteToFutureSelf,
+  CoordinateAction,
+  ExplorationMode,
   DEFAULT_AGENT_CONFIG
 } from './types';
+import { OrchestratorPrompts } from './orchestrator-prompts';
+import { PageInfoRetry } from '../utils/page-info-retry';
+import { DecisionParser } from './decision-parser';
+import { PageSoMHandler } from './page-som-handler';
+import { SomCommand, CommandRunStatus, InteractionAction, isSomVerification, isSomCommand, SomVerification } from './som-types';
 /**
  * Orchestrator Agent - manages step execution with tool use and memory
@@ -30,6 +39,9 @@ export class OrchestratorAgent {
   private config: Required<AgentConfig>;
   private logger?: (message: string, level?: 'log' | 'error' | 'warn' | 'debug') => void;
   private debugMode: boolean = false;
+  private decisionParser: DecisionParser;
+  private somHandler?: PageSoMHandler;
+  private previousSomScreenshot?: string;  // Track previous iteration's screenshot
   constructor(
     llmFacade: LLMFacade,
@@ -45,6 +57,12 @@ export class OrchestratorAgent {
     this.progressReporter = progressReporter;
     this.logger = logger;
     this.debugMode = debugMode || false;
+    this.decisionParser = new DecisionParser(logger);
+    // Initialize SoM handler if enabled
+    if (this.config.useSoM) {
+      this.somHandler = new PageSoMHandler(null as any, this.logger);
+    }
   }
   setDebugMode(enabled: boolean): void {
@@ -61,15 +79,18 @@ export class OrchestratorAgent {
     totalSteps: number,
     scenarioSteps: string[],
     memory: JourneyMemory,
-    jobId: string
+    jobId: string,
+    priorSteps?: string[],    // NEW: For repair mode (undefined for script gen)
+    nextSteps?: string[]      // NEW: For repair mode (undefined for script gen)
   ): Promise<OrchestratorStepResult> {
     this.logger?.(`\n[Orchestrator] ========== STEP ${stepNumber}/${totalSteps} ==========`);
     this.logger?.(`[Orchestrator] 🎯 Goal: ${stepDescription}`);
     let iteration = 0;
-    let previousReflection: SelfReflection | undefined = undefined;
+    let noteToSelf: NoteToFutureSelf | undefined = memory.latestNote;  // Start with note from previous step
     const commandsExecuted: string[] = [];
     let consecutiveFailures = 0;  // Track consecutive iterations with failed commands
+    let coordinateAttempts = 0;  // Track coordinate mode attempts (max 2)
     while (iteration < this.config.maxIterationsPerStep) {
       iteration++;
@@ -84,15 +105,23 @@ export class OrchestratorAgent {
         totalSteps,
         scenarioSteps,
         memory,
-        previousReflection,
-        consecutiveFailures
+        consecutiveFailures,
+        noteToSelf,  // Pass note from previous iteration
+        priorSteps,  // NEW: Pass repair context
+        nextSteps    // NEW: Pass repair context
       );
       // Call agent to make decision
-      const decision = await this.callAgent(context, jobId, stepNumber, iteration, consecutiveFailures);
+      const decision = await this.callAgent(
+        context,
+        jobId,
+        stepNumber,
+        iteration,
+        consecutiveFailures
+      );
       // Log agent's reasoning
-      this.logAgentDecision(decision, iteration);
+      this.decisionParser.log(decision, iteration);
       // Report progress
       await this.reportStepProgress(jobId, stepNumber, stepDescription, decision, iteration);
@@ -100,8 +129,39 @@ export class OrchestratorAgent {
       // Execute tools if requested (tools are READ-ONLY, they don't change state)
       let toolResults: Record<string, any> = {};
+      // ANTI-LOOP: Detect and BLOCK screenshot loops (PER STEP)
+      const screenshotsThisStep = memory.history.filter(s =>
+        s.stepNumber === stepNumber &&
+        (s.code.includes('take_screenshot') || s.action.includes('Screenshot'))
+      );
+      const recentScreenshots = memory.history.slice(-3).filter(s =>
+        s.code.includes('take_screenshot') || s.action.includes('Screenshot')
+      );
+      if (screenshotsThisStep.length >= 3) {
+        this.logger?.(`[Orchestrator] 🚨 SCREENSHOT LOOP - ${screenshotsThisStep.length} screenshots THIS STEP! BLOCKING further screenshots`, 'error');
+      } else if (recentScreenshots.length >= 2 && iteration >= 3) {
+        this.logger?.(`[Orchestrator] ⚠️  WARNING: ${recentScreenshots.length} screenshots in last 3 iterations - agent may be looping`, 'warn');
+      }
       if (decision.toolCalls && decision.toolCalls.length > 0) {
-        toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber);
+        // ENFORCE: Block screenshot tool calls if too many taken IN THIS STEP
+        if (screenshotsThisStep.length >= 3) {
+          decision.toolCalls = decision.toolCalls.filter(tc => tc.name !== 'take_screenshot');
+          if (decision.toolCalls.length === 0) {
+            this.logger?.(`[Orchestrator] 🚫 REJECTED screenshot tool call - loop detected. Agent must ACT.`, 'warn');
+            toolResults = [{
+              toolName: 'take_screenshot',
+              success: false,
+              error: 'SYSTEM BLOCKED: Too many screenshots taken. You must use existing DOM snapshots and execute commands now. Analysis paralysis detected.',
+              data: null
+            }];
+          }
+        }
+        if (decision.toolCalls.length > 0) {
+          toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber, context.currentPageInfo.refMap);
+        }
         // If agent wants to wait for tool results before proceeding, call agent again
         if (decision.needsToolResults) {
@@ -125,7 +185,7 @@ export class OrchestratorAgent {
         this.logger?.(`[Orchestrator] 🚧 BLOCKER DETECTED: ${decision.blockerDetected.description}`);
         this.logger?.(`[Orchestrator] 🧹 Clearing blocker with ${decision.blockerDetected.clearingCommands.length} command(s)...`);
-        const blockerResult = await this.executeCommandsSequentially(
+        const blockerResult = await this.executeCommands(
           decision.blockerDetected.clearingCommands,
           page,
           memory,
@@ -148,9 +208,9 @@ export class OrchestratorAgent {
         }
       }
-      // Execute main commands (only if no blocker failure or continueOnFailure is true)
-      if (decision.commands && decision.commands.length > 0 && !iterationHadFailure) {
-        const executeResult = await this.executeCommandsSequentially(
+      // Execute main commands (only if no blocker failure)
+      if (!iterationHadFailure && decision.commands && decision.commands.length > 0) {
+        const executeResult = await this.executeCommands(
           decision.commands,
           page,
           memory,
@@ -171,8 +231,161 @@ export class OrchestratorAgent {
         }
       }
+      // Handle coordinate-based actions (NEW - fallback when selectors fail) - ONLY if enabled
+      if (this.config.enableCoordinateMode && decision.coordinateAction && !iterationHadFailure) {
+        coordinateAttempts++;
+        this.logger?.(`[Orchestrator] 🎯 Coordinate Action (attempt ${coordinateAttempts}/2): ${decision.coordinateAction.action} at (${decision.coordinateAction.xPercent}%, ${decision.coordinateAction.yPercent}%)`);
+        try {
+          // BEFORE screenshot for visual verification
+          const beforeScreenshot = await page.screenshot({ encoding: 'base64', fullPage: false, type: 'jpeg', quality: 60 });
+          const beforeDataUrl = `data:image/jpeg;base64,${beforeScreenshot}`;
+          // Generate Playwright commands from coordinate action
+          const coordCommands = await CoordinateConverter.generateCommands(decision.coordinateAction, page);
+          this.logger?.(`[Orchestrator] Generated commands:`);
+          coordCommands.forEach(cmd => this.logger?.(`  ${cmd}`));
+          // Execute coordinate commands
+          const coordResult = await this.executeCommands(
+            coordCommands,
+            page,
+            memory,
+            stepNumber,
+            iteration,
+            jobId
+          );
+          commandsExecuted.push(...coordResult.executed);
+          if (!coordResult.allSucceeded) {
+            this.logger?.(`[Orchestrator] ❌ Coordinate action failed (Playwright error)`);
+            consecutiveFailures++;
+            iterationHadFailure = true;
+            // Give up after 2 coordinate attempts
+            if (coordinateAttempts >= 2) {
+              this.logger?.(`[Orchestrator] 🛑 Coordinate mode exhausted (2 attempts) - marking stuck`);
+              return {
+                success: false,
+                commands: commandsExecuted,
+                iterations: iteration,
+                terminationReason: 'agent_stuck',
+                memory,
+                error: 'Coordinate fallback failed after 2 attempts - unable to proceed'
+              };
+            }
+          } else {
+            this.logger?.(`[Orchestrator] ✅ Coordinate action succeeded (no Playwright error)`);
+            // CRITICAL: Verify visually if goal was achieved (coordinates might have clicked wrong place)
+            // Wait for network idle (smarter than fixed timeout - waits as needed, max 10s)
+            try {
+              await page.waitForLoadState('networkidle', { timeout: 10000 });
+            } catch (e) {
+              // Network idle timeout - page may still be loading, but proceed with verification
+              this.logger?.(`[Orchestrator] ⚠️  Network idle timeout after 10s, proceeding with verification`, 'warn');
+            }
+            const afterScreenshot = await page.screenshot({ encoding: 'base64', fullPage: false, type: 'jpeg', quality: 60 });
+            const afterDataUrl = `data:image/jpeg;base64,${afterScreenshot}`;
+            this.logger?.(`[Orchestrator] 📸 Verifying coordinate action visually...`);
+            const verificationRequest = {
+              model: 'gpt-5-mini',
+              systemPrompt: 'You are a visual verification expert for web automation. Compare before/after screenshots to determine if an action achieved its goal.',
+              userPrompt: `Goal: ${scenarioSteps[stepNumber - 1]}\n\nA coordinate-based action was just executed. Compare the BEFORE and AFTER screenshots.\n\nDid the action achieve the goal? Respond with JSON:\n{\n  "verified": boolean,\n  "reasoning": "What changed (or didn't change) between screenshots",\n  "visibleChanges": ["List of UI changes observed"],\n  "clickedWrongPlace": boolean\n}\n\nBe strict: Only return verified=true if you clearly see the expected change matching the goal.`,
+              images: [
+                { label: 'BEFORE', dataUrl: beforeDataUrl },
+                { label: 'AFTER', dataUrl: afterDataUrl }
+              ]
+            };
+            const verificationResponse = await this.llmFacade.llmProvider.callLLM(verificationRequest);
+            const jsonMatch = verificationResponse.answer.match(/\{[\s\S]*\}/);
+            if (jsonMatch) {
+              const verificationResult = JSON.parse(jsonMatch[0]);
+              const verified = verificationResult.verified === true;
+              const reasoning = verificationResult.reasoning || 'No reasoning provided';
+              const clickedWrongPlace = verificationResult.clickedWrongPlace === true;
+              this.logger?.(`[Orchestrator] 📊 Visual verification: ${verified ? '✅ VERIFIED' : '❌ NOT VERIFIED'}`);
+              this.logger?.(`[Orchestrator] 💭 Reasoning: ${reasoning}`);
+              if (verified) {
+                // Goal achieved! Can mark complete
+                consecutiveFailures = 0;
+                // Store note for context
+                noteToSelf = {
+                  fromIteration: iteration,
+                  content: `Coordinate action verified successful: ${reasoning}`
+                };
+                memory.latestNote = noteToSelf;
+              } else {
+                // Goal NOT achieved despite no error
+                this.logger?.(`[Orchestrator] ⚠️  Coordinate click succeeded but goal NOT achieved`, 'warn');
+                consecutiveFailures++;
+                iterationHadFailure = true;
+                // Store diagnostic note for next attempt
+                const diagnostic = clickedWrongPlace
+                  ? `Clicked wrong place. ${reasoning}. Try different coordinates.`
+                  : `Action executed but goal not achieved. ${reasoning}. May need different approach.`;
+                noteToSelf = {
+                  fromIteration: iteration,
+                  content: diagnostic
+                };
+                memory.latestNote = noteToSelf;
+                // Give up after 2 coordinate attempts
+                if (coordinateAttempts >= 2) {
+                  this.logger?.(`[Orchestrator] 🛑 Coordinate mode exhausted (2 attempts, none achieved goal) - marking stuck`);
+                  return {
+                    success: false,
+                    commands: commandsExecuted,
+                    iterations: iteration,
+                    terminationReason: 'agent_stuck',
+                    memory,
+                    error: `Coordinate actions clicked but didn't achieve goal: ${reasoning}`
+                  };
+                }
+              }
+            } else {
+              this.logger?.(`[Orchestrator] ⚠️  Could not parse verification response - treating as unverified`, 'warn');
+              consecutiveFailures++;
+              iterationHadFailure = true;
+            }
+          }
+        } catch (error: any) {
+          this.logger?.(`[Orchestrator] ❌ Coordinate action error: ${error.message}`, 'error');
+          consecutiveFailures++;
+          iterationHadFailure = true;
+          // Give up after 2 coordinate attempts
+          if (coordinateAttempts >= 2) {
+            this.logger?.(`[Orchestrator] 🛑 Coordinate mode exhausted (2 attempts) - marking stuck`);
+            return {
+              success: false,
+              commands: commandsExecuted,
+              iterations: iteration,
+              terminationReason: 'agent_stuck',
+              memory,
+              error: 'Coordinate fallback failed after 2 attempts - unable to proceed'
+            };
+          }
+        }
+      }
       // System-enforced stuck detection (agent might not detect it)
-      if (consecutiveFailures >= 3) {
+      // Allow 5 failures: 3 selector attempts + 2 coordinate attempts
+      if (consecutiveFailures >= 5) {
         this.logger?.(`[Orchestrator] 🛑 SYSTEM: ${consecutiveFailures} consecutive failures detected - forcing stuck`, 'warn');
         return {
           success: false,
@@ -205,23 +418,38 @@ export class OrchestratorAgent {
         }
       }
-      // Store self-reflection for next iteration
-      previousReflection = decision.selfReflection;
+      // Store note to future self (tactical memory across iterations AND steps)
+      if (decision.noteToFutureSelf) {
+        noteToSelf = {
+          fromIteration: iteration,
+          content: decision.noteToFutureSelf
+        };
+        memory.latestNote = noteToSelf;  // Persist in journey memory across steps
+        this.logger?.(`[Orchestrator] 📝 Note to self: ${decision.noteToFutureSelf}`);
+      }
       // Check termination
       if (decision.status !== 'continue') {
         this.logger?.(`[Orchestrator] 🎯 Status: ${decision.status}`);
         this.logger?.(`[Orchestrator] 💭 Reason: ${decision.statusReasoning}`);
-        return {
-          success: decision.status === 'complete',
-          commands: commandsExecuted,
-          iterations: iteration,
-          terminationReason: decision.status === 'complete' ? 'complete' :
-                           decision.status === 'stuck' ? 'agent_stuck' :
-                           'infeasible',
-          memory
-        };
+        // SAFETY CHECK: Don't allow "complete" if commands failed this iteration
+        if (decision.status === 'complete' && iterationHadFailure) {
+          this.logger?.(`[Orchestrator] ⚠️  OVERRIDE: Agent said "complete" but commands FAILED - forcing "continue"`, 'warn');
+          this.logger?.(`[Orchestrator] 💭 System: Commands must succeed before marking complete`);
+          // Don't return - continue to next iteration
+        } else {
+          // Valid termination
+          return {
+            success: decision.status === 'complete',
+            commands: commandsExecuted,
+            iterations: iteration,
+            terminationReason: decision.status === 'complete' ? 'complete' :
+                             decision.status === 'stuck' ? 'agent_stuck' :
+                             'infeasible',
+            memory
+          };
+        }
       }
     }
@@ -248,8 +476,10 @@ export class OrchestratorAgent {
     totalSteps: number,
     scenarioSteps: string[],
     memory: JourneyMemory,
-    previousReflection?: SelfReflection,
-    consecutiveFailures?: number
+    consecutiveFailures?: number,
+    noteFromPreviousIteration?: NoteToFutureSelf,
+    priorSteps?: string[],    // NEW: For repair mode
+    nextSteps?: string[]      // NEW: For repair mode
   ): Promise<AgentContext> {
     // Get fresh DOM
     const currentPageInfo = await getEnhancedPageInfo(page);
@@ -258,8 +488,41 @@ export class OrchestratorAgent {
     // Get recent steps
     const recentSteps = memory.history.slice(-this.config.recentStepsCount);
+    // SoM integration: Update markers and capture screenshot with visual IDs
+    let somScreenshot: string | undefined = undefined;
+    let somElementMap: string | undefined = undefined;
+    if (this.config.useSoM && this.somHandler) {
+      try {
+        if (!this.somHandler) {
+          this.somHandler = new PageSoMHandler(page, this.logger);
+        } else {
+          this.somHandler.setPage(page);
+        }
+        // Wait briefly for page stability (handles first iteration + safety net for fast SPAs)
+        try {
+          await page.waitForLoadState('domcontentloaded', { timeout: 5000 });
+        } catch (error: any) {
+          // Page already loaded or timeout - continue
+        }
+        // Update SoM markers
+        await this.somHandler.updateSom();
+        // Get screenshot WITH markers (viewport only - agent can scroll or use take_screenshot for full page)
+        somScreenshot = await this.somHandler.getScreenshot(true, false, 60);
+        // Get element map for disambiguation
+        somElementMap = this.somHandler.getSomElementMap();
+        this.logger?.(`[Orchestrator] SoM screenshot captured for agent decision-making`, 'log');
+      } catch (error: any) {
+        this.logger?.(`[Orchestrator] Failed to capture SoM screenshot: ${error.message}`, 'error');
+      }
+    }
     // Build context
-    return {
+    const context = {
       overallGoal: scenarioSteps.join('\n'),
       currentStepGoal,
       stepNumber,
@@ -271,8 +534,19 @@ export class OrchestratorAgent {
       recentSteps,
       experiences: memory.experiences,
       extractedData: memory.extractedData,
-      previousIterationGuidance: previousReflection
+      noteFromPreviousIteration,  // Pass tactical note from previous iteration
+      somScreenshot,  // SoM screenshot with visual markers (current)
+      somElementMap,  // SoM element details for disambiguation
+      priorSteps,     // NEW: Repair context (undefined for script gen)
+      nextSteps       // NEW: Repair context (undefined for script gen)
     };
+    // Save current screenshot as previous for next iteration (for tool access)
+    if (somScreenshot) {
+      this.previousSomScreenshot = somScreenshot;
+    }
+    return context;
   }
   /**
@@ -285,18 +559,50 @@ export class OrchestratorAgent {
     iteration: number,
     consecutiveFailures?: number
   ): Promise<AgentDecision> {
-    // Build prompt
-    const systemPrompt = this.buildSystemPrompt();
-    const userPrompt = this.buildUserPrompt(context, consecutiveFailures);
+    // Detect if coordinate mode should be activated
+    // Phase 1: Only 2 tiers (selectors → coordinates), so activate after 3 failures
+    // Phase 2: Will have 3 tiers (selectors → index → coordinates), threshold will be 5
+    const useCoordinateMode = consecutiveFailures !== undefined && consecutiveFailures >= 3;
+    // Build appropriate system prompt based on mode
+    const toolDescriptions = this.toolRegistry.generateToolDescriptions();
+    let systemPrompt: string;
+    if (this.config.useSoM) {
+      // SoM mode: Use visual element identification
+      systemPrompt = OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates);
+    } else if (useCoordinateMode) {
+      // Coordinate mode: Fallback when selectors fail
+      systemPrompt = OrchestratorPrompts.buildCoordinateSystemPrompt();
+    } else {
+      // Standard mode: DOM-based selectors
+      systemPrompt = OrchestratorPrompts.buildSystemPrompt(toolDescriptions, this.config.enableCoordinateMode);
+    }
+    const userPrompt = OrchestratorPrompts.buildUserPrompt(context, consecutiveFailures, this.config.enableCoordinateMode);
+    // Log prompt lengths for monitoring
+    const systemLength = systemPrompt.length;
+    const userLength = userPrompt.length;
+    const totalLength = systemLength + userLength;
+    const estimatedTokens = Math.ceil(totalLength / 4); // Rough estimate: 4 chars per token
+    this.logger?.(`[Orchestrator] 📊 Prompt lengths: system=${systemLength} chars, user=${userLength} chars, total=${totalLength} chars (~${estimatedTokens} tokens)`, 'log');
     try {
       // Call LLM directly via provider
-      const llmRequest = {
+      const llmRequest: any = {
         model: DEFAULT_MODEL,
         systemPrompt,
         userPrompt
       };
+      // Include current SoM screenshot as image
+      if (context.somScreenshot) {
+        llmRequest.imageUrl = context.somScreenshot;
+        this.logger?.(`[Orchestrator] Including SoM screenshot in LLM request`, 'log');
+      }
       const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
       // Report token usage
@@ -318,7 +624,7 @@ export class OrchestratorAgent {
       }
       // Parse response
-      return this.parseAgentDecision(response.answer);
+      return this.decisionParser.parse(response.answer);
     } catch (error: any) {
       this.logger?.(`[Orchestrator] ✗ Agent call failed: ${error.message}`, 'error');
@@ -332,443 +638,6 @@ export class OrchestratorAgent {
     }
   }
-  /**
-   * Build system prompt with tool descriptions
-   */
-  private buildSystemPrompt(): string {
-    const toolDescriptions = this.toolRegistry.generateToolDescriptions();
-    return `You are an intelligent test automation agent that executes web scenarios using Playwright.
-${toolDescriptions}
-YOUR RESPONSE FORMAT (JSON):
-{
-  "toolCalls": [{"name": "tool_name", "params": {...}}],  // Optional: Tools to use
-  "toolReasoning": "Why I need these tools",              // If using tools
-  "needsToolResults": true/false,                         // Wait for tool results before commands?
-  "commands": ["await page.click(...)", ...],             // Playwright commands to execute
-  "commandReasoning": "Why these commands",               // If executing commands
-  "selfReflection": {                                     // Guidance for your next iteration
-    "guidanceForNext": "What to focus on next",
-    "detectingLoop": false,                               // Set true if repeating same approach
-    "loopReasoning": "Why I think I'm looping"           // If detecting loop
-  },
-  "experiences": ["App-specific learning"],              // APP-SPECIFIC patterns (see rules below)
-  "status": "continue|complete|stuck|infeasible",        // Your decision
-  "statusReasoning": "Why this status",
-  "reasoning": "Overall reasoning for this iteration",
-  "blockerDetected": {                                   // OPTIONAL: If something blocks the current goal
-    "description": "What's blocking",                    // E.g., "Cookie consent modal"
-    "clearingCommands": ["await page.click(...)"]       // Commands to clear blocker FIRST
-  },
-  "stepReEvaluation": {                                  // OPTIONAL: If questioning step order (last resort)
-    "detected": true,                                    // Set true if you think step order is wrong
-    "issue": "prior_incomplete|already_done|wrong_order", // What's the issue?
-    "explanation": "Why step order seems wrong"          // Your reasoning
-  }
-}
-STATUS DECISION RULES (CRITICAL!):
-- "complete": ALL commands succeeded AND step goal achieved
-  ❌ NEVER mark complete if ANY command failed!
-- "continue": Command failed OR need more actions
-  Use when: Need to retry with different selector, wait for something, or continue execution
-- "stuck": Tried 3+ iterations, all failed
-- "infeasible": Goal truly impossible (element doesn't exist anywhere)
-IF COMMAND FAILED → status must be "continue" (to retry) or "stuck" (if tried many times)
-IF COMMAND SUCCEEDED → status can be "complete" (if goal done) or "continue" (if more needed)
-STEP RE-EVALUATION (LAST RESORT - After 2+ Failures):
-When you've failed multiple times on current step, QUESTION YOUR ASSUMPTIONS:
-1. **LOOK BACK**: Did prior steps really complete?
-   - Check COMPLETED steps list vs current page state
-   - Example: Says "Login complete" but you see login form → PRIOR STEP FAILED
-   - If prior step incomplete: Execute commands to complete it, explain in reasoning
-2. **LOOK FORWARD**: Is current step already done or no longer relevant?
-   - Check if current step goal already achieved (element exists, state correct)
-   - Example: Step says "Click Submit" but form already submitted → STEP ALREADY DONE
-   - If already done: Mark "complete" and explain why
-3. **LOOK AHEAD**: Is next step more feasible?
-   - Sometimes UI flows differently than expected
-   - Example: Current step "Click Edit" fails, but next step "Fill Name" elements are visible
-   - If can do future step: Note in reasoning, but PREFER sticking to order
-STICK TO ORIGINAL PLAN unless clear evidence suggests otherwise.
-Only re-evaluate after repeated failures on same step.
-BLOCKER DETECTION (IMPORTANT!):
-Use "blockerDetected" when the CURRENT STEP GOAL is not immediately feasible because something unexpected is in the way:
-Examples of BLOCKERS:
-- Cookie consent modal appears before login
-- "What's New" tour overlay prevents clicking navigation
-- Email verification prompt blocks form submission
-- Session timeout modal requires re-login before continuing
-When you detect a blocker:
-1. Set "blockerDetected.description" to describe what's blocking
-2. Provide "clearingCommands" to dismiss/handle the blocker
-3. Set your main "commands" to execute AFTER blocker is cleared
-4. Agent will execute: clearingCommands FIRST, then your commands
-Example:
-{
-  "blockerDetected": {
-    "description": "Cookie consent modal blocking page interaction",
-    "clearingCommands": ["await page.getByRole('button', {name: 'Accept'}).click()"]
-  },
-  "commands": ["await page.getByRole('textbox', {name: 'Email'}).fill('user@example.com')"],
-  ...
-}
-DON'T use blockerDetected for:
-- Normal step execution (just use commands)
-- Wrong selectors (that's not a blocker, just fix the selector)
-- Missing elements (that's infeasible, not blocked)
-EXPERIENCES - APP-SPECIFIC PATTERNS ONLY:
-Guidelines:
-- Concise - combine related info into single learning
-- Focus on app quirks, not obvious practices
-- Max 1-2 experiences per iteration
-- Capture SEMANTIC SELECTOR patterns when discovered
-✅ GOOD - App-specific patterns:
-- "Login form fields accessible via getByLabel: 'Email' and 'Password'"
-- "Submit buttons consistently use role=button with text matching action"
-- "Form uses custom dropdowns (role=combobox) instead of native selects"
-- "Navigation items accessible via getByRole('link') with visible text"
-- "Site uses data-testid attributes consistently (pattern: action-element-name)"
-- "Input fields have clear placeholders - prefer getByPlaceholder over IDs"
-❌ BAD - Too specific or obvious:
-- Listing individual field selectors (combine them)
-- "Button exists with role=button" (obvious)
-- Generic automation advice
-- Noting auto-generated IDs like #«r3»-form-item (these are unreliable)
-COMBINE related learnings instead of listing each element separately.
-PRIORITIZE learning about semantic selector availability over CSS selectors.
-CRITICAL RULES FOR DECISION MAKING:
-1. **BE FOCUSED AND CONCISE** (CRITICAL!):
-   - Read the CURRENT STEP GOAL carefully - ONLY do what's explicitly asked for in THAT step
-   - DON'T add extra actions, verifications, or defensive checks not mentioned in the step goal
-   - If step says "Send a message" → ONLY send the message, DON'T verify it appeared
-   - If step says "Click Submit" → ONLY click, DON'T wait or verify submission success
-   - ONLY add expect() assertions if the step explicitly mentions "verify", "check", "ensure", or "confirm"
-   - Keep commands minimal - no unnecessary preparatory or validation actions
-2. **COMMAND EFFICIENCY**:
-   - DON'T click on input field before filling it - just use .fill() directly
-   - DON'T do preparatory actions unless necessary
-   - Example: page.fill works without clicking the input first
-2b. **VARIABLE SCOPE & AVAILABLE FUNCTIONS** (CRITICAL!):
-   - Commands in SAME BATCH share scope - variables persist within iteration
-   - To save data for later steps: Use extract_data tool, then access via extractedData in context
-   - Within same batch: Variables work across commands
-   - AVAILABLE IN CONTEXT: page (Playwright Page object) and expect (Playwright assertions)
-   - You can use expect() directly - it is already imported and available
-3. **WHEN COMMAND FAILS** (most important!):
-   - DON'T repeat the same command/selector
-   - DO analyze the error message
-   - DO look at DOM snapshot for SEMANTIC selectors (getByRole, getByLabel, getByText)
-   - AVOID falling back to auto-generated IDs with unicode characters (e.g., #«r3»-form-item)
-   - DO take screenshot if DOM doesn't help
-   - "Element is not a <select>" → It's a custom dropdown, use .click() then select from menu
-   - "Timeout waiting for locator" → Try semantic selectors from ARIA tree instead of CSS IDs
-4. **CHECK YOUR RECENT STEPS FOR PATTERNS**:
-   - If you see same selector failed 2+ times → YOU ARE LOOPING!
-   - Set detectingLoop: true and explain why
-   - Take screenshot to see what's actually on the page
-   - Use different SEMANTIC selector from DOM/ARIA tree (getByRole, getByLabel, getByText, data-testid)
-   - AVOID repeating with similar auto-generated ID patterns (e.g., #«r3»-form-item → #«r4»-form-item)
-5. **USE DOM SNAPSHOT** - ONLY USE WHAT'S ACTUALLY THERE (CRITICAL!):
-   - You get INTERACTIVE ELEMENTS list showing: position, tag, id, text, SUGGESTED SELECTORS
-   - ONLY use selectors from this list - DO NOT invent selectors with text/names not in the list!
-   - ❌ WRONG: List shows button "Submit" but you use getByRole('button', {name: 'SomeOtherText'})
-   - ✅ RIGHT: Use EXACT selector from the provided list
-   - ✅ RIGHT: If list shows "#submit-btn" → use that
-   - ✅ RIGHT: If list shows "getByRole('button', {name: 'X'})" → use exactly that
-   - If element you need is NOT in list → take_screenshot to find it
-   - ARIA tree shows roles/names that exist - inventing roles/names not in tree will fail!
-5b. **SELECTOR PREFERENCE** (CRITICAL - Playwright Best Practices):
-   ALWAYS prefer user-friendly, semantic selectors in this order:
-   🏆 BEST (use these first):
-   1. page.getByRole('role', {name: 'text'})     → ✅ Accessible, semantic, resilient
-   2. page.getByLabel('label text')              → ✅ Great for form inputs
-   3. page.getByPlaceholder('placeholder')       → ✅ Good for inputs without labels
-   4. page.getByText('visible text')             → ✅ Clear and readable
-   5. page.getByTestId('test-id')                → ✅ Stable if available
-   ⚠️ AVOID (last resort only):
-   - CSS selectors with auto-generated IDs: #r3-form-item, #«r3»-form-item
-   - CSS selectors with unicode: #«r4»-form-item
-   - Complex CSS paths: div.ant-form > div:nth-child(2)
-   Examples:
-   ❌ BAD:  await page.fill('#«r3»-form-item', 'alice@example.com')
-   ✅ GOOD: await page.getByLabel('Email').fill('alice@example.com')
-   ✅ GOOD: await page.getByRole('textbox', {name: 'Email'}).fill('alice@example.com')
-   ✅ GOOD: await page.getByPlaceholder('Enter your email').fill('alice@example.com')
-   ❌ BAD:  await page.click('#submit-btn-23847')
-   ✅ GOOD: await page.getByRole('button', {name: 'Submit'}).click()
-   ✅ GOOD: await page.getByText('Submit').click()
-   WHY: Semantic selectors are more maintainable, self-documenting, and resilient to UI changes.
-   Auto-generated IDs (especially with unicode like «r3») break when component instances change.
-5c. **ASSERTIONS AND VERIFICATION** (CRITICAL - Use expect):
-   Use expect() for verification ONLY when the step explicitly asks for verification.
-   🏆 WHEN TO USE expect():
-   - Step says "Verify message appears" → await expect(locator).toBeVisible()
-   - Step says "Check field is empty" → await expect(locator).toBeEmpty()
-   - Step says "Confirm URL changed" → await expect(page).toHaveURL('...')
-   - Step explicitly mentions: "verify", "check", "ensure", "confirm", "validate"
-   ❌ WHEN NOT TO USE expect():
-   - Step says "Send a message" → DON'T add expect() for message visibility
-   - Step says "Click Submit" → DON'T add expect() for button state or result
-   - Step says "Fill Email field" → DON'T add expect() to verify it's filled
-   - Step says "Navigate to Dashboard" → DON'T add expect() for URL (unless step asks)
-   🏆 expect() syntax (when needed):
-   - await expect(locator).toBeVisible()
-   - await expect(locator).toHaveText('expected text')
-   - await expect(locator).toHaveValue('expected value')
-   - await expect(locator).toBeEmpty()
-   - await expect(page).toHaveURL('expected-url')
-   ❌ BAD - Manual checks:
-   const value = await page.getByPlaceholder('Message...').inputValue();
-   if (value !== '') throw new Error(\`Expected empty but found: '\${value}'\`);
-   ✅ GOOD - Use expect (but ONLY if step asks to verify):
-   await expect(page.getByPlaceholder('Message...')).toBeEmpty();
-   WHY: expect() has built-in retries and better error messages.
-   BUT: Only use it when verification is explicitly requested in the step goal.
-6. **TOOLS ARE READ-ONLY (Information Gathering)**:
-   AVAILABLE TOOLS (for information only):
-   - take_screenshot: Get visual context when DOM isn't enough or stuck 2+ times
-     * DEFAULT: Captures FULL PAGE (elements below fold included)
-     * Set isFullPage=false ONLY if you need just viewport
-     * Use for: Planning, finding hidden elements, verifying layout
-   - recall_history: Remember earlier steps in journey
-   - inspect_page: Force fresh DOM snapshot (rarely needed, already in context)
-   - check_page_ready: Verify page loaded (use sparingly!)
-   - extract_data: Save data from page to memory for later steps
-   ALL STATE CHANGES USE PLAYWRIGHT COMMANDS:
-   - Navigation: await page.goto('url')
-   - Go back: await page.goBack()
-   - Refresh: await page.reload()
-   - Fill inputs: await page.fill() or page.getByRole().fill()
-   - Click buttons: await page.click() or page.getByRole().click()
-   - Wait: await page.waitForURL(), page.waitForLoadState()
-   Tools gather info → Commands change state
-7. **ERROR-SPECIFIC RESPONSES**:
-   - "Timeout waiting for locator" → Selector doesn't exist, try different one from DOM
-   - "Element is not a <select>" → It's a custom dropdown (button with role=combobox), use .click()
-   - "Multiple elements" → Add more specific selector from DOM (use #id)
-8. **WHEN TO RUN COMMANDS vs TOOLS**:
-   - Confident about selectors from DOM → Run commands directly
-   - Unsure or failed 2+ times → Take screenshot first
-   - First iteration of a step → Usually can run commands from DOM`;
-  }
-  /**
-   * Build user prompt with context
-   */
-  private buildUserPrompt(context: AgentContext, consecutiveFailures?: number): string {
-    const parts: string[] = [];
-    parts.push('=== YOUR CURRENT CONTEXT ===\n');
-    // System warnings for accumulated failures
-    if (consecutiveFailures && consecutiveFailures >= 2) {
-      parts.push(`⚠️⚠️⚠️ SYSTEM WARNING ⚠️⚠️⚠️`);
-      parts.push(`You've failed ${consecutiveFailures} iterations in a row!`);
-      parts.push(`Next failure will force STUCK. You MUST:`);
-      parts.push(`1. Take screenshot if you haven't already`);
-      parts.push(`2. Try completely different selector strategy`);
-      parts.push(`3. QUESTION ASSUMPTIONS: Am I at the right step?`);
-      parts.push(`   - LOOK BACK: Did prior steps really complete? Check page state vs COMPLETED list`);
-      parts.push(`   - LOOK FORWARD: Is current step already done? Check if goal already achieved`);
-      parts.push(`4. OR mark status="stuck" if truly can't proceed`);
-      parts.push(`⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️⚠️\n`);
-    }
-    // Goals - make current step very prominent
-    parts.push(`🎯 CURRENT STEP GOAL (${context.stepNumber}/${context.totalSteps}):`);
-    parts.push(`${context.currentStepGoal}`);
-    parts.push(`^^ DO ONLY WHAT THIS STEP ASKS - NO EXTRA ACTIONS OR VERIFICATIONS ^^`);
-    parts.push(`^^ If step doesn't say "verify/check/confirm" → DON'T add expect() assertions ^^`);
-    parts.push(`^^ Mark "complete" ONLY if commands succeeded ^^`);
-    parts.push(``);
-    parts.push(`OVERALL SCENARIO: ${context.overallGoal}\n`);
-    if (context.completedSteps.length > 0) {
-      parts.push(`COMPLETED: ${context.completedSteps.join(', ')}`);
-    }
-    if (context.remainingSteps.length > 0) {
-      parts.push(`REMAINING: ${context.remainingSteps.join(', ')}\n`);
-    }
-    // Current page state
-    parts.push(`\nCURRENT PAGE:`);
-    parts.push(`URL: ${context.currentURL}`);
-    parts.push(`Title: ${context.currentPageInfo.title}`);
-    parts.push(`\nINTERACTIVE ELEMENTS (with positions and selectors):`);
-    parts.push(context.currentPageInfo.formattedElements);
-    parts.push(`\nARIA TREE (hierarchical structure):`);
-    parts.push(JSON.stringify(context.currentPageInfo.ariaSnapshot, null, 2).substring(0, 5000));
-    if (JSON.stringify(context.currentPageInfo.ariaSnapshot).length > 5000) {
-      parts.push('... (truncated)');
-    }
-    parts.push('')
-    // Recent steps
-    if (context.recentSteps.length > 0) {
-      parts.push(`\nRECENT STEPS (last ${context.recentSteps.length}):`);
-      for (const step of context.recentSteps) {
-        const status = step.result === 'success' ? '✓' : '✗';
-        parts.push(`  ${status} ${step.stepNumber}.${step.iteration || ''} ${step.action}`);
-        parts.push(`     Code: ${step.code}`);
-        if (step.result === 'failure' && step.error) {
-          parts.push(`     ❌ ERROR: ${step.error}`);
-          parts.push(`     ^^ THIS SELECTOR FAILED - TRY DIFFERENT APPROACH ^^`);
-        } else {
-          parts.push(`     Result: ${step.observation}`);
-        }
-      }
-      parts.push('');
-      // Detect repeated failures
-      const recentFailures = context.recentSteps.filter(s => s.result === 'failure');
-      if (recentFailures.length >= 2) {
-        const sameSelector = recentFailures.slice(-2).every((s, i, arr) =>
-          i === 0 || s.code === arr[i-1].code
-        );
-        if (sameSelector) {
-          parts.push(`⚠️ WARNING: You've tried the same selector multiple times and it failed!`);
-          parts.push(`   Last failed selector: ${recentFailures[recentFailures.length - 1].code}`);
-          parts.push(`   YOU MUST try a completely different selector this time!\n`);
-        }
-      }
-    }
-    // Experiences
-    if (context.experiences.length > 0) {
-      parts.push(`\nLEARNINGS (${context.experiences.length}):`);
-      for (const exp of context.experiences) {
-        parts.push(`  - ${exp}`);
-      }
-      parts.push('');
-    }
-    // Extracted data
-    if (Object.keys(context.extractedData).length > 0) {
-      parts.push(`\nEXTRACTED DATA:`);
-      for (const [key, value] of Object.entries(context.extractedData)) {
-        parts.push(`  ${key}: ${value}`);
-      }
-      parts.push('');
-    }
-    // Previous reflection
-    if (context.previousIterationGuidance) {
-      parts.push(`\nYOUR PREVIOUS GUIDANCE TO YOURSELF:`);
-      parts.push(`Focus: ${context.previousIterationGuidance.guidanceForNext}`);
-      if (context.previousIterationGuidance.detectingLoop) {
-        parts.push(`⚠ YOU DETECTED A LOOP: ${context.previousIterationGuidance.loopReasoning}`);
-      }
-      parts.push('');
-    }
-    // Tool results (if any)
-    if (context.toolResults) {
-      parts.push(`\n========== TOOL RESULTS ==========`);
-      for (const [toolName, result] of Object.entries(context.toolResults)) {
-        parts.push(`\n${toolName}: ${result.success ? '✓ SUCCESS' : '✗ FAILED'}`);
-        if (result.learning) {
-          parts.push(`\n📊 ANALYSIS FROM ${toolName.toUpperCase()}:`);
-          parts.push(result.learning);
-          parts.push(`^^ USE THIS INFORMATION TO DECIDE YOUR NEXT COMMANDS ^^`);
-        }
-        if (result.error) {
-          parts.push(`  Error: ${result.error}`);
-        }
-      }
-      parts.push(`\n===================================\n`);
-    }
-    parts.push('=== WHAT DO YOU WANT TO DO? ===');
-    parts.push('Respond with JSON following the format specified in the system prompt.');
-    return parts.join('\n');
-  }
-  /**
-   * Parse agent decision from LLM response
-   */
-  private parseAgentDecision(response: string): AgentDecision {
-    try {
-      // Extract JSON from response
-      const jsonMatch = response.match(/\{[\s\S]*\}/);
-      if (!jsonMatch) {
-        throw new Error('No JSON found in response');
-      }
-      const parsed = JSON.parse(jsonMatch[0]);
-      // Validate required fields
-      if (!parsed.status || !parsed.reasoning) {
-        throw new Error('Missing required fields: status and reasoning');
-      }
-      return parsed as AgentDecision;
-    } catch (error: any) {
-      this.logger?.(`[Orchestrator] ✗ Failed to parse agent decision: ${error.message}`, 'error');
-      this.logger?.(`[Orchestrator] Raw response: ${response.substring(0, 200)}...`, 'log');
-      // Return fallback
-      return {
-        status: 'stuck',
-        statusReasoning: 'Failed to parse agent response',
-        reasoning: `Parse error: ${error.message}`
-      };
-    }
-  }
   /**
    * Execute tools
    */
@@ -776,16 +645,20 @@ CRITICAL RULES FOR DECISION MAKING:
     toolCalls: any[],
     page: any,
     memory: JourneyMemory,
-    stepNumber: number
+    stepNumber: number,
+    refMap?: Map<string, any>
   ): Promise<Record<string, any>> {
     this.logger?.(`[Orchestrator] 🔧 Executing ${toolCalls.length} tool(s)`);
     const results: Record<string, any> = {};
-    const toolContext: ToolExecutionContext = {
+    const toolContext: ToolExecutionContext & { refMap?: Map<string, any>; previousSomScreenshot?: string; somHandler?: any } = {
       page,
       memory,
       stepNumber,
-      logger: this.logger
+      logger: this.logger,
+      refMap,  // Pass refMap for interact_with_ref tool
+      previousSomScreenshot: this.previousSomScreenshot,  // For view_previous_screenshot tool
+      somHandler: this.somHandler  // For refresh_som_markers tool
     };
     for (const toolCall of toolCalls.slice(0, this.config.maxToolCallsPerIteration)) {
@@ -805,71 +678,259 @@ CRITICAL RULES FOR DECISION MAKING:
   }
   /**
-   * Execute commands sequentially with SHARED context (variables persist across commands)
+   * Parse SomCommand from command object
    */
-  private async executeCommandsSequentially(
-    commands: string[],
+  private parseSomCommand(cmd: any): SomCommand | null {
+    if (typeof cmd === 'object' && cmd.action) {
+      // Valid if: has elementRef, OR has coord, OR is navigation action
+      const isNavigationAction = ['navigate', 'goBack', 'goForward', 'reload'].includes(cmd.action);
+      const hasValidTarget = cmd.elementRef || cmd.coord || isNavigationAction;
+      if (hasValidTarget) {
+        return {
+          elementRef: cmd.elementRef,
+          coord: cmd.coord,
+          action: cmd.action,
+          value: cmd.value,
+          fromCoord: cmd.fromCoord,
+          toCoord: cmd.toCoord,
+          force: cmd.force,
+          scrollAmount: cmd.scrollAmount,
+          scrollDirection: cmd.scrollDirection,
+          button: cmd.button,
+          clickCount: cmd.clickCount,
+          modifiers: cmd.modifiers,
+          delay: cmd.delay,
+          timeout: cmd.timeout
+        };
+      }
+    }
+    return null;
+  }
+  /**
+   * Execute commands (mix of ref and playwright commands)
+   */
+  private async executeCommands(
+    commands: string[] | any[],
     page: any,
     memory: JourneyMemory,
     stepNumber: number,
     iteration: number,
     jobId: string
   ): Promise<{ executed: string[]; allSucceeded: boolean }> {
-    this.logger?.(`[Orchestrator] 📝 Executing ${commands.length} command(s) in shared context`);
+    this.logger?.(`[Orchestrator] 📝 Executing ${commands.length} command(s)`);
     const executed: string[] = [];
-    const limitedCommands = commands.slice(0, this.config.maxCommandsPerIteration);
-    // Build execution with shared context (all commands share scope - variables persist)
-    const commandsWithTracking = limitedCommands.map((cmd, i) => {
-      return `
-// Command ${i + 1}/${limitedCommands.length}
+    if (commands.length === 0) {
+      return { executed: [], allSucceeded: true };
+    }
+    // SoM mode: Execute commands through PageSoMHandler
+    if (this.config.useSoM && this.somHandler) {
+      this.logger?.(`[Orchestrator] Using SoM mode for command execution`, 'log');
+      for (let i = 0; i < commands.length; i++) {
+        const cmd = commands[i];
+        // Check if verification or action command
+        if (isSomVerification(cmd)) {
+          // Handle verification command
+          try {
+            const result = await this.somHandler.executeVerification(cmd);
+            // Always add command to executed array (even if verification failed)
+            // Scripts should contain the expect even if it fails during generation
+            if (result.playwrightCommand) {
+              executed.push(result.playwrightCommand);
+            }
+            if (result.success) {
+              this.logger?.(`[Orchestrator]   ✓ [${i + 1}/${commands.length}] Verification passed`, 'log');
+              memory.history.push({
+                stepNumber,
+                iteration,
+                action: `Verification ${i + 1}/${commands.length}: ${cmd.verificationType}`,
+                code: result.playwrightCommand,
+                result: 'success',
+                observation: `Verified: ${cmd.description || cmd.expected}`,
+                url: page.url(),
+                timestamp: Date.now()
+              });
+            } else {
+              this.logger?.(`[Orchestrator]   ✗ [${i + 1}/${commands.length}] Verification failed (non-fatal): ${result.error}`, 'warn');
+              memory.history.push({
+                stepNumber,
+                iteration,
+                action: `Verification ${i + 1}/${commands.length} - FAILED`,
+                code: result.playwrightCommand || JSON.stringify(cmd),
+                result: 'failure',
+                observation: `Failed: ${result.error}`,
+                error: result.error,
+                url: page.url(),
+                timestamp: Date.now()
+              });
+              // Continue anyway - verification failures are non-blocking for script generation
+            }
+            // Small delay between commands
+            if (i < commands.length - 1) {
+              await page.waitForTimeout(300);
+            }
+          } catch (error: any) {
+            this.logger?.(`[Orchestrator]   ✗ [${i + 1}/${commands.length}] Verification exception: ${error.message}`, 'error');
+          }
+        } else if (isSomCommand(cmd)) {
+          // Handle action command (existing logic)
+          const somCommand = cmd as SomCommand;
+          try {
+            const result = await this.somHandler.runCommand(
+              somCommand,
+              this.config.somUseSomIdBasedCommands || false
+            );
+            if (result.status === CommandRunStatus.SUCCESS && result.successAttempt) {
+              this.logger?.(`[Orchestrator]   ✓ [${i + 1}/${commands.length}] SoM action succeeded`, 'log');
+              executed.push(result.successAttempt.command!);
+              memory.history.push({
+                stepNumber,
+                iteration,
+                action: `SoM Action ${i + 1}/${commands.length}: ${somCommand.action}`,
+                code: result.successAttempt.command!,
+                result: 'success',
+                observation: 'Executed successfully',
+                url: page.url(),
+                timestamp: Date.now()
+              });
+              // Small delay for form validation/animations
+              if (i < commands.length - 1) {
+                await page.waitForTimeout(300);
+              }
+            } else {
+              this.logger?.(`[Orchestrator]   ✗ [${i + 1}/${commands.length}] SoM action failed: ${result.error}`, 'error');
+              memory.history.push({
+                stepNumber,
+                iteration,
+                action: `SoM Action ${i + 1}/${commands.length}: ${somCommand.action} - FAILED`,
+                code: JSON.stringify(somCommand),
+                result: 'failure',
+                observation: `Failed: ${result.error}`,
+                error: result.error,
+                url: page.url(),
+                timestamp: Date.now()
+              });
+              // Refresh SoM after batch (DOM may have changed)
+              if (this.somHandler && page) {
+                this.somHandler.setPage(page);
+                await this.somHandler.updateSom();
+              }
+              return { executed, allSucceeded: false };
+            }
+          } catch (error: any) {
+            this.logger?.(`[Orchestrator]   ✗ [${i + 1}/${commands.length}] SoM action exception: ${error.message}`, 'error');
+            memory.history.push({
+              stepNumber,
+              iteration,
+              action: `SoM Action ${i + 1}/${commands.length} - EXCEPTION`,
+              code: JSON.stringify(somCommand),
+              result: 'failure',
+              observation: `Exception: ${error.message}`,
+              error: error.message,
+              url: page.url(),
+              timestamp: Date.now()
+            });
+            // Refresh SoM after batch (DOM may have changed)
+            if (this.somHandler && page) {
+              this.somHandler.setPage(page);
+              await this.somHandler.updateSom();
+            }
+            return { executed, allSucceeded: false };
+          }
+        } else {
+          this.logger?.(`[Orchestrator]   ⚠ [${i + 1}/${commands.length}] Not a valid SoM command/verification, skipping`, 'warn');
+        }
+      }
+      // Always wait for page to stabilize after command batch
+      // This handles both explicit navigation AND clicks that trigger navigation/SPA routes
+      try {
+        this.logger?.(`[Orchestrator] Waiting for page to stabilize...`, 'log');
+        // Use networkidle with short timeout to catch navigation without blocking on SPAs with continuous requests
+        await page.waitForLoadState('networkidle', { timeout: 3000 });
+        this.logger?.(`[Orchestrator] Page stabilized (networkidle)`, 'log');
+      } catch (error: any) {
+        // If networkidle times out, fall back to domcontentloaded
+        try {
+          await page.waitForLoadState('domcontentloaded', { timeout: 2000 });
+          this.logger?.(`[Orchestrator] Page loaded (domcontentloaded)`, 'log');
+        } catch (error2: any) {
+          this.logger?.(`[Orchestrator] Page load wait timeout (continuing anyway)`, 'warn');
+        }
+      }
+      // Refresh SoM after batch (DOM may have changed and page is now stable)
+      if (this.somHandler && page) {
+        this.somHandler.setPage(page);
+        await this.somHandler.updateSom();
+      }
+      return { executed, allSucceeded: true };
+    }
+    // Standard mode: Execute all commands in sequence with small delay between them
+    // Delay helps with form validation, button enabling, and animations
+    const wrappedCode = (commands as string[]).map((cmd, i) => `
+// Command ${i + 1}/${commands.length}
 try {
   ${cmd}
   __results.push({ index: ${i}, success: true });
+  ${i < commands.length - 1 ? 'await page.waitForTimeout(300);' : ''} // Small delay for form validation/animations
 } catch (error) {
   __results.push({ index: ${i}, success: false, error: error.message });
-  throw error; // Stop on first failure
-}`;
-    }).join('\n');
+  throw error;
+}`).join('\n');
-    const wrappedCode = `
-const __results = [];
-${commandsWithTracking}
-return __results;
-`;
+    const fullCode = `const __results = []; ${wrappedCode} return __results;`;
     try {
-      // Execute in shared context - variables declared here persist for entire scenario
-      // Pass both page and expect to make Playwright assertions available
-      const func = new Function('page', 'expect', 'return (async () => { ' + wrappedCode + ' })()');
-      const results = await func(page, expect);
+      const func = new Function('page', 'expect', 'return (async () => { ' + fullCode + ' })()');
+      const results = await func(page, (global as any).expect);
-      // Record results for each command
-      for (let i = 0; i < limitedCommands.length; i++) {
-        const cmd = limitedCommands[i];
+      for (let i = 0; i < commands.length; i++) {
+        const cmd = commands[i];
         const result = results[i];
         if (result && result.success) {
-          this.logger?.(`[Orchestrator]   ✓ [${i + 1}/${limitedCommands.length}] Success`);
-          // Record in history
+          this.logger?.(`[Orchestrator]   ✓ [${i + 1}/${commands.length}] Success`);
           memory.history.push({
             stepNumber,
             iteration,
-            action: `Command ${i + 1}/${limitedCommands.length}`,
+            action: `Command ${i + 1}/${commands.length}`,
             code: cmd,
             result: 'success',
             observation: 'Executed successfully',
             url: page.url(),
             timestamp: Date.now()
           });
           executed.push(cmd);
         }
       }
-      // Cap history
       if (memory.history.length > this.config.maxHistorySize) {
         memory.history = memory.history.slice(-this.config.maxHistorySize);
       }
@@ -877,132 +938,25 @@ return __results;
       return { executed, allSucceeded: true };
     } catch (error: any) {
-      // One of the commands failed - find which one
       const errorMessage = error.message || String(error);
+      this.logger?.(`[Orchestrator] ❌ Command execution failed: ${errorMessage}`, 'error');
-      // Capture page state for debug logging
-      let pageStateDebug = '';
-      if (this.debugMode) {
-        try {
-          const pageInfo = await getEnhancedPageInfo(page);
-          pageStateDebug = `
-=== DEBUG: PAGE STATE AT FAILURE ===
-URL: ${page.url()}
-Title: ${pageInfo.title}
-INTERACTIVE ELEMENTS:
-${pageInfo.formattedElements}
-ARIA SNAPSHOT:
-${JSON.stringify(pageInfo.ariaSnapshot, null, 2)}
-====================================`;
-        } catch (debugError: any) {
-          pageStateDebug = `Failed to capture page state: ${debugError.message}`;
-        }
-      }
-      // Record all that succeeded, then the failure
-      for (let i = 0; i < limitedCommands.length; i++) {
-        const cmd = limitedCommands[i];
-        // This is a failed command (error happened here or earlier)
-        if (executed.length <= i) {
-          this.logger?.(`[Orchestrator]   ✗ [${i + 1}/${limitedCommands.length}] Failed: ${errorMessage}`, 'error');
-          // Log detailed debug info
-          if (this.debugMode && pageStateDebug) {
-            this.logger?.(pageStateDebug, 'debug');
-          }
-          memory.history.push({
-            stepNumber,
-            iteration,
-            action: `Command ${i + 1}/${limitedCommands.length} - FAILED`,
-            code: cmd,
-            result: 'failure',
-            observation: `Failed with error: ${errorMessage}. This selector likely doesn't exist or is incorrect.`,
-            error: errorMessage,
-            url: page.url(),
-            timestamp: Date.now()
-          });
-          if (i < limitedCommands.length - 1) {
-            this.logger?.(`[Orchestrator]   ⚠ Skipping remaining ${limitedCommands.length - i - 1} command(s)`, 'warn');
-          }
-          break;
-        }
-      }
+      memory.history.push({
+        stepNumber,
+        iteration,
+        action: `Command - FAILED`,
+        code: commands[executed.length] || '',
+        result: 'failure',
+        observation: `Failed: ${errorMessage}`,
+        error: errorMessage,
+        url: page.url(),
+        timestamp: Date.now()
+      });
       return { executed, allSucceeded: false };
     }
   }
-  /**
-   * Execute a single command
-   */
-  private async executeCommand(cmd: string, page: any): Promise<void> {
-    // Wrap in async function and execute
-    const wrapped = `(async () => { ${cmd} })()`;
-    try {
-      await eval(wrapped);
-    } catch (error: any) {
-      // If eval fails, try direct execution with page context
-      // Pass both page and expect to make Playwright assertions available
-      const func = new Function('page', 'expect', `return (async () => { ${cmd} })()`);
-      await func(page, expect);
-    }
-  }
-  /**
-   * Log agent decision
-   */
-  private logAgentDecision(decision: AgentDecision, iteration: number): void {
-    this.logger?.(`[Orchestrator] 💭 REASONING: ${decision.reasoning}`);
-    if (decision.selfReflection) {
-      this.logger?.(`[Orchestrator] 🧠 SELF-REFLECTION:`);
-      this.logger?.(`[Orchestrator]    Next: ${decision.selfReflection.guidanceForNext}`);
-      if (decision.selfReflection.detectingLoop) {
-        this.logger?.(`[Orchestrator]    🔄 LOOP DETECTED: ${decision.selfReflection.loopReasoning}`, 'warn');
-      }
-    }
-    if (decision.toolCalls && decision.toolCalls.length > 0) {
-      this.logger?.(`[Orchestrator] 🔧 TOOLS: ${decision.toolCalls.map(t => t.name).join(', ')}`);
-      if (decision.toolReasoning) {
-        this.logger?.(`[Orchestrator] 📋 Why: ${decision.toolReasoning}`);
-      }
-    }
-    if (decision.blockerDetected) {
-      this.logger?.(`[Orchestrator] 🚧 BLOCKER: ${decision.blockerDetected.description}`, 'warn');
-      this.logger?.(`[Orchestrator] 🧹 Clearing with ${decision.blockerDetected.clearingCommands.length} command(s)`);
-    }
-    if (decision.stepReEvaluation?.detected) {
-      this.logger?.(`[Orchestrator] 🔍 STEP RE-EVALUATION: ${decision.stepReEvaluation.issue}`, 'warn');
-      this.logger?.(`[Orchestrator] 📝 Explanation: ${decision.stepReEvaluation.explanation}`);
-    }
-    if (decision.commands && decision.commands.length > 0) {
-      this.logger?.(`[Orchestrator] 📝 COMMANDS (${decision.commands.length}):`);
-      decision.commands.slice(0, 3).forEach((cmd, i) => {
-        this.logger?.(`[Orchestrator]    ${i + 1}. ${cmd.substring(0, 80)}...`);
-      });
-      if (decision.commands.length > 3) {
-        this.logger?.(`[Orchestrator]    ... and ${decision.commands.length - 3} more`);
-      }
-      if (decision.commandReasoning) {
-        this.logger?.(`[Orchestrator] 💡 Why: ${decision.commandReasoning}`);
-      }
-    }
-    // Experiences will be logged when added to memory, no need to log here
-  }
   /**
    * Report step progress
    */
@@ -1032,6 +986,297 @@ ${JSON.stringify(pageInfo.ariaSnapshot, null, 2)}
       agentStatus: decision.status
     });
   }
+  /**
+   * Execute exploration mode - agent autonomously explores to achieve journey goal
+   * Fires onStepProgress callbacks for each autonomous action (transparent to caller)
+   */
+  async executeExploration(
+    page: any,
+    explorationConfig: ExplorationMode,
+    jobId: string
+  ): Promise<OrchestratorStepResult> {
+    this.logger?.(`\n[Orchestrator] ========== EXPLORATION MODE ==========`);
+    this.logger?.(`[Orchestrator] 🎯 Journey Goal: ${explorationConfig.explorationPrompt}`);
+    if (explorationConfig.testDataPrompt) {
+      this.logger?.(`[Orchestrator] 📋 Test Data: ${explorationConfig.testDataPrompt}`);
+    }
+    const memory: JourneyMemory = {
+      history: [],
+      experiences: [],
+      extractedData: {}
+    };
+    const maxSteps = explorationConfig.maxExplorationSteps || 50;
+    let stepNumber = 0;
+    const commandsExecuted: string[] = [];
+    while (stepNumber < maxSteps) {
+      stepNumber++;
+      this.logger?.(`\n[Orchestrator] === Exploration Step ${stepNumber}/${maxSteps} ===`);
+      // Build exploratory context
+      const context = await this.buildExploratoryContext(
+        page,
+        explorationConfig.explorationPrompt,
+        explorationConfig.testDataPrompt,
+        memory,
+        stepNumber,
+        maxSteps
+      );
+      // Call agent with exploratory prompt
+      const decision = await this.callExploratoryAgent(
+        context,
+        jobId,
+        stepNumber
+      );
+      this.decisionParser.log(decision, stepNumber);
+      // Report step start (fires JourneyRunner's beforeStepStart callback)
+      if (this.progressReporter?.onStepProgress) {
+        const stepInfo = {
+          jobId,
+          stepNumber,
+          stepId: `exploration-${stepNumber}-${Date.now()}`,
+          description: decision.reasoning,
+          code: '',  // Will be filled after commands execute
+          status: StepExecutionStatus.IN_PROGRESS,
+          wasRepaired: false
+        };
+        await this.progressReporter.onStepProgress(stepInfo);
+      }
+      // Execute tools if requested
+      if (decision.toolCalls && decision.toolCalls.length > 0) {
+        const toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber);
+        // If needs tool results, call agent again
+        if (decision.needsToolResults) {
+          const updatedContext = { ...context, toolResults };
+          const continuedDecision = await this.callExploratoryAgent(updatedContext, jobId, stepNumber);
+          decision.commands = continuedDecision.commands || decision.commands;
+          decision.commandReasoning = continuedDecision.commandReasoning || decision.commandReasoning;
+          decision.status = continuedDecision.status;
+        }
+      }
+      // Handle blocker clearing
+      if (decision.blockerDetected && decision.blockerDetected.clearingCommands) {
+        this.logger?.(`[Orchestrator] 🚧 Clearing blocker: ${decision.blockerDetected.description}`);
+        const blockerResult = await this.executeCommands(
+          decision.blockerDetected.clearingCommands,
+          page,
+          memory,
+          stepNumber,
+          1,
+          jobId
+        );
+        commandsExecuted.push(...blockerResult.executed);
+      }
+      // Execute exploration commands
+      let commandsSucceeded = true;
+      if (decision.commands && decision.commands.length > 0) {
+        const executeResult = await this.executeCommands(
+          decision.commands,
+          page,
+          memory,
+          stepNumber,
+          1,
+          jobId
+        );
+        commandsExecuted.push(...executeResult.executed);
+        commandsSucceeded = executeResult.allSucceeded;
+      }
+      // Report step completion (fires JourneyRunner's onStepComplete callback)
+      if (this.progressReporter?.onStepProgress) {
+        const stepInfo = {
+          jobId,
+          stepNumber,
+          stepId: `exploration-${stepNumber}-${Date.now()}`,
+          description: decision.reasoning,
+          code: decision.commands?.join('\n') || '',
+          status: commandsSucceeded ? StepExecutionStatus.SUCCESS : StepExecutionStatus.FAILURE,
+          error: commandsSucceeded ? undefined : 'Command execution failed',
+          wasRepaired: false
+        };
+        await this.progressReporter.onStepProgress(stepInfo);
+      }
+      // Add experiences (both app patterns AND exploration progress)
+      if (decision.experiences) {
+        memory.experiences.push(...decision.experiences);
+        if (memory.experiences.length > this.config.maxExperiences) {
+          memory.experiences = memory.experiences.slice(-this.config.maxExperiences);
+        }
+      }
+      // Store note for next iteration
+      if (decision.noteToFutureSelf) {
+        memory.latestNote = {
+          fromIteration: stepNumber,
+          content: decision.noteToFutureSelf
+        };
+      }
+      // Check termination
+      if (decision.status === 'complete') {
+        this.logger?.(`[Orchestrator] ✅ Journey exploration complete: ${decision.statusReasoning}`);
+        return {
+          success: true,
+          commands: commandsExecuted,
+          iterations: stepNumber,
+          terminationReason: 'complete',
+          memory
+        };
+      } else if (decision.status === 'stuck') {
+        this.logger?.(`[Orchestrator] ❌ Exploration stuck: ${decision.statusReasoning}`);
+        return {
+          success: false,
+          commands: commandsExecuted,
+          iterations: stepNumber,
+          terminationReason: 'agent_stuck',
+          memory,
+          error: decision.statusReasoning
+        };
+      }
+    }
+    // Hit max steps - not necessarily a failure
+    this.logger?.(`[Orchestrator] ⚠ Maximum exploration steps reached (budget limit)`);
+    return {
+      success: true,  // Not a failure - just budget limit
+      commands: commandsExecuted,
+      iterations: stepNumber,
+      terminationReason: 'system_limit',
+      memory
+    };
+  }
+  private async buildExploratoryContext(
+    page: any,
+    explorationPrompt: string,
+    testDataPrompt: string | undefined,
+    memory: JourneyMemory,
+    stepNumber: number,
+    maxSteps: number
+  ): Promise<AgentContext> {
+    // Wait for page to be ready and elements to appear (especially important after navigation)
+    const currentPageInfo = await PageInfoRetry.getWithRetry(page);
+    const currentURL = page.url();
+    const recentSteps = memory.history.slice(-this.config.recentStepsCount);
+    // SoM integration for exploratory mode
+    let somScreenshot: string | undefined = undefined;
+    let somElementMap: string | undefined = undefined;
+    if (this.config.useSoM && this.somHandler) {
+      try {
+        this.somHandler.setPage(page);
+        // Wait briefly for page stability (handles first iteration + safety net for fast SPAs)
+        try {
+          await page.waitForLoadState('domcontentloaded', { timeout: 2000 });
+        } catch (error: any) {
+          // Page already loaded or timeout - continue
+        }
+        // Update SoM markers
+        await this.somHandler.updateSom();
+        somScreenshot = await this.somHandler.getScreenshot(true, false, 60);  // Viewport only - agent can scroll or request full page
+        // Get element map for disambiguation
+        somElementMap = this.somHandler.getSomElementMap();
+        this.logger?.(`[Orchestrator] SoM screenshot captured for exploratory agent`, 'log');
+      } catch (error: any) {
+        this.logger?.(`[Orchestrator] Failed to capture SoM screenshot: ${error.message}`, 'error');
+      }
+    }
+    const context = {
+      overallGoal: explorationPrompt,
+      currentStepGoal: explorationPrompt,  // Same as overall for single journey
+      stepNumber,
+      totalSteps: maxSteps,
+      completedSteps: [],
+      remainingSteps: [],
+      currentPageInfo,
+      currentURL,
+      recentSteps,
+      experiences: memory.experiences,
+      extractedData: memory.extractedData,
+      noteFromPreviousIteration: memory.latestNote,
+      testDataPrompt,  // CRITICAL: Store testDataPrompt in context
+      somScreenshot,  // SoM screenshot for exploratory mode (current)
+      somElementMap   // SoM element details for disambiguation
+    };
+    // Save current screenshot as previous for next iteration (for tool access)
+    if (somScreenshot) {
+      this.previousSomScreenshot = somScreenshot;
+    }
+    return context;
+  }
+  private async callExploratoryAgent(
+    context: AgentContext,
+    jobId: string,
+    stepNumber: number
+  ): Promise<AgentDecision> {
+    const toolDescriptions = this.toolRegistry.generateToolDescriptions();
+    // Use SoM system prompt if in SoM mode, otherwise use standard exploratory prompt
+    const systemPrompt = this.config.useSoM
+      ? OrchestratorPrompts.buildSomSystemPrompt(this.config.somRestrictCoordinates)
+      : OrchestratorPrompts.buildExploratorySystemPrompt(toolDescriptions);
+    const userPrompt = OrchestratorPrompts.buildExploratoryUserPrompt(
+      context,
+      context.overallGoal,
+      context.testDataPrompt,  // Pass testDataPrompt from context
+      stepNumber,
+      context.totalSteps
+    );
+    const llmRequest: any = {
+      model: DEFAULT_MODEL,
+      systemPrompt,
+      userPrompt
+    };
+    // Include current SoM screenshot as image
+    if (context.somScreenshot) {
+      llmRequest.imageUrl = context.somScreenshot;
+      this.logger?.(`[Orchestrator] Including SoM screenshot in exploratory LLM request`, 'log');
+    }
+    const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
+    // Report token usage
+    if (response.usage && this.progressReporter?.onTokensUsed) {
+      await this.progressReporter.onTokensUsed({
+        jobId,
+        stepNumber,
+        iteration: 1,
+        inputTokens: response.usage.inputTokens,
+        outputTokens: response.usage.outputTokens,
+        includesImage: false,
+        model: DEFAULT_MODEL,
+        timestamp: Date.now()
+      });
+    }
+    // Parse response (same JSON format as regular mode)
+    const decision = this.decisionParser.parse(response.answer);
+    return decision;
+  }
 }