npm - testchimp-runner-core - Versions diffs - 0.0.39 → 0.0.41 - Mend

testchimp-runner-core 0.0.39 → 0.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

package/dist/execution-service.d.ts.map +1 -1
package/dist/execution-service.js +1 -3
package/dist/execution-service.js.map +1 -1
package/dist/index.d.ts +7 -6
package/dist/index.d.ts.map +1 -1
package/dist/index.js +4 -4
package/dist/index.js.map +1 -1
package/dist/orchestrator/decision-parser.d.ts.map +1 -1
package/dist/orchestrator/decision-parser.js +16 -0
package/dist/orchestrator/decision-parser.js.map +1 -1
package/dist/orchestrator/index.d.ts +3 -1
package/dist/orchestrator/index.d.ts.map +1 -1
package/dist/orchestrator/index.js +8 -1
package/dist/orchestrator/index.js.map +1 -1
package/dist/orchestrator/orchestrator-agent.d.ts +10 -4
package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
package/dist/orchestrator/orchestrator-agent.js +347 -93
package/dist/orchestrator/orchestrator-agent.js.map +1 -1
package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
package/dist/orchestrator/orchestrator-prompts.js +364 -415
package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
package/dist/orchestrator/page-loading-utils.d.ts +15 -0
package/dist/orchestrator/page-loading-utils.d.ts.map +1 -0
package/dist/orchestrator/page-loading-utils.js +115 -0
package/dist/orchestrator/page-loading-utils.js.map +1 -0
package/dist/orchestrator/page-som-handler.d.ts +2 -1
package/dist/orchestrator/page-som-handler.d.ts.map +1 -1
package/dist/orchestrator/page-som-handler.js +250 -33
package/dist/orchestrator/page-som-handler.js.map +1 -1
package/dist/orchestrator/site-learnings-utils.d.ts +31 -0
package/dist/orchestrator/site-learnings-utils.d.ts.map +1 -0
package/dist/orchestrator/site-learnings-utils.js +175 -0
package/dist/orchestrator/site-learnings-utils.js.map +1 -0
package/dist/orchestrator/som-types.d.ts +2 -0
package/dist/orchestrator/som-types.d.ts.map +1 -1
package/dist/orchestrator/som-types.js.map +1 -1
package/dist/orchestrator/tools/take-screenshot.d.ts.map +1 -1
package/dist/orchestrator/tools/take-screenshot.js +10 -1
package/dist/orchestrator/tools/take-screenshot.js.map +1 -1
package/dist/orchestrator/types.d.ts +54 -9
package/dist/orchestrator/types.d.ts.map +1 -1
package/dist/orchestrator/types.js.map +1 -1
package/dist/progress-reporter.d.ts +23 -2
package/dist/progress-reporter.d.ts.map +1 -1
package/dist/progress-reporter.js.map +1 -1
package/dist/scenario-service.d.ts +3 -3
package/dist/scenario-service.d.ts.map +1 -1
package/dist/scenario-service.js +6 -5
package/dist/scenario-service.js.map +1 -1
package/dist/scenario-worker-class.d.ts +7 -3
package/dist/scenario-worker-class.d.ts.map +1 -1
package/dist/scenario-worker-class.js +62 -9
package/dist/scenario-worker-class.js.map +1 -1
package/dist/types.d.ts +4 -0
package/dist/types.d.ts.map +1 -1
package/dist/types.js.map +1 -1
package/package.json +1 -1
package/dist/testing/agent-tester.d.ts +0 -35
package/dist/testing/agent-tester.d.ts.map +0 -1
package/dist/testing/agent-tester.js +0 -84
package/dist/testing/agent-tester.js.map +0 -1
package/dist/testing/ref-translator-tester.d.ts +0 -44
package/dist/testing/ref-translator-tester.d.ts.map +0 -1
package/dist/testing/ref-translator-tester.js +0 -104
package/dist/testing/ref-translator-tester.js.map +0 -1
package/dist/utils/hierarchical-selector.d.ts +0 -47
package/dist/utils/hierarchical-selector.d.ts.map +0 -1
package/dist/utils/hierarchical-selector.js +0 -212
package/dist/utils/hierarchical-selector.js.map +0 -1
package/dist/utils/ref-attacher.d.ts +0 -21
package/dist/utils/ref-attacher.d.ts.map +0 -1
package/dist/utils/ref-attacher.js +0 -149
package/dist/utils/ref-attacher.js.map +0 -1
package/dist/utils/ref-translator.d.ts +0 -49
package/dist/utils/ref-translator.d.ts.map +0 -1
package/dist/utils/ref-translator.js +0 -276
package/dist/utils/ref-translator.js.map +0 -1

package/dist/orchestrator/orchestrator-agent.js CHANGED Viewed

@@ -9,6 +9,10 @@ const progress_reporter_1 = require("../progress-reporter");
 const page_info_utils_1 = require("../utils/page-info-utils");
 const coordinate_converter_1 = require("../utils/coordinate-converter");
 const model_constants_1 = require("../model-constants");
+// @ts-ignore - package.json exists
+const package_json_1 = require("../../package.json");
+const site_learnings_utils_1 = require("./site-learnings-utils");
+const page_loading_utils_1 = require("./page-loading-utils");
 const types_1 = require("./types");
 const orchestrator_prompts_1 = require("./orchestrator-prompts");
 const page_info_retry_1 = require("../utils/page-info-retry");
@@ -21,6 +25,14 @@ const som_types_1 = require("./som-types");
 class OrchestratorAgent {
     constructor(llmFacade, toolRegistry, config, progressReporter, logger, debugMode) {
         this.debugMode = false;
+        // Debug stats tracking
+        this.debugStats = {
+            tokensUsedIn: 0,
+            tokensUsedOut: 0,
+            imagesUsed: 0,
+            toolsUsed: {},
+            promptImproveSuggestions: []
+        };
         this.llmFacade = llmFacade;
         this.toolRegistry = toolRegistry;
         this.config = { ...types_1.DEFAULT_AGENT_CONFIG, ...config };
@@ -43,10 +55,19 @@ class OrchestratorAgent {
     nextSteps, // For repair mode: steps after this one
     successfulCommandsInStep, // For repair mode: commands that succeeded within THIS step
     failingCommand, // For repair mode: the specific command that failed
-    remainingCommandsInStep // For repair mode: commands after the failing one
+    remainingCommandsInStep, // For repair mode: commands after the failing one
+    existingSiteLearnings // Pre-existing site learnings from previous runs
     ) {
         this.logger?.(`\n[Orchestrator] ========== STEP ${stepNumber}/${totalSteps} ==========`);
+        this.logger?.(`[Orchestrator] 🚀 runner-core v${package_json_1.version}`);
         this.logger?.(`[Orchestrator] 🎯 Goal: ${stepDescription}`);
+        // Site learnings (persistent across journeys) - initialize with existing or empty
+        const siteLearnings = existingSiteLearnings || { screens: {}, uxPatterns: {} };
+        if (existingSiteLearnings) {
+            const screenCount = Object.keys(existingSiteLearnings.screens).length;
+            const patternCount = Object.keys(existingSiteLearnings.uxPatterns).length;
+            this.logger?.(`[Orchestrator] 📚 Loaded existing learnings: ${screenCount} screens, ${patternCount} UX patterns`);
+        }
         let iteration = 0;
         let noteToSelf = memory.latestNote; // Start with note from previous step
         const commandsExecuted = [];
@@ -56,8 +77,8 @@ class OrchestratorAgent {
             iteration++;
             this.logger?.(`\n[Orchestrator] === Iteration ${iteration}/${this.config.maxIterationsPerStep} ===`);
             // Build context for agent
-            const context = await this.buildAgentContext(page, stepDescription, stepNumber, totalSteps, scenarioSteps, memory, consecutiveFailures, noteToSelf, // Pass note from previous iteration
-            priorSteps, // Repair context: prior completed steps
+            const context = await this.buildAgentContext(page, stepDescription, stepNumber, totalSteps, scenarioSteps, memory, siteLearnings, // Site learnings (persistent)
+            consecutiveFailures, priorSteps, // Repair context: prior completed steps
             nextSteps, // Repair context: next steps
             successfulCommandsInStep, // Repair context: successful commands in THIS step
             failingCommand, // Repair context: the failing command
@@ -67,6 +88,30 @@ class OrchestratorAgent {
             const decision = await this.callAgent(context, jobId, stepNumber, iteration, consecutiveFailures);
             // Log agent's reasoning
             this.decisionParser.log(decision, iteration);
+            // Handle debug info from agent
+            if (decision.debugInfo) {
+                // Collect prompt improvement suggestions
+                if (decision.debugInfo.suggestedPromptUpdates) {
+                    this.debugStats.promptImproveSuggestions.push(decision.debugInfo.suggestedPromptUpdates);
+                    this.logger?.(`[Orchestrator] 💡 Prompt suggestion collected: ${decision.debugInfo.suggestedPromptUpdates.substring(0, 80)}...`, 'log');
+                }
+                // Process tool usefulness feedback (for tools from PREVIOUS iteration)
+                if (decision.debugInfo.toolUsefulnessFeedback) {
+                    for (const [toolName, rating] of Object.entries(decision.debugInfo.toolUsefulnessFeedback)) {
+                        if (this.debugStats.toolsUsed[toolName]) {
+                            const stats = this.debugStats.toolsUsed[toolName];
+                            // Calculate running average: (oldAvg * oldCount + newRating) / newCount
+                            const oldTotal = stats.averageUsefulnessScore * stats.numTimesRated;
+                            stats.numTimesRated++;
+                            stats.averageUsefulnessScore = (oldTotal + rating) / stats.numTimesRated;
+                            this.logger?.(`[Orchestrator] ⭐ Tool feedback: ${toolName} rated ${rating}/5 (avg: ${stats.averageUsefulnessScore.toFixed(2)})`, 'log');
+                        }
+                        else {
+                            this.logger?.(`[Orchestrator] ⚠️  Tool feedback for unknown tool: ${toolName}`, 'warn');
+                        }
+                    }
+                }
+            }
             // Report progress
             await this.reportStepProgress(jobId, stepNumber, stepDescription, decision, iteration);
             // Execute tools if requested (tools are READ-ONLY, they don't change state)
@@ -82,6 +127,7 @@ class OrchestratorAgent {
                 this.logger?.(`[Orchestrator] ⚠️  WARNING: ${recentScreenshots.length} screenshots in last 3 iterations - agent may be looping`, 'warn');
             }
             if (decision.toolCalls && decision.toolCalls.length > 0) {
+                this.logger?.(`[Orchestrator] 🔧 Agent using TOOL CALLS: ${decision.toolCalls.map(tc => tc.name).join(', ')}`);
                 // ENFORCE: Block screenshot tool calls if too many taken IN THIS STEP
                 if (screenshotsThisStep.length >= 3) {
                     decision.toolCalls = decision.toolCalls.filter(tc => tc.name !== 'take_screenshot');
@@ -96,27 +142,28 @@ class OrchestratorAgent {
                     }
                 }
                 if (decision.toolCalls.length > 0) {
-                    toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber, context.currentPageInfo.refMap);
-                }
-                // If agent wants to wait for tool results before proceeding, call agent again
-                if (decision.needsToolResults) {
-                    const updatedContext = { ...context, toolResults };
-                    const continuedDecision = await this.callAgent(updatedContext, jobId, stepNumber, iteration, consecutiveFailures);
-                    // Merge continued decision
-                    decision.commands = continuedDecision.commands || decision.commands;
-                    decision.commandReasoning = continuedDecision.commandReasoning || decision.commandReasoning;
-                    decision.status = continuedDecision.status;
-                    decision.statusReasoning = continuedDecision.statusReasoning;
-                    decision.reasoning = continuedDecision.reasoning;
+                    toolResults = await this.executeTools(decision.toolCalls, page, memory, stepNumber);
                 }
             }
+            // If agent wants to wait for tool results before proceeding, call agent again
+            if (decision.toolCalls && decision.toolCalls.length > 0 && decision.needsToolResults) {
+                const updatedContext = { ...context, toolResults };
+                const continuedDecision = await this.callAgent(updatedContext, jobId, stepNumber, iteration, consecutiveFailures);
+                // Merge continued decision
+                decision.commands = continuedDecision.commands || decision.commands;
+                decision.commandReasoning = continuedDecision.commandReasoning || decision.commandReasoning;
+                decision.status = continuedDecision.status;
+                decision.statusReasoning = continuedDecision.statusReasoning;
+                decision.reasoning = continuedDecision.reasoning;
+            }
             // Execute commands sequentially
             let iterationHadFailure = false;
             // Handle blocker if detected (clear blocker FIRST, then proceed with main commands)
             if (decision.blockerDetected && decision.blockerDetected.clearingCommands && decision.blockerDetected.clearingCommands.length > 0) {
                 this.logger?.(`[Orchestrator] 🚧 BLOCKER DETECTED: ${decision.blockerDetected.description}`);
                 this.logger?.(`[Orchestrator] 🧹 Clearing blocker with ${decision.blockerDetected.clearingCommands.length} command(s)...`);
-                const blockerResult = await this.executeCommands(decision.blockerDetected.clearingCommands, page, memory, stepNumber, iteration, jobId);
+                const urlBeforeBlockerClear = page.url();
+                const blockerResult = await this.executeCommands(decision.blockerDetected.clearingCommands, page, memory, stepNumber, iteration, jobId, urlBeforeBlockerClear, decision.screenState);
                 // Add blocker commands with comment to output
                 if (blockerResult.executed.length > 0) {
                     commandsExecuted.push(`// Blocker: ${decision.blockerDetected.description}`);
@@ -131,7 +178,8 @@ class OrchestratorAgent {
             }
             // Execute main commands (only if no blocker failure)
             if (!iterationHadFailure && decision.commands && decision.commands.length > 0) {
-                const executeResult = await this.executeCommands(decision.commands, page, memory, stepNumber, iteration, jobId);
+                const urlBeforeCommands = page.url();
+                const executeResult = await this.executeCommands(decision.commands, page, memory, stepNumber, iteration, jobId, urlBeforeCommands, decision.screenState);
                 commandsExecuted.push(...executeResult.executed);
                 // Track failures
                 if (!executeResult.allSucceeded) {
@@ -156,7 +204,8 @@ class OrchestratorAgent {
                     this.logger?.(`[Orchestrator] Generated commands:`);
                     coordCommands.forEach(cmd => this.logger?.(`  ${cmd}`));
                     // Execute coordinate commands
-                    const coordResult = await this.executeCommands(coordCommands, page, memory, stepNumber, iteration, jobId);
+                    const urlBeforeCoord = page.url();
+                    const coordResult = await this.executeCommands(coordCommands, page, memory, stepNumber, iteration, jobId, urlBeforeCoord, decision.screenState);
                     commandsExecuted.push(...coordResult.executed);
                     if (!coordResult.allSucceeded) {
                         this.logger?.(`[Orchestrator] ❌ Coordinate action failed (Playwright error)`);
@@ -171,6 +220,7 @@ class OrchestratorAgent {
                                 iterations: iteration,
                                 terminationReason: 'agent_stuck',
                                 memory,
+                                siteLearnings,
                                 error: 'Coordinate fallback failed after 2 attempts - unable to proceed'
                             };
                         }
@@ -240,6 +290,7 @@ class OrchestratorAgent {
                                         iterations: iteration,
                                         terminationReason: 'agent_stuck',
                                         memory,
+                                        siteLearnings,
                                         error: `Coordinate actions clicked but didn't achieve goal: ${reasoning}`
                                     };
                                 }
@@ -265,6 +316,7 @@ class OrchestratorAgent {
                             iterations: iteration,
                             terminationReason: 'agent_stuck',
                             memory,
+                            siteLearnings,
                             error: 'Coordinate fallback failed after 2 attempts - unable to proceed'
                         };
                     }
@@ -280,25 +332,39 @@ class OrchestratorAgent {
                     iterations: iteration,
                     terminationReason: 'agent_stuck',
                     memory,
+                    siteLearnings,
                     error: `Failed ${consecutiveFailures} iterations in a row - unable to proceed`
                 };
             }
-            // Update memory with experiences
-            if (decision.experiences && decision.experiences.length > 0) {
-                for (const exp of decision.experiences) {
-                    // Deduplicate - don't add if very similar experience exists
-                    const exists = memory.experiences.some(existing => existing.toLowerCase().includes(exp.toLowerCase()) ||
-                        exp.toLowerCase().includes(existing.toLowerCase()));
-                    if (!exists) {
-                        memory.experiences.push(exp);
-                        this.logger?.(`[Orchestrator] 📚 Experience: ${exp}`);
+            // Auto-track visited screen (even without explicit learnings)
+            // Filter out transient screens and loading states
+            if (decision.screenState) {
+                const { screen, state } = decision.screenState;
+                // Skip about:blank and loading states (transient, not worth persisting)
+                const isTransientScreen = screen === 'about:blank' ||
+                    screen.toLowerCase().includes('blank');
+                const isLoadingState = state.toLowerCase().includes('loading') ||
+                    state.toLowerCase().includes('spinner') ||
+                    state.toLowerCase().includes('initializing');
+                if (!isTransientScreen && !isLoadingState) {
+                    if (!siteLearnings.screens[screen]) {
+                        siteLearnings.screens[screen] = { states: {} };
+                        this.logger?.(`[📍 Auto-tracked] Screen: ${screen}`);
+                    }
+                    if (!siteLearnings.screens[screen].states[state]) {
+                        siteLearnings.screens[screen].states[state] = { observations: {} };
+                        this.logger?.(`[📍 Auto-tracked] State: ${screen}[${state}]`);
                     }
                 }
-                // Cap experiences
-                if (memory.experiences.length > this.config.maxExperiences) {
-                    memory.experiences = memory.experiences.slice(-this.config.maxExperiences);
+                else {
+                    this.logger?.(`[⏭️  Skipped] Transient screen/state: ${screen}[${state}]`);
                 }
             }
+            // Update site learnings
+            if (decision.siteLearningsUpdate) {
+                this.logger?.(`[🔍 DEBUG] siteLearningsUpdate from LLM:\n${JSON.stringify(decision.siteLearningsUpdate, null, 2)}`);
+                (0, site_learnings_utils_1.mergeSiteLearnings)(siteLearnings, decision.siteLearningsUpdate, this.logger);
+            }
             // Store note to future self (tactical memory across iterations AND steps)
             if (decision.noteToFutureSelf) {
                 noteToSelf = {
@@ -327,7 +393,8 @@ class OrchestratorAgent {
                         terminationReason: decision.status === 'complete' ? 'complete' :
                             decision.status === 'stuck' ? 'agent_stuck' :
                                 'infeasible',
-                        memory
+                        memory,
+                        siteLearnings
                     };
                 }
             }
@@ -340,21 +407,58 @@ class OrchestratorAgent {
             iterations: iteration,
             terminationReason: 'system_limit',
             memory,
+            siteLearnings,
             error: 'Maximum iterations reached'
         };
     }
     /**
      * Build context for agent
      */
-    async buildAgentContext(page, currentStepGoal, stepNumber, totalSteps, scenarioSteps, memory, consecutiveFailures, noteFromPreviousIteration, priorSteps, // For repair mode: prior completed steps
+    async buildAgentContext(page, currentStepGoal, stepNumber, totalSteps, scenarioSteps, memory, siteLearnings, // Site learnings (persistent across journeys)
+    consecutiveFailures, priorSteps, // For repair mode: prior completed steps
     nextSteps, // For repair mode: next steps
     successfulCommandsInStep, // For repair mode: successful commands in THIS step
     failingCommand, // For repair mode: the failing command
     remainingCommandsInStep // For repair mode: remaining commands in THIS step
     ) {
-        // Get fresh DOM
+        // Get fresh DOM (for title only, not displayed in prompts - SoM mode uses visual markers)
         const currentPageInfo = await (0, page_info_utils_1.getEnhancedPageInfo)(page);
         const currentURL = page.url();
+        // Get page dimensions for scroll vs screenshot decisions
+        // IMPORTANT: Wait for page to stabilize and retry until dimensions stop changing (fixes lazy-loaded/dynamic content)
+        try {
+            await page.waitForLoadState('domcontentloaded', { timeout: 10000 });
+        }
+        catch (e) {
+            // Already loaded, continue
+        }
+        // Retry approach: Measure scrollHeight multiple times until it stabilizes
+        // This handles React/Vue/Angular apps that expand the DOM after initial render
+        // Check MULTIPLE sources and use the maximum (handles edge cases like overflow:hidden)
+        const measureHeight = `Math.max(
+      document.documentElement.scrollHeight || 0,
+      document.body.scrollHeight || 0,
+      document.documentElement.offsetHeight || 0,
+      document.body.offsetHeight || 0
+    )`;
+        let pageHeight = await page.evaluate(measureHeight).catch(() => 0);
+        let previousHeight = 0;
+        let attempts = 0;
+        while (pageHeight !== previousHeight && attempts < 5) {
+            previousHeight = pageHeight;
+            await page.waitForTimeout(200); // Wait for potential expansion
+            pageHeight = await page.evaluate(measureHeight).catch(() => 0);
+            attempts++;
+        }
+        const viewport = page.viewportSize();
+        // @ts-expect-error - document is available in browser context during page.evaluate()
+        const pageWidth = await page.evaluate(() => document.documentElement.scrollWidth).catch(() => 0);
+        // @ts-expect-error - window is available in browser context during page.evaluate()
+        const scrollX = await page.evaluate(() => window.scrollX || window.pageXOffset).catch(() => 0);
+        // @ts-expect-error - window is available in browser context during page.evaluate()
+        const scrollY = await page.evaluate(() => window.scrollY || window.pageYOffset).catch(() => 0);
+        const pageDimensions = { width: pageWidth, height: pageHeight };
+        this.logger?.(`[Orchestrator] Page dimensions: ${pageWidth}x${pageHeight}px (viewport: ${viewport?.width}x${viewport?.height}px) - stabilized after ${attempts} checks`, 'log');
         // Get recent steps
         const recentSteps = memory.history.slice(-this.config.recentStepsCount);
         // SoM integration: Update markers and capture screenshot with visual IDs
@@ -375,10 +479,14 @@ class OrchestratorAgent {
                 catch (error) {
                     // Page already loaded or timeout - continue
                 }
-                // Update SoM markers
-                await this.somHandler.updateSom();
-                // Get screenshot WITH markers (viewport only - agent can scroll or use take_screenshot for full page)
-                somScreenshot = await this.somHandler.getScreenshot(true, false, 60);
+                // Update SoM markers - include offscreen elements for full-page screenshots
+                await this.somHandler.updateSom(true);
+                // TEMPORARY: Always use full-page screenshot for debugging
+                // TODO: Re-enable heuristic once we verify full-page works correctly
+                const useFullPageSom = true;
+                this.logger?.(`[Orchestrator] SoM screenshot strategy: FULL PAGE (ALWAYS) - page: ${pageWidth}x${pageHeight}px, viewport: ${viewport?.width}x${viewport?.height}px`, 'log');
+                // Get screenshot WITH markers
+                somScreenshot = await this.somHandler.getScreenshot(true, useFullPageSom, 60);
                 // Get element map for disambiguation
                 somElementMap = this.somHandler.getSomElementMap();
                 this.logger?.(`[Orchestrator] SoM screenshot captured for agent decision-making`, 'log');
@@ -395,12 +503,16 @@ class OrchestratorAgent {
             totalSteps,
             completedSteps: scenarioSteps.slice(0, stepNumber - 1),
             remainingSteps: scenarioSteps.slice(stepNumber),
-            currentPageInfo,
             currentURL,
-            recentSteps,
-            experiences: memory.experiences,
-            extractedData: memory.extractedData,
-            noteFromPreviousIteration, // Tactical note from previous iteration
+            currentPageTitle: currentPageInfo.title,
+            viewportWidth: viewport?.width,
+            viewportHeight: viewport?.height,
+            pageWidth: pageDimensions.width,
+            pageHeight: pageDimensions.height,
+            scrollX,
+            scrollY,
+            journeyMemory: memory, // Journey-specific memory (includes history, extractedData, latestNote)
+            siteLearnings, // Site-level learnings (persistent across journeys)
             somScreenshot, // SoM screenshot with visual markers (current)
             somElementMap, // SoM element details for disambiguation
             priorSteps, // Repair: prior completed steps
@@ -458,22 +570,32 @@ class OrchestratorAgent {
                 this.logger?.(`[Orchestrator] Including SoM screenshot in LLM request`, 'log');
             }
             const response = await this.llmFacade.llmProvider.callLLM(llmRequest);
-            // Report token usage
-            if (response.usage && this.progressReporter?.onTokensUsed) {
-                const tokenUsage = {
-                    jobId,
-                    stepNumber,
-                    iteration,
-                    inputTokens: response.usage.inputTokens,
-                    outputTokens: response.usage.outputTokens,
-                    includesImage: false,
-                    model: model_constants_1.DEFAULT_MODEL,
-                    timestamp: Date.now()
-                };
-                this.logger?.(`[Orchestrator] 💰 Reporting token usage: ${tokenUsage.inputTokens} + ${tokenUsage.outputTokens}`, 'log');
-                await this.progressReporter.onTokensUsed(tokenUsage);
+            // Track token usage and images
+            const includesImage = !!context.somScreenshot;
+            if (response.usage) {
+                // Accumulate in debug stats
+                this.debugStats.tokensUsedIn += response.usage.inputTokens;
+                this.debugStats.tokensUsedOut += response.usage.outputTokens;
+                if (includesImage) {
+                    this.debugStats.imagesUsed++;
+                }
+                // Report to progress reporter
+                if (this.progressReporter?.onTokensUsed) {
+                    const tokenUsage = {
+                        jobId,
+                        stepNumber,
+                        iteration,
+                        inputTokens: response.usage.inputTokens,
+                        outputTokens: response.usage.outputTokens,
+                        includesImage,
+                        model: model_constants_1.DEFAULT_MODEL,
+                        timestamp: Date.now()
+                    };
+                    this.logger?.(`[Orchestrator] 💰 Reporting token usage: ${tokenUsage.inputTokens} + ${tokenUsage.outputTokens}`, 'log');
+                    await this.progressReporter.onTokensUsed(tokenUsage);
+                }
             }
-            else if (!response.usage) {
+            else {
                 this.logger?.(`[Orchestrator] ⚠ No usage data in LLM response`, 'warn');
             }
             // Parse response
@@ -492,7 +614,7 @@ class OrchestratorAgent {
     /**
      * Execute tools
      */
-    async executeTools(toolCalls, page, memory, stepNumber, refMap) {
+    async executeTools(toolCalls, page, memory, stepNumber) {
         this.logger?.(`[Orchestrator] 🔧 Executing ${toolCalls.length} tool(s)`);
         const results = {};
         const toolContext = {
@@ -500,12 +622,23 @@ class OrchestratorAgent {
             memory,
             stepNumber,
             logger: this.logger,
-            refMap, // Pass refMap for interact_with_ref tool
             previousSomScreenshot: this.previousSomScreenshot, // For view_previous_screenshot tool
             somHandler: this.somHandler // For refresh_som_markers tool
         };
         for (const toolCall of toolCalls.slice(0, this.config.maxToolCallsPerIteration)) {
-            this.logger?.(`[Orchestrator]   ▶ ${toolCall.name}(${JSON.stringify(toolCall.params).substring(0, 50)}...)`);
+            // Log full parameters for debugging (especially for take_screenshot to see if isFullPage is set)
+            this.logger?.(`[Orchestrator]   ▶ ${toolCall.name}(${JSON.stringify(toolCall.params)})`);
+            // Track tool usage in debug stats
+            if (!this.debugStats.toolsUsed[toolCall.name]) {
+                this.debugStats.toolsUsed[toolCall.name] = {
+                    count: 0,
+                    averageUsefulnessScore: 0,
+                    numTimesRated: 0
+                };
+                this.logger?.(`[DebugStats] 📊 Tracking new tool: ${toolCall.name}`);
+            }
+            this.debugStats.toolsUsed[toolCall.name].count++;
+            this.logger?.(`[DebugStats] 📊 Tool '${toolCall.name}' used (count: ${this.debugStats.toolsUsed[toolCall.name].count})`);
             const result = await this.toolRegistry.execute(toolCall, toolContext);
             results[toolCall.name] = result;
             if (result.success) {
@@ -549,7 +682,9 @@ class OrchestratorAgent {
     /**
      * Execute commands (mix of ref and playwright commands)
      */
-    async executeCommands(commands, page, memory, stepNumber, iteration, jobId) {
+    async executeCommands(commands, page, memory, stepNumber, iteration, jobId, urlBeforeAction, // URL before commands execute
+    screenState // Screen state for memory
+    ) {
         this.logger?.(`[Orchestrator] 📝 Executing ${commands.length} command(s)`);
         const executed = [];
         if (commands.length === 0) {
@@ -560,6 +695,11 @@ class OrchestratorAgent {
             this.logger?.(`[Orchestrator] Using SoM mode for command execution`, 'log');
             for (let i = 0; i < commands.length; i++) {
                 const cmd = commands[i];
+                // Skip if plain string (should not happen in SoM mode, but handle gracefully)
+                if (typeof cmd === 'string') {
+                    this.logger?.(`[Orchestrator] ⚠️  Skipping plain string command in SoM mode: "${cmd}"`, 'warn');
+                    continue;
+                }
                 // Check if verification or action command
                 if ((0, som_types_1.isSomVerification)(cmd)) {
                     // Handle verification command
@@ -580,6 +720,8 @@ class OrchestratorAgent {
                                 result: 'success',
                                 observation: `Verified: ${cmd.description || cmd.expected}`,
                                 url: page.url(),
+                                previousUrl: urlBeforeAction,
+                                screenState,
                                 timestamp: Date.now()
                             });
                         }
@@ -594,6 +736,8 @@ class OrchestratorAgent {
                                 observation: `Failed: ${result.error}`,
                                 error: result.error,
                                 url: page.url(),
+                                previousUrl: urlBeforeAction,
+                                screenState,
                                 timestamp: Date.now()
                             });
                             // Continue anyway - verification failures are non-blocking for script generation
@@ -624,6 +768,8 @@ class OrchestratorAgent {
                                 result: 'success',
                                 observation: 'Executed successfully',
                                 url: page.url(),
+                                previousUrl: urlBeforeAction,
+                                screenState,
                                 timestamp: Date.now()
                             });
                             // Small delay for form validation/animations
@@ -644,12 +790,14 @@ class OrchestratorAgent {
                                 observation: `Failed: ${result.error}`,
                                 error: result.error,
                                 url: page.url(),
+                                previousUrl: urlBeforeAction,
+                                screenState,
                                 timestamp: Date.now()
                             });
                             // Refresh SoM after batch (DOM may have changed)
                             if (this.somHandler && page) {
                                 this.somHandler.setPage(page);
-                                await this.somHandler.updateSom();
+                                await this.somHandler.updateSom(true);
                             }
                             return { executed, allSucceeded: false };
                         }
@@ -665,12 +813,14 @@ class OrchestratorAgent {
                             observation: `Exception: ${error.message}`,
                             error: error.message,
                             url: page.url(),
+                            previousUrl: urlBeforeAction,
+                            screenState,
                             timestamp: Date.now()
                         });
                         // Refresh SoM after batch (DOM may have changed)
                         if (this.somHandler && page) {
                             this.somHandler.setPage(page);
-                            await this.somHandler.updateSom();
+                            await this.somHandler.updateSom(true);
                         }
                         return { executed, allSucceeded: false };
                     }
@@ -679,19 +829,33 @@ class OrchestratorAgent {
                     this.logger?.(`[Orchestrator]   ⚠ [${i + 1}/${commands.length}] Not a valid SoM command/verification, skipping`, 'warn');
                 }
             }
+            // Action-aware stabilization: Detect if commands likely triggered navigation
+            const isNavigationAction = (0, page_loading_utils_1.detectNavigationAction)(commands, executed);
             // Always wait for page to stabilize after command batch
             // This handles both explicit navigation AND clicks that trigger navigation/SPA routes
             try {
-                this.logger?.(`[Orchestrator] Waiting for page to stabilize...`, 'log');
-                // Use networkidle with short timeout to catch navigation without blocking on SPAs with continuous requests
-                await page.waitForLoadState('networkidle', { timeout: 3000 });
-                this.logger?.(`[Orchestrator] Page stabilized (networkidle)`, 'log');
+                if (isNavigationAction) {
+                    this.logger?.(`[Orchestrator] Detected navigation action - using extended wait...`, 'log');
+                    // Extended wait for form submissions and navigation clicks
+                    await page.waitForLoadState('networkidle', { timeout: 15000 }); // Longer timeout for slow SPAs
+                    await page.waitForTimeout(1000); // Initial buffer for SPA rendering
+                    this.logger?.(`[Orchestrator] Page stabilized after navigation (networkidle + 1s buffer)`, 'log');
+                    // Smart loading detection: Check if page still shows loading indicators
+                    await (0, page_loading_utils_1.waitForLoadingToComplete)(page, this.logger);
+                }
+                else {
+                    this.logger?.(`[Orchestrator] Waiting for page to stabilize...`, 'log');
+                    // Use networkidle with short timeout for standard interactions
+                    await page.waitForLoadState('networkidle', { timeout: 3000 });
+                    this.logger?.(`[Orchestrator] Page stabilized (networkidle)`, 'log');
+                }
             }
             catch (error) {
-                // If networkidle times out, fall back to domcontentloaded
+                // If networkidle times out, fall back to domcontentloaded + buffer
                 try {
                     await page.waitForLoadState('domcontentloaded', { timeout: 2000 });
-                    this.logger?.(`[Orchestrator] Page loaded (domcontentloaded)`, 'log');
+                    await page.waitForTimeout(1000);
+                    this.logger?.(`[Orchestrator] Page loaded (domcontentloaded + buffer)`, 'log');
                 }
                 catch (error2) {
                     this.logger?.(`[Orchestrator] Page load wait timeout (continuing anyway)`, 'warn');
@@ -700,7 +864,7 @@ class OrchestratorAgent {
             // Refresh SoM after batch (DOM may have changed and page is now stable)
             if (this.somHandler && page) {
                 this.somHandler.setPage(page);
-                await this.somHandler.updateSom();
+                await this.somHandler.updateSom(true);
             }
             return { executed, allSucceeded: true };
         }
@@ -733,6 +897,8 @@ try {
                         result: 'success',
                         observation: 'Executed successfully',
                         url: page.url(),
+                        previousUrl: urlBeforeAction,
+                        screenState,
                         timestamp: Date.now()
                     });
                     executed.push(cmd);
@@ -755,6 +921,8 @@ try {
                 observation: `Failed: ${errorMessage}`,
                 error: errorMessage,
                 url: page.url(),
+                previousUrl: urlBeforeAction,
+                screenState,
                 timestamp: Date.now()
             });
             return { executed, allSucceeded: false };
@@ -778,7 +946,6 @@ try {
             agentIteration: iteration,
             agentReasoning: decision.reasoning,
             agentSelfReflection: decision.selfReflection,
-            agentExperiences: decision.experiences,
             agentToolsUsed: decision.toolCalls?.map(t => t.name),
             agentStatus: decision.status
         });
@@ -787,17 +954,25 @@ try {
      * Execute exploration mode - agent autonomously explores to achieve journey goal
      * Fires onStepProgress callbacks for each autonomous action (transparent to caller)
      */
-    async executeExploration(page, explorationConfig, jobId) {
+    async executeExploration(page, explorationConfig, jobId, existingSiteLearnings) {
         this.logger?.(`\n[Orchestrator] ========== EXPLORATION MODE ==========`);
+        this.logger?.(`[Orchestrator] 🚀 runner-core v${package_json_1.version}`);
         this.logger?.(`[Orchestrator] 🎯 Journey Goal: ${explorationConfig.explorationPrompt}`);
         if (explorationConfig.testDataPrompt) {
             this.logger?.(`[Orchestrator] 📋 Test Data: ${explorationConfig.testDataPrompt}`);
         }
+        // Journey memory (temporal)
         const memory = {
             history: [],
-            experiences: [],
             extractedData: {}
         };
+        // Site learnings (persistent across journeys) - initialize with existing or empty
+        const siteLearnings = existingSiteLearnings || { screens: {}, uxPatterns: {} };
+        if (existingSiteLearnings) {
+            const screenCount = Object.keys(existingSiteLearnings.screens).length;
+            const patternCount = Object.keys(existingSiteLearnings.uxPatterns).length;
+            this.logger?.(`[Orchestrator] 📚 Loaded existing learnings: ${screenCount} screens, ${patternCount} UX patterns`);
+        }
         const maxSteps = explorationConfig.maxExplorationSteps || 50;
         let stepNumber = 0;
         const commandsExecuted = [];
@@ -805,7 +980,7 @@ try {
             stepNumber++;
             this.logger?.(`\n[Orchestrator] === Exploration Step ${stepNumber}/${maxSteps} ===`);
             // Build exploratory context
-            const context = await this.buildExploratoryContext(page, explorationConfig.explorationPrompt, explorationConfig.testDataPrompt, memory, stepNumber, maxSteps);
+            const context = await this.buildExploratoryContext(page, explorationConfig.explorationPrompt, explorationConfig.testDataPrompt, memory, siteLearnings, stepNumber, maxSteps);
             // Call agent with exploratory prompt
             const decision = await this.callExploratoryAgent(context, jobId, stepNumber);
             this.decisionParser.log(decision, stepNumber);
@@ -837,13 +1012,15 @@ try {
             // Handle blocker clearing
             if (decision.blockerDetected && decision.blockerDetected.clearingCommands) {
                 this.logger?.(`[Orchestrator] 🚧 Clearing blocker: ${decision.blockerDetected.description}`);
-                const blockerResult = await this.executeCommands(decision.blockerDetected.clearingCommands, page, memory, stepNumber, 1, jobId);
+                const urlBeforeBlocker = page.url();
+                const blockerResult = await this.executeCommands(decision.blockerDetected.clearingCommands, page, memory, stepNumber, 1, jobId, urlBeforeBlocker, decision.screenState);
                 commandsExecuted.push(...blockerResult.executed);
             }
             // Execute exploration commands
             let commandsSucceeded = true;
             if (decision.commands && decision.commands.length > 0) {
-                const executeResult = await this.executeCommands(decision.commands, page, memory, stepNumber, 1, jobId);
+                const urlBeforeExploration = page.url();
+                const executeResult = await this.executeCommands(decision.commands, page, memory, stepNumber, 1, jobId, urlBeforeExploration, decision.screenState);
                 commandsExecuted.push(...executeResult.executed);
                 commandsSucceeded = executeResult.allSucceeded;
             }
@@ -861,13 +1038,35 @@ try {
                 };
                 await this.progressReporter.onStepProgress(stepInfo);
             }
-            // Add experiences (both app patterns AND exploration progress)
-            if (decision.experiences) {
-                memory.experiences.push(...decision.experiences);
-                if (memory.experiences.length > this.config.maxExperiences) {
-                    memory.experiences = memory.experiences.slice(-this.config.maxExperiences);
+            // Auto-track visited screen (even without explicit learnings)
+            // Filter out transient screens and loading states
+            if (decision.screenState) {
+                const { screen, state } = decision.screenState;
+                // Skip about:blank and loading states (transient, not worth persisting)
+                const isTransientScreen = screen === 'about:blank' ||
+                    screen.toLowerCase().includes('blank');
+                const isLoadingState = state.toLowerCase().includes('loading') ||
+                    state.toLowerCase().includes('spinner') ||
+                    state.toLowerCase().includes('initializing');
+                if (!isTransientScreen && !isLoadingState) {
+                    if (!siteLearnings.screens[screen]) {
+                        siteLearnings.screens[screen] = { states: {} };
+                        this.logger?.(`[📍 Auto-tracked] Screen: ${screen}`);
+                    }
+                    if (!siteLearnings.screens[screen].states[state]) {
+                        siteLearnings.screens[screen].states[state] = { observations: {} };
+                        this.logger?.(`[📍 Auto-tracked] State: ${screen}[${state}]`);
+                    }
+                }
+                else {
+                    this.logger?.(`[⏭️  Skipped] Transient screen/state: ${screen}[${state}]`);
                 }
             }
+            // Update site learnings
+            if (decision.siteLearningsUpdate) {
+                this.logger?.(`[🔍 DEBUG] siteLearningsUpdate from LLM:\n${JSON.stringify(decision.siteLearningsUpdate, null, 2)}`);
+                (0, site_learnings_utils_1.mergeSiteLearnings)(siteLearnings, decision.siteLearningsUpdate, this.logger);
+            }
             // Store note for next iteration
             if (decision.noteToFutureSelf) {
                 memory.latestNote = {
@@ -883,7 +1082,8 @@ try {
                     commands: commandsExecuted,
                     iterations: stepNumber,
                     terminationReason: 'complete',
-                    memory
+                    memory,
+                    siteLearnings
                 };
             }
             else if (decision.status === 'stuck') {
@@ -894,6 +1094,7 @@ try {
                     iterations: stepNumber,
                     terminationReason: 'agent_stuck',
                     memory,
+                    siteLearnings,
                     error: decision.statusReasoning
                 };
             }
@@ -905,14 +1106,47 @@ try {
             commands: commandsExecuted,
             iterations: stepNumber,
             terminationReason: 'system_limit',
-            memory
+            memory,
+            siteLearnings
         };
     }
-    async buildExploratoryContext(page, explorationPrompt, testDataPrompt, memory, stepNumber, maxSteps) {
+    async buildExploratoryContext(page, explorationPrompt, testDataPrompt, memory, siteLearnings, stepNumber, maxSteps) {
         // Wait for page to be ready and elements to appear (especially important after navigation)
         const currentPageInfo = await page_info_retry_1.PageInfoRetry.getWithRetry(page);
         const currentURL = page.url();
-        const recentSteps = memory.history.slice(-this.config.recentStepsCount);
+        // Get page dimensions for scroll vs screenshot decisions
+        // IMPORTANT: Wait for page to stabilize with retry (fixes lazy-loaded/dynamic content)
+        try {
+            await page.waitForLoadState('domcontentloaded', { timeout: 10000 });
+        }
+        catch (e) {
+            // Already loaded, continue
+        }
+        // Retry approach: Measure scrollHeight multiple times until it stabilizes
+        // Check MULTIPLE sources and use the maximum (handles edge cases like overflow:hidden)
+        const measureHeight = `Math.max(
+      document.documentElement.scrollHeight || 0,
+      document.body.scrollHeight || 0,
+      document.documentElement.offsetHeight || 0,
+      document.body.offsetHeight || 0
+    )`;
+        let pageHeight = await page.evaluate(measureHeight).catch(() => 0);
+        let previousHeight = 0;
+        let attempts = 0;
+        while (pageHeight !== previousHeight && attempts < 5) {
+            previousHeight = pageHeight;
+            await page.waitForTimeout(200); // Wait for potential expansion
+            pageHeight = await page.evaluate(measureHeight).catch(() => 0);
+            attempts++;
+        }
+        const viewport = page.viewportSize();
+        // @ts-expect-error - document is available in browser context during page.evaluate()
+        const pageWidth = await page.evaluate(() => document.documentElement.scrollWidth).catch(() => 0);
+        // @ts-expect-error - window is available in browser context during page.evaluate()
+        const scrollX = await page.evaluate(() => window.scrollX || window.pageXOffset).catch(() => 0);
+        // @ts-expect-error - window is available in browser context during page.evaluate()
+        const scrollY = await page.evaluate(() => window.scrollY || window.pageYOffset).catch(() => 0);
+        this.logger?.(`[Orchestrator] Exploration page dimensions: ${pageWidth}x${pageHeight}px (viewport: ${viewport?.width}x${viewport?.height}px) - stabilized after ${attempts} checks`, 'log');
         // SoM integration for exploratory mode
         let somScreenshot = undefined;
         let somElementMap = undefined;
@@ -926,9 +1160,11 @@ try {
                 catch (error) {
                     // Page already loaded or timeout - continue
                 }
-                // Update SoM markers
-                await this.somHandler.updateSom();
-                somScreenshot = await this.somHandler.getScreenshot(true, false, 60); // Viewport only - agent can scroll or request full page
+                // Update SoM markers after coordinate action
+                await this.somHandler.updateSom(true);
+                // TEMPORARY: Always use full-page screenshot for debugging
+                const useFullPageSom = true;
+                somScreenshot = await this.somHandler.getScreenshot(true, useFullPageSom, 60);
                 // Get element map for disambiguation
                 somElementMap = this.somHandler.getSomElementMap();
                 this.logger?.(`[Orchestrator] SoM screenshot captured for exploratory agent`, 'log');
@@ -944,12 +1180,16 @@ try {
             totalSteps: maxSteps,
             completedSteps: [],
             remainingSteps: [],
-            currentPageInfo,
             currentURL,
-            recentSteps,
-            experiences: memory.experiences,
-            extractedData: memory.extractedData,
-            noteFromPreviousIteration: memory.latestNote,
+            currentPageTitle: currentPageInfo.title,
+            viewportWidth: viewport?.width,
+            viewportHeight: viewport?.height,
+            pageWidth,
+            pageHeight,
+            scrollX,
+            scrollY,
+            journeyMemory: memory, // Journey-specific memory
+            siteLearnings, // Site-level learnings
             testDataPrompt, // CRITICAL: Store testDataPrompt in context
             somScreenshot, // SoM screenshot for exploratory mode (current)
             somElementMap // SoM element details for disambiguation
@@ -996,6 +1236,20 @@ try {
         const decision = this.decisionParser.parse(response.answer);
         return decision;
     }
+    /**
+     * Get accumulated debug statistics
+     */
+    getDebugStats() {
+        const stats = { ...this.debugStats };
+        // Log summary of collected debug stats
+        this.logger?.(`\n========== DEBUG STATS SUMMARY ==========`);
+        this.logger?.(`Tokens In: ${stats.tokensUsedIn}, Tokens Out: ${stats.tokensUsedOut}`);
+        this.logger?.(`Images Used: ${stats.imagesUsed}`);
+        this.logger?.(`Tools Used: ${Object.keys(stats.toolsUsed).length > 0 ? JSON.stringify(stats.toolsUsed, null, 2) : 'NONE'}`);
+        this.logger?.(`Prompt Suggestions: ${stats.promptImproveSuggestions.length}`);
+        this.logger?.(`=========================================\n`);
+        return stats;
+    }
 }
 exports.OrchestratorAgent = OrchestratorAgent;
 //# sourceMappingURL=orchestrator-agent.js.map