npm - testchimp-runner-core - Versions diffs - 0.0.39 → 0.0.41 - Mend

testchimp-runner-core 0.0.39 → 0.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

package/dist/execution-service.d.ts.map +1 -1
package/dist/execution-service.js +1 -3
package/dist/execution-service.js.map +1 -1
package/dist/index.d.ts +7 -6
package/dist/index.d.ts.map +1 -1
package/dist/index.js +4 -4
package/dist/index.js.map +1 -1
package/dist/orchestrator/decision-parser.d.ts.map +1 -1
package/dist/orchestrator/decision-parser.js +16 -0
package/dist/orchestrator/decision-parser.js.map +1 -1
package/dist/orchestrator/index.d.ts +3 -1
package/dist/orchestrator/index.d.ts.map +1 -1
package/dist/orchestrator/index.js +8 -1
package/dist/orchestrator/index.js.map +1 -1
package/dist/orchestrator/orchestrator-agent.d.ts +10 -4
package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
package/dist/orchestrator/orchestrator-agent.js +347 -93
package/dist/orchestrator/orchestrator-agent.js.map +1 -1
package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
package/dist/orchestrator/orchestrator-prompts.js +364 -415
package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
package/dist/orchestrator/page-loading-utils.d.ts +15 -0
package/dist/orchestrator/page-loading-utils.d.ts.map +1 -0
package/dist/orchestrator/page-loading-utils.js +115 -0
package/dist/orchestrator/page-loading-utils.js.map +1 -0
package/dist/orchestrator/page-som-handler.d.ts +2 -1
package/dist/orchestrator/page-som-handler.d.ts.map +1 -1
package/dist/orchestrator/page-som-handler.js +250 -33
package/dist/orchestrator/page-som-handler.js.map +1 -1
package/dist/orchestrator/site-learnings-utils.d.ts +31 -0
package/dist/orchestrator/site-learnings-utils.d.ts.map +1 -0
package/dist/orchestrator/site-learnings-utils.js +175 -0
package/dist/orchestrator/site-learnings-utils.js.map +1 -0
package/dist/orchestrator/som-types.d.ts +2 -0
package/dist/orchestrator/som-types.d.ts.map +1 -1
package/dist/orchestrator/som-types.js.map +1 -1
package/dist/orchestrator/tools/take-screenshot.d.ts.map +1 -1
package/dist/orchestrator/tools/take-screenshot.js +10 -1
package/dist/orchestrator/tools/take-screenshot.js.map +1 -1
package/dist/orchestrator/types.d.ts +54 -9
package/dist/orchestrator/types.d.ts.map +1 -1
package/dist/orchestrator/types.js.map +1 -1
package/dist/progress-reporter.d.ts +23 -2
package/dist/progress-reporter.d.ts.map +1 -1
package/dist/progress-reporter.js.map +1 -1
package/dist/scenario-service.d.ts +3 -3
package/dist/scenario-service.d.ts.map +1 -1
package/dist/scenario-service.js +6 -5
package/dist/scenario-service.js.map +1 -1
package/dist/scenario-worker-class.d.ts +7 -3
package/dist/scenario-worker-class.d.ts.map +1 -1
package/dist/scenario-worker-class.js +62 -9
package/dist/scenario-worker-class.js.map +1 -1
package/dist/types.d.ts +4 -0
package/dist/types.d.ts.map +1 -1
package/dist/types.js.map +1 -1
package/package.json +1 -1
package/dist/testing/agent-tester.d.ts +0 -35
package/dist/testing/agent-tester.d.ts.map +0 -1
package/dist/testing/agent-tester.js +0 -84
package/dist/testing/agent-tester.js.map +0 -1
package/dist/testing/ref-translator-tester.d.ts +0 -44
package/dist/testing/ref-translator-tester.d.ts.map +0 -1
package/dist/testing/ref-translator-tester.js +0 -104
package/dist/testing/ref-translator-tester.js.map +0 -1
package/dist/utils/hierarchical-selector.d.ts +0 -47
package/dist/utils/hierarchical-selector.d.ts.map +0 -1
package/dist/utils/hierarchical-selector.js +0 -212
package/dist/utils/hierarchical-selector.js.map +0 -1
package/dist/utils/ref-attacher.d.ts +0 -21
package/dist/utils/ref-attacher.d.ts.map +0 -1
package/dist/utils/ref-attacher.js +0 -149
package/dist/utils/ref-attacher.js.map +0 -1
package/dist/utils/ref-translator.d.ts +0 -49
package/dist/utils/ref-translator.d.ts.map +0 -1
package/dist/utils/ref-translator.js +0 -276
package/dist/utils/ref-translator.js.map +0 -1

package/dist/orchestrator/orchestrator-prompts.js CHANGED Viewed

@@ -5,6 +5,110 @@
  */
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.OrchestratorPrompts = void 0;
+// ========== UTILITY FUNCTIONS ==========
+/**
+ * Truncate URL to avoid bloating prompts
+ */
+function truncateUrl(url, maxLength = 300) {
+    if (!url || url.length <= maxLength)
+        return url;
+    return url.substring(0, maxLength) + '...';
+}
+// ========== SHARED PROMPT SECTIONS (to avoid duplication) ==========
+const DISCRETE_EXPERIENCE_LOOP = `DISCRETE EXPERIENCE LOOP (YOU ARE STATELESS - NO SCREENSHOT MEMORY):
+You operate in iterations: receive state → decide → sleep → wake with NEW state.
+Each iteration you receive: current screenshot, past actions, collected memories, and noteToFutureSelf you wrote in last iteration.
+CRITICAL: You do NOT see previous screenshots (unless you specifically request) - only text descriptions of past actions!
+Write explicit EXPECTED STATE in noteToFutureSelf so your future self can verify against the future screenshot.
+Example: "Clicked hamburger menu (was collapsed). EXPECT: menu expanded with 'settings' items visible"`;
+const SITE_LEARNINGS_GUIDE = `SITE LEARNINGS: Build mental model (persistent across journeys)
+NAMING (check SCREEN STATE VOCABULARY first):
+- screen: REUSE from vocabulary ("login", "dashboard") or create if new. NEVER: "about:blank", "loading"
+- state: INFER from COMPLETED STEPS (max 3 GENERIC dims - user role/context, NOT specific data)
+  Dimensions describe USER STATE (logged-in, admin, cart-empty), NOT data values (workspace names, usernames, products)
+  ✅ "logged-in,admin", "guest,cart-empty"
+  ❌ "testchimp-selected" (workspace name is data!), "user-john" (username is data!)
+LEARNINGS (semantic insights that persist):
+Focus on BEHAVIOR and PATTERNS that will help on future runs, when SoM IDs are completely different.
+WHY NO SOM IDS: SoM markers (1, 2, [5], [6], element 9) regenerate EVERY page load - different numbers each time!
+A learning with "element 9" is useless on next run when that same button is "element 3".
+STORE: Non-obvious behavior, interaction quirks, selector strategies
+✅ "Dropdown opens on caret icon click, not container div"
+✅ "Delete requires overflow menu (not directly visible)"
+✅ "Search triggers on Enter, not auto-search while typing"
+DON'T STORE: Element catalogs, SoM IDs, obvious facts, attribute documentation
+❌ "Continue with Google button" (element listing - adds no behavioral value)
+❌ "opener is SoM id [6]" (ephemeral - will be different ID next run!)
+❌ "input name=emailOrUsername" (documenting HTML - not useful)
+Ask: "Will this help when SoM IDs are completely different?" NO → don't store
+STEP COMPLETION: Check ALL signals (memory, URL, screenshot, noteToFutureSelf) vs step goal.
+Process: Expected (from noteToSelf) → Actual (commands success? URL changed? content visible?) → Decide
+- Commands ✓ + URL changed + expected page → COMPLETE
+- Commands ✓ + error shown → CONTINUE (retry)
+- Command failed → CONTINUE (different selector)
+`;
+const NOTETOSELF_GUIDE = `NOTETOSELF: Capture thinking/intentions + EXPLICIT EXPECTED STATE for verification.
+✅ "Clicked menu. EXPECT: expanded with 'Settings' visible"
+❌ "Click menu" (future can't verify!)
+Include: strategy, backups if fails, what to verify next.`;
+// Response schema - exact TypeScript interface the agent must follow
+const RESPONSE_SCHEMA = `
+RESPONSE FORMAT (exact TypeScript interface):
+interface AgentDecision {
+  // Required fields
+  status: 'complete' | 'stuck' | 'infeasible' | 'continue';
+  statusReasoning: string;
+  reasoning: string;
+  // Screen identification (REQUIRED - always identify current screen)
+  screenState: {
+    screen: string;  // Screen name - REUSE from SCREEN STATE KNOWLEDGE if possible
+    state: string;   // State dimensions: "admin", "admin,empty-cart", "" for default
+  };
+  // Site learnings (OPTIONAL - only when learning something NEW/IMPORTANT)
+  siteLearningsUpdate?: {
+    screens?: {
+      [screenName: string]: {
+        states: {
+          [stateName: string]: {
+            observations?: Array<{ id?: number; text: string }>;  // Add (no id) or Update (with id)
+            deleteObservationIds?: number[];
+          };
+        };
+      };
+    };
+    uxPatterns?: Array<{ id?: number; text: string }>;  // Add (no id) or Update (with id)
+    deleteUxPatternIds?: number[];
+  };
+  // Commands to execute
+  commands?: Array<SomCommand | string>;
+  commandReasoning?: string;
+  // Note to future self (your only memory continuity)
+  noteToFutureSelf?: string;
+  // Other optional fields
+  toolCalls?: Array<{ name: string; params: any }>;
+  toolReasoning?: string;
+  blockerDetected?: { description: string; clearingCommands: string[] };
+  memoryUpdate?: { action: string; observation: string; extractedData?: Record<string, any> };
+}
+CRITICAL: uxPatterns array must have objects with BOTH id and text fields!
+Example: { "id": 1, "text": "Pattern description" } or { "text": "New pattern" } (no id for new)
+`;
+// ===================================================================
 class OrchestratorPrompts {
     /**
      * Build main system prompt for selector-based mode
@@ -12,108 +116,27 @@ class OrchestratorPrompts {
     static buildSystemPrompt(toolDescriptions, enableCoordinateMode = false) {
         return `You are an intelligent test automation agent that executes web scenarios using Playwright.
-DISCRETE EXPERIENCE LOOP:
-You operate in iterations: receive state → decide → sleep → wake with new state.
-System waits for page stability after each batch. Note to future self: strategy, what to verify, backup plans.
-COMMON UX PATTERNS (critical for navigation):
-• Disabled buttons → Fill required fields first to enable them
-• Missing SoM ID → Element likely disabled (fill prerequisites first)
-• Modals/overlays → Dismiss or interact before underlying content
-• Hover effects → Reveal tooltips/menus before clicking
-• Dropdowns/autocomplete → Type then select from revealed options
-• Toasts/alerts → Read for success/error feedback (may be transient)
-• Tabs/steppers → Reveal new content in same page (not navigation)
-• Form validation → Red highlights/borders = invalid, fix before submit
-• Confirmation dialogs → Accept/dismiss before proceeding
-• Lazy loading → Scroll down to load more content
-• Accordions/expandable → Click header to toggle visibility
+${DISCRETE_EXPERIENCE_LOOP}
-INTERACTION STRATEGY:
-1. ALWAYS prefer SoM-marked elements (they have reliable selectors)
-2. If element not marked: try refresh_som_markers tool (may have just enabled)
-3. Last resort: coordinate-based interaction (when element truly unmarked)
+UX PATTERNS: Disabled buttons→fill prerequisites. Modals→dismiss first. Dropdowns→type then select. Form errors→fix before submit.
-Difficult cases: Shadow DOM, iframes, canvas - try keyboard or coordinates as fallback.
+STRATEGY: Prefer SoM elements. If unmarked, try refresh_som_markers or coordinates.
 ${toolDescriptions}
-CRITICAL: STEP COMPLETION DECISION
-Each step has ONE specific goal. Once that goal is achieved, mark status="complete" IMMEDIATELY.
-**When to mark COMPLETE:**
-- Step: "Navigate to URL" → Mark complete after navigate command succeeds (don't login yet!)
-- Step: "Fill login form" → Mark complete after filling fields (don't click submit yet!)
-- Step: "Click Submit" → Mark complete after clicking (don't wait for next page!)
-**DO NOT:**
-- Continue with future steps while still on current step
-- Assume the step wants you to do more than stated
-- Wait for side effects (navigation, etc.) before marking complete
-**The goal text is LITERAL** - do exactly what it says, then mark complete.
-OUTPUT FORMAT (JSON):
-{
-  "status": "continue" | "complete" | "stuck" | "infeasible",
-  "reasoning": "your thinking",
-  "commands": [  // Mix ref and playwright commands as needed
-    { "type": "playwright", "code": "await page.goto('https://example.com')" },
-    { "type": "ref", "ref": "e22", "operation": "fill", "value": "text" },
-    { "type": "ref", "ref": "e31", "operation": "click" },
-    { "type": "playwright", "code": "await page.waitForLoadState('networkidle')" }
-  ],
-  "toolCalls": [{ "name": "tool_name", "params": {} }],
-  "blockerDetected": { "description": "...", "clearingCommands": ["..."] },
-  "experiences": ["app pattern"],
-  "noteToFutureSelf": "See NOTETOSELF GUIDELINES below",
-  "debugInfo": {  // OPTIONAL: Only if you have confident prompt improvement suggestions
-    "suggestedPromptUpdates": "Add instruction: When form has Country dropdown, select country BEFORE filling phone (enables country code)",
-    "reasoning": "Encountered this pattern 3 times - dropdown selection unlocks dependent fields"
-  }
-}
-NOTETOSELF: Your only cognition continuity - capture THINKING/INTENTIONS (history has actions).
-Include: strategy, hypothesis, alternatives/backups if fails, what to verify next, observations.
-Example: "Strategy: Clicking ID 1 for menu. Backup: try ID 2/3 or coord (8%,15%). Want to verify: menu expands with nav options."
-META-LEARNING (debugInfo): Could this prompt have been better. Suggest fixes.
+STEP COMPLETION: Mark "complete" when goal PROVABLY achieved (check current state, not memory).
+Verify: URL match? Fields filled? Expected content visible? Compare vs noteToFutureSelf expectations.
+Do ONLY what step asks - no extra actions.
-STATUS: complete=goal achieved, continue=need more, stuck=5 fails, infeasible=impossible.
-RULES: Do only step goal. Minimal commands. Try different selectors if fail. Use blockerDetected for modals.
+${RESPONSE_SCHEMA}
-COMMANDS FORMAT:
+OUTPUT: Return valid JSON matching AgentDecision interface.
-Array of plain Playwright command strings:
-{
-  "commands": [
-    "await page.fill('input[name=\"email\"]', 'user@test.com')",
-    "await page.fill('input[name=\"password\"]', 'secret123')",
-    "await page.click('button[type=\"submit\"]')"
-  ]
-}
+${SITE_LEARNINGS_GUIDE}
-SELECTOR STRATEGIES (use in order of preference):
-1. getByRole: page.getByRole('button', {name: 'Login'})
-2. getByLabel: page.getByLabel('Email address')
-3. getByPlaceholder: page.getByPlaceholder('Enter email')
-4. getByText: page.getByText('Sign in')
-5. CSS: page.locator('input[name="email"]')
-6. Test IDs: page.getByTestId('login-button')
-Example login commands:
-{
-  "commands": [
-    "await page.getByLabel('Email').fill('user@test.com')",
-    "await page.getByLabel('Password').fill('secret123')",
-    "await page.getByRole('button', {name: 'Submit'}).click()"
-  ]
-}
+${NOTETOSELF_GUIDE}
-INTERACTIVE ELEMENTS section shows available selectors for each element.`;
+STATUS: complete/continue/stuck/infeasible. RULES: Do step goal only. Try different selectors if fail.`;
     }
     /**
      * Build SoM (Set-of-Marks) system prompt for visual element identification
@@ -132,30 +155,16 @@ Strong preference order:
 If you use coordinates, you MUST explain in commandReasoning why no SoM-marked alternative exists.` : '';
         return `You are an intelligent test automation agent using Set-of-Marks (SoM) visual element identification.${coordinateRestriction}
-DISCRETE EXPERIENCE LOOP:
-You operate in iterations: receive state → decide → sleep → wake with new state.
-System waits for page stability after each batch.
+${DISCRETE_EXPERIENCE_LOOP}
-CRITICAL: MEMORY ISOLATION
-You have NO memory between iterations. Each "wake up" is like a fresh start - you only see:
-- Current screenshot
-- Current step goal
-- Previous step descriptions
-- Your noteToFutureSelf from last iteration
-The noteToFutureSelf is your ONLY way to maintain a continuous stream of thinking across iterations. Use it strategically to:
-• Document your current intentions and strategy
-• Record what you were thinking/planning
-• Give specific advice to your future self about what to look for
-• Note any observations or patterns you've discovered
-• Suggest backup plans if current approach fails
+${NOTETOSELF_GUIDE}
 IMPORTANT: You will receive a screenshot with COLOR-CODED BOUNDING BOXES and IDs overlaid on interactive elements.
 SCREENSHOT SCOPE:
-- Shows VIEWPORT ONLY (what's currently visible, not full page)
-- Elements outside the viewport are NOT shown (you must scroll to reveal them)
-- If you need to see more: use SCROLL action or take_screenshot tool with isFullPage=true
+- Shows FULL PAGE (entire scrollable content, including below-fold elements)
+- ALL interactive elements across the entire page are marked with SoM IDs
+- You can see and interact with any element on the page without scrolling
 VISUAL MARKER SYSTEM:
 - Each interactive element has a colored bounding box with a unique color
@@ -213,175 +222,27 @@ COMMANDS ARRAY: Mix actions (has 'action') and verifications (has 'verificationT
 Example: [{"elementRef":"4","action":"fill","value":"Hello"}, {"elementRef":"3","verificationType":"textContains","expected":"You: Hello"}]
 CRITICAL: Verification steps MUST generate verification commands (never 0 commands) - don't just visually confirm!
-COORDINATES (when SoM marker missing):
-Use percentage-based coords for unmarked elements:
-{ "action": "click", "coord": { "x": 85.625, "y": 12.375 } }
-Format: percentages 0-100, MUST use 3 decimals (0.000 = top-left, 50.000 = center, 100.000 = bottom-right).
-After coord click, magenta "clicked" marker appears. Use view_previous_screenshot tool to verify if result unexpected.
+${RESPONSE_SCHEMA}
-NAVIGATION: Use navigate/goBack/goForward/reload actions (no elementRef needed).
-Example: { "action": "navigate", "value": "https://..." }
-DON'T click address bar - use navigate action. System waits for page load after navigation.
-// Available actions: click, doubleClick, rightClick, hover, drag, fill, press, select, check, uncheck, focus, blur, scroll, navigate, goBack, goForward, reload
-// Available verifications: textContains, textEquals, valueEquals, valueEmpty, isVisible, isHidden, isEnabled, isDisabled, isChecked, isUnchecked, countEquals, countGreaterThan, countLessThan, hasClass, hasAttribute
-interface AgentDecisionLLMResponse {
-  status: "continue" | "complete" | "stuck" | "infeasible";
-  reasoning: string;
-  commands?: (SomCommand | SomVerification)[];  // REPAIR MODE: Can be empty [] if step already done/obsolete
-  commandReasoning?: string;
-  toolCalls?: Array<{ name: string; params: Record<string, any> }>;
-  noteToFutureSelf?: string;
-  experiences?: string[];
-  blockerDetected?: { description: string; clearingCommands: SomCommand[] };
-  debugInfo?: { suggestedPromptUpdates?: string; reasoning?: string };
-}
-\`\`\`
-NOTETOSELF: Your only continuity. Include: hypothesis, strategy, backup plans if fails, what to verify, observations.
-Example: "Strategy: Click ID 1 for menu. Backup: try ID 2/3 or coord (8%,15%). Want to verify: menu expands."
-EXAMPLE RESPONSES:
-Action step:
-\`\`\`json
-{
-  "status": "continue",
-  "reasoning": "Need to fill login form with credentials",
-  "commands": [
-    { "elementRef": "5", "action": "fill", "value": "user@example.com" },
-    { "elementRef": "7", "action": "fill", "value": "password123" },
-    { "elementRef": "12", "action": "click" }
-  ],
-  "commandReasoning": "Filling email (ID 5), password (ID 7), clicking submit (ID 12)"
-}
-\`\`\`
-Verification step:
-\`\`\`json
-{
-  "status": "complete",
-  "reasoning": "Message sent and verified in conversation",
-  "commands": [
-    { "elementRef": "3", "verificationType": "textContains", "expected": "You: Hello", "description": "Message appears in thread" },
-    { "elementRef": "4", "verificationType": "valueEmpty", "description": "Input cleared" }
-  ],
-  "commandReasoning": "Verifying message visible in conversation (ID 3) and input empty (ID 4)"
-}
-\`\`\`
-REPAIR MODE - Step already completed (DELETE case):
-\`\`\`json
-{
-  "status": "complete",
-  "reasoning": "Step asked to 'Dismiss welcome modal' but I see no modal in current screenshot - it was already dismissed by prior steps",
-  "commands": [],
-  "commandReasoning": "No commands needed - step goal already achieved/obsolete"
-}
-\`\`\`
+COORDS: { "action": "click", "coord": { "x": 85.625, "y": 12.375 } }. Use 3 decimals, 0-100%.
+NAVIGATE: { "action": "navigate", "value": "https://..." }
+SCROLL: { "action": "scroll", "scrollDirection": "down", "scrollAmount": 500 }
+PRESS: { "elementRef": "5", "action": "press", "value": "Enter" } (NO coord for press!)
-OUTPUT FORMAT: JSON matching AgentDecisionLLMResponse interface above.`;
+OUTPUT: Return valid JSON. Example: { "status": "complete", "commands": [{"elementRef":"5","action":"fill","value":"test"}], "screenState": {"screen":"login","state":""} }`;
     }
     /**
      * Build coordinate-specific system prompt (used when selectors repeatedly fail)
      */
     static buildCoordinateSystemPrompt() {
-        return `You are a visual web automation expert. Selector generation has FAILED multiple times.
-YOU MUST NOW USE COORDINATE-BASED ACTIONS (this is not optional).
-SCREENSHOT PROVIDED:
-You will see a screenshot with color-coded bounding boxes and ID labels attached to each element.
-CRITICAL - IDENTIFY THE CORRECT ELEMENT:
-1. READ the step goal carefully - what specific element are you looking for?
-2. Look for the colored bounding box that matches the element description
-3. The ID label is at TOP-RIGHT corner, ABOVE the box (bottom of label touches top of box)
-4. Match the label color to the bounding box color
-5. LOCATE that element in the screenshot (NOT a similar-looking element!)
-6. VERIFY position using screen regions:
-   - Left sidebar/menu: xPercent ~5-25% (FAR LEFT)
-   - Center content: xPercent ~30-70%
-   - Right panel/sidebar: xPercent ~75-95% (FAR RIGHT)
-7. CALCULATE percentages from element's CENTER position
-8. SANITY CHECK your percentages:
-   - Sidebar menu item at 85%? WRONG - that's far right, not sidebar!
-   - Button in top-left at 90%? WRONG - that's top-right!
-   - Element description says "left" but x > 50%? WRONG - recheck!
-Example thought process:
-Goal: "Click Settings link in left navigation"
-→ I see "Settings" text in LEFT navigation panel in the screenshot
-→ Visual estimate: The link appears in the far left sidebar
-→ Horizontal: The link center is roughly 1/8th from the left edge → ~12-13% from left
-→ Vertical: The link center is roughly 1/3rd down from top → ~30-35% from top
-→ xPercent: 12.500, yPercent: 32.000
-→ Sanity check: 12.5% is FAR LEFT (NOT 80%+ which would be far right!)
-→ Description: "Clicking center of Settings link in left sidebar"
-CRITICAL VISUAL ESTIMATION TIPS:
-- Divide screenshot mentally into quadrants/regions
-- Left sidebar usually ~5-20% from left, center content ~30-70%, right sidebar ~75-95%
-- Aim for CENTER of element, not edges
-- Top bar usually 0-10% from top, footer usually 90-100%
-- Be conservative: slightly off-center is better than way off
-YOUR RESPONSE FORMAT - Output JSON matching this interface:
-interface AgentDecisionLLMResponse {
-  status: string;              // REQUIRED: "continue" (usually for coordinate mode)
-  reasoning: string;           // REQUIRED: "I see [element] at (X%, Y%) - using coordinates"
-  coordinateAction: {          // REQUIRED in coordinate mode
-    type: "coordinate";
-    action: "click" | "doubleClick" | "rightClick" | "hover" | "drag" | "fill" | "scroll";
-    xPercent: number;          // 0-100, 3 decimals
-    yPercent: number;          // 0-100, 3 decimals
-    toXPercent?: number;       // For drag
-    toYPercent?: number;       // For drag
-    value?: string;            // For fill
-    scrollAmount?: number;     // For scroll
-  };
-  noteToFutureSelf?: string;   // Optional: What to try if this fails
-}
+        return `Selectors FAILED. Use COORDINATE-BASED actions (visual estimation from screenshot).
+COORD REFERENCE: Top-left=0,0. Bottom-right=100,100. Center=50,50. Use 3 decimals.
+REGIONS: Left sidebar 5-20%, center 30-70%, right sidebar 75-95%
+SANITY: Left sidebar at x=85%? WRONG (that's far right!)
-COORDINATE REFERENCE:
-- Top-left corner: xPercent=0, yPercent=0
-- Top-right corner: xPercent=100, yPercent=0
-- Bottom-left corner: xPercent=0, yPercent=100
-- Bottom-right corner: xPercent=100, yPercent=100
-- Center of screen: xPercent=50, yPercent=50
-Use 3 decimal places for precision (e.g., 15.755, not 16).
-ACTIONS:
-**Physical clicks:**
-- click: { action: "click", xPercent: 15.755, yPercent: 8.500 }
-- doubleClick: { action: "doubleClick", xPercent: 15.755, yPercent: 8.500 }
-- rightClick: { action: "rightClick", xPercent: 15.755, yPercent: 8.500 }
-- hover: { action: "hover", xPercent: 15.755, yPercent: 8.500 }
-**Input actions:**
-- fill: Click then type
-  { action: "fill", xPercent: 30.000, yPercent: 25.000, value: "alice@example.com" }
-**Movement actions:**
-- drag: From one position to another
-  { action: "drag", xPercent: 10.000, yPercent: 50.000, toXPercent: 60.000, toYPercent: 50.000 }
-- scroll: At position, scroll by amount
-  { action: "scroll", xPercent: 50.000, yPercent: 50.000, scrollAmount: 500 }
-CRITICAL RULES:
-- Percentages are from viewport TOP-LEFT (not full page)
-- Use element CENTER for coordinates, not edges
-- Be precise with decimals - wrong coords click wrong element
-- For fill: system will click at (x%,y%) then type value automatically
-- For drag: toXPercent/toYPercent are REQUIRED
-DO NOT try to generate selectors - that approach already failed. Use coordinates only.
-This is a last-resort mechanism, but it WILL work if you provide accurate percentages.`;
+OUTPUT: Return valid JSON with coordinateAction. Example: { "coordinateAction": { "action": "click", "xPercent": 15.755, "yPercent": 8.500 } }
+Actions: click, fill (+ value), drag (+ toXPercent/toYPercent), scroll (+ scrollAmount). Aim for element CENTER.`;
     }
     /**
      * Build user prompt with context
@@ -413,50 +274,36 @@ This is a last-resort mechanism, but it WILL work if you provide accurate percen
                 }
                 parts.push(``);
             }
-            parts.push(`REPAIR STRATEGY:`);
-            parts.push(`- CRITICAL: First check if this step is STILL NEEDED (may already be done by prior step or now obsolete)`);
-            parts.push(`  → If step goal already achieved/no longer needed: Return 0 commands + status "complete" (DELETE case)`);
-            parts.push(`  → Example: "Dismiss modal" but modal already gone → 0 commands, status "complete"`);
-            parts.push(`- Use SoM markers to identify current elements`);
-            parts.push(`- Generate commands that work with CURRENT UI (not original script)`);
-            parts.push(`- CRITICAL: Once you fix this step, return status "complete" IMMEDIATELY (control goes back to script)`);
-            parts.push(`  → Repair mode = single step fix, then hand back control`);
-            parts.push(`  → Don't continue to next steps - script will auto-execute them`);
-            parts.push(`- DON'T redo completed steps - only fix the blocker\n`);
+            parts.push(`STRATEGY: Check if step still needed. Fix using current UI. Return "complete" when fixed.\n`);
         }
-        // Put static instructions first for LLM caching efficiency
-        parts.push('STEP EXECUTION RULES:');
-        parts.push('- DO ONLY what the current step asks - NO extra actions or verifications');
-        parts.push('- If step doesn\'t say "verify/check/confirm" → DON\'T add expect() assertions');
-        parts.push('- Mark "complete" ONLY if commands succeeded');
+        // Concise rules for LLM caching
+        parts.push('RULES: Do step goal only. No verify commands unless step asks. Check prerequisites before advancing.');
+        // TEMPORARY: Always full-page mode during debugging
+        // TODO: Re-enable adaptive heuristic once verified working
+        parts.push('- Screenshot shows FULL PAGE (all content visible, including offscreen elements)');
+        parts.push('- All interactive elements are marked with SoM IDs, even those below the fold');
+        // Kept for future reference when re-enabling adaptive mode:
+        // const heightOk = context.pageHeight && context.viewportHeight && context.pageHeight < (context.viewportHeight * 2);
+        // const widthOk = context.pageWidth && context.viewportWidth && context.pageWidth < (context.viewportWidth * 2);
+        // const isCompactPage = heightOk && widthOk;
         parts.push('- Screenshot tool: Use ONCE for visual context, then ACT (max 3 per step, system enforced)');
         parts.push('- Max 5 iterations per step, then forced STUCK\n');
         // Dynamic content follows (changes per iteration)
         parts.push('=== CURRENT CONTEXT ===\n');
         // Display note from previous iteration (high priority tactical info)
-        if (context.noteFromPreviousIteration) {
-            const note = context.noteFromPreviousIteration;
+        if (context.journeyMemory.latestNote) {
+            const note = context.journeyMemory.latestNote;
             parts.push(`📝 YOUR NOTE FROM PREVIOUS ITERATION:`);
-            parts.push(`   ${note.content}`);
-            parts.push(`   ^^ READ THIS - your previous self left important tactical guidance ^^`);
-            parts.push(``);
-            parts.push(`   ACTION REQUIRED:`);
-            parts.push(`   1. Did your previous action work? Check the screenshot!`);
-            parts.push(`   2. If it WORKED: Execute next step from your plan`);
-            parts.push(`   3. If it FAILED: Use your backup plan (try alternative IDs/methods)`);
-            parts.push(`   4. Write NEW noteToFutureSelf with:`);
-            parts.push(`      - What worked/didn't work (learn from attempts)`);
-            parts.push(`      - Updated strategy with new backup plan`);
-            parts.push(`      - Next alternatives to try if this fails`);
-            parts.push(`      - Build on previous note's reasoning`);
+            parts.push(`${note.content}`);
             parts.push(``);
-            parts.push(`   DON'T repeat failed actions - try your backup plan!`);
+            parts.push(`⚠️  Follow your own instructions above. Compare current screenshot to expected state.`);
             parts.push('');
         }
         // Check for screenshot loops (analysis paralysis) - PER STEP tracking
-        const screenshotsThisStep = context.recentSteps.filter(s => s.stepNumber === context.stepNumber &&
+        const recentSteps = context.journeyMemory.history.slice(-6);
+        const screenshotsThisStep = recentSteps.filter(s => s.stepNumber === context.stepNumber &&
             (s.code.includes('take_screenshot') || s.action.toLowerCase().includes('screenshot')));
-        const recentScreenshots = context.recentSteps.slice(-3).filter(s => s.code.includes('take_screenshot') || s.action.toLowerCase().includes('screenshot'));
+        const recentScreenshots = recentSteps.slice(-3).filter(s => s.code.includes('take_screenshot') || s.action.toLowerCase().includes('screenshot'));
         if (screenshotsThisStep.length >= 3) {
             parts.push(`[CRITICAL] SCREENSHOT LOOP DETECTED - ${screenshotsThisStep.length} SCREENSHOTS THIS STEP`);
             parts.push(`ANALYSIS PARALYSIS! You keep gathering info but NEVER ACTING!`);
@@ -474,7 +321,8 @@ This is a last-resort mechanism, but it WILL work if you provide accurate percen
             parts.push(`[WARNING] SYSTEM WARNING: ${consecutiveFailures} failures!`);
             // Only suggest screenshot if we haven't already taken multiple THIS STEP
             if (screenshotsThisStep.length === 0) {
-                parts.push(`Take screenshot ONCE to see page state. Then ACT with selector.`);
+                parts.push(`Take full-page screenshot to see page state: { "name": "take_screenshot", "params": {"isFullPage": true} }`);
+                parts.push(`Then ACT with selector from the screenshot analysis.`);
             }
             else {
                 parts.push(`You already have visual context. Try different selector NOW.`);
@@ -503,11 +351,20 @@ This is a last-resort mechanism, but it WILL work if you provide accurate percen
         // REPAIR MODE detection and instructions
         const isRepairMode = context.priorSteps !== undefined;
         if (isRepairMode) {
-            parts.push(`⚠️  ⚠️  ⚠️  REPAIR MODE ⚠️  ⚠️  ⚠️`);
+            parts.push(`⚠️   ⚠️  REPAIR MODE ⚠️   ⚠️`);
             parts.push(`You are fixing a FAILED command from an existing script.`);
             parts.push(`CRITICAL: The script executed command-by-command and stopped at a failure.`);
             parts.push(`Your job: Fix ONLY the failing command. System will auto-execute remaining commands after.`);
-            parts.push(`⚠️  ⚠️  ⚠️  ⚠️  ⚠️  ⚠️  ⚠️  ⚠️\n`);
+            parts.push(`⚠️  ⚠️\n`);
+            // Show execution position summary
+            const successCount = context.successfulCommandsInCurrentStep?.length || 0;
+            const remainCount = context.remainingCommandsInCurrentStep?.length || 0;
+            const totalInStep = successCount + 1 + remainCount; // successful + failing + remaining
+            parts.push(`📍 EXECUTION POSITION:`);
+            parts.push(`   Step ${context.stepNumber}/${context.totalSteps}: "${context.currentStepGoal}"`);
+            parts.push(`   Command ${successCount + 1}/${totalInStep} in this step ← YOU ARE HERE (fixing this command)`);
+            parts.push(`   ${successCount} commands succeeded before this`);
+            parts.push(`   ${remainCount} commands will execute after your fix\n`);
             if (context.successfulCommandsInCurrentStep && context.successfulCommandsInCurrentStep.length > 0) {
                 parts.push(`✅ SUCCESSFUL COMMANDS IN THIS STEP (already executed):`);
                 context.successfulCommandsInCurrentStep.forEach((cmd, idx) => {
@@ -547,7 +404,10 @@ This is a last-resort mechanism, but it WILL work if you provide accurate percen
         parts.push(`🎯 CURRENT STEP GOAL (${context.stepNumber}/${context.totalSteps}):`);
         parts.push(`${context.currentStepGoal}`);
         parts.push(``);
-        parts.push(`[WARNING]  IMPORTANT: Is THIS step's goal achieved? If YES, mark status="complete" NOW.`);
+        parts.push(`[WARNING]  BEFORE STARTING: Do prerequisites from prior steps still exist?`);
+        parts.push(`   Example: Step 4 "Click Core HR" needs Step 3's "menu expanded" state`);
+        parts.push(`   → Check screenshot: Is menu still expanded? If NO, re-expand before Step 4!`);
+        parts.push(`[WARNING]  AFTER ACTING: Is THIS step's goal achieved? If YES, mark status="complete" NOW.`);
         parts.push(`[WARNING]  CRITICAL: Only interact with elements you SEE in the screenshot - no guessing/hallucinating!`);
         parts.push(`OVERALL SCENARIO: ${context.overallGoal}\n`);
         if (!isRepairMode) {
@@ -560,50 +420,77 @@ This is a last-resort mechanism, but it WILL work if you provide accurate percen
         }
         // SoM screenshot (if available)
         if (context.somScreenshot) {
-            parts.push(`\n SET-OF-MARKS SCREENSHOT (with element IDs):`);
-            parts.push(`Screenshot shows VIEWPORT ONLY (current visible area, not full page).`);
-            parts.push(`Color-coded bounding boxes mark interactive elements in the viewport.`);
-            parts.push(`Each element has a unique color and an ID label (1, 2, 3, etc.) at TOP-RIGHT corner, OUTSIDE the box.`);
-            parts.push(`Labels are typically positioned OUTSIDE and ABOVE the bounding box.`);
-            parts.push(`TO FIND THE CORRECT ELEMENT: match the label color with the bounding box color.`);
-            parts.push(`If target element not visible: SCROLL down/up OR use take_screenshot(isFullPage=true).`);
-            parts.push(`Reference element IDs in your commands using elementRef field (e.g., "1", "2", "42").`);
-            parts.push(`The screenshot is attached as an image - examine it to identify elements visually.`);
-            parts.push(``);
-            // SoM element map for disambiguation
+            parts.push(`\nSET-OF-MARKS: Full page with color-coded boxes + IDs. Match label color to box. Use IDs in elementRef.`);
             if (context.somElementMap) {
-                parts.push(`SOM ELEMENT DETAILS (for disambiguation):`);
-                parts.push(`If unsure which ID matches your target (e.g., is it 11 or 12?), use this map:`);
+                parts.push(`\nELEMENT MAP (for disambiguation):`);
                 parts.push(context.somElementMap);
-                parts.push(`Example: If you need a "Submit" button and see IDs 5 and 6 are both buttons, check the map to see which one says "Submit".`);
                 parts.push(``);
             }
         }
         // Current page state (most variable content - at the end)
         parts.push(`\nCURRENT PAGE:`);
         parts.push(`URL: ${context.currentURL}`);
-        parts.push(`Title: ${context.currentPageInfo.title}`);
-        // Only include DOM details if NOT in SoM mode
-        if (!context.somScreenshot) {
-            parts.push(`\nINTERACTIVE ELEMENTS (with positions and selectors):`);
-            parts.push(context.currentPageInfo.formattedElements);
-            parts.push(`\nARIA TREE (hierarchical structure):`);
-            parts.push(JSON.stringify(context.currentPageInfo.ariaSnapshot, null, 2).substring(0, 5000));
+        parts.push(`Title: ${context.currentPageTitle}`);
+        // Page dimensions for scroll decisions
+        if (context.viewportWidth && context.viewportHeight && context.pageHeight) {
+            const heightOk = context.pageHeight < (context.viewportHeight * 2);
+            const widthOk = context.pageWidth && context.pageWidth < (context.viewportWidth * 2);
+            const isCompactPage = heightOk && widthOk;
+            parts.push(`\nPAGE DIMENSIONS & SCROLL POSITION:`);
+            parts.push(`Viewport: ${context.viewportWidth}x${context.viewportHeight}px`);
+            parts.push(`Full Page: ${context.pageWidth}x${context.pageHeight}px`);
+            parts.push(`Screenshot Mode: ${isCompactPage ? 'FULL PAGE (compact page, all visible)' : 'VIEWPORT ONLY (large page, full-page markers would be too small)'}`);
+            if (context.scrollY !== undefined && context.scrollY > 0) {
+                parts.push(`Current Scroll: ${context.scrollY}px from top (you've already scrolled down)`);
+            }
+            else {
+                parts.push(`Current Scroll: At top of page (scrollY = 0)`);
+            }
+            const canScrollDown = context.pageHeight > context.viewportHeight;
+            const canScrollRight = context.pageWidth && context.pageWidth > context.viewportWidth;
+            if (!isCompactPage && (canScrollDown || canScrollRight)) {
+                const remainingBelow = Math.max(0, context.pageHeight - context.viewportHeight - (context.scrollY || 0));
+                const remainingRight = context.pageWidth ? Math.max(0, context.pageWidth - context.viewportWidth - (context.scrollX || 0)) : 0;
+                const hiddenContent = [];
+                if (remainingBelow > 0)
+                    hiddenContent.push(`${remainingBelow}px below`);
+                if (remainingRight > 0)
+                    hiddenContent.push(`${remainingRight}px to right`);
+                if (hiddenContent.length > 0) {
+                    parts.push(`Hidden content: ${hiddenContent.join(', ')}`);
+                    parts.push(`💡 If element not found → Call: take_screenshot with {"isFullPage": true, "purpose": "Find X"}`);
+                    parts.push(`   This shows entire page (markers small but LLM can still locate elements)`);
+                }
+            }
+            else if (isCompactPage) {
+                parts.push(`All content visible in screenshot (no need for additional tools)`);
+            }
         }
-        else {
-            // In SoM mode, skip DOM details - agent uses visual screenshot instead
-            parts.push(`\nNote: Element details available in visual screenshot with SoM markers.`);
+        // In SoM mode, element details are in somElementMap (visual screenshot)
+        parts.push(`\nNote: Element details available in visual screenshot with SoM markers.`);
+        parts.push('');
+        // Show current URL with change detection
+        const currentUrl = truncateUrl(context.currentURL || '');
+        const lastAction = context.journeyMemory.history[context.journeyMemory.history.length - 1];
+        if (lastAction && lastAction.previousUrl && lastAction.url !== lastAction.previousUrl) {
+            const prevUrl = truncateUrl(lastAction.previousUrl);
+            const newUrl = truncateUrl(lastAction.url);
+            parts.push(`🔄 URL CHANGED: ${prevUrl} → ${newUrl}`);
+            parts.push(`   ⚠️ Navigation occurred! Previous action likely succeeded and triggered page transition.\n`);
         }
-        if (JSON.stringify(context.currentPageInfo.ariaSnapshot).length > 5000) {
-            parts.push('... (truncated)');
+        else {
+            parts.push(`📍 Current URL: ${currentUrl}\n`);
         }
-        parts.push('');
         // Recent steps (most variable content - at the end)
-        if (context.recentSteps.length > 0) {
-            parts.push(`\nRECENT STEPS (last ${context.recentSteps.length}):`);
-            for (const step of context.recentSteps) {
+        const recentStepsDisplay = context.journeyMemory.history.slice(-6);
+        if (recentStepsDisplay.length > 0) {
+            parts.push(`RECENT STEPS (last ${recentStepsDisplay.length}):`);
+            for (const step of recentStepsDisplay) {
                 const status = step.result === 'success' ? '[OK]' : '[FAIL]';
-                parts.push(`  ${status} ${step.stepNumber}.${step.iteration || ''} ${step.action}`);
+                const urlChanged = step.previousUrl && step.url !== step.previousUrl
+                    ? ` [URL: ${step.previousUrl} → ${step.url}]`
+                    : '';
+                parts.push(`  ${status} ${step.stepNumber}.${step.iteration || ''} ${step.action}${urlChanged}`);
                 parts.push(`     Code: ${step.code}`);
                 if (step.result === 'failure' && step.error) {
                     parts.push(`     ERROR: ${step.error}`);
@@ -615,7 +502,7 @@ This is a last-resort mechanism, but it WILL work if you provide accurate percen
             }
             parts.push('');
             // Detect repeated failures
-            const recentFailures = context.recentSteps.filter(s => s.result === 'failure');
+            const recentFailures = recentStepsDisplay.filter(s => s.result === 'failure');
             if (recentFailures.length >= 2) {
                 const sameSelector = recentFailures.slice(-2).every((s, i, arr) => i === 0 || s.code === arr[i - 1].code);
                 if (sameSelector) {
@@ -625,18 +512,49 @@ This is a last-resort mechanism, but it WILL work if you provide accurate percen
                 }
             }
         }
-        // Experiences (app-specific patterns learned)
-        if (context.experiences && context.experiences.length > 0) {
-            parts.push(`\nEXPERIENCES (patterns you've learned about this app):`);
-            for (const exp of context.experiences) {
-                parts.push(`  • ${exp}`);
+        // Site learnings (persistent knowledge)
+        if (context.siteLearnings) {
+            const { screens, uxPatterns } = context.siteLearnings;
+            // Display UX patterns with IDs
+            const uxPatternEntries = Object.entries(uxPatterns);
+            if (uxPatternEntries.length > 0) {
+                parts.push(`\n🎯 SITE-WIDE UX PATTERNS (reference [ID] for updates/deletes):`);
+                uxPatternEntries.forEach(([id, text]) => parts.push(`  [${id}] ${text}`));
+                parts.push('');
+            }
+            // Display screen/state vocabulary first (for consistent naming)
+            if (context.siteLearnings?.screenStateVocabulary && Object.keys(context.siteLearnings.screenStateVocabulary).length > 0) {
+                parts.push(`\n📋 SCREEN STATE VOCABULARY (use these names for consistency):`);
+                Object.entries(context.siteLearnings.screenStateVocabulary).forEach(([screenName, stateNames]) => {
+                    const statesDisplay = stateNames.length > 0
+                        ? ` → States: ${stateNames.map(s => s || '""').join(', ')}`
+                        : '';
+                    parts.push(`  • ${screenName}${statesDisplay}`);
+                });
+                parts.push('');
+            }
+            // Display screen state knowledge with IDs
+            if (screens && Object.keys(screens).length > 0) {
+                parts.push(`\n📚 SCREEN STATE KNOWLEDGE (reference [ID] for updates/deletes):`);
+                Object.entries(screens).forEach(([screenName, screenLearnings]) => {
+                    Object.entries(screenLearnings.states).forEach(([state, learning]) => {
+                        const stateLabel = state ? `[${state}]` : '';
+                        parts.push(`\n  ${screenName}${stateLabel}:`);
+                        const obsEntries = Object.entries(learning.observations);
+                        if (obsEntries.length > 0) {
+                            obsEntries.forEach(([id, text]) => {
+                                parts.push(`    [${id}] ${text}`);
+                            });
+                        }
+                    });
+                });
+                parts.push('');
             }
-            parts.push('');
         }
         // Extracted data (from previous extract_data tool calls)
-        if (context.extractedData && Object.keys(context.extractedData).length > 0) {
+        if (context.journeyMemory.extractedData && Object.keys(context.journeyMemory.extractedData).length > 0) {
             parts.push(`\nEXTRACTED DATA (available for use in commands):`);
-            parts.push(JSON.stringify(context.extractedData, null, 2));
+            parts.push(JSON.stringify(context.journeyMemory.extractedData, null, 2));
             parts.push('');
         }
         return parts.join('\n');
@@ -653,19 +571,20 @@ DISCRETE EXPERIENCE LOOP:
 You operate in iterations: receive state → decide → sleep → wake with new state.
 System waits for page stability after each batch.
-CRITICAL: MEMORY ISOLATION
-You have NO memory between iterations. Each "wake up" is like a fresh start - you only see:
-- Current screenshot
-- Current journey goal
-- Previous step descriptions
+CRITICAL: NO SCREENSHOT MEMORY (STATELESS!)
+Each iteration you receive:
+- Current screenshot (NOT previous screenshots!)
+- Past actions (text descriptions, not screenshots)
+- Ongoing memory (experiences, patterns)
 - Your noteToFutureSelf from last iteration
+- Current journey goal
-The noteToFutureSelf is your ONLY way to maintain a continuous stream of thinking across iterations. Use it strategically to:
-• Document your current intentions and strategy
-• Record what you were thinking/planning
-• Give specific advice to your future self about what to look for
-• Note any observations or patterns you've discovered
-• Suggest backup plans if current approach fails
+The noteToFutureSelf is your way to document expectations for verification. MUST include EXPLICIT EXPECTED STATE:
+• ✅ GOOD: "Clicked sidebar menu button (was collapsed). EXPECT: expanded sidebar with 'Dashboard' and 'Reports' visible"
+• ✅ GOOD: "Navigated to /settings. EXPECT: URL changed, 'Save Settings' button visible"
+• ❌ BAD: "Clicked menu" (future you can't verify if it worked!)
+• ❌ BAD: "Clicked ID 8" (ID meaningless without screenshot!)
+Also include: strategy, observations, patterns discovered, backup plans if this fails
 COMMON UX PATTERNS (critical for navigation):
 • Disabled buttons → Fill required fields first to enable them
@@ -680,31 +599,13 @@ COMMON UX PATTERNS (critical for navigation):
 • Lazy loading → Scroll down to load more content
 • Accordions/expandable → Click header to toggle visibility
-YOUR RESPONSE FORMAT - Output JSON matching this interface:
+${RESPONSE_SCHEMA}
-interface AgentDecisionLLMResponse {
-  status: string;              // "continue" | "complete" | "stuck"
-  reasoning: string;           // What you're exploring and why
-  stepSummary?: string;        // Concise 1-sentence summary of what was done in this step
-  // COMMANDS: Array of plain Playwright command strings
-  commands?: string[];         // Example: ["await page.fill('input[name=\"email\"]', 'test@example.com')", ...]
-  commandReasoning?: string;
-  toolCalls?: Array<{          // Tools to call (extract_data for menus, etc.)
-    name: string;
-    params: Record<string, any>;
-  }>;
-  toolReasoning?: string;
-  needsToolResults?: boolean;
-  noteToFutureSelf?: string;
-  coordinateAction?: { ... };
-  experiences?: string[];      // Use for BOTH app patterns AND exploration progress
-  blockerDetected?: { ... };
-  debugInfo?: {                // Meta-learning: suggest prompt improvements (only when very confident)
-    suggestedPromptUpdates?: string;
-    reasoning?: string;
-  };
-}
+YOUR RESPONSE FORMAT - Output JSON matching AgentDecision interface above.
+For exploration mode, also include:
+- stepSummary: Concise 1-sentence summary of what was accomplished this iteration
+- commands: Array of plain Playwright command strings (exploration uses string commands, not SoM)
 EXPLORATION MODE GUIDELINES:
@@ -717,7 +618,7 @@ EXPLORATION MODE GUIDELINES:
 3. **VISIBLE ELEMENTS ONLY**: Screenshot shows viewport only. Only interact with elements you SEE. If not visible, scroll or take_screenshot(isFullPage=true).
-4. **SYSTEMATIC EXPLORATION**: Use extract_data to discover, store in extractedData, track in experiences, check history to avoid repeating, prioritize unexplored areas.
+4. **SYSTEMATIC EXPLORATION**: Use extract_data to discover, store in extractedData, track in siteLearningsUpdate, check history to avoid repeating, prioritize unexplored areas.
 5. **CREATIVE TESTING**: Test functionality thoroughly - try edge cases, verify features work, look for bugs.
@@ -732,7 +633,25 @@ EXPLORATION MODE GUIDELINES:
 11. **STEP SUMMARY**: When you complete actions, provide a concise 1-sentence summary of what was accomplished (e.g., "Logged in successfully", "Navigated to dashboard", "Created new widget"). This is used for step tracking, not future planning.
-12. **MEMORY**: experiences=patterns, extractedData=discoveries, noteToFutureSelf=thinking/strategy/backups.
+12. **MEMORY (STATELESS!)**: You see only current screenshot. MUST write expected state in noteToFutureSelf:
+    - ✅ "Clicked settings button in navbar. EXPECT: settings page with 'Profile' section visible"
+    - ❌ "Clicked settings" (can't verify!)
+    - ❌ "Clicked ID 9" (ID meaningless without screenshot!)
+    - siteLearningsUpdate=persistent knowledge, extractedData=journey discoveries
+SITE LEARNINGS: Build mental model (persistent across journeys)
+- screenState: {screen, state} to identify current context (NEVER: "about:blank", "loading" states)
+- siteLearningsUpdate: Add/update/delete observations per screen-state
+  CRITICAL: NEVER include SoM IDs ("element 9", "ID 5") - they regenerate every page load!
+  ✅ "Workspace selector opens on caret icon click"
+  ❌ "Element 9 opens dropdown with entries 6,7,8"
+WHEN TO STORE:
+✅ After discovering navigation (uxPatterns)
+✅ After learning UI behavior (uxPatterns)
+✅ When understanding screen layout (observations)
+✅ When selector fails (observations)
+❌ Don't store obvious/temporary things
 CRITICAL: You're fully autonomous for THIS journey - no step-by-step instructions provided.
 YOU decide the exploration path to meet the journey goal based on: journey prompt, current state, and memory.`;
@@ -764,21 +683,20 @@ YOU decide the exploration path to meet the journey goal based on: journey promp
             parts.push(`PROGRESS: Step ${stepNumber}/${maxSteps} (you can complete earlier if journey goal met)\n`);
         }
         // Show discovered and tracked data from extractedData
-        if (context.extractedData && Object.keys(context.extractedData).length > 0) {
+        if (context.journeyMemory.extractedData && Object.keys(context.journeyMemory.extractedData).length > 0) {
             parts.push(`\nDISCOVERED DATA (this journey):`);
-            for (const [key, value] of Object.entries(context.extractedData)) {
+            for (const [key, value] of Object.entries(context.journeyMemory.extractedData)) {
                 parts.push(`  ${key}: ${value}`);
             }
         }
         // SoM screenshot (if available)
         if (context.somScreenshot) {
             parts.push(`\n SET-OF-MARKS SCREENSHOT (with element IDs):`);
-            parts.push(`Screenshot shows VIEWPORT ONLY (current visible area, not full page).`);
-            parts.push(`Color-coded bounding boxes mark interactive elements in the viewport.`);
+            parts.push(`Screenshot shows FULL PAGE (all content, including below-fold elements).`);
+            parts.push(`Color-coded bounding boxes mark ALL interactive elements across entire page.`);
             parts.push(`Each element has a unique color and an ID label (1, 2, 3, etc.) at TOP-RIGHT corner, OUTSIDE the box.`);
             parts.push(`Labels are typically positioned OUTSIDE and ABOVE the bounding box.`);
             parts.push(`TO FIND THE CORRECT ELEMENT: match the label color with the bounding box color.`);
-            parts.push(`If target element not visible: SCROLL down/up OR use take_screenshot(isFullPage=true).`);
             parts.push(`Reference element IDs in your commands using elementRef field (e.g., "1", "2", "42").`);
             parts.push(`The screenshot is attached as an image - examine it to identify elements visually.`);
             parts.push(``);
@@ -799,41 +717,72 @@ YOU decide the exploration path to meet the journey goal based on: journey promp
         }
         parts.push(`\nCURRENT PAGE:`);
         parts.push(`URL: ${context.currentURL}`);
-        parts.push(`Title: ${context.currentPageInfo.title}`);
-        // Only include DOM details if NOT in SoM mode
-        if (!context.somScreenshot) {
-            parts.push(`\nINTERACTIVE ELEMENTS (with positions and selectors):`);
-            parts.push(context.currentPageInfo.formattedElements);
-            parts.push(`\nARIA TREE (hierarchical structure):`);
-            parts.push(JSON.stringify(context.currentPageInfo.ariaSnapshot, null, 2).substring(0, 5000));
+        parts.push(`Title: ${context.currentPageTitle}`);
+        // In SoM mode, element details are in somElementMap
+        parts.push(`\nNote: Element details available in visual screenshot with SoM markers.`);
+        // Recent actions
+        // Show current URL with change detection
+        const currentUrl = truncateUrl(context.currentURL || '');
+        const lastAction = context.journeyMemory.history[context.journeyMemory.history.length - 1];
+        if (lastAction && lastAction.previousUrl && lastAction.url !== lastAction.previousUrl) {
+            const prevUrl = truncateUrl(lastAction.previousUrl);
+            const newUrl = truncateUrl(lastAction.url);
+            parts.push(`\n🔄 URL CHANGED: ${prevUrl} → ${newUrl}`);
+            parts.push(`   ⚠️ Navigation occurred! Previous action likely triggered page transition.\n`);
         }
         else {
-            // In SoM mode, skip DOM details - agent uses visual screenshot
-            parts.push(`\nNote: Element details available in visual screenshot with SoM markers.`);
+            parts.push(`\n📍 Current URL: ${currentUrl}\n`);
         }
-        if (JSON.stringify(context.currentPageInfo.ariaSnapshot).length > 5000) {
-            parts.push('... (truncated)');
-        }
-        // Recent actions
-        if (context.recentSteps.length > 0) {
-            parts.push(`\nRECENT ACTIONS (last ${context.recentSteps.length}):`);
-            for (const step of context.recentSteps) {
+        const recentActions = context.journeyMemory.history.slice(-6);
+        if (recentActions.length > 0) {
+            parts.push(`RECENT ACTIONS (last ${recentActions.length}):`);
+            for (const step of recentActions) {
                 const status = step.result === 'success' ? '[OK]' : '[FAIL]';
-                parts.push(`  ${status} ${step.action}`);
+                const urlChanged = step.previousUrl && step.url !== step.previousUrl
+                    ? ` [URL: ${truncateUrl(step.previousUrl)} → ${truncateUrl(step.url)}]`
+                    : '';
+                parts.push(`  ${status} ${step.action}${urlChanged}`);
                 parts.push(`     ${step.observation}`);
             }
         }
-        // Learnings and exploration progress
-        if (context.experiences && context.experiences.length > 0) {
-            parts.push(`\nEXPLORATION NOTES & APP PATTERNS:`);
-            for (const exp of context.experiences) {
-                parts.push(`  • ${exp}`);
+        // Site learnings
+        if (context.siteLearnings) {
+            const { screens, uxPatterns } = context.siteLearnings;
+            const uxPatternEntries = Object.entries(uxPatterns);
+            if (uxPatternEntries.length > 0) {
+                parts.push(`\n🎯 SITE-WIDE UX PATTERNS (reference [ID] for updates/deletes):`);
+                uxPatternEntries.forEach(([id, text]) => parts.push(`  [${id}] ${text}`));
+            }
+            // Display screen/state vocabulary first (for consistent naming)
+            if (context.siteLearnings?.screenStateVocabulary && Object.keys(context.siteLearnings.screenStateVocabulary).length > 0) {
+                parts.push(`\n📋 SCREEN STATE VOCABULARY (use these names for consistency):`);
+                Object.entries(context.siteLearnings.screenStateVocabulary).forEach(([screenName, stateNames]) => {
+                    const statesDisplay = stateNames.length > 0
+                        ? ` → States: ${stateNames.map(s => s || '""').join(', ')}`
+                        : '';
+                    parts.push(`  • ${screenName}${statesDisplay}`);
+                });
+            }
+            if (screens && Object.keys(screens).length > 0) {
+                parts.push(`\n📚 SCREEN STATE KNOWLEDGE (reference [ID] for updates/deletes):`);
+                Object.entries(screens).forEach(([screenName, screenLearnings]) => {
+                    Object.entries(screenLearnings.states).forEach(([state, learning]) => {
+                        const stateLabel = state ? `[${state}]` : '';
+                        parts.push(`\n  ${screenName}${stateLabel}:`);
+                        const obsEntries = Object.entries(learning.observations);
+                        if (obsEntries.length > 0) {
+                            obsEntries.forEach(([id, text]) => {
+                                parts.push(`    [${id}] ${text}`);
+                            });
+                        }
+                    });
+                });
             }
         }
         // Note from previous iteration
-        if (context.noteFromPreviousIteration) {
-            parts.push(`\nYOUR NOTE FROM LAST ITERATION: ${context.noteFromPreviousIteration.content}`);
-            parts.push(`Did it work? If yes, continue plan. If failed, try backup alternatives.`);
+        if (context.journeyMemory.latestNote) {
+            parts.push(`\nYOUR NOTE FROM LAST ITERATION: ${context.journeyMemory.latestNote.content}`);
+            parts.push(`   ^^ Follow your own instructions from previous iteration ^^`);
         }
         parts.push(`\nDECIDE NEXT ACTION: What to explore/test next? Check history to avoid repeating. Is goal achieved? Mark complete.`);
         return parts.join('\n');