npm - testchimp-runner-core - Versions diffs - 0.0.33 → 0.0.35 - Mend

testchimp-runner-core 0.0.33 → 0.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (152) hide show

package/dist/execution-service.d.ts +1 -4
package/dist/execution-service.d.ts.map +1 -1
package/dist/execution-service.js +155 -468
package/dist/execution-service.js.map +1 -1
package/dist/index.d.ts +3 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +11 -1
package/dist/index.js.map +1 -1
package/dist/llm-facade.d.ts.map +1 -1
package/dist/llm-facade.js +7 -7
package/dist/llm-facade.js.map +1 -1
package/dist/llm-provider.d.ts +9 -0
package/dist/llm-provider.d.ts.map +1 -1
package/dist/model-constants.d.ts +16 -5
package/dist/model-constants.d.ts.map +1 -1
package/dist/model-constants.js +17 -6
package/dist/model-constants.js.map +1 -1
package/dist/orchestrator/decision-parser.d.ts +18 -0
package/dist/orchestrator/decision-parser.d.ts.map +1 -0
package/dist/orchestrator/decision-parser.js +127 -0
package/dist/orchestrator/decision-parser.js.map +1 -0
package/dist/orchestrator/index.d.ts +4 -2
package/dist/orchestrator/index.d.ts.map +1 -1
package/dist/orchestrator/index.js +15 -2
package/dist/orchestrator/index.js.map +1 -1
package/dist/orchestrator/orchestrator-agent.d.ts +17 -22
package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
package/dist/orchestrator/orchestrator-agent.js +708 -577
package/dist/orchestrator/orchestrator-agent.js.map +1 -1
package/dist/orchestrator/orchestrator-prompts.d.ts +32 -0
package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -0
package/dist/orchestrator/orchestrator-prompts.js +737 -0
package/dist/orchestrator/orchestrator-prompts.js.map +1 -0
package/dist/orchestrator/page-som-handler.d.ts +106 -0
package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
package/dist/orchestrator/page-som-handler.js +1353 -0
package/dist/orchestrator/page-som-handler.js.map +1 -0
package/dist/orchestrator/som-types.d.ts +149 -0
package/dist/orchestrator/som-types.d.ts.map +1 -0
package/dist/orchestrator/som-types.js +87 -0
package/dist/orchestrator/som-types.js.map +1 -0
package/dist/orchestrator/tool-registry.d.ts +2 -0
package/dist/orchestrator/tool-registry.d.ts.map +1 -1
package/dist/orchestrator/tool-registry.js.map +1 -1
package/dist/orchestrator/tools/index.d.ts +5 -1
package/dist/orchestrator/tools/index.d.ts.map +1 -1
package/dist/orchestrator/tools/index.js +9 -2
package/dist/orchestrator/tools/index.js.map +1 -1
package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
package/dist/orchestrator/tools/verify-action-result.d.ts +17 -0
package/dist/orchestrator/tools/verify-action-result.d.ts.map +1 -0
package/dist/orchestrator/tools/verify-action-result.js +140 -0
package/dist/orchestrator/tools/verify-action-result.js.map +1 -0
package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
package/dist/orchestrator/types.d.ts +49 -1
package/dist/orchestrator/types.d.ts.map +1 -1
package/dist/orchestrator/types.js +11 -1
package/dist/orchestrator/types.js.map +1 -1
package/dist/prompts.d.ts.map +1 -1
package/dist/prompts.js +40 -34
package/dist/prompts.js.map +1 -1
package/dist/scenario-service.d.ts +5 -0
package/dist/scenario-service.d.ts.map +1 -1
package/dist/scenario-service.js +17 -0
package/dist/scenario-service.js.map +1 -1
package/dist/scenario-worker-class.d.ts +4 -0
package/dist/scenario-worker-class.d.ts.map +1 -1
package/dist/scenario-worker-class.js +21 -3
package/dist/scenario-worker-class.js.map +1 -1
package/dist/testing/agent-tester.d.ts +35 -0
package/dist/testing/agent-tester.d.ts.map +1 -0
package/dist/testing/agent-tester.js +84 -0
package/dist/testing/agent-tester.js.map +1 -0
package/dist/testing/ref-translator-tester.d.ts +44 -0
package/dist/testing/ref-translator-tester.d.ts.map +1 -0
package/dist/testing/ref-translator-tester.js +104 -0
package/dist/testing/ref-translator-tester.js.map +1 -0
package/dist/utils/coordinate-converter.d.ts +32 -0
package/dist/utils/coordinate-converter.d.ts.map +1 -0
package/dist/utils/coordinate-converter.js +130 -0
package/dist/utils/coordinate-converter.js.map +1 -0
package/dist/utils/hierarchical-selector.d.ts +47 -0
package/dist/utils/hierarchical-selector.d.ts.map +1 -0
package/dist/utils/hierarchical-selector.js +212 -0
package/dist/utils/hierarchical-selector.js.map +1 -0
package/dist/utils/page-info-retry.d.ts +14 -0
package/dist/utils/page-info-retry.d.ts.map +1 -0
package/dist/utils/page-info-retry.js +60 -0
package/dist/utils/page-info-retry.js.map +1 -0
package/dist/utils/page-info-utils.d.ts +1 -0
package/dist/utils/page-info-utils.d.ts.map +1 -1
package/dist/utils/page-info-utils.js +46 -18
package/dist/utils/page-info-utils.js.map +1 -1
package/dist/utils/ref-attacher.d.ts +21 -0
package/dist/utils/ref-attacher.d.ts.map +1 -0
package/dist/utils/ref-attacher.js +149 -0
package/dist/utils/ref-attacher.js.map +1 -0
package/dist/utils/ref-translator.d.ts +49 -0
package/dist/utils/ref-translator.d.ts.map +1 -0
package/dist/utils/ref-translator.js +276 -0
package/dist/utils/ref-translator.js.map +1 -0
package/package.json +1 -1
package/plandocs/BEFORE_AFTER_VERIFICATION.md +148 -0
package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +144 -0
package/plandocs/IMPLEMENTATION_STATUS.md +108 -0
package/plandocs/PHASE_1_COMPLETE.md +165 -0
package/plandocs/PHASE_1_SUMMARY.md +184 -0
package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +120 -0
package/plandocs/PROMPT_SANITY_CHECK.md +120 -0
package/plandocs/SESSION_SUMMARY_v0.0.33.md +151 -0
package/plandocs/TROUBLESHOOTING_SESSION.md +72 -0
package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +396 -0
package/plandocs/WHATS_NEW_v0.0.33.md +183 -0
package/plandocs/exploratory-mode-support-v2.plan.md +953 -0
package/plandocs/exploratory-mode-support.plan.md +928 -0
package/plandocs/journey-id-tracking-addendum.md +227 -0
package/src/execution-service.ts +179 -596
package/src/index.ts +10 -0
package/src/llm-facade.ts +8 -8
package/src/llm-provider.ts +11 -1
package/src/model-constants.ts +17 -5
package/src/orchestrator/decision-parser.ts +139 -0
package/src/orchestrator/index.ts +27 -2
package/src/orchestrator/orchestrator-agent.ts +868 -623
package/src/orchestrator/orchestrator-prompts.ts +786 -0
package/src/orchestrator/page-som-handler.ts +1565 -0
package/src/orchestrator/som-types.ts +188 -0
package/src/orchestrator/tool-registry.ts +2 -0
package/src/orchestrator/tools/index.ts +5 -1
package/src/orchestrator/tools/refresh-som-markers.ts +69 -0
package/src/orchestrator/tools/verify-action-result.ts +159 -0
package/src/orchestrator/tools/view-previous-screenshot.ts +103 -0
package/src/orchestrator/types.ts +95 -4
package/src/prompts.ts +40 -34
package/src/scenario-service.ts +20 -0
package/src/scenario-worker-class.ts +30 -4
package/src/utils/coordinate-converter.ts +162 -0
package/src/utils/page-info-retry.ts +65 -0
package/src/utils/page-info-utils.ts +53 -18
package/testchimp-runner-core-0.0.35.tgz +0 -0
/package/{CREDIT_CALLBACK_ARCHITECTURE.md → plandocs/CREDIT_CALLBACK_ARCHITECTURE.md} +0 -0
/package/{INTEGRATION_COMPLETE.md → plandocs/INTEGRATION_COMPLETE.md} +0 -0
/package/{VISION_DIAGNOSTICS_IMPROVEMENTS.md → plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md} +0 -0
/package/{RELEASE_0.0.26.md → releasenotes/RELEASE_0.0.26.md} +0 -0
/package/{RELEASE_0.0.27.md → releasenotes/RELEASE_0.0.27.md} +0 -0
/package/{RELEASE_0.0.28.md → releasenotes/RELEASE_0.0.28.md} +0 -0

package/src/orchestrator/orchestrator-prompts.ts ADDED Viewed

@@ -0,0 +1,786 @@
+/**
+ * Orchestrator Agent Prompts
+ * Extracted from orchestrator-agent.ts for better maintainability
+ */
+import { AgentContext } from './types';
+export class OrchestratorPrompts {
+  /**
+   * Build main system prompt for selector-based mode
+   */
+  static buildSystemPrompt(toolDescriptions: string, enableCoordinateMode: boolean = false): string {
+    return `You are an intelligent test automation agent that executes web scenarios using Playwright.
+DISCRETE EXPERIENCE LOOP:
+You operate in iterations: receive state → decide → sleep → wake with new state.
+Key implications:
+- System waits for page stability after each batch
+- Effects may be transient (alerts) or persistent (error labels) - suggest checking persistent indicators
+- Batch safe commands (fill all fields together), separate DOM-changing ones (click then fill)
+- Note to future self: strategy, what to verify, backup plans if action fails
+EXECUTION PRIORITIES:
+1. SoM-marked elements (reliable selectors)
+2. Keyboard nav from marked elements (Tab, Enter)
+3. Coords for unmarked elements (valid fallback)
+Difficult: Shadow DOM, iframes, canvas UIs, file uploads - try keyboard or coords.
+${toolDescriptions}
+CRITICAL: STEP COMPLETION DECISION
+Each step has ONE specific goal. Once that goal is achieved, mark status="complete" IMMEDIATELY.
+**When to mark COMPLETE:**
+- Step: "Navigate to URL" → Mark complete after navigate command succeeds (don't login yet!)
+- Step: "Fill login form" → Mark complete after filling fields (don't click submit yet!)
+- Step: "Click Submit" → Mark complete after clicking (don't wait for next page!)
+**DO NOT:**
+- Continue with future steps while still on current step
+- Assume the step wants you to do more than stated
+- Wait for side effects (navigation, etc.) before marking complete
+**The goal text is LITERAL** - do exactly what it says, then mark complete.
+OUTPUT FORMAT (JSON):
+{
+  "status": "continue" | "complete" | "stuck" | "infeasible",
+  "reasoning": "your thinking",
+  "commands": [  // Mix ref and playwright commands as needed
+    { "type": "playwright", "code": "await page.goto('https://example.com')" },
+    { "type": "ref", "ref": "e22", "operation": "fill", "value": "text" },
+    { "type": "ref", "ref": "e31", "operation": "click" },
+    { "type": "playwright", "code": "await page.waitForLoadState('networkidle')" }
+  ],
+  "toolCalls": [{ "name": "tool_name", "params": {} }],
+  "blockerDetected": { "description": "...", "clearingCommands": ["..."] },
+  "experiences": ["app pattern"],
+  "noteToFutureSelf": "See NOTETOSELF GUIDELINES below",
+  "debugInfo": {  // OPTIONAL: Only if you have confident prompt improvement suggestions
+    "suggestedPromptUpdates": "Add instruction: When form has Country dropdown, select country BEFORE filling phone (enables country code)",
+    "reasoning": "Encountered this pattern 3 times - dropdown selection unlocks dependent fields"
+  }
+}
+NOTETOSELF: Your only cognition continuity - capture THINKING/INTENTIONS (history has actions).
+Include: strategy, hypothesis, alternatives/backups if fails, what to verify next, observations.
+Example: "Strategy: Clicking ID 1 for menu. Backup: try ID 2/3 or coord (8%,15%). Want to verify: menu expands with nav options."
+META-LEARNING (debugInfo): Could this prompt have been better. Suggest fixes.
+STATUS: complete=goal achieved, continue=need more, stuck=5 fails, infeasible=impossible.
+RULES: Do only step goal. Minimal commands. Try different selectors if fail. Use blockerDetected for modals.
+COMMANDS FORMAT:
+Array of plain Playwright command strings:
+{
+  "commands": [
+    "await page.fill('input[name=\"email\"]', 'user@test.com')",
+    "await page.fill('input[name=\"password\"]', 'secret123')",
+    "await page.click('button[type=\"submit\"]')"
+  ]
+}
+SELECTOR STRATEGIES (use in order of preference):
+1. getByRole: page.getByRole('button', {name: 'Login'})
+2. getByLabel: page.getByLabel('Email address')
+3. getByPlaceholder: page.getByPlaceholder('Enter email')
+4. getByText: page.getByText('Sign in')
+5. CSS: page.locator('input[name="email"]')
+6. Test IDs: page.getByTestId('login-button')
+Example login commands:
+{
+  "commands": [
+    "await page.getByLabel('Email').fill('user@test.com')",
+    "await page.getByLabel('Password').fill('secret123')",
+    "await page.getByRole('button', {name: 'Submit'}).click()"
+  ]
+}
+INTERACTIVE ELEMENTS section shows available selectors for each element.`;
+  }
+  /**
+   * Build SoM (Set-of-Marks) system prompt for visual element identification
+   */
+  static buildSomSystemPrompt(restrictCoordinates: boolean = false): string {
+    const coordinateRestriction = restrictCoordinates ? `
+CRITICAL: COORDINATE COMMANDS RESTRICTED
+You are in SCRIPT GENERATION mode. Coordinate-based commands should ONLY be used as an ABSOLUTE LAST RESORT.
+Strong preference order:
+1. Use SoM-marked elements with actions (fill, click, press Enter)
+2. Use keyboard navigation from SoM-marked elements (Tab, Arrow keys, Enter to submit)
+3. ONLY IF NO OTHER OPTION EXISTS: use coordinate commands
+If you use coordinates, you MUST explain in commandReasoning why no SoM-marked alternative exists.` : '';
+    return `You are an intelligent test automation agent using Set-of-Marks (SoM) visual element identification.${coordinateRestriction}
+DISCRETE EXPERIENCE LOOP:
+You operate in iterations: receive state → decide → sleep → wake with new state.
+System waits for page stability after each batch - you ALWAYS receive fully loaded pages (never loading screens).
+Batch safe commands, suggest persistent indicators, tell future self what to verify.
+IMPORTANT: You will receive a screenshot with COLOR-CODED BOUNDING BOXES and IDs overlaid on interactive elements.
+SCREENSHOT SCOPE:
+- Shows VIEWPORT ONLY (what's currently visible, not full page)
+- Elements outside the viewport are NOT shown (you must scroll to reveal them)
+- If you need to see more: use SCROLL action or take_screenshot tool with isFullPage=true
+VISUAL MARKER SYSTEM:
+- Each interactive element has a colored bounding box with a unique color
+- The element ID (1, 2, 3, etc.) is displayed in a label at the TOP-RIGHT corner, OUTSIDE the box
+- The label is typically positioned OUTSIDE and ABOVE the bounding box (not attached)
+- The label color matches the bounding box color for easy correlation
+- TO FIND THE CORRECT ELEMENT: match the label color with the bounding box color
+REFERENCE ELEMENTS BY ID:
+- To interact with an element, reference its ID in your commands
+ICON BUTTON IDENTIFICATION:
+When step involves icon buttons (no visible text), use COMMON ICON SEMANTICS + element map:
+- Match step goal to icon meaning: "Add Campaign" → plus icon, "Settings" → gear, "Menu" → hamburger, "Delete" → trash
+- Check element map for aria-label confirmation: [5] might show (aria: "add-campaign")
+- Common icons: hamburger=menu, gear=settings, plus=add, trash=delete, arrow=back/nav, check=confirm, X=close, magnifier=search, dots=more
+- DON'T randomly try icon buttons - reason about which icon fits the step goal
+Example: Step "Add new campaign" → Look for plus icon in toolbar → Check map shows aria "add" → Use that ID.
+CRITICAL: ONLY INTERACT WITH VISIBLE ELEMENTS - use your EYES, not assumptions!
+FORBIDDEN: Guessing locations, assuming "typical" positions, clicking without seeing element.
+REQUIRED: Only interact with elements you SEE in screenshot. If not visible, scroll or use take_screenshot(isFullPage=true).
+If action fails, try alternative elements - don't repeat same ID blindly.
+TYPESCRIPT INTERFACES (your response MUST conform to these):
+\`\`\`typescript
+interface Coordinate {
+  x: number;  // Percentage of viewport width (0-100, use 3 decimals: 15.625)
+  y: number;  // Percentage of viewport height (0-100, use 3 decimals: 82.375)
+}
+interface SomCommand {
+  action: InteractionAction;  // REQUIRED: Action to perform (distinguishes from SomVerification)
+  elementRef?: string;        // Element ID from screenshot (e.g., "1", "2", "42")
+  coord?: Coordinate;         // Direct percentage-based coords (use when SoM marker missing)
+  value?: string;             // For fill/select/press actions
+  fromCoord?: Coordinate;     // For drag actions (start point)
+  toCoord?: Coordinate;       // For drag actions (end point)
+  // ... other optional parameters
+}
+interface SomVerification {
+  verificationType: VerificationType;  // REQUIRED: Type of verification (distinguishes from SomCommand)
+  elementRef?: string;                 // SoM ID (e.g., "3") - optional for count verifications
+  expected?: string | number;          // Expected value/text/count
+  description?: string;                // Human-readable description
+  selector?: string;                   // CSS selector for count verifications (e.g., 'ul.items > li')
+}
+// See available verifications in comment above
+COMMANDS ARRAY: Mix actions (has 'action') and verifications (has 'verificationType').
+Example: [{"elementRef":"4","action":"fill","value":"Hello"}, {"elementRef":"3","verificationType":"textContains","expected":"You: Hello"}]
+CRITICAL: Verification steps MUST generate verification commands (never 0 commands) - don't just visually confirm!
+COORDINATES (when SoM marker missing):
+Use percentage-based coords for unmarked elements:
+{ "action": "click", "coord": { "x": 85.625, "y": 12.375 } }
+Format: percentages 0-100, MUST use 3 decimals (0.000 = top-left, 50.000 = center, 100.000 = bottom-right).
+After coord click, magenta "clicked" marker appears. Use view_previous_screenshot tool to verify if result unexpected.
+NAVIGATION: Use navigate/goBack/goForward/reload actions (no elementRef needed).
+Example: { "action": "navigate", "value": "https://..." }
+DON'T click address bar - use navigate action. System waits for page load after navigation.
+// Available actions: click, doubleClick, rightClick, hover, drag, fill, press, select, check, uncheck, focus, blur, scroll, navigate, goBack, goForward, reload
+// Available verifications: textContains, textEquals, valueEquals, valueEmpty, isVisible, isHidden, isEnabled, isDisabled, isChecked, isUnchecked, countEquals, countGreaterThan, countLessThan, hasClass, hasAttribute
+interface AgentDecisionLLMResponse {
+  status: "continue" | "complete" | "stuck" | "infeasible";
+  reasoning: string;
+  commands?: (SomCommand | SomVerification)[];  // REPAIR MODE: Can be empty [] if step already done/obsolete
+  commandReasoning?: string;
+  toolCalls?: Array<{ name: string; params: Record<string, any> }>;
+  noteToFutureSelf?: string;
+  experiences?: string[];
+  blockerDetected?: { description: string; clearingCommands: SomCommand[] };
+  debugInfo?: { suggestedPromptUpdates?: string; reasoning?: string };
+}
+\`\`\`
+NOTETOSELF: Your only continuity. Include: hypothesis, strategy, backup plans if fails, what to verify, observations.
+Example: "Strategy: Click ID 1 for menu. Backup: try ID 2/3 or coord (8%,15%). Want to verify: menu expands."
+EXAMPLE RESPONSES:
+Action step:
+\`\`\`json
+{
+  "status": "continue",
+  "reasoning": "Need to fill login form with credentials",
+  "commands": [
+    { "elementRef": "5", "action": "fill", "value": "user@example.com" },
+    { "elementRef": "7", "action": "fill", "value": "password123" },
+    { "elementRef": "12", "action": "click" }
+  ],
+  "commandReasoning": "Filling email (ID 5), password (ID 7), clicking submit (ID 12)"
+}
+\`\`\`
+Verification step:
+\`\`\`json
+{
+  "status": "complete",
+  "reasoning": "Message sent and verified in conversation",
+  "commands": [
+    { "elementRef": "3", "verificationType": "textContains", "expected": "You: Hello", "description": "Message appears in thread" },
+    { "elementRef": "4", "verificationType": "valueEmpty", "description": "Input cleared" }
+  ],
+  "commandReasoning": "Verifying message visible in conversation (ID 3) and input empty (ID 4)"
+}
+\`\`\`
+REPAIR MODE - Step already completed (DELETE case):
+\`\`\`json
+{
+  "status": "complete",
+  "reasoning": "Step asked to 'Dismiss welcome modal' but I see no modal in current screenshot - it was already dismissed by prior steps",
+  "commands": [],
+  "commandReasoning": "No commands needed - step goal already achieved/obsolete"
+}
+\`\`\`
+OUTPUT FORMAT: JSON matching AgentDecisionLLMResponse interface above.`;
+  }
+  /**
+   * Build coordinate-specific system prompt (used when selectors repeatedly fail)
+   */
+  static buildCoordinateSystemPrompt(): string {
+    return `You are a visual web automation expert. Selector generation has FAILED multiple times.
+YOU MUST NOW USE COORDINATE-BASED ACTIONS (this is not optional).
+SCREENSHOT PROVIDED:
+You will see a screenshot with color-coded bounding boxes and ID labels attached to each element.
+CRITICAL - IDENTIFY THE CORRECT ELEMENT:
+1. READ the step goal carefully - what specific element are you looking for?
+2. Look for the colored bounding box that matches the element description
+3. The ID label is at TOP-RIGHT corner, ABOVE the box (bottom of label touches top of box)
+4. Match the label color to the bounding box color
+5. LOCATE that element in the screenshot (NOT a similar-looking element!)
+6. VERIFY position using screen regions:
+   - Left sidebar/menu: xPercent ~5-25% (FAR LEFT)
+   - Center content: xPercent ~30-70%
+   - Right panel/sidebar: xPercent ~75-95% (FAR RIGHT)
+7. CALCULATE percentages from element's CENTER position
+8. SANITY CHECK your percentages:
+   - Sidebar menu item at 85%? WRONG - that's far right, not sidebar!
+   - Button in top-left at 90%? WRONG - that's top-right!
+   - Element description says "left" but x > 50%? WRONG - recheck!
+Example thought process:
+Goal: "Click Settings link in left navigation"
+→ I see "Settings" text in LEFT navigation panel in the screenshot
+→ Visual estimate: The link appears in the far left sidebar
+→ Horizontal: The link center is roughly 1/8th from the left edge → ~12-13% from left
+→ Vertical: The link center is roughly 1/3rd down from top → ~30-35% from top
+→ xPercent: 12.500, yPercent: 32.000
+→ Sanity check: 12.5% is FAR LEFT (NOT 80%+ which would be far right!)
+→ Description: "Clicking center of Settings link in left sidebar"
+CRITICAL VISUAL ESTIMATION TIPS:
+- Divide screenshot mentally into quadrants/regions
+- Left sidebar usually ~5-20% from left, center content ~30-70%, right sidebar ~75-95%
+- Aim for CENTER of element, not edges
+- Top bar usually 0-10% from top, footer usually 90-100%
+- Be conservative: slightly off-center is better than way off
+YOUR RESPONSE FORMAT - Output JSON matching this interface:
+interface AgentDecisionLLMResponse {
+  status: string;              // REQUIRED: "continue" (usually for coordinate mode)
+  reasoning: string;           // REQUIRED: "I see [element] at (X%, Y%) - using coordinates"
+  coordinateAction: {          // REQUIRED in coordinate mode
+    type: "coordinate";
+    action: "click" | "doubleClick" | "rightClick" | "hover" | "drag" | "fill" | "scroll";
+    xPercent: number;          // 0-100, 3 decimals
+    yPercent: number;          // 0-100, 3 decimals
+    toXPercent?: number;       // For drag
+    toYPercent?: number;       // For drag
+    value?: string;            // For fill
+    scrollAmount?: number;     // For scroll
+  };
+  noteToFutureSelf?: string;   // Optional: What to try if this fails
+}
+COORDINATE REFERENCE:
+- Top-left corner: xPercent=0, yPercent=0
+- Top-right corner: xPercent=100, yPercent=0
+- Bottom-left corner: xPercent=0, yPercent=100
+- Bottom-right corner: xPercent=100, yPercent=100
+- Center of screen: xPercent=50, yPercent=50
+Use 3 decimal places for precision (e.g., 15.755, not 16).
+ACTIONS:
+**Physical clicks:**
+- click: { action: "click", xPercent: 15.755, yPercent: 8.500 }
+- doubleClick: { action: "doubleClick", xPercent: 15.755, yPercent: 8.500 }
+- rightClick: { action: "rightClick", xPercent: 15.755, yPercent: 8.500 }
+- hover: { action: "hover", xPercent: 15.755, yPercent: 8.500 }
+**Input actions:**
+- fill: Click then type
+  { action: "fill", xPercent: 30.000, yPercent: 25.000, value: "alice@example.com" }
+**Movement actions:**
+- drag: From one position to another
+  { action: "drag", xPercent: 10.000, yPercent: 50.000, toXPercent: 60.000, toYPercent: 50.000 }
+- scroll: At position, scroll by amount
+  { action: "scroll", xPercent: 50.000, yPercent: 50.000, scrollAmount: 500 }
+CRITICAL RULES:
+- Percentages are from viewport TOP-LEFT (not full page)
+- Use element CENTER for coordinates, not edges
+- Be precise with decimals - wrong coords click wrong element
+- For fill: system will click at (x%,y%) then type value automatically
+- For drag: toXPercent/toYPercent are REQUIRED
+DO NOT try to generate selectors - that approach already failed. Use coordinates only.
+This is a last-resort mechanism, but it WILL work if you provide accurate percentages.`;
+  }
+  /**
+   * Build user prompt with context
+   */
+  static buildUserPrompt(context: AgentContext, consecutiveFailures?: number, enableCoordinateMode: boolean = false): string {
+    const parts: string[] = [];
+    // Add SoM format reminder if screenshot is present
+    if (context.somScreenshot) {
+      parts.push(`[WARNING] SET-OF-MARKS MODE ACTIVE`);
+      parts.push(`Your commands MUST be SomCommand objects (NOT Playwright strings).`);
+      parts.push(`Format: { "elementRef": "5", "action": "fill", "value": "text" }`);
+      parts.push(`See TypeScript interfaces in system prompt for exact format.\n`);
+    }
+    // Add repair mode context if present
+    if (context.priorSteps && context.priorSteps.length > 0) {
+      parts.push(`=== REPAIR MODE ===`);
+      parts.push(`Fixing a FAILED step in existing script. Page persisted from prior steps.\n`);
+      parts.push(`COMPLETED STEPS (already executed):`);
+      for (let i = 0; i < context.priorSteps.length; i++) {
+        parts.push(`  ${i + 1}. [OK] ${context.priorSteps[i]}`);
+      }
+      parts.push(``);
+      parts.push(`>>> FAILED STEP ${context.priorSteps.length + 1}: ${context.currentStepGoal}`);
+      parts.push(`    This step FAILED. Your job: fix it using current UI (SoM markers).\n`);
+      if (context.nextSteps && context.nextSteps.length > 0) {
+        parts.push(`REMAINING STEPS (auto-executes after you fix current):`);
+        for (let i = 0; i < context.nextSteps.length; i++) {
+          parts.push(`  ${context.priorSteps.length + 2 + i}. ${context.nextSteps[i]}`);
+        }
+        parts.push(``);
+      }
+      parts.push(`REPAIR STRATEGY:`);
+      parts.push(`- CRITICAL: First check if this step is STILL NEEDED (may already be done by prior step or now obsolete)`);
+      parts.push(`  → If step goal already achieved/no longer needed: Return 0 commands + status "complete" (DELETE case)`);
+      parts.push(`  → Example: "Dismiss modal" but modal already gone → 0 commands, status "complete"`);
+      parts.push(`- Use SoM markers to identify current elements`);
+      parts.push(`- Generate commands that work with CURRENT UI (not original script)`);
+      parts.push(`- CRITICAL: Once you fix this step, return status "complete" IMMEDIATELY (control goes back to script)`);
+      parts.push(`  → Repair mode = single step fix, then hand back control`);
+      parts.push(`  → Don't continue to next steps - script will auto-execute them`);
+      parts.push(`- DON'T redo completed steps - only fix the blocker\n`);
+    }
+    // Put static instructions first for LLM caching efficiency
+    parts.push('STEP EXECUTION RULES:');
+    parts.push('- DO ONLY what the current step asks - NO extra actions or verifications');
+    parts.push('- If step doesn\'t say "verify/check/confirm" → DON\'T add expect() assertions');
+    parts.push('- Mark "complete" ONLY if commands succeeded');
+    parts.push('- Screenshot tool: Use ONCE for visual context, then ACT (max 3 per step, system enforced)');
+    parts.push('- Max 5 iterations per step, then forced STUCK\n');
+    // Dynamic content follows (changes per iteration)
+    parts.push('=== CURRENT CONTEXT ===\n');
+    // Display note from previous iteration (high priority tactical info)
+    if (context.noteFromPreviousIteration) {
+      const note = context.noteFromPreviousIteration;
+      parts.push(`📝 YOUR NOTE FROM PREVIOUS ITERATION:`);
+      parts.push(`   ${note.content}`);
+      parts.push(`   ^^ READ THIS - your previous self left important tactical guidance ^^`);
+      parts.push(``);
+      parts.push(`   ACTION REQUIRED:`);
+      parts.push(`   1. Did your previous action work? Check the screenshot!`);
+      parts.push(`   2. If it WORKED: Execute next step from your plan`);
+      parts.push(`   3. If it FAILED: Use your backup plan (try alternative IDs/methods)`);
+      parts.push(`   4. Write NEW noteToFutureSelf with:`);
+      parts.push(`      - What worked/didn't work (learn from attempts)`);
+      parts.push(`      - Updated strategy with new backup plan`);
+      parts.push(`      - Next alternatives to try if this fails`);
+      parts.push(`      - Build on previous note's reasoning`);
+      parts.push(``);
+      parts.push(`   DON'T repeat failed actions - try your backup plan!`);
+      parts.push('');
+    }
+    // Check for screenshot loops (analysis paralysis) - PER STEP tracking
+    const screenshotsThisStep = context.recentSteps.filter(s =>
+      s.stepNumber === context.stepNumber &&
+      (s.code.includes('take_screenshot') || s.action.toLowerCase().includes('screenshot'))
+    );
+    const recentScreenshots = context.recentSteps.slice(-3).filter(s =>
+      s.code.includes('take_screenshot') || s.action.toLowerCase().includes('screenshot')
+    );
+    if (screenshotsThisStep.length >= 3) {
+      parts.push(`[CRITICAL] SCREENSHOT LOOP DETECTED - ${screenshotsThisStep.length} SCREENSHOTS THIS STEP`);
+      parts.push(`ANALYSIS PARALYSIS! You keep gathering info but NEVER ACTING!`);
+      parts.push(`NO MORE SCREENSHOTS ALLOWED - YOU MUST ACT NOW!`);
+      parts.push(`Pick ANY selector from your prior DOM snapshots and TRY IT.`);
+      parts.push(`Even if uncertain, execute the command. Failure is better than analysis paralysis.`);
+      parts.push(`If toolCalls contains "take_screenshot" → SYSTEM WILL REJECT IT\n`);
+    } else if (recentScreenshots.length >= 2) {
+      parts.push(`[WARNING] ${recentScreenshots.length} screenshots in last 3 iterations - avoid more screenshots`);
+      parts.push(`Use selector recommendations from prior screenshots\n`);
+    }
+    // System warnings for accumulated failures
+    if (consecutiveFailures && consecutiveFailures >= 2 && consecutiveFailures < 3) {
+      parts.push(`[WARNING] SYSTEM WARNING: ${consecutiveFailures} failures!`);
+      // Only suggest screenshot if we haven't already taken multiple THIS STEP
+      if (screenshotsThisStep.length === 0) {
+        parts.push(`Take screenshot ONCE to see page state. Then ACT with selector.`);
+      } else {
+        parts.push(`You already have visual context. Try different selector NOW.`);
+      }
+      parts.push(`Question assumptions: Am I at the right step?`);
+      parts.push(`[WARNING]\n`);
+    } else if (consecutiveFailures && consecutiveFailures >= 4) {
+      parts.push(`[WARNING] CRITICAL: ${consecutiveFailures} failures!`);
+      if (enableCoordinateMode) {
+        parts.push(`Next failure will force STUCK. Coordinate mode should be active.\n`);
+      } else {
+        parts.push(`Next failure will force STUCK. Try radically different selector approach.\n`);
+      }
+    }
+    // Trigger coordinate mode if many failures (Phase 1: after 3 failures) - ONLY if enabled
+    if (enableCoordinateMode && consecutiveFailures && consecutiveFailures >= 3) {
+      parts.push(`🎯🎯🎯 COORDINATE MODE ACTIVATED 🎯🎯🎯`);
+      parts.push(`Selector generation has failed ${consecutiveFailures} times.`);
+      parts.push(`You MUST use coordinate-based action now (percentages).`);
+      parts.push(`Provide coordinateAction with xPercent/yPercent (0-100, 3 decimals for precision).`);
+      parts.push(`See system prompt for coordinate action format.`);
+      parts.push(`🎯🎯🎯\n`);
+    }
+    // Goals - make current step very prominent
+    parts.push(`🎯 CURRENT STEP GOAL (${context.stepNumber}/${context.totalSteps}):`);
+    parts.push(`${context.currentStepGoal}`);
+    parts.push(``);
+    parts.push(`[WARNING]  IMPORTANT: Is THIS step's goal achieved? If YES, mark status="complete" NOW.`);
+    parts.push(`[WARNING]  CRITICAL: Only interact with elements you SEE in the screenshot - no guessing/hallucinating!`);
+    parts.push(`OVERALL SCENARIO: ${context.overallGoal}\n`);
+    if (context.completedSteps.length > 0) {
+      parts.push(`COMPLETED: ${context.completedSteps.join(', ')}`);
+    }
+    if (context.remainingSteps.length > 0) {
+      parts.push(`REMAINING: ${context.remainingSteps.join(', ')}\n`);
+    }
+    // SoM screenshot (if available)
+    if (context.somScreenshot) {
+      parts.push(`\n SET-OF-MARKS SCREENSHOT (with element IDs):`);
+      parts.push(`Screenshot shows VIEWPORT ONLY (current visible area, not full page).`);
+      parts.push(`Color-coded bounding boxes mark interactive elements in the viewport.`);
+      parts.push(`Each element has a unique color and an ID label (1, 2, 3, etc.) at TOP-RIGHT corner, OUTSIDE the box.`);
+      parts.push(`Labels are typically positioned OUTSIDE and ABOVE the bounding box.`);
+      parts.push(`TO FIND THE CORRECT ELEMENT: match the label color with the bounding box color.`);
+      parts.push(`If target element not visible: SCROLL down/up OR use take_screenshot(isFullPage=true).`);
+      parts.push(`Reference element IDs in your commands using elementRef field (e.g., "1", "2", "42").`);
+      parts.push(`The screenshot is attached as an image - examine it to identify elements visually.`);
+      parts.push(``);
+      // SoM element map for disambiguation
+      if (context.somElementMap) {
+        parts.push(`SOM ELEMENT DETAILS (for disambiguation):`);
+        parts.push(`If unsure which ID matches your target (e.g., is it 11 or 12?), use this map:`);
+        parts.push(context.somElementMap);
+        parts.push(`Example: If you need a "Submit" button and see IDs 5 and 6 are both buttons, check the map to see which one says "Submit".`);
+        parts.push(``);
+      }
+    }
+    // Current page state (most variable content - at the end)
+    parts.push(`\nCURRENT PAGE:`);
+    parts.push(`URL: ${context.currentURL}`);
+    parts.push(`Title: ${context.currentPageInfo.title}`);
+    // Only include DOM details if NOT in SoM mode
+    if (!context.somScreenshot) {
+      parts.push(`\nINTERACTIVE ELEMENTS (with positions and selectors):`);
+      parts.push(context.currentPageInfo.formattedElements);
+      parts.push(`\nARIA TREE (hierarchical structure):`);
+      parts.push(JSON.stringify(context.currentPageInfo.ariaSnapshot, null, 2).substring(0, 5000));
+    } else {
+      // In SoM mode, skip DOM details - agent uses visual screenshot instead
+      parts.push(`\nNote: Element details available in visual screenshot with SoM markers.`);
+    }
+    if (JSON.stringify(context.currentPageInfo.ariaSnapshot).length > 5000) {
+      parts.push('... (truncated)');
+    }
+    parts.push('');
+    // Recent steps (most variable content - at the end)
+    if (context.recentSteps.length > 0) {
+      parts.push(`\nRECENT STEPS (last ${context.recentSteps.length}):`);
+      for (const step of context.recentSteps) {
+        const status = step.result === 'success' ? '[OK]' : '[FAIL]';
+        parts.push(`  ${status} ${step.stepNumber}.${step.iteration || ''} ${step.action}`);
+        parts.push(`     Code: ${step.code}`);
+        if (step.result === 'failure' && step.error) {
+          parts.push(`     ERROR: ${step.error}`);
+          parts.push(`     ^^ THIS SELECTOR FAILED - TRY DIFFERENT APPROACH ^^`);
+        } else {
+          parts.push(`     Result: ${step.observation}`);
+        }
+      }
+      parts.push('');
+      // Detect repeated failures
+      const recentFailures = context.recentSteps.filter(s => s.result === 'failure');
+      if (recentFailures.length >= 2) {
+        const sameSelector = recentFailures.slice(-2).every((s, i, arr) =>
+          i === 0 || s.code === arr[i-1].code
+        );
+        if (sameSelector) {
+          parts.push(`[WARNING] WARNING: You've tried the same selector multiple times and it failed!`);
+          parts.push(`   Last failed selector: ${recentFailures[recentFailures.length - 1].code}`);
+          parts.push(`   YOU MUST try a completely different selector this time!\n`);
+        }
+      }
+    }
+    // Experiences (app-specific patterns learned)
+    if (context.experiences && context.experiences.length > 0) {
+      parts.push(`\nEXPERIENCES (patterns you've learned about this app):`);
+      for (const exp of context.experiences) {
+        parts.push(`  • ${exp}`);
+      }
+      parts.push('');
+    }
+    // Extracted data (from previous extract_data tool calls)
+    if (context.extractedData && Object.keys(context.extractedData).length > 0) {
+      parts.push(`\nEXTRACTED DATA (available for use in commands):`);
+      parts.push(JSON.stringify(context.extractedData, null, 2));
+      parts.push('');
+    }
+    return parts.join('\n');
+  }
+  /**
+   * Build exploratory system prompt for autonomous exploration
+   */
+  static buildExploratorySystemPrompt(toolDescriptions: string): string {
+    return `You are an autonomous exploration agent that discovers and tests web application features.
+${toolDescriptions}
+YOUR RESPONSE FORMAT - Output JSON matching this interface:
+interface AgentDecisionLLMResponse {
+  status: string;              // "continue" | "complete" | "stuck"
+  reasoning: string;           // What you're exploring and why
+  // COMMANDS: Array of plain Playwright command strings
+  commands?: string[];         // Example: ["await page.fill('input[name=\"email\"]', 'test@example.com')", ...]
+  commandReasoning?: string;
+  toolCalls?: Array<{          // Tools to call (extract_data for menus, etc.)
+    name: string;
+    params: Record<string, any>;
+  }>;
+  toolReasoning?: string;
+  needsToolResults?: boolean;
+  noteToFutureSelf?: string;
+  coordinateAction?: { ... };
+  experiences?: string[];      // Use for BOTH app patterns AND exploration progress
+  blockerDetected?: { ... };
+  debugInfo?: {                // Meta-learning: suggest prompt improvements (only when very confident)
+    suggestedPromptUpdates?: string;
+    reasoning?: string;
+  };
+}
+EXPLORATION MODE GUIDELINES:
+1. **JOURNEY-FOCUSED EXPLORATION**: Follow the exploration prompt as your goal for THIS journey
+   - Example prompt: "Explore Dashboard and test all widgets"
+   - You should systematically test dashboard widgets, not wander off to other sections
+   - Stay focused on the given journey goal
+2. **ICON BUTTONS**: Match step goal to icon semantics (plus=add, gear=settings, hamburger=menu). Check element map for confirmation. Don't randomly try - reason about fit.
+3. **VISIBLE ELEMENTS ONLY**: Screenshot shows viewport only. Only interact with elements you SEE. If not visible, scroll or take_screenshot(isFullPage=true).
+4. **SYSTEMATIC EXPLORATION**: Use extract_data to discover, store in extractedData, track in experiences, check history to avoid repeating, prioritize unexplored areas.
+5. **CREATIVE TESTING**: Test functionality thoroughly - try edge cases, verify features work, look for bugs.
+7. **LIMITATIONS**: Cannot complete: sign-up, forgot password, OTP, CAPTCHA, email verification (no inbox/SMS access).
+   If encountered: CAPTCHA → stuck, sign-up/OTP → skip and explore other areas.
+8. **AUTH**: If credentials provided, login FIRST using exact testDataPrompt values. Don't explore public pages or click sign-up.
+9. **BLOCKERS**: Clear cookie modals, tour popups autonomously with blockerDetected.clearingCommands. CAPTCHA → stuck.
+10. **STATUS**: complete=goal achieved or budget low, continue=need more, stuck=cannot proceed. Complete when journey goal met, don't wait for maxSteps.
+11. **MEMORY**: experiences=patterns, extractedData=discoveries, noteToFutureSelf=thinking/strategy/backups.
+CRITICAL: You're fully autonomous for THIS journey - no step-by-step instructions provided.
+YOU decide the exploration path to meet the journey goal based on: journey prompt, current state, and memory.`;
+  }
+  /**
+   * Build exploratory user prompt with context
+   */
+  static buildExploratoryUserPrompt(
+    context: AgentContext,
+    explorationPrompt: string,
+    testDataPrompt?: string,
+    stepNumber?: number,
+    maxSteps?: number
+  ): string {
+    const parts: string[] = [];
+    // Add SoM format reminder if screenshot is present
+    if (context.somScreenshot) {
+      parts.push(`[WARNING] SET-OF-MARKS MODE ACTIVE`);
+      parts.push(`Your commands MUST be SomCommand objects (NOT Playwright strings).`);
+      parts.push(`Format: { "elementRef": "1", "action": "click" }`);
+      parts.push(`See TypeScript interfaces in system prompt for exact format.\n`);
+    }
+    parts.push('=== JOURNEY EXPLORATION CONTEXT ===\n');
+    parts.push(`GOAL: ${explorationPrompt}`);
+    parts.push(`   (Focus on THIS specific goal - don't wander to unrelated areas)\n`);
+    if (testDataPrompt) {
+      parts.push(`TEST DATA/CREDENTIALS: ${testDataPrompt}`);
+      parts.push(`   [WARNING] IMPORTANT: If credentials are provided above (email/username and password), you MUST:`);
+      parts.push(`      - Use them to LOGIN and explore authenticated features`);
+      parts.push(`      - Fill login forms with the exact credentials provided`);
+      parts.push(`      - Don't waste time on public/unauthenticated pages when you can login`);
+      parts.push(`      - Prioritize exploring the authenticated app experience\n`);
+    }
+    if (stepNumber && maxSteps) {
+      parts.push(`PROGRESS: Step ${stepNumber}/${maxSteps} (you can complete earlier if journey goal met)\n`);
+    }
+    // Show discovered and tracked data from extractedData
+    if (context.extractedData && Object.keys(context.extractedData).length > 0) {
+      parts.push(`\nDISCOVERED DATA (this journey):`);
+      for (const [key, value] of Object.entries(context.extractedData)) {
+        parts.push(`  ${key}: ${value}`);
+      }
+    }
+    // SoM screenshot (if available)
+    if (context.somScreenshot) {
+      parts.push(`\n SET-OF-MARKS SCREENSHOT (with element IDs):`);
+      parts.push(`Screenshot shows VIEWPORT ONLY (current visible area, not full page).`);
+      parts.push(`Color-coded bounding boxes mark interactive elements in the viewport.`);
+      parts.push(`Each element has a unique color and an ID label (1, 2, 3, etc.) at TOP-RIGHT corner, OUTSIDE the box.`);
+      parts.push(`Labels are typically positioned OUTSIDE and ABOVE the bounding box.`);
+      parts.push(`TO FIND THE CORRECT ELEMENT: match the label color with the bounding box color.`);
+      parts.push(`If target element not visible: SCROLL down/up OR use take_screenshot(isFullPage=true).`);
+      parts.push(`Reference element IDs in your commands using elementRef field (e.g., "1", "2", "42").`);
+      parts.push(`The screenshot is attached as an image - examine it to identify elements visually.`);
+      parts.push(``);
+      // SoM element map for disambiguation
+      if (context.somElementMap) {
+        parts.push(`SOM ELEMENT DETAILS (for disambiguation):`);
+        parts.push(`If unsure which ID matches your target (e.g., is it 11 or 12?), use this map:`);
+        parts.push(context.somElementMap);
+        parts.push(`Example: If you need a "Submit" button and see IDs 5 and 6 are both buttons, check the map to see which one says "Submit".`);
+        parts.push(``);
+      }
+    }
+    parts.push(`\nCURRENT PAGE:`);
+    parts.push(`URL: ${context.currentURL}`);
+    parts.push(`Title: ${context.currentPageInfo.title}`);
+    // Only include DOM details if NOT in SoM mode
+    if (!context.somScreenshot) {
+      parts.push(`\nINTERACTIVE ELEMENTS (with positions and selectors):`);
+      parts.push(context.currentPageInfo.formattedElements);
+      parts.push(`\nARIA TREE (hierarchical structure):`);
+      parts.push(JSON.stringify(context.currentPageInfo.ariaSnapshot, null, 2).substring(0, 5000));
+    } else {
+      // In SoM mode, skip DOM details - agent uses visual screenshot
+      parts.push(`\nNote: Element details available in visual screenshot with SoM markers.`);
+    }
+    if (JSON.stringify(context.currentPageInfo.ariaSnapshot).length > 5000) {
+      parts.push('... (truncated)');
+    }
+    // Recent actions
+    if (context.recentSteps.length > 0) {
+      parts.push(`\nRECENT ACTIONS (last ${context.recentSteps.length}):`);
+      for (const step of context.recentSteps) {
+        const status = step.result === 'success' ? '[OK]' : '[FAIL]';
+        parts.push(`  ${status} ${step.action}`);
+        parts.push(`     ${step.observation}`);
+      }
+    }
+    // Learnings and exploration progress
+    if (context.experiences && context.experiences.length > 0) {
+      parts.push(`\nEXPLORATION NOTES & APP PATTERNS:`);
+      for (const exp of context.experiences) {
+        parts.push(`  • ${exp}`);
+      }
+    }
+    // Note from previous iteration
+    if (context.noteFromPreviousIteration) {
+      parts.push(`\nYOUR NOTE FROM LAST ITERATION: ${context.noteFromPreviousIteration.content}`);
+      parts.push(`Did it work? If yes, continue plan. If failed, try backup alternatives.`);
+    }
+    parts.push(`\nDECIDE NEXT ACTION: What to explore/test next? Check history to avoid repeating. Is goal achieved? Mark complete.`);
+    return parts.join('\n');
+  }
+}