npm - testchimp-runner-core - Versions diffs - 0.0.21 → 0.0.23 - Mend

testchimp-runner-core 0.0.21 → 0.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (146) hide show

package/VISION_DIAGNOSTICS_IMPROVEMENTS.md +336 -0
package/dist/credit-usage-service.d.ts +9 -0
package/dist/credit-usage-service.d.ts.map +1 -1
package/dist/credit-usage-service.js +20 -5
package/dist/credit-usage-service.js.map +1 -1
package/dist/execution-service.d.ts +7 -2
package/dist/execution-service.d.ts.map +1 -1
package/dist/execution-service.js +91 -36
package/dist/execution-service.js.map +1 -1
package/dist/index.d.ts +30 -2
package/dist/index.d.ts.map +1 -1
package/dist/index.js +91 -26
package/dist/index.js.map +1 -1
package/dist/llm-facade.d.ts +64 -8
package/dist/llm-facade.d.ts.map +1 -1
package/dist/llm-facade.js +361 -109
package/dist/llm-facade.js.map +1 -1
package/dist/llm-provider.d.ts +39 -0
package/dist/llm-provider.d.ts.map +1 -0
package/dist/llm-provider.js +7 -0
package/dist/llm-provider.js.map +1 -0
package/dist/model-constants.d.ts +21 -0
package/dist/model-constants.d.ts.map +1 -0
package/dist/model-constants.js +24 -0
package/dist/model-constants.js.map +1 -0
package/dist/orchestrator/index.d.ts +8 -0
package/dist/orchestrator/index.d.ts.map +1 -0
package/dist/orchestrator/index.js +23 -0
package/dist/orchestrator/index.js.map +1 -0
package/dist/orchestrator/orchestrator-agent.d.ts +66 -0
package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -0
package/dist/orchestrator/orchestrator-agent.js +855 -0
package/dist/orchestrator/orchestrator-agent.js.map +1 -0
package/dist/orchestrator/tool-registry.d.ts +74 -0
package/dist/orchestrator/tool-registry.d.ts.map +1 -0
package/dist/orchestrator/tool-registry.js +131 -0
package/dist/orchestrator/tool-registry.js.map +1 -0
package/dist/orchestrator/tools/check-page-ready.d.ts +13 -0
package/dist/orchestrator/tools/check-page-ready.d.ts.map +1 -0
package/dist/orchestrator/tools/check-page-ready.js +72 -0
package/dist/orchestrator/tools/check-page-ready.js.map +1 -0
package/dist/orchestrator/tools/extract-data.d.ts +13 -0
package/dist/orchestrator/tools/extract-data.d.ts.map +1 -0
package/dist/orchestrator/tools/extract-data.js +84 -0
package/dist/orchestrator/tools/extract-data.js.map +1 -0
package/dist/orchestrator/tools/index.d.ts +10 -0
package/dist/orchestrator/tools/index.d.ts.map +1 -0
package/dist/orchestrator/tools/index.js +18 -0
package/dist/orchestrator/tools/index.js.map +1 -0
package/dist/orchestrator/tools/inspect-page.d.ts +13 -0
package/dist/orchestrator/tools/inspect-page.d.ts.map +1 -0
package/dist/orchestrator/tools/inspect-page.js +39 -0
package/dist/orchestrator/tools/inspect-page.js.map +1 -0
package/dist/orchestrator/tools/recall-history.d.ts +13 -0
package/dist/orchestrator/tools/recall-history.d.ts.map +1 -0
package/dist/orchestrator/tools/recall-history.js +64 -0
package/dist/orchestrator/tools/recall-history.js.map +1 -0
package/dist/orchestrator/tools/take-screenshot.d.ts +15 -0
package/dist/orchestrator/tools/take-screenshot.d.ts.map +1 -0
package/dist/orchestrator/tools/take-screenshot.js +112 -0
package/dist/orchestrator/tools/take-screenshot.js.map +1 -0
package/dist/orchestrator/types.d.ts +133 -0
package/dist/orchestrator/types.d.ts.map +1 -0
package/dist/orchestrator/types.js +28 -0
package/dist/orchestrator/types.js.map +1 -0
package/dist/playwright-mcp-service.d.ts +9 -0
package/dist/playwright-mcp-service.d.ts.map +1 -1
package/dist/playwright-mcp-service.js +20 -5
package/dist/playwright-mcp-service.js.map +1 -1
package/dist/progress-reporter.d.ts +97 -0
package/dist/progress-reporter.d.ts.map +1 -0
package/dist/progress-reporter.js +18 -0
package/dist/progress-reporter.js.map +1 -0
package/dist/prompts.d.ts +24 -0
package/dist/prompts.d.ts.map +1 -1
package/dist/prompts.js +593 -68
package/dist/prompts.js.map +1 -1
package/dist/providers/backend-proxy-llm-provider.d.ts +25 -0
package/dist/providers/backend-proxy-llm-provider.d.ts.map +1 -0
package/dist/providers/backend-proxy-llm-provider.js +76 -0
package/dist/providers/backend-proxy-llm-provider.js.map +1 -0
package/dist/providers/local-llm-provider.d.ts +21 -0
package/dist/providers/local-llm-provider.d.ts.map +1 -0
package/dist/providers/local-llm-provider.js +35 -0
package/dist/providers/local-llm-provider.js.map +1 -0
package/dist/scenario-service.d.ts +27 -1
package/dist/scenario-service.d.ts.map +1 -1
package/dist/scenario-service.js +48 -12
package/dist/scenario-service.js.map +1 -1
package/dist/scenario-worker-class.d.ts +39 -2
package/dist/scenario-worker-class.d.ts.map +1 -1
package/dist/scenario-worker-class.js +614 -86
package/dist/scenario-worker-class.js.map +1 -1
package/dist/script-utils.d.ts +2 -0
package/dist/script-utils.d.ts.map +1 -1
package/dist/script-utils.js +44 -4
package/dist/script-utils.js.map +1 -1
package/dist/types.d.ts +11 -0
package/dist/types.d.ts.map +1 -1
package/dist/types.js.map +1 -1
package/dist/utils/browser-utils.d.ts +20 -1
package/dist/utils/browser-utils.d.ts.map +1 -1
package/dist/utils/browser-utils.js +102 -51
package/dist/utils/browser-utils.js.map +1 -1
package/dist/utils/page-info-utils.d.ts +23 -4
package/dist/utils/page-info-utils.d.ts.map +1 -1
package/dist/utils/page-info-utils.js +174 -43
package/dist/utils/page-info-utils.js.map +1 -1
package/package.json +1 -2
package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +642 -0
package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +844 -0
package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +539 -0
package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +241 -0
package/plandocs/PHASE1_FINAL_STATUS.md +210 -0
package/plandocs/PLANNING_SESSION_SUMMARY.md +372 -0
package/plandocs/SCRIPT_CLEANUP_FEATURE.md +201 -0
package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +364 -0
package/plandocs/SELECTOR_IMPROVEMENTS.md +139 -0
package/src/credit-usage-service.ts +23 -5
package/src/execution-service.ts +152 -42
package/src/index.ts +169 -26
package/src/llm-facade.ts +500 -126
package/src/llm-provider.ts +43 -0
package/src/model-constants.ts +23 -0
package/src/orchestrator/index.ts +33 -0
package/src/orchestrator/orchestrator-agent.ts +1037 -0
package/src/orchestrator/tool-registry.ts +182 -0
package/src/orchestrator/tools/check-page-ready.ts +75 -0
package/src/orchestrator/tools/extract-data.ts +92 -0
package/src/orchestrator/tools/index.ts +11 -0
package/src/orchestrator/tools/inspect-page.ts +42 -0
package/src/orchestrator/tools/recall-history.ts +72 -0
package/src/orchestrator/tools/take-screenshot.ts +128 -0
package/src/orchestrator/types.ts +200 -0
package/src/playwright-mcp-service.ts +23 -5
package/src/progress-reporter.ts +109 -0
package/src/prompts.ts +606 -69
package/src/providers/backend-proxy-llm-provider.ts +91 -0
package/src/providers/local-llm-provider.ts +38 -0
package/src/scenario-service.ts +83 -13
package/src/scenario-worker-class.ts +740 -72
package/src/script-utils.ts +50 -5
package/src/types.ts +13 -1
package/src/utils/browser-utils.ts +123 -51
package/src/utils/page-info-utils.ts +210 -53
package/testchimp-runner-core-0.0.22.tgz +0 -0

package/src/prompts.ts CHANGED Viewed

@@ -19,56 +19,424 @@ export const PROMPTS = {
   // Scenario breakdown
   SCENARIO_BREAKDOWN: {
-    SYSTEM: `You are an expert test automation engineer that breaks down user scenarios into precise, actionable Playwright steps.
-          RULES:
-          - Each step should be a single, specific action
-          - Use clear, imperative language (Go to, Click, Type, Verify, etc.)
-          - Include specific details (URLs, text content, element descriptions)
-          - Order steps logically (navigation first, then interactions, then verifications)
-          - Be specific about what to verify/assert
-          COMMON STEP PATTERNS:
-          - "Go to [URL]" - for navigation
-          - "Click on [element description]" - for clicking
-          - "Type '[text]' into [field description]" - for text input
-          - "Verify that [condition]" - for assertions
-          - "Wait for [element/condition]" - for waiting
-          Respond with JSON: {"steps": ["step1", "step2", "step3"]}`,
-    USER: (scenario: string) => `Break down this scenario into specific, actionable steps for Playwright automation:\n\n"${scenario}"`
+    SYSTEM: `Split user scenarios into individual steps. Copy each step exactly as provided. Do not add, expand, or modify.`,
+    USER: (scenario: string) => `Split this into steps. Keep each step exactly as written.
+${scenario}
+Return JSON: {"steps": ["step 1", "step 2", ...]}`
+  },
+  // Goal completion assessment
+  GOAL_COMPLETION_CHECK: {
+    SYSTEM: 'You are an expert test automation analyst. Evaluate whether a goal has been fully achieved. Be EXTREMELY CONSERVATIVE - mark goals complete when the PRIMARY action succeeds. DO NOT invent verification steps that were not explicitly requested. However, if the scenario explicitly specifies verification requirements, those MUST be completed and not skipped.',
+    USER: (goalDescription: string, completedActions: string[], pageInfo: any) => `Analyze whether the following goal has been fully completed:
+    GOAL: "${goalDescription}"
+    COMPLETED ACTIONS IN THIS STEP:
+    ${completedActions.map((action, idx) => `${idx + 1}. ${action}`).join('\n')}
+    CURRENT PAGE STATE:
+    - URL: ${pageInfo.url}
+    - Title: ${pageInfo.title}
+    - Interactive Elements:
+${pageInfo.formattedElements}
+    CRITICAL GUIDELINES - MARK COMPLETE AGGRESSIVELY:
+    1. **Action Goals vs Verification Goals**:
+       - If goal is an ACTION (click, type, select, send, submit), mark COMPLETE after successful action
+       - If goal is VERIFICATION (verify, check, ensure, assert), mark COMPLETE after assertion passes
+       - NEVER add verification to action goals - if the goal doesn't mention verification, don't require it
+       - HOWEVER: If verification is EXPLICITLY mentioned in the goal, it MUST be completed - do not skip it
+    2. **Understand Action Semantics** (what does the action verb really mean):
+       Some actions are ATOMIC (one operation):
+       - "Click X" → Just click
+       - "Type X into field" → Just type
+       - "Navigate to URL" → Just navigate
+       - "Select option" → Just select
+       Other actions imply a WORKFLOW with implicit final trigger:
+       - ANY action verb that implies submission/sending/triggering
+       - If the action includes data to provide, it usually implies using that data
+       - If the action name is a business process (login, register, send, post, etc.), think about what the user expects to happen
+       **General Pattern Recognition:**
+       Ask yourself: "In normal usage, does [ACTION VERB] require a final trigger/button?"
+       - "Login" → Yes, requires clicking a login/submit button after entering credentials
+       - "Send" → Yes, requires clicking a send button after typing content
+       - "Post" → Yes, requires clicking a post/publish button after entering content
+       - "Search for X" → Yes, requires triggering search after entering search term
+       - "Filter by X" → Maybe, depends on if filter auto-applies or needs button
+       - "Fill in X" → No, just data entry unless goal says "fill AND submit"
+       Mark COMPLETE when the BUSINESS ACTION is done from user perspective:
+       - Not complete if you only prepared data (filled fields) but didn't trigger the action
+       - Complete when the system would have processed/submitted/executed the action
+       Examples:
+       - "Login with credentials: X" → Incomplete until credentials submitted (button clicked)
+       - "Send message: Y" → Incomplete until message sent (send button clicked)
+       - "Fill in name field" → Complete after fill (no submission implied)
+       - "Search for products" → Incomplete until search triggered
+       Think: "From a user's perspective, is the action done?" not "Did I type the data?"
+    3. **Multi-part Goals** (explicit multiple requirements):
+       - "Fill in ALL fields" → Need multiple fills for each field
+       - "Click submit AND verify success message appears" → Need both click + explicit verification
+       - Goals with explicit "and" requiring multiple distinct actions
+    4. **NEVER Create Hallucinated Verification Sub-goals, BUT Honor Explicit Verification Requirements**:
+       - Don't invent verification steps that weren't in the original goal
+       - Don't look for confirmation messages unless goal explicitly asks for them
+       - Don't check for success indicators unless goal explicitly requires verification
+       - Trust Playwright's execution - if action succeeded without error, it worked
+       - Action success IS the completion criteria for action goals
+       - CRITICAL: If the goal explicitly says "verify", "check", "ensure", "confirm" something, that verification MUST be completed
+    5. **State Changes After Actions Are SUCCESS, Not Failure**:
+       - Button becomes disabled after click → SUCCESS (expected behavior)
+       - Form clears after submit → SUCCESS (expected behavior)
+       - Page navigates after action → SUCCESS (expected behavior)
+       - Element disappears after interaction → SUCCESS (expected behavior)
+    6. **What "nextSubGoal" Should Look Like**:
+       - For "Fill in all fields" with 5 fields, if 2 filled: "Fill in the remaining 3 fields" ✅
+       - For "Click submit AND verify", if clicked but not verified: "Verify the success message appears" ✅
+       - For "Click send button" after click succeeds: NO nextSubGoal - COMPLETE ✅
+       - DON'T create nextSubGoal for verification unless goal explicitly asks for it ❌
+       CRITICAL - Preserve specific values in nextSubGoal:
+       - Original: "Login with credentials: admin, pass123" (username filled, password not)
+         ✅ nextSubGoal: "Enter password: pass123"
+         ❌ NOT: "Complete the login" (loses the password value!)
+       - Original: "Enter user details: Name: John, Email: john@test.com" (name done, email not)
+         ✅ nextSubGoal: "Enter email: john@test.com"
+         ❌ NOT: "Enter email address" (loses specific email!)
+    Examples:
+    ✅ PURE ACTION GOALS (no verification in description - complete after action):
+    - Goal: "Click the send button" + Action: click() succeeded → COMPLETE ✅ (no verification needed)
+    - Goal: "Enter email address" + Action: fill() succeeded → COMPLETE ✅ (no verification needed)
+    - Goal: "Navigate to dashboard" + Action: goto() succeeded → COMPLETE ✅ (no verification needed)
+    - Goal: "Submit the form" + Action: click() succeeded → COMPLETE ✅ (no verification needed)
+    ⏳ GOALS WITH EXPLICIT VERIFICATION (must complete BOTH action AND verification):
+    - Goal: "Click send and verify message sent" + Action: click() succeeded → INCOMPLETE ⏳ nextSubGoal: "Verify message sent confirmation"
+    - Goal: "Submit form and check for success message" + Action: submit clicked → INCOMPLETE ⏳ nextSubGoal: "Check for success message"
+    - Goal: "Login and verify dashboard appears" + Action: login completed → INCOMPLETE ⏳ nextSubGoal: "Verify dashboard appears"
+    ✅ PURE VERIFICATION GOALS (complete after verification):
+    - Goal: "Verify page title is correct" + Action: assertion passed → COMPLETE ✅
+    - Goal: "Check that the error message is displayed" + Action: assertion passed → COMPLETE ✅
+    - Goal: "Ensure user is logged in" + Action: assertion passed → COMPLETE ✅
+    ⏳ MULTI-STEP ACTION GOALS (complete all parts):
+    - Goal: "Fill in all required fields" + Action: filled 2 of 5 → INCOMPLETE ⏳ nextSubGoal: "Fill in remaining 3 fields"
+    GOLDEN RULE:
+    - If the goal is a SIMPLE ACTION and that action SUCCEEDED, mark COMPLETE immediately
+    - Don't hallucinate verification requirements that weren't explicitly requested
+    - BUT if verification IS explicitly mentioned in the goal, it MUST be completed before marking COMPLETE
+    - Only verify what is instructed to be verified, nothing more, nothing less
+    Respond with JSON:
+    {
+      "isComplete": true/false,
+      "reason": "brief explanation - if action succeeded and goal was just the action, mark complete",
+      "nextSubGoal": "ONLY if goal has multiple parts and not all parts done yet - must be based on ACTUAL goal requirements, not invented verification"
+    }`
+  },
+  // Screenshot need assessment
+  SCREENSHOT_NEED_ASSESSMENT: {
+    SYSTEM: 'You are an expert test automation analyst. Determine if visual screenshot analysis is ABSOLUTELY NECESSARY to solve this failure. Vision mode is expensive (GPT-4o), so only recommend when there is CLEAR diagnostic value that DOM cannot provide.',
+    USER: (stepDescription: string, errorMessage: string, attemptCount: number, pageInfo: any) => `After 2 failures, determine if VISION MODE is absolutely necessary. This is the ONLY chance to use vision.
+    STEP: "${stepDescription}"
+    ERROR: "${errorMessage}"
+    ATTEMPT: ${attemptCount} (after ${attemptCount - 1} failures - vision can diagnose the issue)
+    CURRENT DOM INFO AVAILABLE:
+    - URL: ${pageInfo.url}
+    - Interactive Elements:
+${pageInfo.formattedElements}
+    🎯 VISION MODE - USE SPARINGLY (Expensive GPT-4o):
+    Vision provides diagnostic value ONLY when DOM information is truly insufficient.
+    ✅ **RECOMMEND SCREENSHOT only for these HIGH-VALUE cases:**
+    1. **Suspected Element Hallucination** (HIGH priority):
+       - Previous attempts tried getByText/toBeVisible for elements that might not exist
+       - Error: "not found" or "timeout" on verification attempts
+       - Visual will definitively show if elements exist or if we need alternative verification
+    2. **Visual-Only Elements**:
+       - Icons, images, or visual indicators without text/roles
+       - Elements identified by position ("button on the right")
+       - Shadow DOM or complex component structures
+    3. **Visual Blockers**:
+       - Overlays, modals, or popups blocking interactions
+       - Z-index issues preventing clicks
+       - Scrolling problems that DOM doesn't reveal
+    ❌ **DO NOT RECOMMEND SCREENSHOT when:**
+    - **Strict mode violations / multiple matches** - Accessibility tree shows duplicates, use DOM info to scope selector
+    - Simple selector errors (try different selector strategies first)
+    - Navigation issues (URL problems are not visual)
+    - Invalid Playwright API (syntax errors)
+    - Timing issues that can be solved with better waits
+    - DOM clearly shows the solution (IDs, data-testid available)
+    - Error has obvious DOM-based fix
+    **Conservative Assessment Required:**
+    - Vision mode is EXPENSIVE (uses GPT-4o)
+    - This is the ONLY chance (attempt 3 of 4)
+    - Only recommend if DOM truly cannot solve it
+    - If in doubt, suggest DOM-based alternative instead
+    Respond with JSON:
+    {
+      "needsScreenshot": true/false,
+      "reason": "If true: [specific diagnostic value vision provides]. If false: [why DOM-based approach is sufficient]",
+      "alternativeApproach": "REQUIRED if needsScreenshot=false: [specific DOM-based solution to try next]"
+    }
+    Remember: Default to NO unless there's compelling evidence that visual analysis is the ONLY way to solve this.`
   },
   // Playwright command generation
   PLAYWRIGHT_COMMAND: {
-    SYSTEM: 'You are an expert Playwright automation engineer. Generate clean, concise, and reliable commands. Use Playwright\'s built-in auto-waiting instead of explicit timeouts. Keep code readable and maintainable. Learn from previous failures and adapt your approach accordingly.',
+    SYSTEM: 'You are an expert Playwright automation engineer with strong self-awareness and problem-solving skills. You understand cause-and-effect, learn from your own actions, and can reason about application state changes.',
-    USER: (stepDescription: string, pageInfo: any, previousCommands: string, attemptHistory: string, errorContext: string) => `You are an expert Playwright automation engineer. Generate a single, precise Playwright command for the given step.
+    USER: (stepDescription: string, pageInfo: any, previousCommands: string, attemptHistory: string, errorContext: string) => `You are working to achieve a specific goal. Generate ONE precise Playwright command that makes progress.
+    🎯 CURRENT GOAL: "${stepDescription}"
+    📋 WHAT YOU'VE ALREADY DONE IN THIS STEP:
+    ${previousCommands || 'Nothing yet - this is the first action for this goal'}
+    ${errorContext ? `⚠️ PREVIOUS ATTEMPT FAILED:\n${errorContext}\n` : ''}
+    ${attemptHistory ? `📊 ALL ATTEMPTS SO FAR:\n${attemptHistory}\n` : ''}
+    🧠 SELF-AWARENESS & REASONING:
+    1. **Analyze Your Own Actions**:
+       - Review what you've ALREADY done in this step above
+       - Did your previous actions CAUSE the current state?
+       - Ask: "What is the LOGICAL consequence of what I just did?"
+       - Understand that your actions change the application state
+    2. **Understand Cause & Effect**:
+       - Element state changed? → Did YOUR previous action cause it?
+       - Element not found? → Did YOUR action remove it or navigate away?
+       - Validation error? → Did YOUR action trigger it (empty field, wrong format)?
+       - Before retrying, ask: "Is this the EXPECTED result of my actions?"
+    3. **Self-Correction Logic**:
+       - If you caused the problem → Fix it (don't just retry)
+       - If you achieved the goal (even with side effects) → Move on!
+       - If you're stuck in a loop → You're fighting expected behavior, change approach
+       - Don't undo successful work or fight against normal state transitions
+    4. **Smart Recovery**:
+       - Element not ready/unavailable → Identify what prerequisite is missing, complete it first
+       - Element not found → Distinguish between: your action removed it (success) vs genuine error
+       - Multiple failures on same approach → Fundamentally rethink strategy, don't iterate blindly
+       - Stuck in retry loop → Step back, analyze root cause, try completely different approach
+    5. **NEVER Hallucinate Verification Elements**:
+       - ONLY verify elements that ACTUALLY EXIST in the current DOM state
+       - Check the "CURRENT PAGE STATE" section for what elements are available
+       - Don't look for "success message", "confirmation text", or "sent message" unless you see them in the DOM
+       - Don't invent text patterns or regex for elements that don't exist
+       - If verification is needed but element doesn't exist, use alternative methods:
+         * Check for state changes (button disabled, form cleared, URL changed)
+         * Wait for page load state changes
+         * Check for element detachment/attachment
+         * Use waitForResponse for network verification
+       - When previous attempts failed looking for non-existent elements, STOP trying to find them
+    6. **Navigation and Redirects** (CRITICAL):
+       Handle redirects properly - DON'T keep retrying original URL if navigation succeeded:
+       - For navigation, use explicit 10-second timeout (default is 5s, too short for redirects):
+         await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 10000 })
+       - Why longer timeout for navigation:
+         * Redirects take extra time
+         * Initial page loads are slower
+         * Default 5s timeout is for fast element operations only
+       - If navigation times out or throws "execution context destroyed":
+         * CHECK CURRENT URL FIRST: const currentUrl = page.url()
+         * If URL changed from about:blank → Navigation SUCCEEDED (even if redirected)
+         * DON'T retry goto() if already on a page
+         * Proceed with next step
+       - Navigation succeeded if:
+         * page.url() is NOT 'about:blank'
+         * page.url() changed from previous URL
+         * Even if different from target URL (redirects are normal)
+       - Only retry navigation if:
+         * page.url() is still 'about:blank' or previous URL
+         * AND no redirect happened
+    7. **Real-World Web App Resilience**:
+       Common Interruptions (handle gracefully):
+       - Cookie consent banners → Dismiss if blocking main UI (look for "Accept", "OK", "Close")
+       - Modal popups → Close if not relevant to current goal (look for X button, "Dismiss")
+       - Page refreshes → Re-find elements, don't assume page state persists
+       - Loading states → Wait for content, check for spinners/loading indicators
+       - Overlays → Dismiss or wait for them to disappear before proceeding
+       Detection Patterns:
+       - If element suddenly not found → Check if overlay/modal appeared
+       - If click fails → Check if cookie banner is blocking element
+       - If page URL changed unexpectedly → Handle redirect/refresh gracefully
+       - If timeout occurs → Check for loading indicators, wait for them to disappear
+       Resilience Strategies:
+       - Before critical interactions, check for and dismiss blocking overlays
+       - After page loads, wait for dynamic content (networkidle, specific elements)
+       - If element covered/blocked, look for overlay and close it
+       - Use flexible selectors that work across page refreshes
+       - Add waits for elements that load dynamically
+       Examples:
+       - If cookie banner present: await page.getByRole('button', {name: /accept|ok|agree/i}).click();
+       - If modal blocking: await page.getByRole('button', {name: /close|dismiss|x/i}).click();
+       - After action that might refresh: await page.waitForLoadState('domcontentloaded');
+       - For dynamic content: await page.getByText('expected content').waitFor();
+    8. **Use Specific Values from Goal Description**:
+       CRITICAL: Extract and use exact values mentioned in the goal.
+       Examples:
+       - Goal: "Login with credentials: Willy, Willy@1234"
+         ✅ Use: await page.fill('username', 'Willy'); await page.fill('password', 'Willy@1234');
+         ❌ NOT: await page.fill('username', process.env.USERNAME);
+       - Goal: "Enter name: John Doe"
+         ✅ Use: await page.fill('[name="name"]', 'John Doe');
+         ❌ NOT: await page.fill('[name="name"]', 'Test User');
+       Apply this to ANY specific value in the goal (amounts, dates, selections, text, etc.).
+       NEVER:
+       - Replace specific values with environment variables
+       - Replace specific values with generic test data
+       - Hallucinate different values than what's in the goal
+       - Use process.env, config, or placeholder values
+       Be creative ONLY when goal doesn't specify values:
+       - "Login with valid credentials" → Infer reasonable values
+       - "Login with credentials: admin, pass123" → Use EXACTLY those values
+    GOAL-ORIENTED APPROACH:
+    - What needs to be done to achieve this goal?
+    - Have I ALREADY done parts of this? (check "WHAT YOU'VE ALREADY DONE")
+    - If yes, what's the NEXT logical action?
+    - If retrying after failure, WHY did it fail? Did I cause it?
+    - Is something blocking the UI? (cookie banner, modal, overlay)
+    - Extract any specific values from the goal and use them EXACTLY
+    9. **Strict Mode Violations & Multiple Matches** (CRITICAL):
+       Playwright throws "strict mode violation" when a selector matches MULTIPLE elements.
+       **PROACTIVE DETECTION** - Check BEFORE generating command:
+       - Review the "CURRENT PAGE STATE" section below (accessibility tree / aria snapshot)
+       - Look for duplicate elements with same role/text (e.g., multiple links with "Employee Information")
+       - If duplicates exist, generate a MORE SPECIFIC selector from the start
+       - Don't wait for strict mode error - prevent it by analyzing the DOM structure
+       🚨 ERROR PATTERNS:
+       - "strict mode violation" → Your selector matched multiple elements
+       - "Multiple elements found" → Same issue
+       - Command chain with multiple strategies → Sign of selector problems
+       ✅ SOLUTIONS (in order of preference):
+       **Option 1: Be More Specific** (BEST):
+       - BAD: page.locator('a', { hasText: 'Employee Information' }).click() → Matches multiple links
+       - GOOD: page.locator('nav a', { hasText: 'Employee Information' }).click() → Scoped to nav
+       - GOOD: page.getByRole('navigation').getByRole('link', { name: 'Employee Information' }).click() → Role-based scoping
+       - GOOD: page.locator('a[href*="/employee"]', { hasText: 'Employee Information' }).click() → Combined attributes
+       **Option 2: Use Position-Based Selection**:
+       - If multiple matches are expected: page.locator('a', { hasText: 'Employee Information' }).first().click()
+       - Or use: .nth(0) for first, .last() for last
+       **Option 3: Filter by Visibility/State**:
+       - page.locator('button', { hasText: 'Submit' }).filter({ hasNotText: 'Draft' }).click()
+       🚫 **ANTI-PATTERNS (DON'T DO THIS)**:
+       - BAD: Chaining multiple selector strategies in one command with semicolons
+       - BAD: Using page.evaluate() to find/click elements (defeats Playwright's auto-waiting)
+       - GOOD: ONE clear, specific selector like page.locator('nav a', { hasText: 'Employee Information' }).click()
+       **When You See Strict Mode Errors:**
+       1. Analyze - Why did my selector match multiple elements?
+       2. Narrow Down - Add parent context (nav, sidebar, header)
+       3. Combine - Use multiple attributes (role + text, class + href)
+       4. Position - If truly ambiguous, use .first() or .nth()
+       5. NEVER - Chain multiple selector attempts or use page.evaluate()
+       **Key Principle:**
+       - ONE command = ONE clear selector strategy
+       - Don't hedge your bets with multiple approaches
+       - Trust Playwright's auto-waiting and built-in selectors
     CRITICAL RULES:
-    - Generate ONLY ONE command per step
+    - Generate ONLY ONE command that moves toward the goal
+    - NEVER undo your own successful work (don't clear fields you just filled!)
+    - If previous attempts failed, analyze WHY before trying different approach
+    - Learn from failures and your own action history
     - Use the most reliable selectors (prefer getByRole, getByText, getByLabel)
-    - Always wait for elements before interacting (use waitFor, waitForSelector)
-    - Use proper error handling and timeouts
-    - If previous attempts failed, try a COMPLETELY DIFFERENT approach
-    - Learn from failures and adapt your strategy
+    - Trust Playwright's auto-waiting - if click succeeded, it worked!
+    - If strict mode violation: Make selector MORE SPECIFIC or use .first()
+    - Generate ONE clear command, not multiple chained selector attempts
     ELEMENT SELECTION PRIORITY:
-    1. getByRole() - Most reliable for interactive elements
-    2. getByText() - For text content
-    3. getByLabel() - For form inputs
-    4. getByPlaceholder() - For input placeholders
-    5. getByTestId() - For test-specific elements
-    6. locator() with CSS selectors - Last resort
-    COMMON PATTERNS:
+    1. getByTestId() - BEST if data-testid is available (most stable, designed for tests)
+    2. locator('#id') - EXCELLENT if element has unique ID (stable, direct targeting)
+    3. getByRole() - Very reliable for interactive elements (semantic)
+    4. getByText() - For text content (good for unique text)
+    5. getByLabel() - For form inputs (semantic)
+    6. getByPlaceholder() - For input placeholders
+    7. locator() with CSS classes - Last resort (brittle, changes frequently)
+    COMMON PATTERNS (prefer IDs/data-testid when available):
     - Navigation: await page.goto('url')
-    - Click: await page.getByRole('button', { name: 'text' }).click()
-    - Type: await page.getByRole('textbox', { name: 'label' }).fill('text')
+    - Click with testid: await page.getByTestId('submit-btn').click()
+    - Click with ID: await page.locator('#login-button').click()
+    - Click with role: await page.getByRole('button', { name: 'text' }).click()
+    - Type with testid: await page.getByTestId('username-input').fill('text')
+    - Type with ID: await page.locator('#email').fill('text')
+    - Type with role: await page.getByRole('textbox', { name: 'label' }).fill('text')
     - Wait: await page.waitForLoadState('networkidle')
     - Verify: await expect(page).toHaveTitle(/expected/)
+    IMPORTANT: Use IDs/data attributes in COMMANDS, but keep goal descriptions semantic!
     CODE STYLE GUIDELINES:
     - Keep commands concise and clean
     - Avoid explicit timeouts unless necessary
@@ -76,11 +444,30 @@ export const PROMPTS = {
     - Only add timeouts for specific slow operations
     - Prefer single-line commands when possible
+    VALID PLAYWRIGHT API REFERENCE:
+    - locator.waitFor({ state: 'visible'|'hidden'|'attached'|'detached' }) - ONLY these states
+    - NEVER use waitFor({ state: 'enabled' }) - THIS IS INVALID
+    - For disabled elements: Use page.waitForFunction() with DOM check
+    - CSS selectors: Standard CSS only (no :has-text(), :enabled pseudo-classes)
+    - Playwright pseudo-selectors only work in locator(), NOT in querySelector()
     RETRY STRATEGIES:
-    - Timeout errors: Add waitFor() or increase timeout
-    - Not found errors: Try different selectors or wait for element
-    - Not visible errors: Scroll into view or wait for visibility
-    - Not enabled errors: Wait for element to be enabled
+    - Timeout errors: Add waitFor() or increase timeout, check for loading states
+    - Not found errors: Try different selectors, wait for element, or check if DOM changed
+    - Not visible errors: Scroll into view, dismiss overlays, or wait for visibility
+    - Not enabled/Disabled errors: Identify and complete prerequisites that enable the element
+    - Detached errors: Element removed from DOM, refetch or use different selector
+    - Covered/Blocked errors: Close overlays, modals, or popups blocking the element
+    ELEMENT STATE AWARENESS:
+    - Element disabled/inactive? → Identify and complete the prerequisite (fill required fields, check boxes, select options)
+    - Interacting with unavailable elements ALWAYS fails → Enable/prepare element state first
+    - Review your action history → Did you reverse a prerequisite? Complete it again before proceeding
+    - Different element states need different handling:
+      * Disabled → Complete prerequisites (validation, required fields, agreements)
+      * Hidden/Not visible → Scroll, dismiss overlays, or wait for visibility
+      * Detached → Element removed from DOM, may need navigation or different selector
+      * Loading → Wait for completion before interaction
     TIMEOUT GUIDELINES:
     - Only add explicit timeouts for slow operations (file uploads, large data loads)
@@ -98,10 +485,8 @@ export const PROMPTS = {
     Current State:
     - URL: ${pageInfo.url}
     - Title: ${pageInfo.title}
-    - Page Structure: ${pageInfo.pageStructure}
-    - Interactive Elements: ${pageInfo.interactiveElements}
-    - Form Fields: ${pageInfo.formFields}
-    - All Elements: ${pageInfo.elements}
+    - Interactive Elements:
+${pageInfo.formattedElements}
     Previous Commands:
     \`\`\`javascript
@@ -115,33 +500,142 @@ export const PROMPTS = {
     Step to execute: "${stepDescription}"`
   },
+  // Vision diagnostic analysis (supervisor reviewing screenshot)
+  VISION_DIAGNOSTIC_ANALYSIS: {
+    SYSTEM: 'You are a senior QA supervisor with vision capabilities. Analyze the screenshot AND DOM snapshot together to identify what went wrong and provide specific instructions with accurate selectors.',
+    USER: (stepDescription: string, pageInfo: any, previousCommands: string, attemptHistory: string, errorContext: string) => `Analyze screenshot + DOM snapshot to diagnose failures and provide specific instructions.
+🎯 GOAL: "${stepDescription}"
+📸 SCREENSHOT + 🌳 DOM SNAPSHOT:
+Correlate visual elements in screenshot with DOM structure below.
+**DOM Snapshot:**
+- URL: ${pageInfo.url}
+- Title: ${pageInfo.title}
+- Interactive Elements:
+${pageInfo.formattedElements}
+**Previous Failed Attempts:**
+${previousCommands || 'None'}
+**Errors:**
+${errorContext || 'None'}
+**Your Task:**
+1. Look at screenshot - identify target element visually
+2. Look at DOM - find matching element in ARIA tree
+3. Check if element has ID or data-testid (best selectors)
+4. Provide EXACT selector from DOM
+Respond with JSON:
+{
+  "visualAnalysis": "I see...",
+  "rootCause": "Failed because...",
+  "specificInstructions": "Click element with [exact selector from DOM]...",
+  "recommendedApproach": "Use page.locator('[exact-attribute]')...",
+  "elementsFound": ["element with id='x'", "button[name='y']"],
+  "elementsNotFound": ["element worker looked for but doesn't exist"]
+}`
+  },
+  // Playwright command generation with supervisor instructions
+  PLAYWRIGHT_COMMAND_WITH_SUPERVISOR: {
+    SYSTEM: 'You are a Playwright automation engineer. Your supervisor has analyzed a screenshot and provided specific instructions. Follow their instructions EXACTLY to generate the correct command.',
+    USER: (stepDescription: string, supervisorInstructions: string, supervisorAnalysis: string, elementsFound: string[], elementsNotFound: string[], pageInfo: any) => `Your supervisor has reviewed the screenshot and provided specific instructions. Follow them EXACTLY.
+    🎯 ORIGINAL GOAL: "${stepDescription}"
+    👔 SUPERVISOR'S VISUAL ANALYSIS:
+    ${supervisorAnalysis}
+    📋 SUPERVISOR'S SPECIFIC INSTRUCTIONS:
+    ${supervisorInstructions}
+    ✅ ELEMENTS THAT EXIST (confirmed by supervisor from screenshot):
+    ${elementsFound.length > 0 ? elementsFound.map((el, i) => `${i + 1}. ${el}`).join('\n') : 'None specified'}
+    ❌ ELEMENTS THAT DON'T EXIST (confirmed absent from screenshot):
+    ${elementsNotFound.length > 0 ? elementsNotFound.map((el, i) => `${i + 1}. ${el}`).join('\n') : 'None specified'}
+    **YOUR TASK:**
+    Generate ONE Playwright command that implements the supervisor's instructions EXACTLY.
+    **CRITICAL RULES:**
+    1. Follow supervisor's instructions to the letter
+    2. ONLY use elements from "ELEMENTS THAT EXIST" list
+    3. NEVER try to find elements from "ELEMENTS THAT DON'T EXIST" list
+    4. Use the exact selectors/strategies supervisor recommended
+    5. If supervisor said "don't verify X, check Y instead" - do exactly that
+    **Current DOM Context:**
+    - URL: ${pageInfo.url}
+    - Interactive Elements:
+${pageInfo.formattedElements}
+    Respond with JSON:
+    {
+      "command": "await page.locator('#exact-selector').click();",
+      "reasoning": "Following supervisor's instruction to [what you're doing]"
+    }`
+  },
+  // Legacy vision command generation (kept for compatibility)
+  PLAYWRIGHT_COMMAND_WITH_VISION: {
+    SYSTEM: 'Analyze screenshot + DOM together. Correlate visual elements with DOM to generate accurate Playwright commands with precise selectors.',
+    USER: (stepDescription: string, pageInfo: any, previousCommands: string, attemptHistory: string, errorContext: string) => `Vision mode: Correlate screenshot with DOM to generate command.
+🎯 GOAL: "${stepDescription}"
+📸 SCREENSHOT + 🌳 DOM SNAPSHOT:
+Correlate visual elements in screenshot with DOM structure below.
+**DOM Snapshot:**
+- URL: ${pageInfo.url}
+- Title: ${pageInfo.title}
+- Interactive Elements:
+${pageInfo.formattedElements}
+**Previous Failed Attempts:**
+${previousCommands || 'None'}
+**Errors:**
+${errorContext || 'None'}
+**Your Task:**
+1. Look at screenshot - identify target element visually
+2. Look at DOM - find matching element in ARIA tree
+3. Check if element has ID or data-testid (best selectors)
+4. Generate command with EXACT selector from DOM
+Respond with JSON:
+{
+  "command": "await page.locator('[exact-selector-from-dom]').click();",
+  "reasoning": "Visual element matches DOM element with [attribute]",
+  "visualInsights": "I see [element] in screenshot",
+  "failureRootCause": "Previous failed because [reason]",
+  "recommendedAlternative": "Use [strategy]"
+}`
+  },
   // Script parsing for AI repair
   SCRIPT_PARSING: {
-    SYSTEM: 'You are an expert at parsing Playwright test scripts into logical steps. IGNORE doc comments at the top (/** ... */) as they are repair advice, not test steps. ALWAYS prioritize existing step comments over generating new ones. If the script has "// Step N:" comments, use those exactly as they are. Only generate new descriptions if no existing step comments are found. Be conservative and preserve exact code formatting.',
+    SYSTEM: 'Parse Playwright scripts into steps. Use existing // comments as step boundaries.',
-    USER: (script: string) => `Parse this Playwright test script into logical steps. Be conservative and preserve the exact code.
+    USER: (script: string) => `Extract steps from this script.
-            Instructions:
-            1. IGNORE any doc comments at the top of the script (e.g., /** ... */ or /* ... */) - these are repair advice and should not be parsed as steps
-            2. FIRST, look for existing step comments (e.g., "// Step 1:", "// Step 2:", etc.) and use those as step boundaries
-            3. If existing step comments are found, use them exactly as they are - do not modify or regenerate descriptions
-            4. If no existing step comments, then group related commands that work together logically
-            5. Preserve ALL code exactly as written - do not modify, reformat, or change any code
-            6. Each step should contain commands that belong together (e.g., navigation + wait, form filling, verification)
-            7. Keep steps focused and not too granular
-            Script:
-            ${script}
-            Return JSON object with steps array:
-            {
-              "steps": [
-                {
-                  "description": "use existing comment if available, otherwise create meaningful description",
-                  "code": "exact code from script - preserve all formatting and content"
-                }
-              ]
-            }`
+Find code INSIDE: test('...', async ({ page, browser, context }) => { ... })
+Each // comment starts a new step. Use comment text (without //) as description.
+Preserve code exactly.
+Script:
+${script}
+Return JSON: {"steps": [{"description": "...", "code": "..."}, ...]}`
   },
   // Repair suggestion
@@ -158,8 +652,8 @@ export const PROMPTS = {
     Current Page State:
     - URL: ${pageInfo.url}
     - Title: ${pageInfo.title}
-    - Interactive Elements: ${pageInfo.interactiveElements}
-    - Form Fields: ${pageInfo.formFields}
+    - Interactive Elements:
+${pageInfo.formattedElements}
     ${failureHistory}
@@ -250,5 +744,48 @@ export const PROMPTS = {
             {
               "script": "complete final script that can be pasted into the original file"
             }`
+  },
+  // Script cleanup (minor adjustments only)
+  SCRIPT_CLEANUP: {
+    SYSTEM: 'You are a Playwright test script reviewer. Your job is to do MINOR cleanup only - remove obvious redundancies, but preserve the core structure and logic.',
+    USER: (script: string) => `Review this generated Playwright test script and make MINOR adjustments only.
+SCRIPT:
+${script}
+YOUR TASK (MINOR ADJUSTMENTS ONLY):
+1. Remove duplicate/redundant expect() assertions (e.g., same assertion repeated twice)
+2. Remove duplicate step comments without code
+3. Fix obvious formatting issues (inconsistent spacing, etc.)
+4. Consolidate multiple identical assertions into one
+5. Remove any obviously redundant waits or checks
+DO NOT:
+- Change the test logic or flow
+- Remove legitimate assertions
+- Restructure the code
+- Change selectors
+- Add new functionality
+- Remove important waits
+EXAMPLES:
+❌ REMOVE redundancy:
+await expect(page.getByText('Hello')).toBeVisible();
+await expect(page.getByText('Hello')).toBeVisible();  // duplicate
+✅ KEEP legitimate checks:
+await expect(page.getByPlaceholder('Message...')).toBeEmpty();
+await page.getByPlaceholder('Message...').fill('Hello');
+await expect(page.getByPlaceholder('Message...')).toHaveValue('Hello');  // different checks
+Return JSON:
+{
+  "script": "cleaned script (or original if no changes needed)",
+  "changes": ["list of minor changes made, or empty array if none"],
+  "skipped": "reason if you chose not to make changes"
+}`
   }
 };