testchimp-runner-core 0.0.21 → 0.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/VISION_DIAGNOSTICS_IMPROVEMENTS.md +336 -0
  2. package/dist/credit-usage-service.d.ts +9 -0
  3. package/dist/credit-usage-service.d.ts.map +1 -1
  4. package/dist/credit-usage-service.js +20 -5
  5. package/dist/credit-usage-service.js.map +1 -1
  6. package/dist/execution-service.d.ts +7 -2
  7. package/dist/execution-service.d.ts.map +1 -1
  8. package/dist/execution-service.js +91 -36
  9. package/dist/execution-service.js.map +1 -1
  10. package/dist/index.d.ts +30 -2
  11. package/dist/index.d.ts.map +1 -1
  12. package/dist/index.js +91 -26
  13. package/dist/index.js.map +1 -1
  14. package/dist/llm-facade.d.ts +64 -8
  15. package/dist/llm-facade.d.ts.map +1 -1
  16. package/dist/llm-facade.js +361 -109
  17. package/dist/llm-facade.js.map +1 -1
  18. package/dist/llm-provider.d.ts +39 -0
  19. package/dist/llm-provider.d.ts.map +1 -0
  20. package/dist/llm-provider.js +7 -0
  21. package/dist/llm-provider.js.map +1 -0
  22. package/dist/model-constants.d.ts +21 -0
  23. package/dist/model-constants.d.ts.map +1 -0
  24. package/dist/model-constants.js +24 -0
  25. package/dist/model-constants.js.map +1 -0
  26. package/dist/orchestrator/index.d.ts +8 -0
  27. package/dist/orchestrator/index.d.ts.map +1 -0
  28. package/dist/orchestrator/index.js +23 -0
  29. package/dist/orchestrator/index.js.map +1 -0
  30. package/dist/orchestrator/orchestrator-agent.d.ts +66 -0
  31. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -0
  32. package/dist/orchestrator/orchestrator-agent.js +855 -0
  33. package/dist/orchestrator/orchestrator-agent.js.map +1 -0
  34. package/dist/orchestrator/tool-registry.d.ts +74 -0
  35. package/dist/orchestrator/tool-registry.d.ts.map +1 -0
  36. package/dist/orchestrator/tool-registry.js +131 -0
  37. package/dist/orchestrator/tool-registry.js.map +1 -0
  38. package/dist/orchestrator/tools/check-page-ready.d.ts +13 -0
  39. package/dist/orchestrator/tools/check-page-ready.d.ts.map +1 -0
  40. package/dist/orchestrator/tools/check-page-ready.js +72 -0
  41. package/dist/orchestrator/tools/check-page-ready.js.map +1 -0
  42. package/dist/orchestrator/tools/extract-data.d.ts +13 -0
  43. package/dist/orchestrator/tools/extract-data.d.ts.map +1 -0
  44. package/dist/orchestrator/tools/extract-data.js +84 -0
  45. package/dist/orchestrator/tools/extract-data.js.map +1 -0
  46. package/dist/orchestrator/tools/index.d.ts +10 -0
  47. package/dist/orchestrator/tools/index.d.ts.map +1 -0
  48. package/dist/orchestrator/tools/index.js +18 -0
  49. package/dist/orchestrator/tools/index.js.map +1 -0
  50. package/dist/orchestrator/tools/inspect-page.d.ts +13 -0
  51. package/dist/orchestrator/tools/inspect-page.d.ts.map +1 -0
  52. package/dist/orchestrator/tools/inspect-page.js +39 -0
  53. package/dist/orchestrator/tools/inspect-page.js.map +1 -0
  54. package/dist/orchestrator/tools/recall-history.d.ts +13 -0
  55. package/dist/orchestrator/tools/recall-history.d.ts.map +1 -0
  56. package/dist/orchestrator/tools/recall-history.js +64 -0
  57. package/dist/orchestrator/tools/recall-history.js.map +1 -0
  58. package/dist/orchestrator/tools/take-screenshot.d.ts +15 -0
  59. package/dist/orchestrator/tools/take-screenshot.d.ts.map +1 -0
  60. package/dist/orchestrator/tools/take-screenshot.js +112 -0
  61. package/dist/orchestrator/tools/take-screenshot.js.map +1 -0
  62. package/dist/orchestrator/types.d.ts +133 -0
  63. package/dist/orchestrator/types.d.ts.map +1 -0
  64. package/dist/orchestrator/types.js +28 -0
  65. package/dist/orchestrator/types.js.map +1 -0
  66. package/dist/playwright-mcp-service.d.ts +9 -0
  67. package/dist/playwright-mcp-service.d.ts.map +1 -1
  68. package/dist/playwright-mcp-service.js +20 -5
  69. package/dist/playwright-mcp-service.js.map +1 -1
  70. package/dist/progress-reporter.d.ts +97 -0
  71. package/dist/progress-reporter.d.ts.map +1 -0
  72. package/dist/progress-reporter.js +18 -0
  73. package/dist/progress-reporter.js.map +1 -0
  74. package/dist/prompts.d.ts +24 -0
  75. package/dist/prompts.d.ts.map +1 -1
  76. package/dist/prompts.js +593 -68
  77. package/dist/prompts.js.map +1 -1
  78. package/dist/providers/backend-proxy-llm-provider.d.ts +25 -0
  79. package/dist/providers/backend-proxy-llm-provider.d.ts.map +1 -0
  80. package/dist/providers/backend-proxy-llm-provider.js +76 -0
  81. package/dist/providers/backend-proxy-llm-provider.js.map +1 -0
  82. package/dist/providers/local-llm-provider.d.ts +21 -0
  83. package/dist/providers/local-llm-provider.d.ts.map +1 -0
  84. package/dist/providers/local-llm-provider.js +35 -0
  85. package/dist/providers/local-llm-provider.js.map +1 -0
  86. package/dist/scenario-service.d.ts +27 -1
  87. package/dist/scenario-service.d.ts.map +1 -1
  88. package/dist/scenario-service.js +48 -12
  89. package/dist/scenario-service.js.map +1 -1
  90. package/dist/scenario-worker-class.d.ts +39 -2
  91. package/dist/scenario-worker-class.d.ts.map +1 -1
  92. package/dist/scenario-worker-class.js +614 -86
  93. package/dist/scenario-worker-class.js.map +1 -1
  94. package/dist/script-utils.d.ts +2 -0
  95. package/dist/script-utils.d.ts.map +1 -1
  96. package/dist/script-utils.js +44 -4
  97. package/dist/script-utils.js.map +1 -1
  98. package/dist/types.d.ts +11 -0
  99. package/dist/types.d.ts.map +1 -1
  100. package/dist/types.js.map +1 -1
  101. package/dist/utils/browser-utils.d.ts +20 -1
  102. package/dist/utils/browser-utils.d.ts.map +1 -1
  103. package/dist/utils/browser-utils.js +102 -51
  104. package/dist/utils/browser-utils.js.map +1 -1
  105. package/dist/utils/page-info-utils.d.ts +23 -4
  106. package/dist/utils/page-info-utils.d.ts.map +1 -1
  107. package/dist/utils/page-info-utils.js +174 -43
  108. package/dist/utils/page-info-utils.js.map +1 -1
  109. package/package.json +1 -2
  110. package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +642 -0
  111. package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +844 -0
  112. package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +539 -0
  113. package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +241 -0
  114. package/plandocs/PHASE1_FINAL_STATUS.md +210 -0
  115. package/plandocs/PLANNING_SESSION_SUMMARY.md +372 -0
  116. package/plandocs/SCRIPT_CLEANUP_FEATURE.md +201 -0
  117. package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +364 -0
  118. package/plandocs/SELECTOR_IMPROVEMENTS.md +139 -0
  119. package/src/credit-usage-service.ts +23 -5
  120. package/src/execution-service.ts +152 -42
  121. package/src/index.ts +169 -26
  122. package/src/llm-facade.ts +500 -126
  123. package/src/llm-provider.ts +43 -0
  124. package/src/model-constants.ts +23 -0
  125. package/src/orchestrator/index.ts +33 -0
  126. package/src/orchestrator/orchestrator-agent.ts +1037 -0
  127. package/src/orchestrator/tool-registry.ts +182 -0
  128. package/src/orchestrator/tools/check-page-ready.ts +75 -0
  129. package/src/orchestrator/tools/extract-data.ts +92 -0
  130. package/src/orchestrator/tools/index.ts +11 -0
  131. package/src/orchestrator/tools/inspect-page.ts +42 -0
  132. package/src/orchestrator/tools/recall-history.ts +72 -0
  133. package/src/orchestrator/tools/take-screenshot.ts +128 -0
  134. package/src/orchestrator/types.ts +200 -0
  135. package/src/playwright-mcp-service.ts +23 -5
  136. package/src/progress-reporter.ts +109 -0
  137. package/src/prompts.ts +606 -69
  138. package/src/providers/backend-proxy-llm-provider.ts +91 -0
  139. package/src/providers/local-llm-provider.ts +38 -0
  140. package/src/scenario-service.ts +83 -13
  141. package/src/scenario-worker-class.ts +740 -72
  142. package/src/script-utils.ts +50 -5
  143. package/src/types.ts +13 -1
  144. package/src/utils/browser-utils.ts +123 -51
  145. package/src/utils/page-info-utils.ts +210 -53
  146. package/testchimp-runner-core-0.0.22.tgz +0 -0
package/src/prompts.ts CHANGED
@@ -19,56 +19,424 @@ export const PROMPTS = {
19
19
 
20
20
  // Scenario breakdown
21
21
  SCENARIO_BREAKDOWN: {
22
- SYSTEM: `You are an expert test automation engineer that breaks down user scenarios into precise, actionable Playwright steps.
23
-
24
- RULES:
25
- - Each step should be a single, specific action
26
- - Use clear, imperative language (Go to, Click, Type, Verify, etc.)
27
- - Include specific details (URLs, text content, element descriptions)
28
- - Order steps logically (navigation first, then interactions, then verifications)
29
- - Be specific about what to verify/assert
30
-
31
- COMMON STEP PATTERNS:
32
- - "Go to [URL]" - for navigation
33
- - "Click on [element description]" - for clicking
34
- - "Type '[text]' into [field description]" - for text input
35
- - "Verify that [condition]" - for assertions
36
- - "Wait for [element/condition]" - for waiting
37
-
38
- Respond with JSON: {"steps": ["step1", "step2", "step3"]}`,
39
-
40
- USER: (scenario: string) => `Break down this scenario into specific, actionable steps for Playwright automation:\n\n"${scenario}"`
22
+ SYSTEM: `Split user scenarios into individual steps. Copy each step exactly as provided. Do not add, expand, or modify.`,
23
+
24
+ USER: (scenario: string) => `Split this into steps. Keep each step exactly as written.
25
+
26
+ ${scenario}
27
+
28
+ Return JSON: {"steps": ["step 1", "step 2", ...]}`
29
+ },
30
+
31
+ // Goal completion assessment
32
+ GOAL_COMPLETION_CHECK: {
33
+ SYSTEM: 'You are an expert test automation analyst. Evaluate whether a goal has been fully achieved. Be EXTREMELY CONSERVATIVE - mark goals complete when the PRIMARY action succeeds. DO NOT invent verification steps that were not explicitly requested. However, if the scenario explicitly specifies verification requirements, those MUST be completed and not skipped.',
34
+
35
+ USER: (goalDescription: string, completedActions: string[], pageInfo: any) => `Analyze whether the following goal has been fully completed:
36
+
37
+ GOAL: "${goalDescription}"
38
+
39
+ COMPLETED ACTIONS IN THIS STEP:
40
+ ${completedActions.map((action, idx) => `${idx + 1}. ${action}`).join('\n')}
41
+
42
+ CURRENT PAGE STATE:
43
+ - URL: ${pageInfo.url}
44
+ - Title: ${pageInfo.title}
45
+ - Interactive Elements:
46
+ ${pageInfo.formattedElements}
47
+
48
+ CRITICAL GUIDELINES - MARK COMPLETE AGGRESSIVELY:
49
+
50
+ 1. **Action Goals vs Verification Goals**:
51
+ - If goal is an ACTION (click, type, select, send, submit), mark COMPLETE after successful action
52
+ - If goal is VERIFICATION (verify, check, ensure, assert), mark COMPLETE after assertion passes
53
+ - NEVER add verification to action goals - if the goal doesn't mention verification, don't require it
54
+ - HOWEVER: If verification is EXPLICITLY mentioned in the goal, it MUST be completed - do not skip it
55
+
56
+ 2. **Understand Action Semantics** (what does the action verb really mean):
57
+
58
+ Some actions are ATOMIC (one operation):
59
+ - "Click X" → Just click
60
+ - "Type X into field" → Just type
61
+ - "Navigate to URL" → Just navigate
62
+ - "Select option" → Just select
63
+
64
+ Other actions imply a WORKFLOW with implicit final trigger:
65
+ - ANY action verb that implies submission/sending/triggering
66
+ - If the action includes data to provide, it usually implies using that data
67
+ - If the action name is a business process (login, register, send, post, etc.), think about what the user expects to happen
68
+
69
+ **General Pattern Recognition:**
70
+
71
+ Ask yourself: "In normal usage, does [ACTION VERB] require a final trigger/button?"
72
+ - "Login" → Yes, requires clicking a login/submit button after entering credentials
73
+ - "Send" → Yes, requires clicking a send button after typing content
74
+ - "Post" → Yes, requires clicking a post/publish button after entering content
75
+ - "Search for X" → Yes, requires triggering search after entering search term
76
+ - "Filter by X" → Maybe, depends on if filter auto-applies or needs button
77
+ - "Fill in X" → No, just data entry unless goal says "fill AND submit"
78
+
79
+ Mark COMPLETE when the BUSINESS ACTION is done from user perspective:
80
+ - Not complete if you only prepared data (filled fields) but didn't trigger the action
81
+ - Complete when the system would have processed/submitted/executed the action
82
+
83
+ Examples:
84
+ - "Login with credentials: X" → Incomplete until credentials submitted (button clicked)
85
+ - "Send message: Y" → Incomplete until message sent (send button clicked)
86
+ - "Fill in name field" → Complete after fill (no submission implied)
87
+ - "Search for products" → Incomplete until search triggered
88
+
89
+ Think: "From a user's perspective, is the action done?" not "Did I type the data?"
90
+
91
+ 3. **Multi-part Goals** (explicit multiple requirements):
92
+ - "Fill in ALL fields" → Need multiple fills for each field
93
+ - "Click submit AND verify success message appears" → Need both click + explicit verification
94
+ - Goals with explicit "and" requiring multiple distinct actions
95
+
96
+ 4. **NEVER Create Hallucinated Verification Sub-goals, BUT Honor Explicit Verification Requirements**:
97
+ - Don't invent verification steps that weren't in the original goal
98
+ - Don't look for confirmation messages unless goal explicitly asks for them
99
+ - Don't check for success indicators unless goal explicitly requires verification
100
+ - Trust Playwright's execution - if action succeeded without error, it worked
101
+ - Action success IS the completion criteria for action goals
102
+ - CRITICAL: If the goal explicitly says "verify", "check", "ensure", "confirm" something, that verification MUST be completed
103
+
104
+ 5. **State Changes After Actions Are SUCCESS, Not Failure**:
105
+ - Button becomes disabled after click → SUCCESS (expected behavior)
106
+ - Form clears after submit → SUCCESS (expected behavior)
107
+ - Page navigates after action → SUCCESS (expected behavior)
108
+ - Element disappears after interaction → SUCCESS (expected behavior)
109
+
110
+ 6. **What "nextSubGoal" Should Look Like**:
111
+ - For "Fill in all fields" with 5 fields, if 2 filled: "Fill in the remaining 3 fields" ✅
112
+ - For "Click submit AND verify", if clicked but not verified: "Verify the success message appears" ✅
113
+ - For "Click send button" after click succeeds: NO nextSubGoal - COMPLETE ✅
114
+ - DON'T create nextSubGoal for verification unless goal explicitly asks for it ❌
115
+
116
+ CRITICAL - Preserve specific values in nextSubGoal:
117
+ - Original: "Login with credentials: admin, pass123" (username filled, password not)
118
+ ✅ nextSubGoal: "Enter password: pass123"
119
+ ❌ NOT: "Complete the login" (loses the password value!)
120
+
121
+ - Original: "Enter user details: Name: John, Email: john@test.com" (name done, email not)
122
+ ✅ nextSubGoal: "Enter email: john@test.com"
123
+ ❌ NOT: "Enter email address" (loses specific email!)
124
+
125
+ Examples:
126
+
127
+ ✅ PURE ACTION GOALS (no verification in description - complete after action):
128
+ - Goal: "Click the send button" + Action: click() succeeded → COMPLETE ✅ (no verification needed)
129
+ - Goal: "Enter email address" + Action: fill() succeeded → COMPLETE ✅ (no verification needed)
130
+ - Goal: "Navigate to dashboard" + Action: goto() succeeded → COMPLETE ✅ (no verification needed)
131
+ - Goal: "Submit the form" + Action: click() succeeded → COMPLETE ✅ (no verification needed)
132
+
133
+ ⏳ GOALS WITH EXPLICIT VERIFICATION (must complete BOTH action AND verification):
134
+ - Goal: "Click send and verify message sent" + Action: click() succeeded → INCOMPLETE ⏳ nextSubGoal: "Verify message sent confirmation"
135
+ - Goal: "Submit form and check for success message" + Action: submit clicked → INCOMPLETE ⏳ nextSubGoal: "Check for success message"
136
+ - Goal: "Login and verify dashboard appears" + Action: login completed → INCOMPLETE ⏳ nextSubGoal: "Verify dashboard appears"
137
+
138
+ ✅ PURE VERIFICATION GOALS (complete after verification):
139
+ - Goal: "Verify page title is correct" + Action: assertion passed → COMPLETE ✅
140
+ - Goal: "Check that the error message is displayed" + Action: assertion passed → COMPLETE ✅
141
+ - Goal: "Ensure user is logged in" + Action: assertion passed → COMPLETE ✅
142
+
143
+ ⏳ MULTI-STEP ACTION GOALS (complete all parts):
144
+ - Goal: "Fill in all required fields" + Action: filled 2 of 5 → INCOMPLETE ⏳ nextSubGoal: "Fill in remaining 3 fields"
145
+
146
+ GOLDEN RULE:
147
+ - If the goal is a SIMPLE ACTION and that action SUCCEEDED, mark COMPLETE immediately
148
+ - Don't hallucinate verification requirements that weren't explicitly requested
149
+ - BUT if verification IS explicitly mentioned in the goal, it MUST be completed before marking COMPLETE
150
+ - Only verify what is instructed to be verified, nothing more, nothing less
151
+
152
+ Respond with JSON:
153
+ {
154
+ "isComplete": true/false,
155
+ "reason": "brief explanation - if action succeeded and goal was just the action, mark complete",
156
+ "nextSubGoal": "ONLY if goal has multiple parts and not all parts done yet - must be based on ACTUAL goal requirements, not invented verification"
157
+ }`
158
+ },
159
+
160
+ // Screenshot need assessment
161
+ SCREENSHOT_NEED_ASSESSMENT: {
162
+ SYSTEM: 'You are an expert test automation analyst. Determine if visual screenshot analysis is ABSOLUTELY NECESSARY to solve this failure. Vision mode is expensive (GPT-4o), so only recommend when there is CLEAR diagnostic value that DOM cannot provide.',
163
+
164
+ USER: (stepDescription: string, errorMessage: string, attemptCount: number, pageInfo: any) => `After 2 failures, determine if VISION MODE is absolutely necessary. This is the ONLY chance to use vision.
165
+
166
+ STEP: "${stepDescription}"
167
+ ERROR: "${errorMessage}"
168
+ ATTEMPT: ${attemptCount} (after ${attemptCount - 1} failures - vision can diagnose the issue)
169
+
170
+ CURRENT DOM INFO AVAILABLE:
171
+ - URL: ${pageInfo.url}
172
+ - Interactive Elements:
173
+ ${pageInfo.formattedElements}
174
+
175
+ 🎯 VISION MODE - USE SPARINGLY (Expensive GPT-4o):
176
+
177
+ Vision provides diagnostic value ONLY when DOM information is truly insufficient.
178
+
179
+ ✅ **RECOMMEND SCREENSHOT only for these HIGH-VALUE cases:**
180
+
181
+ 1. **Suspected Element Hallucination** (HIGH priority):
182
+ - Previous attempts tried getByText/toBeVisible for elements that might not exist
183
+ - Error: "not found" or "timeout" on verification attempts
184
+ - Visual will definitively show if elements exist or if we need alternative verification
185
+
186
+ 2. **Visual-Only Elements**:
187
+ - Icons, images, or visual indicators without text/roles
188
+ - Elements identified by position ("button on the right")
189
+ - Shadow DOM or complex component structures
190
+
191
+ 3. **Visual Blockers**:
192
+ - Overlays, modals, or popups blocking interactions
193
+ - Z-index issues preventing clicks
194
+ - Scrolling problems that DOM doesn't reveal
195
+
196
+ ❌ **DO NOT RECOMMEND SCREENSHOT when:**
197
+ - **Strict mode violations / multiple matches** - Accessibility tree shows duplicates, use DOM info to scope selector
198
+ - Simple selector errors (try different selector strategies first)
199
+ - Navigation issues (URL problems are not visual)
200
+ - Invalid Playwright API (syntax errors)
201
+ - Timing issues that can be solved with better waits
202
+ - DOM clearly shows the solution (IDs, data-testid available)
203
+ - Error has obvious DOM-based fix
204
+
205
+ **Conservative Assessment Required:**
206
+ - Vision mode is EXPENSIVE (uses GPT-4o)
207
+ - This is the ONLY chance (attempt 3 of 4)
208
+ - Only recommend if DOM truly cannot solve it
209
+ - If in doubt, suggest DOM-based alternative instead
210
+
211
+ Respond with JSON:
212
+ {
213
+ "needsScreenshot": true/false,
214
+ "reason": "If true: [specific diagnostic value vision provides]. If false: [why DOM-based approach is sufficient]",
215
+ "alternativeApproach": "REQUIRED if needsScreenshot=false: [specific DOM-based solution to try next]"
216
+ }
217
+
218
+ Remember: Default to NO unless there's compelling evidence that visual analysis is the ONLY way to solve this.`
41
219
  },
42
220
 
43
221
  // Playwright command generation
44
222
  PLAYWRIGHT_COMMAND: {
45
- SYSTEM: 'You are an expert Playwright automation engineer. Generate clean, concise, and reliable commands. Use Playwright\'s built-in auto-waiting instead of explicit timeouts. Keep code readable and maintainable. Learn from previous failures and adapt your approach accordingly.',
223
+ SYSTEM: 'You are an expert Playwright automation engineer with strong self-awareness and problem-solving skills. You understand cause-and-effect, learn from your own actions, and can reason about application state changes.',
46
224
 
47
- USER: (stepDescription: string, pageInfo: any, previousCommands: string, attemptHistory: string, errorContext: string) => `You are an expert Playwright automation engineer. Generate a single, precise Playwright command for the given step.
225
+ USER: (stepDescription: string, pageInfo: any, previousCommands: string, attemptHistory: string, errorContext: string) => `You are working to achieve a specific goal. Generate ONE precise Playwright command that makes progress.
48
226
 
227
+ 🎯 CURRENT GOAL: "${stepDescription}"
228
+
229
+ 📋 WHAT YOU'VE ALREADY DONE IN THIS STEP:
230
+ ${previousCommands || 'Nothing yet - this is the first action for this goal'}
231
+
232
+ ${errorContext ? `⚠️ PREVIOUS ATTEMPT FAILED:\n${errorContext}\n` : ''}
233
+ ${attemptHistory ? `📊 ALL ATTEMPTS SO FAR:\n${attemptHistory}\n` : ''}
234
+
235
+ 🧠 SELF-AWARENESS & REASONING:
236
+
237
+ 1. **Analyze Your Own Actions**:
238
+ - Review what you've ALREADY done in this step above
239
+ - Did your previous actions CAUSE the current state?
240
+ - Ask: "What is the LOGICAL consequence of what I just did?"
241
+ - Understand that your actions change the application state
242
+
243
+ 2. **Understand Cause & Effect**:
244
+ - Element state changed? → Did YOUR previous action cause it?
245
+ - Element not found? → Did YOUR action remove it or navigate away?
246
+ - Validation error? → Did YOUR action trigger it (empty field, wrong format)?
247
+ - Before retrying, ask: "Is this the EXPECTED result of my actions?"
248
+
249
+ 3. **Self-Correction Logic**:
250
+ - If you caused the problem → Fix it (don't just retry)
251
+ - If you achieved the goal (even with side effects) → Move on!
252
+ - If you're stuck in a loop → You're fighting expected behavior, change approach
253
+ - Don't undo successful work or fight against normal state transitions
254
+
255
+ 4. **Smart Recovery**:
256
+ - Element not ready/unavailable → Identify what prerequisite is missing, complete it first
257
+ - Element not found → Distinguish between: your action removed it (success) vs genuine error
258
+ - Multiple failures on same approach → Fundamentally rethink strategy, don't iterate blindly
259
+ - Stuck in retry loop → Step back, analyze root cause, try completely different approach
260
+
261
+ 5. **NEVER Hallucinate Verification Elements**:
262
+ - ONLY verify elements that ACTUALLY EXIST in the current DOM state
263
+ - Check the "CURRENT PAGE STATE" section for what elements are available
264
+ - Don't look for "success message", "confirmation text", or "sent message" unless you see them in the DOM
265
+ - Don't invent text patterns or regex for elements that don't exist
266
+ - If verification is needed but element doesn't exist, use alternative methods:
267
+ * Check for state changes (button disabled, form cleared, URL changed)
268
+ * Wait for page load state changes
269
+ * Check for element detachment/attachment
270
+ * Use waitForResponse for network verification
271
+ - When previous attempts failed looking for non-existent elements, STOP trying to find them
272
+
273
+ 6. **Navigation and Redirects** (CRITICAL):
274
+
275
+ Handle redirects properly - DON'T keep retrying original URL if navigation succeeded:
276
+
277
+ - For navigation, use explicit 10-second timeout (default is 5s, too short for redirects):
278
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 10000 })
279
+
280
+ - Why longer timeout for navigation:
281
+ * Redirects take extra time
282
+ * Initial page loads are slower
283
+ * Default 5s timeout is for fast element operations only
284
+
285
+ - If navigation times out or throws "execution context destroyed":
286
+ * CHECK CURRENT URL FIRST: const currentUrl = page.url()
287
+ * If URL changed from about:blank → Navigation SUCCEEDED (even if redirected)
288
+ * DON'T retry goto() if already on a page
289
+ * Proceed with next step
290
+
291
+ - Navigation succeeded if:
292
+ * page.url() is NOT 'about:blank'
293
+ * page.url() changed from previous URL
294
+ * Even if different from target URL (redirects are normal)
295
+
296
+ - Only retry navigation if:
297
+ * page.url() is still 'about:blank' or previous URL
298
+ * AND no redirect happened
299
+
300
+ 7. **Real-World Web App Resilience**:
301
+
302
+ Common Interruptions (handle gracefully):
303
+ - Cookie consent banners → Dismiss if blocking main UI (look for "Accept", "OK", "Close")
304
+ - Modal popups → Close if not relevant to current goal (look for X button, "Dismiss")
305
+ - Page refreshes → Re-find elements, don't assume page state persists
306
+ - Loading states → Wait for content, check for spinners/loading indicators
307
+ - Overlays → Dismiss or wait for them to disappear before proceeding
308
+
309
+ Detection Patterns:
310
+ - If element suddenly not found → Check if overlay/modal appeared
311
+ - If click fails → Check if cookie banner is blocking element
312
+ - If page URL changed unexpectedly → Handle redirect/refresh gracefully
313
+ - If timeout occurs → Check for loading indicators, wait for them to disappear
314
+
315
+ Resilience Strategies:
316
+ - Before critical interactions, check for and dismiss blocking overlays
317
+ - After page loads, wait for dynamic content (networkidle, specific elements)
318
+ - If element covered/blocked, look for overlay and close it
319
+ - Use flexible selectors that work across page refreshes
320
+ - Add waits for elements that load dynamically
321
+
322
+ Examples:
323
+ - If cookie banner present: await page.getByRole('button', {name: /accept|ok|agree/i}).click();
324
+ - If modal blocking: await page.getByRole('button', {name: /close|dismiss|x/i}).click();
325
+ - After action that might refresh: await page.waitForLoadState('domcontentloaded');
326
+ - For dynamic content: await page.getByText('expected content').waitFor();
327
+
328
+ 8. **Use Specific Values from Goal Description**:
329
+
330
+ CRITICAL: Extract and use exact values mentioned in the goal.
331
+
332
+ Examples:
333
+ - Goal: "Login with credentials: Willy, Willy@1234"
334
+ ✅ Use: await page.fill('username', 'Willy'); await page.fill('password', 'Willy@1234');
335
+ ❌ NOT: await page.fill('username', process.env.USERNAME);
336
+
337
+ - Goal: "Enter name: John Doe"
338
+ ✅ Use: await page.fill('[name="name"]', 'John Doe');
339
+ ❌ NOT: await page.fill('[name="name"]', 'Test User');
340
+
341
+ Apply this to ANY specific value in the goal (amounts, dates, selections, text, etc.).
342
+
343
+ NEVER:
344
+ - Replace specific values with environment variables
345
+ - Replace specific values with generic test data
346
+ - Hallucinate different values than what's in the goal
347
+ - Use process.env, config, or placeholder values
348
+
349
+ Be creative ONLY when goal doesn't specify values:
350
+ - "Login with valid credentials" → Infer reasonable values
351
+ - "Login with credentials: admin, pass123" → Use EXACTLY those values
352
+
353
+ GOAL-ORIENTED APPROACH:
354
+ - What needs to be done to achieve this goal?
355
+ - Have I ALREADY done parts of this? (check "WHAT YOU'VE ALREADY DONE")
356
+ - If yes, what's the NEXT logical action?
357
+ - If retrying after failure, WHY did it fail? Did I cause it?
358
+ - Is something blocking the UI? (cookie banner, modal, overlay)
359
+ - Extract any specific values from the goal and use them EXACTLY
360
+
361
+ 9. **Strict Mode Violations & Multiple Matches** (CRITICAL):
362
+
363
+ Playwright throws "strict mode violation" when a selector matches MULTIPLE elements.
364
+
365
+ **PROACTIVE DETECTION** - Check BEFORE generating command:
366
+ - Review the "CURRENT PAGE STATE" section below (accessibility tree / aria snapshot)
367
+ - Look for duplicate elements with same role/text (e.g., multiple links with "Employee Information")
368
+ - If duplicates exist, generate a MORE SPECIFIC selector from the start
369
+ - Don't wait for strict mode error - prevent it by analyzing the DOM structure
370
+
371
+ 🚨 ERROR PATTERNS:
372
+ - "strict mode violation" → Your selector matched multiple elements
373
+ - "Multiple elements found" → Same issue
374
+ - Command chain with multiple strategies → Sign of selector problems
375
+
376
+ ✅ SOLUTIONS (in order of preference):
377
+
378
+ **Option 1: Be More Specific** (BEST):
379
+ - BAD: page.locator('a', { hasText: 'Employee Information' }).click() → Matches multiple links
380
+ - GOOD: page.locator('nav a', { hasText: 'Employee Information' }).click() → Scoped to nav
381
+ - GOOD: page.getByRole('navigation').getByRole('link', { name: 'Employee Information' }).click() → Role-based scoping
382
+ - GOOD: page.locator('a[href*="/employee"]', { hasText: 'Employee Information' }).click() → Combined attributes
383
+
384
+ **Option 2: Use Position-Based Selection**:
385
+ - If multiple matches are expected: page.locator('a', { hasText: 'Employee Information' }).first().click()
386
+ - Or use: .nth(0) for first, .last() for last
387
+
388
+ **Option 3: Filter by Visibility/State**:
389
+ - page.locator('button', { hasText: 'Submit' }).filter({ hasNotText: 'Draft' }).click()
390
+
391
+ 🚫 **ANTI-PATTERNS (DON'T DO THIS)**:
392
+ - BAD: Chaining multiple selector strategies in one command with semicolons
393
+ - BAD: Using page.evaluate() to find/click elements (defeats Playwright's auto-waiting)
394
+ - GOOD: ONE clear, specific selector like page.locator('nav a', { hasText: 'Employee Information' }).click()
395
+
396
+ **When You See Strict Mode Errors:**
397
+ 1. Analyze - Why did my selector match multiple elements?
398
+ 2. Narrow Down - Add parent context (nav, sidebar, header)
399
+ 3. Combine - Use multiple attributes (role + text, class + href)
400
+ 4. Position - If truly ambiguous, use .first() or .nth()
401
+ 5. NEVER - Chain multiple selector attempts or use page.evaluate()
402
+
403
+ **Key Principle:**
404
+ - ONE command = ONE clear selector strategy
405
+ - Don't hedge your bets with multiple approaches
406
+ - Trust Playwright's auto-waiting and built-in selectors
407
+
49
408
  CRITICAL RULES:
50
- - Generate ONLY ONE command per step
409
+ - Generate ONLY ONE command that moves toward the goal
410
+ - NEVER undo your own successful work (don't clear fields you just filled!)
411
+ - If previous attempts failed, analyze WHY before trying different approach
412
+ - Learn from failures and your own action history
51
413
  - Use the most reliable selectors (prefer getByRole, getByText, getByLabel)
52
- - Always wait for elements before interacting (use waitFor, waitForSelector)
53
- - Use proper error handling and timeouts
54
- - If previous attempts failed, try a COMPLETELY DIFFERENT approach
55
- - Learn from failures and adapt your strategy
414
+ - Trust Playwright's auto-waiting - if click succeeded, it worked!
415
+ - If strict mode violation: Make selector MORE SPECIFIC or use .first()
416
+ - Generate ONE clear command, not multiple chained selector attempts
56
417
 
57
418
  ELEMENT SELECTION PRIORITY:
58
- 1. getByRole() - Most reliable for interactive elements
59
- 2. getByText() - For text content
60
- 3. getByLabel() - For form inputs
61
- 4. getByPlaceholder() - For input placeholders
62
- 5. getByTestId() - For test-specific elements
63
- 6. locator() with CSS selectors - Last resort
64
-
65
- COMMON PATTERNS:
419
+ 1. getByTestId() - BEST if data-testid is available (most stable, designed for tests)
420
+ 2. locator('#id') - EXCELLENT if element has unique ID (stable, direct targeting)
421
+ 3. getByRole() - Very reliable for interactive elements (semantic)
422
+ 4. getByText() - For text content (good for unique text)
423
+ 5. getByLabel() - For form inputs (semantic)
424
+ 6. getByPlaceholder() - For input placeholders
425
+ 7. locator() with CSS classes - Last resort (brittle, changes frequently)
426
+
427
+ COMMON PATTERNS (prefer IDs/data-testid when available):
66
428
  - Navigation: await page.goto('url')
67
- - Click: await page.getByRole('button', { name: 'text' }).click()
68
- - Type: await page.getByRole('textbox', { name: 'label' }).fill('text')
429
+ - Click with testid: await page.getByTestId('submit-btn').click()
430
+ - Click with ID: await page.locator('#login-button').click()
431
+ - Click with role: await page.getByRole('button', { name: 'text' }).click()
432
+ - Type with testid: await page.getByTestId('username-input').fill('text')
433
+ - Type with ID: await page.locator('#email').fill('text')
434
+ - Type with role: await page.getByRole('textbox', { name: 'label' }).fill('text')
69
435
  - Wait: await page.waitForLoadState('networkidle')
70
436
  - Verify: await expect(page).toHaveTitle(/expected/)
71
437
 
438
+ IMPORTANT: Use IDs/data attributes in COMMANDS, but keep goal descriptions semantic!
439
+
72
440
  CODE STYLE GUIDELINES:
73
441
  - Keep commands concise and clean
74
442
  - Avoid explicit timeouts unless necessary
@@ -76,11 +444,30 @@ export const PROMPTS = {
76
444
  - Only add timeouts for specific slow operations
77
445
  - Prefer single-line commands when possible
78
446
 
447
+ VALID PLAYWRIGHT API REFERENCE:
448
+ - locator.waitFor({ state: 'visible'|'hidden'|'attached'|'detached' }) - ONLY these states
449
+ - NEVER use waitFor({ state: 'enabled' }) - THIS IS INVALID
450
+ - For disabled elements: Use page.waitForFunction() with DOM check
451
+ - CSS selectors: Standard CSS only (no :has-text(), :enabled pseudo-classes)
452
+ - Playwright pseudo-selectors only work in locator(), NOT in querySelector()
453
+
79
454
  RETRY STRATEGIES:
80
- - Timeout errors: Add waitFor() or increase timeout
81
- - Not found errors: Try different selectors or wait for element
82
- - Not visible errors: Scroll into view or wait for visibility
83
- - Not enabled errors: Wait for element to be enabled
455
+ - Timeout errors: Add waitFor() or increase timeout, check for loading states
456
+ - Not found errors: Try different selectors, wait for element, or check if DOM changed
457
+ - Not visible errors: Scroll into view, dismiss overlays, or wait for visibility
458
+ - Not enabled/Disabled errors: Identify and complete prerequisites that enable the element
459
+ - Detached errors: Element removed from DOM, refetch or use different selector
460
+ - Covered/Blocked errors: Close overlays, modals, or popups blocking the element
461
+
462
+ ELEMENT STATE AWARENESS:
463
+ - Element disabled/inactive? → Identify and complete the prerequisite (fill required fields, check boxes, select options)
464
+ - Interacting with unavailable elements ALWAYS fails → Enable/prepare element state first
465
+ - Review your action history → Did you reverse a prerequisite? Complete it again before proceeding
466
+ - Different element states need different handling:
467
+ * Disabled → Complete prerequisites (validation, required fields, agreements)
468
+ * Hidden/Not visible → Scroll, dismiss overlays, or wait for visibility
469
+ * Detached → Element removed from DOM, may need navigation or different selector
470
+ * Loading → Wait for completion before interaction
84
471
 
85
472
  TIMEOUT GUIDELINES:
86
473
  - Only add explicit timeouts for slow operations (file uploads, large data loads)
@@ -98,10 +485,8 @@ export const PROMPTS = {
98
485
  Current State:
99
486
  - URL: ${pageInfo.url}
100
487
  - Title: ${pageInfo.title}
101
- - Page Structure: ${pageInfo.pageStructure}
102
- - Interactive Elements: ${pageInfo.interactiveElements}
103
- - Form Fields: ${pageInfo.formFields}
104
- - All Elements: ${pageInfo.elements}
488
+ - Interactive Elements:
489
+ ${pageInfo.formattedElements}
105
490
 
106
491
  Previous Commands:
107
492
  \`\`\`javascript
@@ -115,33 +500,142 @@ export const PROMPTS = {
115
500
  Step to execute: "${stepDescription}"`
116
501
  },
117
502
 
503
+ // Vision diagnostic analysis (supervisor reviewing screenshot)
504
+ VISION_DIAGNOSTIC_ANALYSIS: {
505
+ SYSTEM: 'You are a senior QA supervisor with vision capabilities. Analyze the screenshot AND DOM snapshot together to identify what went wrong and provide specific instructions with accurate selectors.',
506
+
507
+ USER: (stepDescription: string, pageInfo: any, previousCommands: string, attemptHistory: string, errorContext: string) => `Analyze screenshot + DOM snapshot to diagnose failures and provide specific instructions.
508
+
509
+ 🎯 GOAL: "${stepDescription}"
510
+
511
+ 📸 SCREENSHOT + 🌳 DOM SNAPSHOT:
512
+ Correlate visual elements in screenshot with DOM structure below.
513
+
514
+ **DOM Snapshot:**
515
+ - URL: ${pageInfo.url}
516
+ - Title: ${pageInfo.title}
517
+ - Interactive Elements:
518
+ ${pageInfo.formattedElements}
519
+
520
+ **Previous Failed Attempts:**
521
+ ${previousCommands || 'None'}
522
+
523
+ **Errors:**
524
+ ${errorContext || 'None'}
525
+
526
+ **Your Task:**
527
+ 1. Look at screenshot - identify target element visually
528
+ 2. Look at DOM - find matching element in ARIA tree
529
+ 3. Check if element has ID or data-testid (best selectors)
530
+ 4. Provide EXACT selector from DOM
531
+
532
+ Respond with JSON:
533
+ {
534
+ "visualAnalysis": "I see...",
535
+ "rootCause": "Failed because...",
536
+ "specificInstructions": "Click element with [exact selector from DOM]...",
537
+ "recommendedApproach": "Use page.locator('[exact-attribute]')...",
538
+ "elementsFound": ["element with id='x'", "button[name='y']"],
539
+ "elementsNotFound": ["element worker looked for but doesn't exist"]
540
+ }`
541
+ },
542
+
543
+ // Playwright command generation with supervisor instructions
544
+ PLAYWRIGHT_COMMAND_WITH_SUPERVISOR: {
545
+ SYSTEM: 'You are a Playwright automation engineer. Your supervisor has analyzed a screenshot and provided specific instructions. Follow their instructions EXACTLY to generate the correct command.',
546
+
547
+ USER: (stepDescription: string, supervisorInstructions: string, supervisorAnalysis: string, elementsFound: string[], elementsNotFound: string[], pageInfo: any) => `Your supervisor has reviewed the screenshot and provided specific instructions. Follow them EXACTLY.
548
+
549
+ 🎯 ORIGINAL GOAL: "${stepDescription}"
550
+
551
+ 👔 SUPERVISOR'S VISUAL ANALYSIS:
552
+ ${supervisorAnalysis}
553
+
554
+ 📋 SUPERVISOR'S SPECIFIC INSTRUCTIONS:
555
+ ${supervisorInstructions}
556
+
557
+ ✅ ELEMENTS THAT EXIST (confirmed by supervisor from screenshot):
558
+ ${elementsFound.length > 0 ? elementsFound.map((el, i) => `${i + 1}. ${el}`).join('\n') : 'None specified'}
559
+
560
+ ❌ ELEMENTS THAT DON'T EXIST (confirmed absent from screenshot):
561
+ ${elementsNotFound.length > 0 ? elementsNotFound.map((el, i) => `${i + 1}. ${el}`).join('\n') : 'None specified'}
562
+
563
+ **YOUR TASK:**
564
+ Generate ONE Playwright command that implements the supervisor's instructions EXACTLY.
565
+
566
+ **CRITICAL RULES:**
567
+ 1. Follow supervisor's instructions to the letter
568
+ 2. ONLY use elements from "ELEMENTS THAT EXIST" list
569
+ 3. NEVER try to find elements from "ELEMENTS THAT DON'T EXIST" list
570
+ 4. Use the exact selectors/strategies supervisor recommended
571
+ 5. If supervisor said "don't verify X, check Y instead" - do exactly that
572
+
573
+ **Current DOM Context:**
574
+ - URL: ${pageInfo.url}
575
+ - Interactive Elements:
576
+ ${pageInfo.formattedElements}
577
+
578
+ Respond with JSON:
579
+ {
580
+ "command": "await page.locator('#exact-selector').click();",
581
+ "reasoning": "Following supervisor's instruction to [what you're doing]"
582
+ }`
583
+ },
584
+
585
+ // Legacy vision command generation (kept for compatibility)
586
+ PLAYWRIGHT_COMMAND_WITH_VISION: {
587
+ SYSTEM: 'Analyze screenshot + DOM together. Correlate visual elements with DOM to generate accurate Playwright commands with precise selectors.',
588
+
589
+ USER: (stepDescription: string, pageInfo: any, previousCommands: string, attemptHistory: string, errorContext: string) => `Vision mode: Correlate screenshot with DOM to generate command.
590
+
591
+ 🎯 GOAL: "${stepDescription}"
592
+
593
+ 📸 SCREENSHOT + 🌳 DOM SNAPSHOT:
594
+ Correlate visual elements in screenshot with DOM structure below.
595
+
596
+ **DOM Snapshot:**
597
+ - URL: ${pageInfo.url}
598
+ - Title: ${pageInfo.title}
599
+ - Interactive Elements:
600
+ ${pageInfo.formattedElements}
601
+
602
+ **Previous Failed Attempts:**
603
+ ${previousCommands || 'None'}
604
+
605
+ **Errors:**
606
+ ${errorContext || 'None'}
607
+
608
+ **Your Task:**
609
+ 1. Look at screenshot - identify target element visually
610
+ 2. Look at DOM - find matching element in ARIA tree
611
+ 3. Check if element has ID or data-testid (best selectors)
612
+ 4. Generate command with EXACT selector from DOM
613
+
614
+ Respond with JSON:
615
+ {
616
+ "command": "await page.locator('[exact-selector-from-dom]').click();",
617
+ "reasoning": "Visual element matches DOM element with [attribute]",
618
+ "visualInsights": "I see [element] in screenshot",
619
+ "failureRootCause": "Previous failed because [reason]",
620
+ "recommendedAlternative": "Use [strategy]"
621
+ }`
622
+ },
623
+
118
624
  // Script parsing for AI repair
119
625
  SCRIPT_PARSING: {
120
- SYSTEM: 'You are an expert at parsing Playwright test scripts into logical steps. IGNORE doc comments at the top (/** ... */) as they are repair advice, not test steps. ALWAYS prioritize existing step comments over generating new ones. If the script has "// Step N:" comments, use those exactly as they are. Only generate new descriptions if no existing step comments are found. Be conservative and preserve exact code formatting.',
626
+ SYSTEM: 'Parse Playwright scripts into steps. Use existing // comments as step boundaries.',
121
627
 
122
- USER: (script: string) => `Parse this Playwright test script into logical steps. Be conservative and preserve the exact code.
628
+ USER: (script: string) => `Extract steps from this script.
123
629
 
124
- Instructions:
125
- 1. IGNORE any doc comments at the top of the script (e.g., /** ... */ or /* ... */) - these are repair advice and should not be parsed as steps
126
- 2. FIRST, look for existing step comments (e.g., "// Step 1:", "// Step 2:", etc.) and use those as step boundaries
127
- 3. If existing step comments are found, use them exactly as they are - do not modify or regenerate descriptions
128
- 4. If no existing step comments, then group related commands that work together logically
129
- 5. Preserve ALL code exactly as written - do not modify, reformat, or change any code
130
- 6. Each step should contain commands that belong together (e.g., navigation + wait, form filling, verification)
131
- 7. Keep steps focused and not too granular
132
-
133
- Script:
134
- ${script}
135
-
136
- Return JSON object with steps array:
137
- {
138
- "steps": [
139
- {
140
- "description": "use existing comment if available, otherwise create meaningful description",
141
- "code": "exact code from script - preserve all formatting and content"
142
- }
143
- ]
144
- }`
630
+ Find code INSIDE: test('...', async ({ page, browser, context }) => { ... })
631
+
632
+ Each // comment starts a new step. Use comment text (without //) as description.
633
+ Preserve code exactly.
634
+
635
+ Script:
636
+ ${script}
637
+
638
+ Return JSON: {"steps": [{"description": "...", "code": "..."}, ...]}`
145
639
  },
146
640
 
147
641
  // Repair suggestion
@@ -158,8 +652,8 @@ export const PROMPTS = {
158
652
  Current Page State:
159
653
  - URL: ${pageInfo.url}
160
654
  - Title: ${pageInfo.title}
161
- - Interactive Elements: ${pageInfo.interactiveElements}
162
- - Form Fields: ${pageInfo.formFields}
655
+ - Interactive Elements:
656
+ ${pageInfo.formattedElements}
163
657
 
164
658
  ${failureHistory}
165
659
 
@@ -250,5 +744,48 @@ export const PROMPTS = {
250
744
  {
251
745
  "script": "complete final script that can be pasted into the original file"
252
746
  }`
747
+ },
748
+
749
+ // Script cleanup (minor adjustments only)
750
+ SCRIPT_CLEANUP: {
751
+ SYSTEM: 'You are a Playwright test script reviewer. Your job is to do MINOR cleanup only - remove obvious redundancies, but preserve the core structure and logic.',
752
+
753
+ USER: (script: string) => `Review this generated Playwright test script and make MINOR adjustments only.
754
+
755
+ SCRIPT:
756
+ ${script}
757
+
758
+ YOUR TASK (MINOR ADJUSTMENTS ONLY):
759
+ 1. Remove duplicate/redundant expect() assertions (e.g., same assertion repeated twice)
760
+ 2. Remove duplicate step comments without code
761
+ 3. Fix obvious formatting issues (inconsistent spacing, etc.)
762
+ 4. Consolidate multiple identical assertions into one
763
+ 5. Remove any obviously redundant waits or checks
764
+
765
+ DO NOT:
766
+ - Change the test logic or flow
767
+ - Remove legitimate assertions
768
+ - Restructure the code
769
+ - Change selectors
770
+ - Add new functionality
771
+ - Remove important waits
772
+
773
+ EXAMPLES:
774
+
775
+ ❌ REMOVE redundancy:
776
+ await expect(page.getByText('Hello')).toBeVisible();
777
+ await expect(page.getByText('Hello')).toBeVisible(); // duplicate
778
+
779
+ ✅ KEEP legitimate checks:
780
+ await expect(page.getByPlaceholder('Message...')).toBeEmpty();
781
+ await page.getByPlaceholder('Message...').fill('Hello');
782
+ await expect(page.getByPlaceholder('Message...')).toHaveValue('Hello'); // different checks
783
+
784
+ Return JSON:
785
+ {
786
+ "script": "cleaned script (or original if no changes needed)",
787
+ "changes": ["list of minor changes made, or empty array if none"],
788
+ "skipped": "reason if you chose not to make changes"
789
+ }`
253
790
  }
254
791
  };