testchimp-runner-core 0.0.34 → 0.0.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/execution-service.d.ts +1 -4
  2. package/dist/execution-service.d.ts.map +1 -1
  3. package/dist/execution-service.js +155 -468
  4. package/dist/execution-service.js.map +1 -1
  5. package/dist/index.d.ts +3 -1
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js +11 -1
  8. package/dist/index.js.map +1 -1
  9. package/dist/orchestrator/decision-parser.d.ts +18 -0
  10. package/dist/orchestrator/decision-parser.d.ts.map +1 -0
  11. package/dist/orchestrator/decision-parser.js +127 -0
  12. package/dist/orchestrator/decision-parser.js.map +1 -0
  13. package/dist/orchestrator/index.d.ts +4 -2
  14. package/dist/orchestrator/index.d.ts.map +1 -1
  15. package/dist/orchestrator/index.js +14 -2
  16. package/dist/orchestrator/index.js.map +1 -1
  17. package/dist/orchestrator/orchestrator-agent.d.ts +17 -14
  18. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -1
  19. package/dist/orchestrator/orchestrator-agent.js +534 -204
  20. package/dist/orchestrator/orchestrator-agent.js.map +1 -1
  21. package/dist/orchestrator/orchestrator-prompts.d.ts +14 -2
  22. package/dist/orchestrator/orchestrator-prompts.d.ts.map +1 -1
  23. package/dist/orchestrator/orchestrator-prompts.js +529 -247
  24. package/dist/orchestrator/orchestrator-prompts.js.map +1 -1
  25. package/dist/orchestrator/page-som-handler.d.ts +106 -0
  26. package/dist/orchestrator/page-som-handler.d.ts.map +1 -0
  27. package/dist/orchestrator/page-som-handler.js +1353 -0
  28. package/dist/orchestrator/page-som-handler.js.map +1 -0
  29. package/dist/orchestrator/som-types.d.ts +149 -0
  30. package/dist/orchestrator/som-types.d.ts.map +1 -0
  31. package/dist/orchestrator/som-types.js +87 -0
  32. package/dist/orchestrator/som-types.js.map +1 -0
  33. package/dist/orchestrator/tool-registry.d.ts +2 -0
  34. package/dist/orchestrator/tool-registry.d.ts.map +1 -1
  35. package/dist/orchestrator/tool-registry.js.map +1 -1
  36. package/dist/orchestrator/tools/index.d.ts +4 -1
  37. package/dist/orchestrator/tools/index.d.ts.map +1 -1
  38. package/dist/orchestrator/tools/index.js +7 -2
  39. package/dist/orchestrator/tools/index.js.map +1 -1
  40. package/dist/orchestrator/tools/refresh-som-markers.d.ts +12 -0
  41. package/dist/orchestrator/tools/refresh-som-markers.d.ts.map +1 -0
  42. package/dist/orchestrator/tools/refresh-som-markers.js +64 -0
  43. package/dist/orchestrator/tools/refresh-som-markers.js.map +1 -0
  44. package/dist/orchestrator/tools/view-previous-screenshot.d.ts +15 -0
  45. package/dist/orchestrator/tools/view-previous-screenshot.d.ts.map +1 -0
  46. package/dist/orchestrator/tools/view-previous-screenshot.js +92 -0
  47. package/dist/orchestrator/tools/view-previous-screenshot.js.map +1 -0
  48. package/dist/orchestrator/types.d.ts +23 -1
  49. package/dist/orchestrator/types.d.ts.map +1 -1
  50. package/dist/orchestrator/types.js +11 -1
  51. package/dist/orchestrator/types.js.map +1 -1
  52. package/dist/scenario-service.d.ts +5 -0
  53. package/dist/scenario-service.d.ts.map +1 -1
  54. package/dist/scenario-service.js +17 -0
  55. package/dist/scenario-service.js.map +1 -1
  56. package/dist/scenario-worker-class.d.ts +4 -0
  57. package/dist/scenario-worker-class.d.ts.map +1 -1
  58. package/dist/scenario-worker-class.js +18 -3
  59. package/dist/scenario-worker-class.js.map +1 -1
  60. package/dist/testing/agent-tester.d.ts +35 -0
  61. package/dist/testing/agent-tester.d.ts.map +1 -0
  62. package/dist/testing/agent-tester.js +84 -0
  63. package/dist/testing/agent-tester.js.map +1 -0
  64. package/dist/testing/ref-translator-tester.d.ts +44 -0
  65. package/dist/testing/ref-translator-tester.d.ts.map +1 -0
  66. package/dist/testing/ref-translator-tester.js +104 -0
  67. package/dist/testing/ref-translator-tester.js.map +1 -0
  68. package/dist/utils/hierarchical-selector.d.ts +47 -0
  69. package/dist/utils/hierarchical-selector.d.ts.map +1 -0
  70. package/dist/utils/hierarchical-selector.js +212 -0
  71. package/dist/utils/hierarchical-selector.js.map +1 -0
  72. package/dist/utils/page-info-retry.d.ts +14 -0
  73. package/dist/utils/page-info-retry.d.ts.map +1 -0
  74. package/dist/utils/page-info-retry.js +60 -0
  75. package/dist/utils/page-info-retry.js.map +1 -0
  76. package/dist/utils/page-info-utils.d.ts +1 -0
  77. package/dist/utils/page-info-utils.d.ts.map +1 -1
  78. package/dist/utils/page-info-utils.js +46 -18
  79. package/dist/utils/page-info-utils.js.map +1 -1
  80. package/dist/utils/ref-attacher.d.ts +21 -0
  81. package/dist/utils/ref-attacher.d.ts.map +1 -0
  82. package/dist/utils/ref-attacher.js +149 -0
  83. package/dist/utils/ref-attacher.js.map +1 -0
  84. package/dist/utils/ref-translator.d.ts +49 -0
  85. package/dist/utils/ref-translator.d.ts.map +1 -0
  86. package/dist/utils/ref-translator.js +276 -0
  87. package/dist/utils/ref-translator.js.map +1 -0
  88. package/package.json +6 -1
  89. package/RELEASE_0.0.26.md +0 -165
  90. package/RELEASE_0.0.27.md +0 -236
  91. package/RELEASE_0.0.28.md +0 -286
  92. package/plandocs/BEFORE_AFTER_VERIFICATION.md +0 -148
  93. package/plandocs/COORDINATE_MODE_DIAGNOSIS.md +0 -144
  94. package/plandocs/CREDIT_CALLBACK_ARCHITECTURE.md +0 -253
  95. package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +0 -642
  96. package/plandocs/IMPLEMENTATION_STATUS.md +0 -108
  97. package/plandocs/INTEGRATION_COMPLETE.md +0 -322
  98. package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +0 -844
  99. package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +0 -539
  100. package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +0 -241
  101. package/plandocs/PHASE1_FINAL_STATUS.md +0 -210
  102. package/plandocs/PHASE_1_COMPLETE.md +0 -165
  103. package/plandocs/PHASE_1_SUMMARY.md +0 -184
  104. package/plandocs/PLANNING_SESSION_SUMMARY.md +0 -372
  105. package/plandocs/PROMPT_OPTIMIZATION_ANALYSIS.md +0 -120
  106. package/plandocs/PROMPT_SANITY_CHECK.md +0 -120
  107. package/plandocs/SCRIPT_CLEANUP_FEATURE.md +0 -201
  108. package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +0 -364
  109. package/plandocs/SELECTOR_IMPROVEMENTS.md +0 -139
  110. package/plandocs/SESSION_SUMMARY_v0.0.33.md +0 -151
  111. package/plandocs/TROUBLESHOOTING_SESSION.md +0 -72
  112. package/plandocs/VISION_DIAGNOSTICS_IMPROVEMENTS.md +0 -336
  113. package/plandocs/VISUAL_AGENT_EVOLUTION_PLAN.md +0 -396
  114. package/plandocs/WHATS_NEW_v0.0.33.md +0 -183
  115. package/src/auth-config.ts +0 -84
  116. package/src/credit-usage-service.ts +0 -188
  117. package/src/env-loader.ts +0 -103
  118. package/src/execution-service.ts +0 -1413
  119. package/src/file-handler.ts +0 -104
  120. package/src/index.ts +0 -422
  121. package/src/llm-facade.ts +0 -821
  122. package/src/llm-provider.ts +0 -53
  123. package/src/model-constants.ts +0 -35
  124. package/src/orchestrator/index.ts +0 -34
  125. package/src/orchestrator/orchestrator-agent.ts +0 -862
  126. package/src/orchestrator/orchestrator-agent.ts.backup +0 -1386
  127. package/src/orchestrator/orchestrator-prompts.ts +0 -474
  128. package/src/orchestrator/tool-registry.ts +0 -182
  129. package/src/orchestrator/tools/check-page-ready.ts +0 -75
  130. package/src/orchestrator/tools/extract-data.ts +0 -92
  131. package/src/orchestrator/tools/index.ts +0 -12
  132. package/src/orchestrator/tools/inspect-page.ts +0 -42
  133. package/src/orchestrator/tools/recall-history.ts +0 -72
  134. package/src/orchestrator/tools/take-screenshot.ts +0 -128
  135. package/src/orchestrator/tools/verify-action-result.ts +0 -159
  136. package/src/orchestrator/types.ts +0 -248
  137. package/src/playwright-mcp-service.ts +0 -224
  138. package/src/progress-reporter.ts +0 -144
  139. package/src/prompts.ts +0 -842
  140. package/src/providers/backend-proxy-llm-provider.ts +0 -91
  141. package/src/providers/local-llm-provider.ts +0 -38
  142. package/src/scenario-service.ts +0 -232
  143. package/src/scenario-worker-class.ts +0 -1089
  144. package/src/script-utils.ts +0 -203
  145. package/src/types.ts +0 -239
  146. package/src/utils/browser-utils.ts +0 -348
  147. package/src/utils/coordinate-converter.ts +0 -162
  148. package/src/utils/page-info-utils.ts +0 -250
  149. package/testchimp-runner-core-0.0.33.tgz +0 -0
  150. package/tsconfig.json +0 -19
package/src/prompts.ts DELETED
@@ -1,842 +0,0 @@
1
- /**
2
- * All LLM prompts used throughout the application
3
- */
4
-
5
- export const PROMPTS = {
6
- // Test name generation
7
- TEST_NAME_GENERATION: {
8
- SYSTEM: 'You are an AI assistant that generates meaningful test names for user journey tests. Carefully analyze the scenario description to look for any hints, indicators, or explicit mentions of what this test should be called. Pay attention to phrases like "test", "scenario", "check", "verify", "flow", or any descriptive terms that suggest the test purpose. If you find such indicators, use them as the basis for the test name. Otherwise, analyze the overall user journey and business purpose. Generate a concise test name (under 30 characters) in camelCase format. Respond with a JSON object in this format: {"testName": "userJourneyName"}',
9
-
10
- USER: (scenario: string) => `Analyze this scenario description and generate a meaningful test name:\n\n"${scenario}"\n\nInstructions:\n1. Look for ANY hints or indicators in the text that suggest what this test should be called:\n - Explicit mentions: "Test: ...", "Scenario: ...", "Check: ...", "Verify: ..."\n - Descriptive phrases: "...flow", "...process", "...journey", "...workflow"\n - Action-focused terms: "login", "registration", "purchase", "messaging", "search"\n - Business context: "user onboarding", "checkout process", "team collaboration"\n2. If you find such indicators, use them as the basis for the test name\n3. If not found, analyze the user journey and business purpose\n4. Generate a concise name under 30 characters in camelCase\n\nExamples:\n- "Test: User login and messaging flow" -> "userLoginAndMessagingFlow"\n- "Checkout process with payment" -> "checkoutProcess"\n- "User registration and email verification" -> "userRegistration"\n- "Team messaging and collaboration" -> "teamMessaging"`
11
- },
12
-
13
- // Hashtag generation for semantic grouping
14
- HASHTAG_GENERATION: {
15
- SYSTEM: 'You are an AI assistant that generates relevant hashtags for test scenarios to enable semantic grouping across test files. Analyze the scenario description and generate 2-5 hashtags that capture the key aspects of the test such as functionality, user journey, or domain.',
16
-
17
- USER: (scenario: string) => `Analyze this scenario description and generate relevant hashtags for semantic grouping:\n\n"${scenario}"\n\nInstructions:\n1. Identify the main functionality being tested (e.g., #login, #checkout, #messaging)\n2. Identify the user journey type (e.g., #registration, #onboarding, #payment)\n3. Identify the domain/area (e.g., #ecommerce, #social, #admin)\n4. Identify any specific features (e.g., #search, #upload, #notification)\n5. Generate 2-5 hashtags that are:\n - Concise and descriptive\n - Lowercase with no spaces or special characters\n - Relevant for grouping similar tests\n\nExamples:\n- "User login and messaging flow" -> ["#login", "#messaging", "#social"]\n- "Checkout process with payment" -> ["#checkout", "#payment", "#ecommerce"]\n- "User registration and email verification" -> ["#registration", "#verification", "#onboarding"]\n\nRespond with JSON: {"hashtags": ["#tag1", "#tag2", "#tag3"]}`
18
- },
19
-
20
- // Scenario breakdown
21
- SCENARIO_BREAKDOWN: {
22
- SYSTEM: `Split user scenarios into individual steps. Copy each step exactly as provided. Do not add, expand, or modify.`,
23
-
24
- USER: (scenario: string) => `Split this into steps. Keep each step exactly as written.
25
-
26
- ${scenario}
27
-
28
- Return JSON: {"steps": ["step 1", "step 2", ...]}`
29
- },
30
-
31
- // Goal completion assessment
32
- GOAL_COMPLETION_CHECK: {
33
- SYSTEM: 'You are an expert test automation analyst. Evaluate whether a goal has been fully achieved. Be EXTREMELY CONSERVATIVE - mark goals complete when the PRIMARY action succeeds. DO NOT invent verification steps that were not explicitly requested. However, if the scenario explicitly specifies verification requirements, those MUST be completed and not skipped.',
34
-
35
- USER: (goalDescription: string, completedActions: string[], pageInfo: any) => `Analyze whether the following goal has been fully completed:
36
-
37
- GOAL: "${goalDescription}"
38
-
39
- COMPLETED ACTIONS IN THIS STEP:
40
- ${completedActions.map((action, idx) => `${idx + 1}. ${action}`).join('\n')}
41
-
42
- CURRENT PAGE STATE:
43
- - URL: ${pageInfo.url}
44
- - Title: ${pageInfo.title}
45
- - Interactive Elements:
46
- ${pageInfo.formattedElements}
47
-
48
- CRITICAL GUIDELINES - MARK COMPLETE AGGRESSIVELY:
49
-
50
- 1. **Action Goals vs Verification Goals**:
51
- - If goal is an ACTION (click, type, select, send, submit), mark COMPLETE after successful action
52
- - If goal is VERIFICATION (verify, check, ensure, assert), mark COMPLETE after assertion passes
53
- - NEVER add verification to action goals - if the goal doesn't mention verification, don't require it
54
- - HOWEVER: If verification is EXPLICITLY mentioned in the goal, it MUST be completed - do not skip it
55
-
56
- 2. **Understand Action Semantics** (what does the action verb really mean):
57
-
58
- Some actions are ATOMIC (one operation):
59
- - "Click X" → Just click
60
- - "Type X into field" → Just type
61
- - "Navigate to URL" → Just navigate
62
- - "Select option" → Just select
63
-
64
- Other actions imply a WORKFLOW with implicit final trigger:
65
- - ANY action verb that implies submission/sending/triggering
66
- - If the action includes data to provide, it usually implies using that data
67
- - If the action name is a business process (login, register, send, post, etc.), think about what the user expects to happen
68
-
69
- **General Pattern Recognition:**
70
-
71
- Ask yourself: "In normal usage, does [ACTION VERB] require a final trigger/button?"
72
- - "Login" → Yes, requires clicking a login/submit button after entering credentials
73
- - "Send" → Yes, requires clicking a send button after typing content
74
- - "Post" → Yes, requires clicking a post/publish button after entering content
75
- - "Search for X" → Yes, requires triggering search after entering search term
76
- - "Filter by X" → Maybe, depends on if filter auto-applies or needs button
77
- - "Fill in X" → No, just data entry unless goal says "fill AND submit"
78
-
79
- Mark COMPLETE when the BUSINESS ACTION is done from user perspective:
80
- - Not complete if you only prepared data (filled fields) but didn't trigger the action
81
- - Complete when the system would have processed/submitted/executed the action
82
-
83
- Examples:
84
- - "Login with credentials: X" → Incomplete until credentials submitted (button clicked)
85
- - "Send message: Y" → Incomplete until message sent (send button clicked)
86
- - "Fill in name field" → Complete after fill (no submission implied)
87
- - "Search for products" → Incomplete until search triggered
88
-
89
- Think: "From a user's perspective, is the action done?" not "Did I type the data?"
90
-
91
- 3. **Multi-part Goals** (explicit multiple requirements):
92
- - "Fill in ALL fields" → Need multiple fills for each field
93
- - "Click submit AND verify success message appears" → Need both click + explicit verification
94
- - Goals with explicit "and" requiring multiple distinct actions
95
-
96
- 4. **NEVER Create Hallucinated Verification Sub-goals, BUT Honor Explicit Verification Requirements**:
97
- - Don't invent verification steps that weren't in the original goal
98
- - Don't look for confirmation messages unless goal explicitly asks for them
99
- - Don't check for success indicators unless goal explicitly requires verification
100
- - Trust Playwright's execution - if action succeeded without error, it worked
101
- - Action success IS the completion criteria for action goals
102
- - CRITICAL: If the goal explicitly says "verify", "check", "ensure", "confirm" something, that verification MUST be completed
103
-
104
- 5. **State Changes After Actions Are SUCCESS, Not Failure**:
105
- - Button becomes disabled after click → SUCCESS (expected behavior)
106
- - Form clears after submit → SUCCESS (expected behavior)
107
- - Page navigates after action → SUCCESS (expected behavior)
108
- - Element disappears after interaction → SUCCESS (expected behavior)
109
-
110
- 6. **What "nextSubGoal" Should Look Like**:
111
- - For "Fill in all fields" with 5 fields, if 2 filled: "Fill in the remaining 3 fields" ✅
112
- - For "Click submit AND verify", if clicked but not verified: "Verify the success message appears" ✅
113
- - For "Click send button" after click succeeds: NO nextSubGoal - COMPLETE ✅
114
- - DON'T create nextSubGoal for verification unless goal explicitly asks for it ❌
115
-
116
- CRITICAL - Preserve specific values in nextSubGoal:
117
- - Original: "Login with credentials: admin, pass123" (username filled, password not)
118
- ✅ nextSubGoal: "Enter password: pass123"
119
- ❌ NOT: "Complete the login" (loses the password value!)
120
-
121
- - Original: "Enter user details: Name: John, Email: john@test.com" (name done, email not)
122
- ✅ nextSubGoal: "Enter email: john@test.com"
123
- ❌ NOT: "Enter email address" (loses specific email!)
124
-
125
- Examples:
126
-
127
- ✅ PURE ACTION GOALS (no verification in description - complete after action):
128
- - Goal: "Click the send button" + Action: click() succeeded → COMPLETE ✅ (no verification needed)
129
- - Goal: "Enter email address" + Action: fill() succeeded → COMPLETE ✅ (no verification needed)
130
- - Goal: "Navigate to dashboard" + Action: goto() succeeded → COMPLETE ✅ (no verification needed)
131
- - Goal: "Submit the form" + Action: click() succeeded → COMPLETE ✅ (no verification needed)
132
-
133
- ⏳ GOALS WITH EXPLICIT VERIFICATION (must complete BOTH action AND verification):
134
- - Goal: "Click send and verify message sent" + Action: click() succeeded → INCOMPLETE ⏳ nextSubGoal: "Verify message sent confirmation"
135
- - Goal: "Submit form and check for success message" + Action: submit clicked → INCOMPLETE ⏳ nextSubGoal: "Check for success message"
136
- - Goal: "Login and verify dashboard appears" + Action: login completed → INCOMPLETE ⏳ nextSubGoal: "Verify dashboard appears"
137
-
138
- ✅ PURE VERIFICATION GOALS (complete after verification):
139
- - Goal: "Verify page title is correct" + Action: assertion passed → COMPLETE ✅
140
- - Goal: "Check that the error message is displayed" + Action: assertion passed → COMPLETE ✅
141
- - Goal: "Ensure user is logged in" + Action: assertion passed → COMPLETE ✅
142
-
143
- ⏳ MULTI-STEP ACTION GOALS (complete all parts):
144
- - Goal: "Fill in all required fields" + Action: filled 2 of 5 → INCOMPLETE ⏳ nextSubGoal: "Fill in remaining 3 fields"
145
-
146
- GOLDEN RULE:
147
- - If the goal is a SIMPLE ACTION and that action SUCCEEDED, mark COMPLETE immediately
148
- - Don't hallucinate verification requirements that weren't explicitly requested
149
- - BUT if verification IS explicitly mentioned in the goal, it MUST be completed before marking COMPLETE
150
- - Only verify what is instructed to be verified, nothing more, nothing less
151
-
152
- Respond with JSON:
153
- {
154
- "isComplete": true/false,
155
- "reason": "brief explanation - if action succeeded and goal was just the action, mark complete",
156
- "nextSubGoal": "ONLY if goal has multiple parts and not all parts done yet - must be based on ACTUAL goal requirements, not invented verification"
157
- }`
158
- },
159
-
160
- // Screenshot need assessment
161
- SCREENSHOT_NEED_ASSESSMENT: {
162
- SYSTEM: 'You are an expert test automation analyst. Determine if visual screenshot analysis is ABSOLUTELY NECESSARY to solve this failure. Vision mode is expensive (GPT-4o), so only recommend when there is CLEAR diagnostic value that DOM cannot provide.',
163
-
164
- USER: (stepDescription: string, errorMessage: string, attemptCount: number, pageInfo: any) => `After 2 failures, determine if VISION MODE is absolutely necessary. This is the ONLY chance to use vision.
165
-
166
- STEP: "${stepDescription}"
167
- ERROR: "${errorMessage}"
168
- ATTEMPT: ${attemptCount} (after ${attemptCount - 1} failures - vision can diagnose the issue)
169
-
170
- CURRENT DOM INFO AVAILABLE:
171
- - URL: ${pageInfo.url}
172
- - Interactive Elements:
173
- ${pageInfo.formattedElements}
174
-
175
- 🎯 VISION MODE - USE SPARINGLY (Expensive GPT-4o):
176
-
177
- Vision provides diagnostic value ONLY when DOM information is truly insufficient.
178
-
179
- ✅ **RECOMMEND SCREENSHOT only for these HIGH-VALUE cases:**
180
-
181
- 1. **Suspected Element Hallucination** (HIGH priority):
182
- - Previous attempts tried getByText/toBeVisible for elements that might not exist
183
- - Error: "not found" or "timeout" on verification attempts
184
- - Visual will definitively show if elements exist or if we need alternative verification
185
-
186
- 2. **Visual-Only Elements**:
187
- - Icons, images, or visual indicators without text/roles
188
- - Elements identified by position ("button on the right")
189
- - Shadow DOM or complex component structures
190
-
191
- 3. **Visual Blockers**:
192
- - Overlays, modals, or popups blocking interactions
193
- - Z-index issues preventing clicks
194
- - Scrolling problems that DOM doesn't reveal
195
-
196
- ❌ **DO NOT RECOMMEND SCREENSHOT when:**
197
- - **Strict mode violations / multiple matches** - Accessibility tree shows duplicates, use DOM info to scope selector
198
- - Simple selector errors (try different selector strategies first)
199
- - Navigation issues (URL problems are not visual)
200
- - Invalid Playwright API (syntax errors)
201
- - Timing issues that can be solved with better waits
202
- - DOM clearly shows the solution (IDs, data-testid available)
203
- - Error has obvious DOM-based fix
204
-
205
- **Conservative Assessment Required:**
206
- - Vision mode is EXPENSIVE (uses GPT-4o)
207
- - This is the ONLY chance (attempt 3 of 4)
208
- - Only recommend if DOM truly cannot solve it
209
- - If in doubt, suggest DOM-based alternative instead
210
-
211
- Respond with JSON:
212
- {
213
- "needsScreenshot": true/false,
214
- "reason": "If true: [specific diagnostic value vision provides]. If false: [why DOM-based approach is sufficient]",
215
- "alternativeApproach": "REQUIRED if needsScreenshot=false: [specific DOM-based solution to try next]"
216
- }
217
-
218
- Remember: Default to NO unless there's compelling evidence that visual analysis is the ONLY way to solve this.`
219
- },
220
-
221
- // Playwright command generation
222
- PLAYWRIGHT_COMMAND: {
223
- SYSTEM: 'You are an expert Playwright automation engineer with strong self-awareness and problem-solving skills. You understand cause-and-effect, learn from your own actions, and can reason about application state changes.',
224
-
225
- USER: (stepDescription: string, pageInfo: any, previousCommands: string, attemptHistory: string, errorContext: string) => `You are working to achieve a specific goal. Generate ONE precise Playwright command that makes progress.
226
-
227
- 🎯 CURRENT GOAL: "${stepDescription}"
228
-
229
- 📋 WHAT YOU'VE ALREADY DONE IN THIS STEP:
230
- ${previousCommands || 'Nothing yet - this is the first action for this goal'}
231
-
232
- ${errorContext ? `⚠️ PREVIOUS ATTEMPT FAILED:\n${errorContext}\n` : ''}
233
- ${attemptHistory ? `📊 ALL ATTEMPTS SO FAR:\n${attemptHistory}\n` : ''}
234
-
235
- 🧠 SELF-AWARENESS & REASONING:
236
-
237
- 1. **Analyze Your Own Actions**:
238
- - Review what you've ALREADY done in this step above
239
- - Did your previous actions CAUSE the current state?
240
- - Ask: "What is the LOGICAL consequence of what I just did?"
241
- - Understand that your actions change the application state
242
-
243
- 2. **Understand Cause & Effect**:
244
- - Element state changed? → Did YOUR previous action cause it?
245
- - Element not found? → Did YOUR action remove it or navigate away?
246
- - Validation error? → Did YOUR action trigger it (empty field, wrong format)?
247
- - Before retrying, ask: "Is this the EXPECTED result of my actions?"
248
-
249
- 3. **Self-Correction Logic**:
250
- - If you caused the problem → Fix it (don't just retry)
251
- - If you achieved the goal (even with side effects) → Move on!
252
- - If you're stuck in a loop → You're fighting expected behavior, change approach
253
- - Don't undo successful work or fight against normal state transitions
254
-
255
- 4. **Smart Recovery**:
256
- - Element not ready/unavailable → Identify what prerequisite is missing, complete it first
257
- - Element not found → Distinguish between: your action removed it (success) vs genuine error
258
- - Multiple failures on same approach → Fundamentally rethink strategy, don't iterate blindly
259
- - Stuck in retry loop → Step back, analyze root cause, try completely different approach
260
-
261
- 5. **NEVER Hallucinate Verification Elements**:
262
- - ONLY verify elements that ACTUALLY EXIST in the current DOM state
263
- - Check the "CURRENT PAGE STATE" section for what elements are available
264
- - Don't look for "success message", "confirmation text", or "sent message" unless you see them in the DOM
265
- - Don't invent text patterns or regex for elements that don't exist
266
- - If verification is needed but element doesn't exist, use alternative methods:
267
- * Check for state changes (button disabled, form cleared, URL changed)
268
- * Wait for page load state changes
269
- * Check for element detachment/attachment
270
- * Use waitForResponse for network verification
271
- - When previous attempts failed looking for non-existent elements, STOP trying to find them
272
-
273
- 6. **Navigation and Redirects** (CRITICAL):
274
-
275
- Handle redirects properly - DON'T keep retrying original URL if navigation succeeded:
276
-
277
- - For navigation, use explicit 10-second timeout (default is 5s, too short for redirects):
278
- await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 10000 })
279
-
280
- - Why longer timeout for navigation:
281
- * Redirects take extra time
282
- * Initial page loads are slower
283
- * Default 5s timeout is for fast element operations only
284
-
285
- - If navigation times out or throws "execution context destroyed":
286
- * CHECK CURRENT URL FIRST: const currentUrl = page.url()
287
- * If URL changed from about:blank → Navigation SUCCEEDED (even if redirected)
288
- * DON'T retry goto() if already on a page
289
- * Proceed with next step
290
-
291
- - Navigation succeeded if:
292
- * page.url() is NOT 'about:blank'
293
- * page.url() changed from previous URL
294
- * Even if different from target URL (redirects are normal)
295
-
296
- - Only retry navigation if:
297
- * page.url() is still 'about:blank' or previous URL
298
- * AND no redirect happened
299
-
300
- 7. **Real-World Web App Resilience**:
301
-
302
- Common Interruptions (handle gracefully):
303
- - Cookie consent banners → Dismiss if blocking main UI (look for "Accept", "OK", "Close")
304
- - Modal popups → Close if not relevant to current goal (look for X button, "Dismiss")
305
- - Page refreshes → Re-find elements, don't assume page state persists
306
- - Loading states → Wait for content, check for spinners/loading indicators
307
- - Overlays → Dismiss or wait for them to disappear before proceeding
308
-
309
- Detection Patterns:
310
- - If element suddenly not found → Check if overlay/modal appeared
311
- - If click fails → Check if cookie banner is blocking element
312
- - If page URL changed unexpectedly → Handle redirect/refresh gracefully
313
- - If timeout occurs → Check for loading indicators, wait for them to disappear
314
-
315
- Resilience Strategies:
316
- - Before critical interactions, check for and dismiss blocking overlays
317
- - After page loads, wait for dynamic content (networkidle, specific elements)
318
- - If element covered/blocked, look for overlay and close it
319
- - Use flexible selectors that work across page refreshes
320
- - Add waits for elements that load dynamically
321
-
322
- Examples:
323
- - If cookie banner present: await page.getByRole('button', {name: /accept|ok|agree/i}).click();
324
- - If modal blocking: await page.getByRole('button', {name: /close|dismiss|x/i}).click();
325
- - After action that might refresh: await page.waitForLoadState('domcontentloaded');
326
- - For dynamic content: await page.getByText('expected content').waitFor();
327
-
328
- 8. **Use Specific Values from Goal Description**:
329
-
330
- CRITICAL: Extract and use exact values mentioned in the goal.
331
-
332
- Examples:
333
- - Goal: "Login with credentials: alice, pass123"
334
- ✅ Use: await page.fill('username', 'alice'); await page.fill('password', 'pass123');
335
- ❌ NOT: await page.fill('username', process.env.USERNAME);
336
-
337
- - Goal: "Enter name: John Doe"
338
- ✅ Use: await page.fill('[name="name"]', 'John Doe');
339
- ❌ NOT: await page.fill('[name="name"]', 'Test User');
340
-
341
- Apply this to ANY specific value in the goal (amounts, dates, selections, text, etc.).
342
-
343
- NEVER:
344
- - Replace specific values with environment variables
345
- - Replace specific values with generic test data
346
- - Hallucinate different values than what's in the goal
347
- - Use process.env, config, or placeholder values
348
-
349
- Be creative ONLY when goal doesn't specify values:
350
- - "Login with valid credentials" → Infer reasonable values
351
- - "Login with credentials: admin, pass123" → Use EXACTLY those values
352
-
353
- GOAL-ORIENTED APPROACH:
354
- - What needs to be done to achieve this goal?
355
- - Have I ALREADY done parts of this? (check "WHAT YOU'VE ALREADY DONE")
356
- - If yes, what's the NEXT logical action?
357
- - If retrying after failure, WHY did it fail? Did I cause it?
358
- - Is something blocking the UI? (cookie banner, modal, overlay)
359
- - Extract any specific values from the goal and use them EXACTLY
360
-
361
- 9. **Strict Mode Violations & Multiple Matches** (CRITICAL):
362
-
363
- Playwright throws "strict mode violation" when a selector matches MULTIPLE elements.
364
-
365
- **PROACTIVE DETECTION** - Check BEFORE generating command:
366
- - Review the "CURRENT PAGE STATE" section below (accessibility tree / aria snapshot)
367
- - Look for duplicate elements with same role/text (e.g., multiple links with "Settings")
368
- - If duplicates exist, generate a MORE SPECIFIC selector from the start
369
- - Don't wait for strict mode error - prevent it by analyzing the DOM structure
370
-
371
- 🚨 ERROR PATTERNS:
372
- - "strict mode violation" → Your selector matched multiple elements
373
- - "Multiple elements found" → Same issue
374
- - "locator resolved to 2 elements" → Often one is a tooltip/popover with duplicate text
375
- - Command chain with multiple strategies → Sign of selector problems
376
-
377
- ✅ SOLUTIONS (in order of preference):
378
-
379
- **Option 1: Use Role-Based Selectors** (BEST - avoids tooltips):
380
- - BAD: page.getByText('Settings').click() → Matches button AND its tooltip
381
- - GOOD: page.getByRole('button', { name: 'Settings' }).click() → Only matches button role
382
- - GOOD: page.locator('button').getByText('Settings').click() → Scoped to button tag
383
- - GOOD: page.locator('[role="button"]').getByText('Settings').click() → Scoped to role
384
-
385
- **Option 2: Scope to Container**:
386
- - BAD: page.locator('a', { hasText: 'Settings' }).click() → Matches multiple links
387
- - GOOD: page.locator('nav a', { hasText: 'Settings' }).click() → Scoped to nav
388
- - GOOD: page.locator('a[href*="/settings"]').click() → Use unique attribute
389
-
390
- **Option 2: Use Position-Based Selection**:
391
- - If multiple matches are expected: page.locator('a', { hasText: 'Settings' }).first().click()
392
- - Or use: .nth(0) for first, .last() for last
393
-
394
- **Option 3: Filter by Visibility/State**:
395
- - page.locator('button', { hasText: 'Submit' }).filter({ hasNotText: 'Draft' }).click()
396
-
397
- 🚫 **ANTI-PATTERNS (DON'T DO THIS)**:
398
- - BAD: Chaining multiple selector strategies in one command with semicolons
399
- - BAD: Using page.evaluate() to find/click elements (defeats Playwright's auto-waiting)
400
- - GOOD: ONE clear, specific selector like page.locator('nav a', { hasText: 'Settings' }).click()
401
-
402
- **When You See Strict Mode Errors:**
403
- 1. Analyze - Why did my selector match multiple elements?
404
- 2. Narrow Down - Add parent context (nav, sidebar, header)
405
- 3. Combine - Use multiple attributes (role + text, class + href)
406
- 4. Position - If truly ambiguous, use .first() or .nth()
407
- 5. NEVER - Chain multiple selector attempts or use page.evaluate()
408
-
409
- **Key Principle:**
410
- - ONE command = ONE clear selector strategy
411
- - Don't hedge your bets with multiple approaches
412
- - Trust Playwright's auto-waiting and built-in selectors
413
-
414
- CRITICAL RULES:
415
- - Generate ONLY ONE command that moves toward the goal
416
- - NEVER undo your own successful work (don't clear fields you just filled!)
417
- - If previous attempts failed, analyze WHY before trying different approach
418
- - Learn from failures and your own action history
419
- - Use the most reliable selectors (prefer getByRole, getByText, getByLabel)
420
- - Trust Playwright's auto-waiting - if click succeeded, it worked!
421
- - If strict mode violation: Make selector MORE SPECIFIC or use .first()
422
- - Generate ONE clear command, not multiple chained selector attempts
423
-
424
- ELEMENT SELECTION PRIORITY:
425
- 1. getByTestId() - BEST if data-testid is available (most stable, designed for tests)
426
- 2. locator('#id') - EXCELLENT if element has unique ID (stable, direct targeting)
427
- 3. getByRole() - Very reliable for interactive elements (semantic)
428
- 4. getByText() - For text content (good for unique text)
429
- 5. getByLabel() - For form inputs (semantic)
430
- 6. getByPlaceholder() - For input placeholders
431
- 7. locator() with CSS classes - Last resort (brittle, changes frequently)
432
-
433
- COMMON PATTERNS (prefer IDs/data-testid when available):
434
- - Navigation: await page.goto('url')
435
- - Click with testid: await page.getByTestId('submit-btn').click()
436
- - Click with ID: await page.locator('#login-button').click()
437
- - Click with role: await page.getByRole('button', { name: 'text' }).click()
438
- - Type with testid: await page.getByTestId('username-input').fill('text')
439
- - Type with ID: await page.locator('#email').fill('text')
440
- - Type with role: await page.getByRole('textbox', { name: 'label' }).fill('text')
441
- - Wait: await page.waitForLoadState('networkidle')
442
- - Verify: await expect(page).toHaveTitle(/expected/)
443
-
444
- IMPORTANT: Use IDs/data attributes in COMMANDS, but keep goal descriptions semantic!
445
-
446
- CODE STYLE GUIDELINES:
447
- - Keep commands concise and clean
448
- - Avoid explicit timeouts unless necessary
449
- - Use Playwright's built-in auto-waiting
450
- - Only add timeouts for specific slow operations
451
- - Prefer single-line commands when possible
452
-
453
- VALID PLAYWRIGHT API REFERENCE:
454
- - locator.waitFor({ state: 'visible'|'hidden'|'attached'|'detached' }) - ONLY these states
455
- - NEVER use waitFor({ state: 'enabled' }) - THIS IS INVALID
456
- - For disabled elements: Use page.waitForFunction() with DOM check
457
- - CSS selectors: Standard CSS only (no :has-text(), :enabled pseudo-classes)
458
- - Playwright pseudo-selectors only work in locator(), NOT in querySelector()
459
-
460
- RETRY STRATEGIES:
461
- - Timeout errors: Add waitFor() or increase timeout, check for loading states
462
- - Not found errors: Try different selectors, wait for element, or check if DOM changed
463
- - Not visible errors: Scroll into view, dismiss overlays, or wait for visibility
464
- - Not enabled/Disabled errors: Identify and complete prerequisites that enable the element
465
- - Detached errors: Element removed from DOM, refetch or use different selector
466
- - Covered/Blocked errors: Close overlays, modals, or popups blocking the element
467
-
468
- ELEMENT STATE AWARENESS:
469
- - Element disabled/inactive? → Identify and complete the prerequisite (fill required fields, check boxes, select options)
470
- - Interacting with unavailable elements ALWAYS fails → Enable/prepare element state first
471
- - Review your action history → Did you reverse a prerequisite? Complete it again before proceeding
472
- - Different element states need different handling:
473
- * Disabled → Complete prerequisites (validation, required fields, agreements)
474
- * Hidden/Not visible → Scroll, dismiss overlays, or wait for visibility
475
- * Detached → Element removed from DOM, may need navigation or different selector
476
- * Loading → Wait for completion before interaction
477
-
478
- TIMEOUT GUIDELINES:
479
- - Only add explicit timeouts for slow operations (file uploads, large data loads)
480
- - Use page.waitForLoadState('networkidle') for page navigation
481
- - Use element.waitFor() only when waiting for specific conditions
482
- - Let Playwright's auto-waiting handle most interactions
483
-
484
- Respond with JSON:
485
- {
486
- "command": "await page.goto('https://www.google.com');",
487
- "reasoning": "Direct navigation to target URL",
488
- "selectorStrategy": "direct_navigation"
489
- }
490
-
491
- Current State:
492
- - URL: ${pageInfo.url}
493
- - Title: ${pageInfo.title}
494
- - Interactive Elements:
495
- ${pageInfo.formattedElements}
496
-
497
- Previous Commands:
498
- \`\`\`javascript
499
- ${previousCommands}
500
- \`\`\`
501
-
502
- ${attemptHistory}
503
-
504
- ${errorContext}
505
-
506
- Step to execute: "${stepDescription}"`
507
- },
508
-
509
- // Vision diagnostic analysis (supervisor reviewing screenshot)
510
- VISION_DIAGNOSTIC_ANALYSIS: {
511
- SYSTEM: 'You are a senior QA supervisor with vision capabilities. Analyze the screenshot AND DOM snapshot together to identify what went wrong and provide specific instructions with accurate selectors.',
512
-
513
- USER: (stepDescription: string, pageInfo: any, previousCommands: string, attemptHistory: string, errorContext: string) => `Analyze screenshot + DOM snapshot to diagnose failures and provide specific instructions.
514
-
515
- 🎯 GOAL: "${stepDescription}"
516
-
517
- 📸 SCREENSHOT + 🌳 DOM SNAPSHOT:
518
- Correlate visual elements in screenshot with DOM structure below.
519
-
520
- **DOM Snapshot:**
521
- - URL: ${pageInfo.url}
522
- - Title: ${pageInfo.title}
523
- - Interactive Elements:
524
- ${pageInfo.formattedElements}
525
-
526
- **Previous Failed Attempts:**
527
- ${previousCommands || 'None'}
528
-
529
- **Errors:**
530
- ${errorContext || 'None'}
531
-
532
- **Your Task:**
533
- 1. Look at screenshot - identify target element visually
534
- 2. Look at DOM - find matching element in ARIA tree
535
- 3. Check if element has ID or data-testid (best selectors)
536
- 4. Provide EXACT selector from DOM
537
-
538
- Respond with JSON:
539
- {
540
- "visualAnalysis": "I see...",
541
- "rootCause": "Failed because...",
542
- "specificInstructions": "Click element with [exact selector from DOM]...",
543
- "recommendedApproach": "Use page.locator('[exact-attribute]')...",
544
- "elementsFound": ["element with id='x'", "button[name='y']"],
545
- "elementsNotFound": ["element worker looked for but doesn't exist"]
546
- }`
547
- },
548
-
549
- // Playwright command generation with supervisor instructions
550
- PLAYWRIGHT_COMMAND_WITH_SUPERVISOR: {
551
- SYSTEM: 'You are a Playwright automation engineer. Your supervisor has analyzed a screenshot and provided specific instructions. Follow their instructions EXACTLY to generate the correct command.',
552
-
553
- USER: (stepDescription: string, supervisorInstructions: string, supervisorAnalysis: string, elementsFound: string[], elementsNotFound: string[], pageInfo: any) => `Your supervisor has reviewed the screenshot and provided specific instructions. Follow them EXACTLY.
554
-
555
- 🎯 ORIGINAL GOAL: "${stepDescription}"
556
-
557
- 👔 SUPERVISOR'S VISUAL ANALYSIS:
558
- ${supervisorAnalysis}
559
-
560
- 📋 SUPERVISOR'S SPECIFIC INSTRUCTIONS:
561
- ${supervisorInstructions}
562
-
563
- ✅ ELEMENTS THAT EXIST (confirmed by supervisor from screenshot):
564
- ${elementsFound.length > 0 ? elementsFound.map((el, i) => `${i + 1}. ${el}`).join('\n') : 'None specified'}
565
-
566
- ❌ ELEMENTS THAT DON'T EXIST (confirmed absent from screenshot):
567
- ${elementsNotFound.length > 0 ? elementsNotFound.map((el, i) => `${i + 1}. ${el}`).join('\n') : 'None specified'}
568
-
569
- **YOUR TASK:**
570
- Generate ONE Playwright command that implements the supervisor's instructions EXACTLY.
571
-
572
- **CRITICAL RULES:**
573
- 1. Follow supervisor's instructions to the letter
574
- 2. ONLY use elements from "ELEMENTS THAT EXIST" list
575
- 3. NEVER try to find elements from "ELEMENTS THAT DON'T EXIST" list
576
- 4. Use the exact selectors/strategies supervisor recommended
577
- 5. If supervisor said "don't verify X, check Y instead" - do exactly that
578
-
579
- **Current DOM Context:**
580
- - URL: ${pageInfo.url}
581
- - Interactive Elements:
582
- ${pageInfo.formattedElements}
583
-
584
- Respond with JSON:
585
- {
586
- "command": "await page.locator('#exact-selector').click();",
587
- "reasoning": "Following supervisor's instruction to [what you're doing]"
588
- }`
589
- },
590
-
591
- // Legacy vision command generation (kept for compatibility)
592
- PLAYWRIGHT_COMMAND_WITH_VISION: {
593
- SYSTEM: 'Analyze screenshot + DOM together. Correlate visual elements with DOM to generate accurate Playwright commands with precise selectors.',
594
-
595
- USER: (stepDescription: string, pageInfo: any, previousCommands: string, attemptHistory: string, errorContext: string) => `Vision mode: Correlate screenshot with DOM to generate command.
596
-
597
- 🎯 GOAL: "${stepDescription}"
598
-
599
- 📸 SCREENSHOT + 🌳 DOM SNAPSHOT:
600
- Correlate visual elements in screenshot with DOM structure below.
601
-
602
- **DOM Snapshot:**
603
- - URL: ${pageInfo.url}
604
- - Title: ${pageInfo.title}
605
- - Interactive Elements:
606
- ${pageInfo.formattedElements}
607
-
608
- **Previous Failed Attempts:**
609
- ${previousCommands || 'None'}
610
-
611
- **Errors:**
612
- ${errorContext || 'None'}
613
-
614
- **Your Task:**
615
- 1. Look at screenshot - identify target element visually
616
- 2. Look at DOM - find matching element in ARIA tree
617
- 3. Check if element has ID or data-testid (best selectors)
618
- 4. Generate command with EXACT selector from DOM
619
-
620
- Respond with JSON:
621
- {
622
- "command": "await page.locator('[exact-selector-from-dom]').click();",
623
- "reasoning": "Visual element matches DOM element with [attribute]",
624
- "visualInsights": "I see [element] in screenshot",
625
- "failureRootCause": "Previous failed because [reason]",
626
- "recommendedAlternative": "Use [strategy]"
627
- }`
628
- },
629
-
630
- // Script parsing for AI repair
631
- SCRIPT_PARSING: {
632
- SYSTEM: 'Parse Playwright scripts into steps. Use existing // comments as step boundaries.',
633
-
634
- USER: (script: string) => `Extract steps from this script.
635
-
636
- Find code INSIDE: test('...', async ({ page, browser, context }) => { ... })
637
-
638
- Each // comment starts a new step. Use comment text (without //) as description.
639
- Preserve code exactly.
640
-
641
- Script:
642
- ${script}
643
-
644
- Return JSON: {"steps": [{"description": "...", "code": "..."}, ...]}`
645
- },
646
-
647
- // Repair suggestion
648
- REPAIR_SUGGESTION: {
649
- SYSTEM: 'You are an expert test automation engineer specializing in fixing failing Playwright tests. Analyze the current DOM state, error message, and step description to suggest the best repair action. Consider the failure history to avoid repeating the same mistakes. CRITICAL: Only use valid Playwright API methods.',
650
-
651
- USER: (stepDescription: string, stepCode: string, errorMessage: string, pageInfo: any, failureHistory: string, recentRepairs: string) => `Analyze this failing Playwright test step and suggest a repair action.
652
-
653
- Current Step:
654
- Description: ${stepDescription}
655
- Code: ${stepCode}
656
- Error: ${errorMessage}
657
-
658
- Current Page State:
659
- - URL: ${pageInfo.url}
660
- - Title: ${pageInfo.title}
661
- - Interactive Elements:
662
- ${pageInfo.formattedElements}
663
-
664
- ${failureHistory}
665
-
666
- ${recentRepairs}
667
-
668
- VALID PLAYWRIGHT API METHODS (DO NOT use methods not listed here):
669
-
670
- Locator Methods:
671
- - .click({ force: true }) // Force click even if element is covered
672
- - .click({ timeout: 10000 }) // Increase timeout
673
- - .fill(value)
674
- - .type(value, { delay: 100 }) // Type with delay
675
- - .press('Enter')
676
- - .selectOption(value)
677
- - .check() / .uncheck()
678
- - .scrollIntoViewIfNeeded() // NOT scrollIntoView() - that doesn't exist!
679
- - .waitFor({ state: 'visible' })
680
- - .waitFor({ state: 'attached' })
681
- - .isVisible()
682
- - .isEnabled()
683
- - .count()
684
- - .first() / .last() / .nth(index)
685
- - .filter({ hasText: 'text' })
686
-
687
- Page Methods:
688
- - page.goto(url, { waitUntil: 'networkidle' })
689
- - page.waitForLoadState('networkidle')
690
- - page.waitForTimeout(ms)
691
- - page.evaluate(() => window.scrollBy(0, 500)) // Scroll page
692
- - page.getByRole(role, { name: 'text' })
693
- - page.getByText('text')
694
- - page.getByLabel('text')
695
- - page.getByPlaceholder('text')
696
- - page.getByTestId('id')
697
- - page.locator('selector')
698
-
699
- COMMON FIXES FOR "element outside viewport":
700
- - Use { force: true } option: await locator.click({ force: true });
701
- - Scroll page first: await page.evaluate(() => window.scrollBy(0, 300));
702
- - Wait longer: await locator.click({ timeout: 10000 });
703
- - Click parent/container instead if element has overlays
704
-
705
- INVALID METHODS TO AVOID:
706
- - ❌ .scrollIntoView() - DOES NOT EXIST (use .scrollIntoViewIfNeeded() instead)
707
- - ❌ .scrollIntoView({ behavior: 'smooth' }) - WRONG API
708
- - ❌ Any jQuery methods - This is Playwright, not jQuery
709
-
710
- Choose the best repair action:
711
- 1. MODIFY - Fix the current step with better selectors, waits, or logic
712
- 2. INSERT - Add a new step before the current one (e.g., wait for element, handle popups)
713
- 3. REMOVE - Skip this step entirely if it's not essential
714
-
715
- IMPORTANT: Your repair code MUST use ONLY valid Playwright API methods listed above!
716
-
717
- Respond with JSON:
718
- {
719
- "shouldContinue": true/false,
720
- "reason": "explanation of decision",
721
- "action": {
722
- "operation": "MODIFY|INSERT|REMOVE",
723
- "newStep": {
724
- "description": "step description",
725
- "code": "await page.getByRole('button', { name: 'Submit' }).click({ force: true });"
726
- }
727
- }
728
- }`
729
- },
730
-
731
- // Repair confidence assessment
732
- REPAIR_CONFIDENCE: {
733
- SYSTEM: 'You are an expert test automation engineer who writes concise repair advice to build a running understanding of this test behavior and repairs done.',
734
-
735
- USER: (originalScript: string, updatedScript: string) => `You are an expert test automation engineer. Generate a short repair advice that will be used to build a running understanding of this test.
736
-
737
- Original Script:
738
- ${originalScript}
739
-
740
- Repaired Script:
741
- ${updatedScript}
742
-
743
- Instructions:
744
- 1. Compare the original and repaired scripts to identify what was fixed
745
- 2. Determine confidence level (0-5) where:
746
- - 0 = Low confidence, repairs may be unreliable
747
- - 5 = High confidence, repairs are solid and maintainable
748
- 3. Write SHORT advice (few short sentences max) that:
749
- - States what specific fix was made
750
- - Builds on any previous repair advice found in the original script
751
- - Captures patterns (e.g., "usually fails on selector issues", "often needs deflaking")
752
- - Will help future repairs understand this test's quirks
753
-
754
- IMPORTANT:
755
- - Step comments are EXPECTED and GOOD - do not mention them as issues
756
- - Be concise and factual
757
- - Focus on the actual fix made, not general recommendations
758
- - Build a running understanding of this test's behavior relating to the repairs done
759
- - If the original script contains previous repair advice, build upon it to create a cumulative understanding
760
-
761
- Respond with JSON:
762
- {
763
- "confidence": 0-5,
764
- "advice": "short factual statement about the fix and test patterns"
765
- }`
766
- },
767
-
768
- // Final script generation
769
- FINAL_SCRIPT: {
770
- SYSTEM: 'You are an expert at creating drop-in replacement scripts. Generate a complete, properly formatted script that preserves the original structure while incorporating repairs and new advice.',
771
-
772
- USER: (originalScript: string, updatedScript: string, newRepairAdvice: string) => `You are an expert at generating drop-in replacement scripts. Create a final script that can be pasted directly into the original file.
773
-
774
- Original Script (with existing repair advice):
775
- ${originalScript}
776
-
777
- Updated Script (with repairs):
778
- ${updatedScript}
779
-
780
- New Repair Advice:
781
- ${newRepairAdvice}
782
-
783
- Instructions:
784
- 1. Create a drop-in replacement that preserves the original test name and structure
785
- 2. Update the TestChimp comment block at the top to include BOTH existing and new repair advice
786
- 3. If there was existing repair advice, combine it with the new advice to build a running understanding
787
- 4. Use the repaired code from the updated script
788
- 5. Preserve the original test name (don't use 'repairedTest')
789
- 6. Keep the same import statements and overall structure
790
- 7. Ensure the script is properly formatted and ready to use
791
- 8. The repair advice should accumulate knowledge about this test's behavior patterns
792
-
793
- Return JSON object with the final script:
794
- {
795
- "script": "complete final script that can be pasted into the original file"
796
- }`
797
- },
798
-
799
- // Script cleanup (minor adjustments only)
800
- SCRIPT_CLEANUP: {
801
- SYSTEM: 'You are a Playwright test script reviewer. Your job is to do MINOR cleanup only - remove obvious redundancies, but preserve the core structure and logic.',
802
-
803
- USER: (script: string) => `Review this generated Playwright test script and make MINOR adjustments only.
804
-
805
- SCRIPT:
806
- ${script}
807
-
808
- YOUR TASK (MINOR ADJUSTMENTS ONLY):
809
- 1. Remove duplicate/redundant expect() assertions (e.g., same assertion repeated twice)
810
- 2. Fix obvious formatting issues (inconsistent spacing, etc.)
811
- 3. Consolidate multiple identical assertions into one
812
- 4. Remove any obviously redundant waits or checks
813
-
814
- DO NOT:
815
- - Remove the TestChimp header comment (/* This is a TestChimp Smart Test... */) - this must be preserved
816
- - Remove step comments (e.g., "// Step 1: ..." or "// Navigate to...") - these are important for readability
817
- - Change the test logic or flow
818
- - Remove legitimate assertions
819
- - Restructure the code
820
- - Change selectors
821
- - Add new functionality
822
- - Remove important waits
823
-
824
- EXAMPLES:
825
-
826
- ❌ REMOVE redundancy:
827
- await expect(page.getByText('Hello')).toBeVisible();
828
- await expect(page.getByText('Hello')).toBeVisible(); // duplicate
829
-
830
- ✅ KEEP legitimate checks:
831
- await expect(page.getByPlaceholder('Message...')).toBeEmpty();
832
- await page.getByPlaceholder('Message...').fill('Hello');
833
- await expect(page.getByPlaceholder('Message...')).toHaveValue('Hello'); // different checks
834
-
835
- Return JSON:
836
- {
837
- "script": "cleaned script (or original if no changes needed)",
838
- "changes": ["list of minor changes made, or empty array if none"],
839
- "skipped": "reason if you chose not to make changes"
840
- }`
841
- }
842
- };