testchimp-runner-core 0.0.21 → 0.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. package/VISION_DIAGNOSTICS_IMPROVEMENTS.md +336 -0
  2. package/dist/credit-usage-service.d.ts +9 -0
  3. package/dist/credit-usage-service.d.ts.map +1 -1
  4. package/dist/credit-usage-service.js +20 -5
  5. package/dist/credit-usage-service.js.map +1 -1
  6. package/dist/execution-service.d.ts +7 -2
  7. package/dist/execution-service.d.ts.map +1 -1
  8. package/dist/execution-service.js +91 -36
  9. package/dist/execution-service.js.map +1 -1
  10. package/dist/index.d.ts +30 -2
  11. package/dist/index.d.ts.map +1 -1
  12. package/dist/index.js +91 -26
  13. package/dist/index.js.map +1 -1
  14. package/dist/llm-facade.d.ts +64 -8
  15. package/dist/llm-facade.d.ts.map +1 -1
  16. package/dist/llm-facade.js +361 -109
  17. package/dist/llm-facade.js.map +1 -1
  18. package/dist/llm-provider.d.ts +39 -0
  19. package/dist/llm-provider.d.ts.map +1 -0
  20. package/dist/llm-provider.js +7 -0
  21. package/dist/llm-provider.js.map +1 -0
  22. package/dist/model-constants.d.ts +21 -0
  23. package/dist/model-constants.d.ts.map +1 -0
  24. package/dist/model-constants.js +24 -0
  25. package/dist/model-constants.js.map +1 -0
  26. package/dist/orchestrator/index.d.ts +8 -0
  27. package/dist/orchestrator/index.d.ts.map +1 -0
  28. package/dist/orchestrator/index.js +23 -0
  29. package/dist/orchestrator/index.js.map +1 -0
  30. package/dist/orchestrator/orchestrator-agent.d.ts +66 -0
  31. package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -0
  32. package/dist/orchestrator/orchestrator-agent.js +855 -0
  33. package/dist/orchestrator/orchestrator-agent.js.map +1 -0
  34. package/dist/orchestrator/tool-registry.d.ts +74 -0
  35. package/dist/orchestrator/tool-registry.d.ts.map +1 -0
  36. package/dist/orchestrator/tool-registry.js +131 -0
  37. package/dist/orchestrator/tool-registry.js.map +1 -0
  38. package/dist/orchestrator/tools/check-page-ready.d.ts +13 -0
  39. package/dist/orchestrator/tools/check-page-ready.d.ts.map +1 -0
  40. package/dist/orchestrator/tools/check-page-ready.js +72 -0
  41. package/dist/orchestrator/tools/check-page-ready.js.map +1 -0
  42. package/dist/orchestrator/tools/extract-data.d.ts +13 -0
  43. package/dist/orchestrator/tools/extract-data.d.ts.map +1 -0
  44. package/dist/orchestrator/tools/extract-data.js +84 -0
  45. package/dist/orchestrator/tools/extract-data.js.map +1 -0
  46. package/dist/orchestrator/tools/index.d.ts +10 -0
  47. package/dist/orchestrator/tools/index.d.ts.map +1 -0
  48. package/dist/orchestrator/tools/index.js +18 -0
  49. package/dist/orchestrator/tools/index.js.map +1 -0
  50. package/dist/orchestrator/tools/inspect-page.d.ts +13 -0
  51. package/dist/orchestrator/tools/inspect-page.d.ts.map +1 -0
  52. package/dist/orchestrator/tools/inspect-page.js +39 -0
  53. package/dist/orchestrator/tools/inspect-page.js.map +1 -0
  54. package/dist/orchestrator/tools/recall-history.d.ts +13 -0
  55. package/dist/orchestrator/tools/recall-history.d.ts.map +1 -0
  56. package/dist/orchestrator/tools/recall-history.js +64 -0
  57. package/dist/orchestrator/tools/recall-history.js.map +1 -0
  58. package/dist/orchestrator/tools/take-screenshot.d.ts +15 -0
  59. package/dist/orchestrator/tools/take-screenshot.d.ts.map +1 -0
  60. package/dist/orchestrator/tools/take-screenshot.js +112 -0
  61. package/dist/orchestrator/tools/take-screenshot.js.map +1 -0
  62. package/dist/orchestrator/types.d.ts +133 -0
  63. package/dist/orchestrator/types.d.ts.map +1 -0
  64. package/dist/orchestrator/types.js +28 -0
  65. package/dist/orchestrator/types.js.map +1 -0
  66. package/dist/playwright-mcp-service.d.ts +9 -0
  67. package/dist/playwright-mcp-service.d.ts.map +1 -1
  68. package/dist/playwright-mcp-service.js +20 -5
  69. package/dist/playwright-mcp-service.js.map +1 -1
  70. package/dist/progress-reporter.d.ts +97 -0
  71. package/dist/progress-reporter.d.ts.map +1 -0
  72. package/dist/progress-reporter.js +18 -0
  73. package/dist/progress-reporter.js.map +1 -0
  74. package/dist/prompts.d.ts +24 -0
  75. package/dist/prompts.d.ts.map +1 -1
  76. package/dist/prompts.js +593 -68
  77. package/dist/prompts.js.map +1 -1
  78. package/dist/providers/backend-proxy-llm-provider.d.ts +25 -0
  79. package/dist/providers/backend-proxy-llm-provider.d.ts.map +1 -0
  80. package/dist/providers/backend-proxy-llm-provider.js +76 -0
  81. package/dist/providers/backend-proxy-llm-provider.js.map +1 -0
  82. package/dist/providers/local-llm-provider.d.ts +21 -0
  83. package/dist/providers/local-llm-provider.d.ts.map +1 -0
  84. package/dist/providers/local-llm-provider.js +35 -0
  85. package/dist/providers/local-llm-provider.js.map +1 -0
  86. package/dist/scenario-service.d.ts +27 -1
  87. package/dist/scenario-service.d.ts.map +1 -1
  88. package/dist/scenario-service.js +48 -12
  89. package/dist/scenario-service.js.map +1 -1
  90. package/dist/scenario-worker-class.d.ts +39 -2
  91. package/dist/scenario-worker-class.d.ts.map +1 -1
  92. package/dist/scenario-worker-class.js +614 -86
  93. package/dist/scenario-worker-class.js.map +1 -1
  94. package/dist/script-utils.d.ts +2 -0
  95. package/dist/script-utils.d.ts.map +1 -1
  96. package/dist/script-utils.js +44 -4
  97. package/dist/script-utils.js.map +1 -1
  98. package/dist/types.d.ts +11 -0
  99. package/dist/types.d.ts.map +1 -1
  100. package/dist/types.js.map +1 -1
  101. package/dist/utils/browser-utils.d.ts +20 -1
  102. package/dist/utils/browser-utils.d.ts.map +1 -1
  103. package/dist/utils/browser-utils.js +102 -51
  104. package/dist/utils/browser-utils.js.map +1 -1
  105. package/dist/utils/page-info-utils.d.ts +23 -4
  106. package/dist/utils/page-info-utils.d.ts.map +1 -1
  107. package/dist/utils/page-info-utils.js +174 -43
  108. package/dist/utils/page-info-utils.js.map +1 -1
  109. package/package.json +1 -2
  110. package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +642 -0
  111. package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +844 -0
  112. package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +539 -0
  113. package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +241 -0
  114. package/plandocs/PHASE1_FINAL_STATUS.md +210 -0
  115. package/plandocs/PLANNING_SESSION_SUMMARY.md +372 -0
  116. package/plandocs/SCRIPT_CLEANUP_FEATURE.md +201 -0
  117. package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +364 -0
  118. package/plandocs/SELECTOR_IMPROVEMENTS.md +139 -0
  119. package/src/credit-usage-service.ts +23 -5
  120. package/src/execution-service.ts +152 -42
  121. package/src/index.ts +169 -26
  122. package/src/llm-facade.ts +500 -126
  123. package/src/llm-provider.ts +43 -0
  124. package/src/model-constants.ts +23 -0
  125. package/src/orchestrator/index.ts +33 -0
  126. package/src/orchestrator/orchestrator-agent.ts +1037 -0
  127. package/src/orchestrator/tool-registry.ts +182 -0
  128. package/src/orchestrator/tools/check-page-ready.ts +75 -0
  129. package/src/orchestrator/tools/extract-data.ts +92 -0
  130. package/src/orchestrator/tools/index.ts +11 -0
  131. package/src/orchestrator/tools/inspect-page.ts +42 -0
  132. package/src/orchestrator/tools/recall-history.ts +72 -0
  133. package/src/orchestrator/tools/take-screenshot.ts +128 -0
  134. package/src/orchestrator/types.ts +200 -0
  135. package/src/playwright-mcp-service.ts +23 -5
  136. package/src/progress-reporter.ts +109 -0
  137. package/src/prompts.ts +606 -69
  138. package/src/providers/backend-proxy-llm-provider.ts +91 -0
  139. package/src/providers/local-llm-provider.ts +38 -0
  140. package/src/scenario-service.ts +83 -13
  141. package/src/scenario-worker-class.ts +740 -72
  142. package/src/script-utils.ts +50 -5
  143. package/src/types.ts +13 -1
  144. package/src/utils/browser-utils.ts +123 -51
  145. package/src/utils/page-info-utils.ts +210 -53
  146. package/testchimp-runner-core-0.0.22.tgz +0 -0
@@ -0,0 +1,364 @@
1
+ # Script Generation Architecture & Work Plan
2
+
3
+ ## Overview
4
+ AI-powered test script generation from natural language scenarios using LLM-guided Playwright automation with vision-based fallback diagnostics.
5
+
6
+ ## Architecture Flow
7
+
8
+ ```
9
+ User Scenario (text file)
10
+
11
+ 1. Scenario Breakdown (LLM)
12
+
13
+ 2. Step-by-Step Execution
14
+
15
+ 3. Command Generation (LLM + DOM)
16
+
17
+ 4. Playwright Execution
18
+
19
+ 5. Goal Completion Check (LLM)
20
+
21
+ 6. Vision Fallback (if needed)
22
+
23
+ 7. Script Generation
24
+ ```
25
+
26
+ ## Components
27
+
28
+ ### 1. Scenario Breakdown
29
+ **File:** `llm-facade.ts` → `breakdownScenario()`
30
+ **Prompt:** `PROMPTS.SCENARIO_BREAKDOWN`
31
+
32
+ **Input:** Natural language scenario
33
+ ```
34
+ - Go to https://app.com
35
+ - Login with credentials: admin, pass123
36
+ - Click on settings
37
+ ```
38
+
39
+ **Output:** Structured steps
40
+ ```json
41
+ {
42
+ "steps": [
43
+ "Go to https://app.com",
44
+ "Login with credentials: admin, pass123",
45
+ "Click on settings"
46
+ ]
47
+ }
48
+ ```
49
+
50
+ **Key Principles:**
51
+ - ✅ Preserve ALL specific values (credentials, names, amounts, etc.)
52
+ - ✅ Keep steps semantic (no technical selectors)
53
+ - ✅ One clear action per step
54
+ - ❌ Never replace values with variables/placeholders
55
+
56
+ ### 2. Step Execution Loop
57
+ **File:** `scenario-worker-class.ts` → `processScenarioJob()`
58
+
59
+ **For each step:**
60
+ 1. Initialize step tracking
61
+ 2. Execute sub-actions until goal complete
62
+ 3. Track failures and successes
63
+ 4. Generate final script
64
+
65
+ **Counters:**
66
+ - `subActionCount`: Number of different commands tried for this step
67
+ - `totalFailedAttemptsForStep`: Total failures across all sub-actions
68
+ - `attempt`: Retry count within current sub-action (0-3)
69
+
70
+ ### 3. Command Generation
71
+ **File:** `llm-facade.ts` → `generatePlaywrightCommand()`
72
+ **Prompt:** `PROMPTS.PLAYWRIGHT_COMMAND`
73
+
74
+ **Context Provided:**
75
+ - Goal description
76
+ - Current page state (DOM snapshot)
77
+ - Previous commands in this step
78
+ - Previous step history
79
+ - Last error (if retry)
80
+
81
+ **Key Principles:**
82
+ 1. **Extract specific values from goal** - Use exact credentials, names, amounts from goal description
83
+ 2. **Navigation handling** - Use `{ waitUntil: 'domcontentloaded', timeout: 10000 }` for redirects
84
+ 3. **Check current URL** - Don't retry navigation if already navigated (even if redirected)
85
+ 4. **Never hallucinate verification** - Only verify what goal explicitly asks for
86
+ 5. **Semantic action completion** - "Login" means fill + click, not just fill
87
+
88
+ ### 4. Goal Completion Assessment
89
+ **File:** `llm-facade.ts` → `checkGoalCompletion()`
90
+ **Prompt:** `PROMPTS.GOAL_COMPLETION_CHECK`
91
+
92
+ **Decision Matrix:**
93
+
94
+ | Goal Type | Completion Criteria | Example |
95
+ |-----------|-------------------|---------|
96
+ | Simple action | Action succeeded | "Click button" → complete after click |
97
+ | Semantic action | All implicit steps done | "Login" → complete after fill + click |
98
+ | Multi-part action | All parts done | "Fill all fields" → complete after all fields |
99
+ | Verification | Assertion passed | "Verify message" → complete after assertion |
100
+
101
+ **Semantic Action Recognition:**
102
+ - **"Login with credentials"** → Fill username, fill password, click login button
103
+ - **"Send message"** → Type message, click send button
104
+ - **"Submit form"** → Fill fields, click submit button
105
+ - **"Register/Signup"** → Fill registration, click register button
106
+
107
+ Mark INCOMPLETE until the final implicit action completes.
108
+
109
+ ### 5. Vision-Based Fallback Diagnostics
110
+ **File:** `scenario-worker-class.ts` (lines 215-272)
111
+ **Prompts:** `SCREENSHOT_NEED_ASSESSMENT`, `VISION_DIAGNOSTIC_ANALYSIS`
112
+
113
+ **Trigger Condition:**
114
+ ```typescript
115
+ totalFailedAttemptsForStep >= 2 && !usedVisionMode && lastError
116
+ ```
117
+
118
+ **When:** After 2+ total failures across all sub-actions
119
+
120
+ **Two-Step Process:**
121
+
122
+ **Step 1: Assess Screenshot Need** (gpt-4.1-mini)
123
+ - Quick check: Would visual analysis help?
124
+ - Conservative: Only recommend if DOM info insufficient
125
+ - Returns: needsScreenshot (boolean) + reason
126
+
127
+ **Step 2: Vision Diagnostics** (gpt-4o - only if assessment says yes)
128
+ - Supervisor analyzes screenshot
129
+ - Identifies: What's visible vs what was assumed
130
+ - Diagnoses: Why previous attempts failed
131
+ - Recommends: Better approach based on visual reality
132
+
133
+ **Output:**
134
+ - Visual analysis
135
+ - Root cause of failures
136
+ - Specific instructions for next attempt
137
+ - Elements found/not found
138
+
139
+ ### 6. Script Generation
140
+ **File:** `script-utils.ts` → `generateTestScript()`
141
+
142
+ **Output Format:**
143
+ ```javascript
144
+ /*
145
+ This is a TestChimp Smart Test.
146
+ Version: 1.0
147
+
148
+ #login #coreHR #peopleHR
149
+ */
150
+
151
+ import { test, expect } from '@playwright/test';
152
+ test('testName', async ({ page, browser, context }) => {
153
+ // Step 1: Go to URL
154
+ await page.goto('https://...', { waitUntil: 'domcontentloaded', timeout: 10000 });
155
+
156
+ // Step 2: Login with credentials: Willy, Willy@1234
157
+ await page.fill('username', 'Willy');
158
+ await page.fill('password', 'Willy@1234');
159
+ await page.click('button[name="Login"]');
160
+
161
+ // Step 3: Click on All Modules [FAILED]
162
+ // Attempted: await page.getByText('All Modules').click();
163
+ });
164
+ ```
165
+
166
+ ## Configuration & Timeouts
167
+
168
+ **Default Timeout:** 5 seconds (fast feedback on wrong selectors)
169
+
170
+ **Navigation Timeout:** 10 seconds explicit (handles redirects)
171
+ ```typescript
172
+ await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 10000 })
173
+ ```
174
+
175
+ **Why:**
176
+ - 5s for element operations → fast failure on wrong selectors (not 10s wait per wrong selector)
177
+ - 10s explicit for navigation → handles redirects properly
178
+ - Best of both: fast iteration + reliable navigation
179
+
180
+ ## Key Improvements
181
+
182
+ ### 1. Value Preservation Throughout Flow
183
+
184
+ **Problem:** Losing specific values (credentials, amounts, etc.)
185
+
186
+ **Solution:** Preserve at every stage
187
+
188
+ | Stage | Before | After |
189
+ |-------|--------|-------|
190
+ | Breakdown | "Login with user/pass" | "Login with credentials: Willy, Willy@1234" |
191
+ | Goal | "Complete login" | "Login with credentials: Willy, Willy@1234" |
192
+ | Command | `process.env.USERNAME` | `'Willy'` |
193
+
194
+ ### 2. Semantic Action Understanding
195
+
196
+ **Problem:** Marking "Login" complete after just filling fields
197
+
198
+ **Solution:** Recognize implicit final actions
199
+
200
+ - "Login" → fill + **click login button**
201
+ - "Send" → type + **click send button**
202
+ - "Submit" → fill + **click submit button**
203
+
204
+ ### 3. Navigation & Redirect Handling
205
+
206
+ **Problem:** Retrying original URL after successful redirect
207
+
208
+ **Solution:**
209
+ - Check current URL after navigation errors
210
+ - If URL changed from `about:blank` → navigation succeeded
211
+ - Use `domcontentloaded` for redirects (more reliable than `load`)
212
+ - Don't retry if already on a page
213
+
214
+ ### 4. Vision Diagnostics
215
+
216
+ **Problem:** Vision never triggering (was checking per-sub-action attempt)
217
+
218
+ **Solution:** Trigger on total failures across all sub-actions
219
+ - Changed from `attempt === 2` → `totalFailedAttemptsForStep >= 2`
220
+ - Now triggers after 2+ failures regardless of sub-action boundaries
221
+ - Detailed logging shows when/why vision triggers or doesn't
222
+
223
+ ### 5. Enhanced Logging
224
+
225
+ **Visibility:**
226
+ - ✅ Console logs for Debug Console
227
+ - ✅ outputChannel for Output panel
228
+ - ✅ Timestamps on all logs
229
+ - ✅ Version markers
230
+ - ✅ Vision trigger decision logs
231
+
232
+ **Format:**
233
+ ```
234
+ [02:58:15.613] [ScenarioWorker] 🚀 RUNNER-CORE VERSION: v1.5.0-vision-preserve-values
235
+ [02:58:15.614] [ScenarioWorker] Step 1 - Sub-action 1, Attempt 1: Go to URL
236
+ [02:58:15.650] [ScenarioWorker] 🔍 Vision trigger check: subAction=1, attempt=0, totalFailed=0, usedVision=false
237
+ [02:58:15.651] [ScenarioWorker] 📝 Using DOM-based approach (0 failures so far, need 2+)
238
+ ```
239
+
240
+ ## Retry & Failure Budget
241
+
242
+ **Per Step Limits:**
243
+ - `MAX_RETRIES_PER_STEP = 3` → 4 attempts per sub-action (0, 1, 2, 3)
244
+ - `MAX_SUBACTIONS_PER_STEP = 5` → Max 5 different commands for one step
245
+ - `MAX_FAILED_ATTEMPTS_PER_STEP = 12` → Hard limit on total failures
246
+
247
+ **Early Termination:**
248
+ - After 2 consecutive step failures → stop execution
249
+ - Saves resources, prevents runaway costs
250
+
251
+ ## Error Context Enhancement
252
+
253
+ **Navigation errors now include current URL:**
254
+ ```
255
+ Error: Timeout 10000ms exceeded | Current URL: https://redirected-url.com
256
+ ```
257
+
258
+ This helps LLM understand:
259
+ - Navigation succeeded but redirected
260
+ - Don't retry original URL
261
+ - Proceed with current page
262
+
263
+ ## Workflow Example
264
+
265
+ **Scenario:**
266
+ ```
267
+ - Go to https://app.com/login
268
+ - Login with credentials: admin, pass123
269
+ - Click dashboard
270
+ ```
271
+
272
+ **Execution:**
273
+
274
+ **Step 1: Navigate**
275
+ ```
276
+ Attempt 1: goto(url, {domcontentloaded, timeout: 10000}) → ✅ Success
277
+ Goal check: COMPLETE (navigation is single-step action)
278
+ ```
279
+
280
+ **Step 2: Login**
281
+ ```
282
+ Sub-action 1, Attempt 1: fill(username, 'admin') → ✅ Success
283
+ Goal check: INCOMPLETE (login needs username + password + click)
284
+ nextSubGoal: "Enter password and click login"
285
+
286
+ Sub-action 2, Attempt 1: fill(password, 'pass123') → ✅ Success
287
+ Goal check: INCOMPLETE (still need to click login button)
288
+ nextSubGoal: "Click login button to submit credentials"
289
+
290
+ Sub-action 3, Attempt 1: click(login button) → ✅ Success
291
+ Goal check: COMPLETE (all parts of login done)
292
+ ```
293
+
294
+ **Step 3: Click dashboard**
295
+ ```
296
+ Sub-action 1, Attempt 1: click(dashboard) → ❌ Fail (not visible)
297
+ 🔍 Vision check: totalFailed=1, need 2+
298
+ 📝 Using DOM (1 failure, need 2+)
299
+
300
+ Sub-action 1, Attempt 2: waitFor + click → ❌ Fail (still not visible)
301
+ 🔍 Vision check: totalFailed=2, usedVision=false
302
+ 🎯 VISION TRIGGER: 2 total failures - assessing...
303
+ 💭 LLM: SCREENSHOT NEEDED ✅
304
+ 📸 Taking screenshot...
305
+ 👔 Supervisor analyzing...
306
+ 🔨 Generating vision-aided command...
307
+
308
+ Sub-action 1, Attempt 3: [vision-aided command] → ✅ Success
309
+ Goal check: COMPLETE
310
+ ```
311
+
312
+ ## Testing Checklist
313
+
314
+ - [ ] Specific values preserved (credentials, names, amounts)
315
+ - [ ] Semantic actions complete fully (login includes button click)
316
+ - [ ] Navigation redirects handled (no URL retry loops)
317
+ - [ ] Vision triggers after 2+ failures
318
+ - [ ] Vision logs show decision reasoning
319
+ - [ ] Timeouts appropriate (5s default, 10s navigation)
320
+ - [ ] Error context includes current URL
321
+ - [ ] Failed steps don't show previous step commands
322
+ - [ ] Version marker visible in logs
323
+
324
+ ## Version Tracking
325
+
326
+ **Current Version:** `v1.5.0-vision-preserve-values`
327
+
328
+ **Version log location:**
329
+ - During initialization: `[ScenarioWorker] 🚀 RUNNER-CORE VERSION: v1.5.0-vision-preserve-values`
330
+ - Increment for each significant change
331
+
332
+ ## Build & Deploy
333
+
334
+ **Local Development:**
335
+ ```bash
336
+ cd /Users/nuwansam/IdeaProjects/AwareRepo/local/vs-ext
337
+ ./build_local.sh
338
+ ```
339
+
340
+ **What it does:**
341
+ 1. Builds runner-core
342
+ 2. Packs runner-core (0.0.22)
343
+ 3. Installs in vs-ext
344
+ 4. Builds vs-ext for staging
345
+
346
+ **Verification:**
347
+ ```bash
348
+ grep "v1.5.0-vision-preserve-values" node_modules/testchimp-runner-core/dist/scenario-worker-class.js
349
+ ```
350
+
351
+ ## Related Documentation
352
+
353
+ - `VISION_DIAGNOSTICS_IMPROVEMENTS.md` - Vision system details
354
+ - `prompts.ts` - All LLM prompts and guidance
355
+ - `types.ts` - Type definitions
356
+
357
+ ## Future Enhancements
358
+
359
+ 1. **Learn from vision insights** - Build library of common patterns
360
+ 2. **Optimize vision timing** - Better cost/benefit analysis
361
+ 3. **Cross-flow learning** - Share insights between generation and repair
362
+ 4. **Smarter goal parsing** - Better semantic action recognition
363
+ 5. **Dynamic timeout adjustment** - Based on operation type
364
+
@@ -0,0 +1,139 @@
1
+ # Selector Preference Improvements
2
+
3
+ ## Summary
4
+ Updated the orchestrator agent to prefer user-friendly, semantic Playwright selectors over auto-generated IDs, following Playwright's official best practices.
5
+
6
+ ## Problem
7
+ The agent was generating commands like:
8
+ ```typescript
9
+ await page.fill('#«r3»-form-item', 'alice@example.com')
10
+ await page.fill('#«r4»-form-item', 'TestPass123')
11
+ ```
12
+
13
+ These auto-generated IDs (especially with unicode characters like `«r3»`) are:
14
+ - Not user-friendly or readable
15
+ - Break when component instances change
16
+ - Not maintainable
17
+ - Not following Playwright best practices
18
+
19
+ ## Solution
20
+ Implemented a comprehensive selector preference strategy across three key files:
21
+
22
+ ### 1. Orchestrator Agent Prompt (`orchestrator-agent.ts`)
23
+
24
+ Added new section **"5b. SELECTOR PREFERENCE"** with explicit guidance:
25
+
26
+ **Preferred selectors (in order):**
27
+ 1. `page.getByRole('role', {name: 'text'})` - Accessible, semantic, resilient
28
+ 2. `page.getByLabel('label text')` - Great for form inputs
29
+ 3. `page.getByPlaceholder('placeholder')` - Good for inputs without labels
30
+ 4. `page.getByText('visible text')` - Clear and readable
31
+ 5. `page.getByTestId('test-id')` - Stable if available
32
+
33
+ **Avoid (last resort only):**
34
+ - CSS selectors with auto-generated IDs: `#r3-form-item`, `#«r3»-form-item`
35
+ - CSS selectors with unicode characters
36
+ - Complex CSS paths
37
+
38
+ **Examples provided:**
39
+ ```typescript
40
+ // ❌ BAD
41
+ await page.fill('#«r3»-form-item', 'alice@example.com')
42
+
43
+ // ✅ GOOD
44
+ await page.getByLabel('Email').fill('alice@example.com')
45
+ await page.getByRole('textbox', {name: 'Email'}).fill('alice@example.com')
46
+ await page.getByPlaceholder('Enter your email').fill('alice@example.com')
47
+ ```
48
+
49
+ ### 2. Page Info Utils (`page-info-utils.ts`)
50
+
51
+ **Reordered selector generation priority:**
52
+
53
+ Before:
54
+ 1. ID selector (e.g., `#«r3»-form-item`)
55
+ 2. data-testid
56
+ 3. getByRole
57
+
58
+ After:
59
+ 1. getByLabel (for inputs with associated labels)
60
+ 2. getByRole with name (semantic and accessible)
61
+ 3. getByPlaceholder (for inputs with placeholders)
62
+ 4. getByText (visible text fallback)
63
+ 5. getByTestId (stable, explicit)
64
+ 6. ID selector - ONLY if stable (filters out auto-generated IDs with unicode or patterns like `rc_`, `:r[0-9]+:`, `__`)
65
+
66
+ **Enhanced display:**
67
+ - Shows best selector first, with up to 2 alternatives
68
+ - Example: `getByLabel('Email') (or: getByRole('textbox', {name: 'Email'}), getByPlaceholder('Enter your email'))`
69
+
70
+ ### 3. Screenshot Tool (`take-screenshot.ts`)
71
+
72
+ **Updated vision analysis prompts:**
73
+ - System prompt now emphasizes: "ALWAYS prioritize semantic selectors (getByRole, getByLabel, getByText) over CSS selectors with auto-generated IDs"
74
+ - Analysis task explicitly instructs: "Recommend SEMANTIC SELECTORS FIRST" and "AVOID auto-generated IDs with unicode"
75
+
76
+ ### 4. Updated EXPERIENCES Section
77
+
78
+ Enhanced examples to capture semantic selector patterns:
79
+ ```
80
+ ✅ GOOD - App-specific patterns:
81
+ - "Login form fields accessible via getByLabel: 'Email' and 'Password'"
82
+ - "Submit buttons consistently use role=button with text matching action"
83
+ - "Input fields have clear placeholders - prefer getByPlaceholder over IDs"
84
+
85
+ ❌ BAD:
86
+ - Noting auto-generated IDs like #«r3»-form-item (these are unreliable)
87
+ ```
88
+
89
+ ## Benefits
90
+
91
+ 1. **More Maintainable**: Semantic selectors are resilient to UI changes
92
+ 2. **Self-Documenting**: Code reads like natural language
93
+ 3. **Accessibility**: Ensures UI elements are properly accessible
94
+ 4. **Best Practices**: Follows Playwright's official recommendations
95
+ 5. **User-Friendly**: Generated tests are more readable and easier to understand
96
+
97
+ ## Expected Output
98
+
99
+ After these changes, the agent will generate:
100
+
101
+ ```typescript
102
+ // Login form example
103
+ await page.getByLabel('Email').fill('alice@example.com')
104
+ await page.getByLabel('Password').fill('TestPass123')
105
+ await page.getByRole('button', {name: 'Sign In'}).click()
106
+
107
+ // Navigation example
108
+ await page.getByRole('link', {name: 'Dashboard'}).click()
109
+
110
+ // Form with placeholders
111
+ await page.getByPlaceholder('Search...').fill('test query')
112
+ ```
113
+
114
+ Instead of:
115
+
116
+ ```typescript
117
+ // Old style with auto-generated IDs
118
+ await page.fill('#«r3»-form-item', 'alice@example.com')
119
+ await page.fill('#«r4»-form-item', 'TestPass123')
120
+ await page.click('#«r5»-button')
121
+ ```
122
+
123
+ ## Testing
124
+
125
+ The changes are backward compatible - the agent will still use ID selectors as a last resort when semantic selectors are not available. The build completed successfully with no linter errors.
126
+
127
+ ## Files Modified
128
+
129
+ 1. `/src/orchestrator/orchestrator-agent.ts` - System prompt enhancements
130
+ 2. `/src/utils/page-info-utils.ts` - Selector priority reordering
131
+ 3. `/src/orchestrator/tools/take-screenshot.ts` - Vision analysis updates
132
+
133
+ ## Next Steps
134
+
135
+ Test the changes with real scenarios to verify that:
136
+ 1. Generated commands use semantic selectors when available
137
+ 2. Tests remain stable and maintainable
138
+ 3. Fallback to ID selectors works when semantic options aren't available
139
+
@@ -22,6 +22,7 @@ export interface InsertCreditUsageResponse {
22
22
  export class CreditUsageService {
23
23
  private backendUrl: string;
24
24
  private authConfig: AuthConfig | null;
25
+ private logger?: (message: string, level?: 'log' | 'error' | 'warn') => void;
25
26
 
26
27
  constructor(authConfig?: AuthConfig, backendUrl?: string) {
27
28
  // Use provided backend URL or fall back to environment configuration
@@ -36,6 +37,23 @@ export class CreditUsageService {
36
37
  this.authConfig = authConfig || null;
37
38
  }
38
39
 
40
+ /**
41
+ * Set a logger callback for capturing execution logs
42
+ */
43
+ setLogger(logger: (message: string, level?: 'log' | 'error' | 'warn') => void): void {
44
+ this.logger = logger;
45
+ }
46
+
47
+ /**
48
+ * Log a message using the configured logger
49
+ */
50
+ private log(message: string, level: 'log' | 'error' | 'warn' = 'log'): void {
51
+ if (this.logger) {
52
+ this.logger(message, level);
53
+ }
54
+ // No console fallback - logs are routed to consumer
55
+ }
56
+
39
57
  /**
40
58
  * Update authentication configuration
41
59
  */
@@ -82,7 +100,7 @@ export class CreditUsageService {
82
100
 
83
101
  return response.data;
84
102
  } catch (error: any) {
85
- console.error('Credit usage report failed:', error);
103
+ this.log(`Credit usage report failed: ${error}`, 'error');
86
104
  throw new Error(`Credit usage report failed: ${error.message}`);
87
105
  }
88
106
  }
@@ -93,9 +111,9 @@ export class CreditUsageService {
93
111
  async reportScriptGenerationCredit(jobId?: string): Promise<void> {
94
112
  try {
95
113
  await this.reportCreditUsage(1, CreditUsageReason.SCRIPT_GENERATE, jobId);
96
- console.log(`Credit usage reported for script generation${jobId ? ` (job: ${jobId})` : ''}`);
114
+ this.log(`Credit usage reported for script generation${jobId ? ` (job: ${jobId})` : ''}`);
97
115
  } catch (error) {
98
- console.error('Failed to report script generation credit usage:', error);
116
+ this.log(`Failed to report script generation credit usage: ${error}`, 'error');
99
117
  // Don't throw - credit reporting should not break the main flow
100
118
  }
101
119
  }
@@ -106,9 +124,9 @@ export class CreditUsageService {
106
124
  async reportAIRepairCredit(jobId?: string): Promise<void> {
107
125
  try {
108
126
  await this.reportCreditUsage(1, CreditUsageReason.TEST_REPAIR, jobId);
109
- console.log(`Credit usage reported for AI repair${jobId ? ` (job: ${jobId})` : ''}`);
127
+ this.log(`Credit usage reported for AI repair${jobId ? ` (job: ${jobId})` : ''}`);
110
128
  } catch (error) {
111
- console.error('Failed to report AI repair credit usage:', error);
129
+ this.log(`Failed to report AI repair credit usage: ${error}`, 'error');
112
130
  // Don't throw - credit reporting should not break the main flow
113
131
  }
114
132
  }