testchimp-runner-core 0.0.21 → 0.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/VISION_DIAGNOSTICS_IMPROVEMENTS.md +336 -0
- package/dist/credit-usage-service.d.ts +9 -0
- package/dist/credit-usage-service.d.ts.map +1 -1
- package/dist/credit-usage-service.js +20 -5
- package/dist/credit-usage-service.js.map +1 -1
- package/dist/execution-service.d.ts +7 -2
- package/dist/execution-service.d.ts.map +1 -1
- package/dist/execution-service.js +91 -36
- package/dist/execution-service.js.map +1 -1
- package/dist/index.d.ts +30 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +91 -26
- package/dist/index.js.map +1 -1
- package/dist/llm-facade.d.ts +64 -8
- package/dist/llm-facade.d.ts.map +1 -1
- package/dist/llm-facade.js +361 -109
- package/dist/llm-facade.js.map +1 -1
- package/dist/llm-provider.d.ts +39 -0
- package/dist/llm-provider.d.ts.map +1 -0
- package/dist/llm-provider.js +7 -0
- package/dist/llm-provider.js.map +1 -0
- package/dist/model-constants.d.ts +21 -0
- package/dist/model-constants.d.ts.map +1 -0
- package/dist/model-constants.js +24 -0
- package/dist/model-constants.js.map +1 -0
- package/dist/orchestrator/index.d.ts +8 -0
- package/dist/orchestrator/index.d.ts.map +1 -0
- package/dist/orchestrator/index.js +23 -0
- package/dist/orchestrator/index.js.map +1 -0
- package/dist/orchestrator/orchestrator-agent.d.ts +66 -0
- package/dist/orchestrator/orchestrator-agent.d.ts.map +1 -0
- package/dist/orchestrator/orchestrator-agent.js +855 -0
- package/dist/orchestrator/orchestrator-agent.js.map +1 -0
- package/dist/orchestrator/tool-registry.d.ts +74 -0
- package/dist/orchestrator/tool-registry.d.ts.map +1 -0
- package/dist/orchestrator/tool-registry.js +131 -0
- package/dist/orchestrator/tool-registry.js.map +1 -0
- package/dist/orchestrator/tools/check-page-ready.d.ts +13 -0
- package/dist/orchestrator/tools/check-page-ready.d.ts.map +1 -0
- package/dist/orchestrator/tools/check-page-ready.js +72 -0
- package/dist/orchestrator/tools/check-page-ready.js.map +1 -0
- package/dist/orchestrator/tools/extract-data.d.ts +13 -0
- package/dist/orchestrator/tools/extract-data.d.ts.map +1 -0
- package/dist/orchestrator/tools/extract-data.js +84 -0
- package/dist/orchestrator/tools/extract-data.js.map +1 -0
- package/dist/orchestrator/tools/index.d.ts +10 -0
- package/dist/orchestrator/tools/index.d.ts.map +1 -0
- package/dist/orchestrator/tools/index.js +18 -0
- package/dist/orchestrator/tools/index.js.map +1 -0
- package/dist/orchestrator/tools/inspect-page.d.ts +13 -0
- package/dist/orchestrator/tools/inspect-page.d.ts.map +1 -0
- package/dist/orchestrator/tools/inspect-page.js +39 -0
- package/dist/orchestrator/tools/inspect-page.js.map +1 -0
- package/dist/orchestrator/tools/recall-history.d.ts +13 -0
- package/dist/orchestrator/tools/recall-history.d.ts.map +1 -0
- package/dist/orchestrator/tools/recall-history.js +64 -0
- package/dist/orchestrator/tools/recall-history.js.map +1 -0
- package/dist/orchestrator/tools/take-screenshot.d.ts +15 -0
- package/dist/orchestrator/tools/take-screenshot.d.ts.map +1 -0
- package/dist/orchestrator/tools/take-screenshot.js +112 -0
- package/dist/orchestrator/tools/take-screenshot.js.map +1 -0
- package/dist/orchestrator/types.d.ts +133 -0
- package/dist/orchestrator/types.d.ts.map +1 -0
- package/dist/orchestrator/types.js +28 -0
- package/dist/orchestrator/types.js.map +1 -0
- package/dist/playwright-mcp-service.d.ts +9 -0
- package/dist/playwright-mcp-service.d.ts.map +1 -1
- package/dist/playwright-mcp-service.js +20 -5
- package/dist/playwright-mcp-service.js.map +1 -1
- package/dist/progress-reporter.d.ts +97 -0
- package/dist/progress-reporter.d.ts.map +1 -0
- package/dist/progress-reporter.js +18 -0
- package/dist/progress-reporter.js.map +1 -0
- package/dist/prompts.d.ts +24 -0
- package/dist/prompts.d.ts.map +1 -1
- package/dist/prompts.js +593 -68
- package/dist/prompts.js.map +1 -1
- package/dist/providers/backend-proxy-llm-provider.d.ts +25 -0
- package/dist/providers/backend-proxy-llm-provider.d.ts.map +1 -0
- package/dist/providers/backend-proxy-llm-provider.js +76 -0
- package/dist/providers/backend-proxy-llm-provider.js.map +1 -0
- package/dist/providers/local-llm-provider.d.ts +21 -0
- package/dist/providers/local-llm-provider.d.ts.map +1 -0
- package/dist/providers/local-llm-provider.js +35 -0
- package/dist/providers/local-llm-provider.js.map +1 -0
- package/dist/scenario-service.d.ts +27 -1
- package/dist/scenario-service.d.ts.map +1 -1
- package/dist/scenario-service.js +48 -12
- package/dist/scenario-service.js.map +1 -1
- package/dist/scenario-worker-class.d.ts +39 -2
- package/dist/scenario-worker-class.d.ts.map +1 -1
- package/dist/scenario-worker-class.js +614 -86
- package/dist/scenario-worker-class.js.map +1 -1
- package/dist/script-utils.d.ts +2 -0
- package/dist/script-utils.d.ts.map +1 -1
- package/dist/script-utils.js +44 -4
- package/dist/script-utils.js.map +1 -1
- package/dist/types.d.ts +11 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/dist/utils/browser-utils.d.ts +20 -1
- package/dist/utils/browser-utils.d.ts.map +1 -1
- package/dist/utils/browser-utils.js +102 -51
- package/dist/utils/browser-utils.js.map +1 -1
- package/dist/utils/page-info-utils.d.ts +23 -4
- package/dist/utils/page-info-utils.d.ts.map +1 -1
- package/dist/utils/page-info-utils.js +174 -43
- package/dist/utils/page-info-utils.js.map +1 -1
- package/package.json +1 -2
- package/plandocs/HUMAN_LIKE_IMPROVEMENTS.md +642 -0
- package/plandocs/MULTI_AGENT_ARCHITECTURE_REVIEW.md +844 -0
- package/plandocs/ORCHESTRATOR_MVP_SUMMARY.md +539 -0
- package/plandocs/PHASE1_ABSTRACTION_COMPLETE.md +241 -0
- package/plandocs/PHASE1_FINAL_STATUS.md +210 -0
- package/plandocs/PLANNING_SESSION_SUMMARY.md +372 -0
- package/plandocs/SCRIPT_CLEANUP_FEATURE.md +201 -0
- package/plandocs/SCRIPT_GENERATION_ARCHITECTURE.md +364 -0
- package/plandocs/SELECTOR_IMPROVEMENTS.md +139 -0
- package/src/credit-usage-service.ts +23 -5
- package/src/execution-service.ts +152 -42
- package/src/index.ts +169 -26
- package/src/llm-facade.ts +500 -126
- package/src/llm-provider.ts +43 -0
- package/src/model-constants.ts +23 -0
- package/src/orchestrator/index.ts +33 -0
- package/src/orchestrator/orchestrator-agent.ts +1037 -0
- package/src/orchestrator/tool-registry.ts +182 -0
- package/src/orchestrator/tools/check-page-ready.ts +75 -0
- package/src/orchestrator/tools/extract-data.ts +92 -0
- package/src/orchestrator/tools/index.ts +11 -0
- package/src/orchestrator/tools/inspect-page.ts +42 -0
- package/src/orchestrator/tools/recall-history.ts +72 -0
- package/src/orchestrator/tools/take-screenshot.ts +128 -0
- package/src/orchestrator/types.ts +200 -0
- package/src/playwright-mcp-service.ts +23 -5
- package/src/progress-reporter.ts +109 -0
- package/src/prompts.ts +606 -69
- package/src/providers/backend-proxy-llm-provider.ts +91 -0
- package/src/providers/local-llm-provider.ts +38 -0
- package/src/scenario-service.ts +83 -13
- package/src/scenario-worker-class.ts +740 -72
- package/src/script-utils.ts +50 -5
- package/src/types.ts +13 -1
- package/src/utils/browser-utils.ts +123 -51
- package/src/utils/page-info-utils.ts +210 -53
- package/testchimp-runner-core-0.0.22.tgz +0 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
# Script Generation Architecture & Work Plan
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
AI-powered test script generation from natural language scenarios using LLM-guided Playwright automation with vision-based fallback diagnostics.
|
|
5
|
+
|
|
6
|
+
## Architecture Flow
|
|
7
|
+
|
|
8
|
+
```
|
|
9
|
+
User Scenario (text file)
|
|
10
|
+
↓
|
|
11
|
+
1. Scenario Breakdown (LLM)
|
|
12
|
+
↓
|
|
13
|
+
2. Step-by-Step Execution
|
|
14
|
+
↓
|
|
15
|
+
3. Command Generation (LLM + DOM)
|
|
16
|
+
↓
|
|
17
|
+
4. Playwright Execution
|
|
18
|
+
↓
|
|
19
|
+
5. Goal Completion Check (LLM)
|
|
20
|
+
↓
|
|
21
|
+
6. Vision Fallback (if needed)
|
|
22
|
+
↓
|
|
23
|
+
7. Script Generation
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Components
|
|
27
|
+
|
|
28
|
+
### 1. Scenario Breakdown
|
|
29
|
+
**File:** `llm-facade.ts` → `breakdownScenario()`
|
|
30
|
+
**Prompt:** `PROMPTS.SCENARIO_BREAKDOWN`
|
|
31
|
+
|
|
32
|
+
**Input:** Natural language scenario
|
|
33
|
+
```
|
|
34
|
+
- Go to https://app.com
|
|
35
|
+
- Login with credentials: admin, pass123
|
|
36
|
+
- Click on settings
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
**Output:** Structured steps
|
|
40
|
+
```json
|
|
41
|
+
{
|
|
42
|
+
"steps": [
|
|
43
|
+
"Go to https://app.com",
|
|
44
|
+
"Login with credentials: admin, pass123",
|
|
45
|
+
"Click on settings"
|
|
46
|
+
]
|
|
47
|
+
}
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
**Key Principles:**
|
|
51
|
+
- ✅ Preserve ALL specific values (credentials, names, amounts, etc.)
|
|
52
|
+
- ✅ Keep steps semantic (no technical selectors)
|
|
53
|
+
- ✅ One clear action per step
|
|
54
|
+
- ❌ Never replace values with variables/placeholders
|
|
55
|
+
|
|
56
|
+
### 2. Step Execution Loop
|
|
57
|
+
**File:** `scenario-worker-class.ts` → `processScenarioJob()`
|
|
58
|
+
|
|
59
|
+
**For each step:**
|
|
60
|
+
1. Initialize step tracking
|
|
61
|
+
2. Execute sub-actions until goal complete
|
|
62
|
+
3. Track failures and successes
|
|
63
|
+
4. Generate final script
|
|
64
|
+
|
|
65
|
+
**Counters:**
|
|
66
|
+
- `subActionCount`: Number of different commands tried for this step
|
|
67
|
+
- `totalFailedAttemptsForStep`: Total failures across all sub-actions
|
|
68
|
+
- `attempt`: Retry count within current sub-action (0-3)
|
|
69
|
+
|
|
70
|
+
### 3. Command Generation
|
|
71
|
+
**File:** `llm-facade.ts` → `generatePlaywrightCommand()`
|
|
72
|
+
**Prompt:** `PROMPTS.PLAYWRIGHT_COMMAND`
|
|
73
|
+
|
|
74
|
+
**Context Provided:**
|
|
75
|
+
- Goal description
|
|
76
|
+
- Current page state (DOM snapshot)
|
|
77
|
+
- Previous commands in this step
|
|
78
|
+
- Previous step history
|
|
79
|
+
- Last error (if retry)
|
|
80
|
+
|
|
81
|
+
**Key Principles:**
|
|
82
|
+
1. **Extract specific values from goal** - Use exact credentials, names, amounts from goal description
|
|
83
|
+
2. **Navigation handling** - Use `{ waitUntil: 'domcontentloaded', timeout: 10000 }` for redirects
|
|
84
|
+
3. **Check current URL** - Don't retry navigation if already navigated (even if redirected)
|
|
85
|
+
4. **Never hallucinate verification** - Only verify what goal explicitly asks for
|
|
86
|
+
5. **Semantic action completion** - "Login" means fill + click, not just fill
|
|
87
|
+
|
|
88
|
+
### 4. Goal Completion Assessment
|
|
89
|
+
**File:** `llm-facade.ts` → `checkGoalCompletion()`
|
|
90
|
+
**Prompt:** `PROMPTS.GOAL_COMPLETION_CHECK`
|
|
91
|
+
|
|
92
|
+
**Decision Matrix:**
|
|
93
|
+
|
|
94
|
+
| Goal Type | Completion Criteria | Example |
|
|
95
|
+
|-----------|-------------------|---------|
|
|
96
|
+
| Simple action | Action succeeded | "Click button" → complete after click |
|
|
97
|
+
| Semantic action | All implicit steps done | "Login" → complete after fill + click |
|
|
98
|
+
| Multi-part action | All parts done | "Fill all fields" → complete after all fields |
|
|
99
|
+
| Verification | Assertion passed | "Verify message" → complete after assertion |
|
|
100
|
+
|
|
101
|
+
**Semantic Action Recognition:**
|
|
102
|
+
- **"Login with credentials"** → Fill username, fill password, click login button
|
|
103
|
+
- **"Send message"** → Type message, click send button
|
|
104
|
+
- **"Submit form"** → Fill fields, click submit button
|
|
105
|
+
- **"Register/Signup"** → Fill registration, click register button
|
|
106
|
+
|
|
107
|
+
Mark INCOMPLETE until the final implicit action completes.
|
|
108
|
+
|
|
109
|
+
### 5. Vision-Based Fallback Diagnostics
|
|
110
|
+
**File:** `scenario-worker-class.ts` (lines 215-272)
|
|
111
|
+
**Prompts:** `SCREENSHOT_NEED_ASSESSMENT`, `VISION_DIAGNOSTIC_ANALYSIS`
|
|
112
|
+
|
|
113
|
+
**Trigger Condition:**
|
|
114
|
+
```typescript
|
|
115
|
+
totalFailedAttemptsForStep >= 2 && !usedVisionMode && lastError
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
**When:** After 2+ total failures across all sub-actions
|
|
119
|
+
|
|
120
|
+
**Two-Step Process:**
|
|
121
|
+
|
|
122
|
+
**Step 1: Assess Screenshot Need** (gpt-4.1-mini)
|
|
123
|
+
- Quick check: Would visual analysis help?
|
|
124
|
+
- Conservative: Only recommend if DOM info insufficient
|
|
125
|
+
- Returns: needsScreenshot (boolean) + reason
|
|
126
|
+
|
|
127
|
+
**Step 2: Vision Diagnostics** (gpt-4o - only if assessment says yes)
|
|
128
|
+
- Supervisor analyzes screenshot
|
|
129
|
+
- Identifies: What's visible vs what was assumed
|
|
130
|
+
- Diagnoses: Why previous attempts failed
|
|
131
|
+
- Recommends: Better approach based on visual reality
|
|
132
|
+
|
|
133
|
+
**Output:**
|
|
134
|
+
- Visual analysis
|
|
135
|
+
- Root cause of failures
|
|
136
|
+
- Specific instructions for next attempt
|
|
137
|
+
- Elements found/not found
|
|
138
|
+
|
|
139
|
+
### 6. Script Generation
|
|
140
|
+
**File:** `script-utils.ts` → `generateTestScript()`
|
|
141
|
+
|
|
142
|
+
**Output Format:**
|
|
143
|
+
```javascript
|
|
144
|
+
/*
|
|
145
|
+
This is a TestChimp Smart Test.
|
|
146
|
+
Version: 1.0
|
|
147
|
+
|
|
148
|
+
#login #coreHR #peopleHR
|
|
149
|
+
*/
|
|
150
|
+
|
|
151
|
+
import { test, expect } from '@playwright/test';
|
|
152
|
+
test('testName', async ({ page, browser, context }) => {
|
|
153
|
+
// Step 1: Go to URL
|
|
154
|
+
await page.goto('https://...', { waitUntil: 'domcontentloaded', timeout: 10000 });
|
|
155
|
+
|
|
156
|
+
// Step 2: Login with credentials: Willy, Willy@1234
|
|
157
|
+
await page.fill('username', 'Willy');
|
|
158
|
+
await page.fill('password', 'Willy@1234');
|
|
159
|
+
await page.click('button[name="Login"]');
|
|
160
|
+
|
|
161
|
+
// Step 3: Click on All Modules [FAILED]
|
|
162
|
+
// Attempted: await page.getByText('All Modules').click();
|
|
163
|
+
});
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## Configuration & Timeouts
|
|
167
|
+
|
|
168
|
+
**Default Timeout:** 5 seconds (fast feedback on wrong selectors)
|
|
169
|
+
|
|
170
|
+
**Navigation Timeout:** 10 seconds explicit (handles redirects)
|
|
171
|
+
```typescript
|
|
172
|
+
await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 10000 })
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
**Why:**
|
|
176
|
+
- 5s for element operations → fast failure on wrong selectors (not 10s wait per wrong selector)
|
|
177
|
+
- 10s explicit for navigation → handles redirects properly
|
|
178
|
+
- Best of both: fast iteration + reliable navigation
|
|
179
|
+
|
|
180
|
+
## Key Improvements
|
|
181
|
+
|
|
182
|
+
### 1. Value Preservation Throughout Flow
|
|
183
|
+
|
|
184
|
+
**Problem:** Losing specific values (credentials, amounts, etc.)
|
|
185
|
+
|
|
186
|
+
**Solution:** Preserve at every stage
|
|
187
|
+
|
|
188
|
+
| Stage | Before | After |
|
|
189
|
+
|-------|--------|-------|
|
|
190
|
+
| Breakdown | "Login with user/pass" | "Login with credentials: Willy, Willy@1234" |
|
|
191
|
+
| Goal | "Complete login" | "Login with credentials: Willy, Willy@1234" |
|
|
192
|
+
| Command | `process.env.USERNAME` | `'Willy'` |
|
|
193
|
+
|
|
194
|
+
### 2. Semantic Action Understanding
|
|
195
|
+
|
|
196
|
+
**Problem:** Marking "Login" complete after just filling fields
|
|
197
|
+
|
|
198
|
+
**Solution:** Recognize implicit final actions
|
|
199
|
+
|
|
200
|
+
- "Login" → fill + **click login button**
|
|
201
|
+
- "Send" → type + **click send button**
|
|
202
|
+
- "Submit" → fill + **click submit button**
|
|
203
|
+
|
|
204
|
+
### 3. Navigation & Redirect Handling
|
|
205
|
+
|
|
206
|
+
**Problem:** Retrying original URL after successful redirect
|
|
207
|
+
|
|
208
|
+
**Solution:**
|
|
209
|
+
- Check current URL after navigation errors
|
|
210
|
+
- If URL changed from `about:blank` → navigation succeeded
|
|
211
|
+
- Use `domcontentloaded` for redirects (more reliable than `load`)
|
|
212
|
+
- Don't retry if already on a page
|
|
213
|
+
|
|
214
|
+
### 4. Vision Diagnostics
|
|
215
|
+
|
|
216
|
+
**Problem:** Vision never triggering (was checking per-sub-action attempt)
|
|
217
|
+
|
|
218
|
+
**Solution:** Trigger on total failures across all sub-actions
|
|
219
|
+
- Changed from `attempt === 2` → `totalFailedAttemptsForStep >= 2`
|
|
220
|
+
- Now triggers after 2+ failures regardless of sub-action boundaries
|
|
221
|
+
- Detailed logging shows when/why vision triggers or doesn't
|
|
222
|
+
|
|
223
|
+
### 5. Enhanced Logging
|
|
224
|
+
|
|
225
|
+
**Visibility:**
|
|
226
|
+
- ✅ Console logs for Debug Console
|
|
227
|
+
- ✅ outputChannel for Output panel
|
|
228
|
+
- ✅ Timestamps on all logs
|
|
229
|
+
- ✅ Version markers
|
|
230
|
+
- ✅ Vision trigger decision logs
|
|
231
|
+
|
|
232
|
+
**Format:**
|
|
233
|
+
```
|
|
234
|
+
[02:58:15.613] [ScenarioWorker] 🚀 RUNNER-CORE VERSION: v1.5.0-vision-preserve-values
|
|
235
|
+
[02:58:15.614] [ScenarioWorker] Step 1 - Sub-action 1, Attempt 1: Go to URL
|
|
236
|
+
[02:58:15.650] [ScenarioWorker] 🔍 Vision trigger check: subAction=1, attempt=0, totalFailed=0, usedVision=false
|
|
237
|
+
[02:58:15.651] [ScenarioWorker] 📝 Using DOM-based approach (0 failures so far, need 2+)
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
## Retry & Failure Budget
|
|
241
|
+
|
|
242
|
+
**Per Step Limits:**
|
|
243
|
+
- `MAX_RETRIES_PER_STEP = 3` → 4 attempts per sub-action (0, 1, 2, 3)
|
|
244
|
+
- `MAX_SUBACTIONS_PER_STEP = 5` → Max 5 different commands for one step
|
|
245
|
+
- `MAX_FAILED_ATTEMPTS_PER_STEP = 12` → Hard limit on total failures
|
|
246
|
+
|
|
247
|
+
**Early Termination:**
|
|
248
|
+
- After 2 consecutive step failures → stop execution
|
|
249
|
+
- Saves resources, prevents runaway costs
|
|
250
|
+
|
|
251
|
+
## Error Context Enhancement
|
|
252
|
+
|
|
253
|
+
**Navigation errors now include current URL:**
|
|
254
|
+
```
|
|
255
|
+
Error: Timeout 10000ms exceeded | Current URL: https://redirected-url.com
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
This helps LLM understand:
|
|
259
|
+
- Navigation succeeded but redirected
|
|
260
|
+
- Don't retry original URL
|
|
261
|
+
- Proceed with current page
|
|
262
|
+
|
|
263
|
+
## Workflow Example
|
|
264
|
+
|
|
265
|
+
**Scenario:**
|
|
266
|
+
```
|
|
267
|
+
- Go to https://app.com/login
|
|
268
|
+
- Login with credentials: admin, pass123
|
|
269
|
+
- Click dashboard
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
**Execution:**
|
|
273
|
+
|
|
274
|
+
**Step 1: Navigate**
|
|
275
|
+
```
|
|
276
|
+
Attempt 1: goto(url, {domcontentloaded, timeout: 10000}) → ✅ Success
|
|
277
|
+
Goal check: COMPLETE (navigation is single-step action)
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
**Step 2: Login**
|
|
281
|
+
```
|
|
282
|
+
Sub-action 1, Attempt 1: fill(username, 'admin') → ✅ Success
|
|
283
|
+
Goal check: INCOMPLETE (login needs username + password + click)
|
|
284
|
+
nextSubGoal: "Enter password and click login"
|
|
285
|
+
|
|
286
|
+
Sub-action 2, Attempt 1: fill(password, 'pass123') → ✅ Success
|
|
287
|
+
Goal check: INCOMPLETE (still need to click login button)
|
|
288
|
+
nextSubGoal: "Click login button to submit credentials"
|
|
289
|
+
|
|
290
|
+
Sub-action 3, Attempt 1: click(login button) → ✅ Success
|
|
291
|
+
Goal check: COMPLETE (all parts of login done)
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
**Step 3: Click dashboard**
|
|
295
|
+
```
|
|
296
|
+
Sub-action 1, Attempt 1: click(dashboard) → ❌ Fail (not visible)
|
|
297
|
+
🔍 Vision check: totalFailed=1, need 2+
|
|
298
|
+
📝 Using DOM (1 failure, need 2+)
|
|
299
|
+
|
|
300
|
+
Sub-action 1, Attempt 2: waitFor + click → ❌ Fail (still not visible)
|
|
301
|
+
🔍 Vision check: totalFailed=2, usedVision=false
|
|
302
|
+
🎯 VISION TRIGGER: 2 total failures - assessing...
|
|
303
|
+
💭 LLM: SCREENSHOT NEEDED ✅
|
|
304
|
+
📸 Taking screenshot...
|
|
305
|
+
👔 Supervisor analyzing...
|
|
306
|
+
🔨 Generating vision-aided command...
|
|
307
|
+
|
|
308
|
+
Sub-action 1, Attempt 3: [vision-aided command] → ✅ Success
|
|
309
|
+
Goal check: COMPLETE
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
## Testing Checklist
|
|
313
|
+
|
|
314
|
+
- [ ] Specific values preserved (credentials, names, amounts)
|
|
315
|
+
- [ ] Semantic actions complete fully (login includes button click)
|
|
316
|
+
- [ ] Navigation redirects handled (no URL retry loops)
|
|
317
|
+
- [ ] Vision triggers after 2+ failures
|
|
318
|
+
- [ ] Vision logs show decision reasoning
|
|
319
|
+
- [ ] Timeouts appropriate (5s default, 10s navigation)
|
|
320
|
+
- [ ] Error context includes current URL
|
|
321
|
+
- [ ] Failed steps don't show previous step commands
|
|
322
|
+
- [ ] Version marker visible in logs
|
|
323
|
+
|
|
324
|
+
## Version Tracking
|
|
325
|
+
|
|
326
|
+
**Current Version:** `v1.5.0-vision-preserve-values`
|
|
327
|
+
|
|
328
|
+
**Version log location:**
|
|
329
|
+
- During initialization: `[ScenarioWorker] 🚀 RUNNER-CORE VERSION: v1.5.0-vision-preserve-values`
|
|
330
|
+
- Increment for each significant change
|
|
331
|
+
|
|
332
|
+
## Build & Deploy
|
|
333
|
+
|
|
334
|
+
**Local Development:**
|
|
335
|
+
```bash
|
|
336
|
+
cd /Users/nuwansam/IdeaProjects/AwareRepo/local/vs-ext
|
|
337
|
+
./build_local.sh
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
**What it does:**
|
|
341
|
+
1. Builds runner-core
|
|
342
|
+
2. Packs runner-core (0.0.22)
|
|
343
|
+
3. Installs in vs-ext
|
|
344
|
+
4. Builds vs-ext for staging
|
|
345
|
+
|
|
346
|
+
**Verification:**
|
|
347
|
+
```bash
|
|
348
|
+
grep "v1.5.0-vision-preserve-values" node_modules/testchimp-runner-core/dist/scenario-worker-class.js
|
|
349
|
+
```
|
|
350
|
+
|
|
351
|
+
## Related Documentation
|
|
352
|
+
|
|
353
|
+
- `VISION_DIAGNOSTICS_IMPROVEMENTS.md` - Vision system details
|
|
354
|
+
- `prompts.ts` - All LLM prompts and guidance
|
|
355
|
+
- `types.ts` - Type definitions
|
|
356
|
+
|
|
357
|
+
## Future Enhancements
|
|
358
|
+
|
|
359
|
+
1. **Learn from vision insights** - Build library of common patterns
|
|
360
|
+
2. **Optimize vision timing** - Better cost/benefit analysis
|
|
361
|
+
3. **Cross-flow learning** - Share insights between generation and repair
|
|
362
|
+
4. **Smarter goal parsing** - Better semantic action recognition
|
|
363
|
+
5. **Dynamic timeout adjustment** - Based on operation type
|
|
364
|
+
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# Selector Preference Improvements
|
|
2
|
+
|
|
3
|
+
## Summary
|
|
4
|
+
Updated the orchestrator agent to prefer user-friendly, semantic Playwright selectors over auto-generated IDs, following Playwright's official best practices.
|
|
5
|
+
|
|
6
|
+
## Problem
|
|
7
|
+
The agent was generating commands like:
|
|
8
|
+
```typescript
|
|
9
|
+
await page.fill('#«r3»-form-item', 'alice@example.com')
|
|
10
|
+
await page.fill('#«r4»-form-item', 'TestPass123')
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
These auto-generated IDs (especially with unicode characters like `«r3»`) are:
|
|
14
|
+
- Not user-friendly or readable
|
|
15
|
+
- Break when component instances change
|
|
16
|
+
- Not maintainable
|
|
17
|
+
- Not following Playwright best practices
|
|
18
|
+
|
|
19
|
+
## Solution
|
|
20
|
+
Implemented a comprehensive selector preference strategy across three key files:
|
|
21
|
+
|
|
22
|
+
### 1. Orchestrator Agent Prompt (`orchestrator-agent.ts`)
|
|
23
|
+
|
|
24
|
+
Added new section **"5b. SELECTOR PREFERENCE"** with explicit guidance:
|
|
25
|
+
|
|
26
|
+
**Preferred selectors (in order):**
|
|
27
|
+
1. `page.getByRole('role', {name: 'text'})` - Accessible, semantic, resilient
|
|
28
|
+
2. `page.getByLabel('label text')` - Great for form inputs
|
|
29
|
+
3. `page.getByPlaceholder('placeholder')` - Good for inputs without labels
|
|
30
|
+
4. `page.getByText('visible text')` - Clear and readable
|
|
31
|
+
5. `page.getByTestId('test-id')` - Stable if available
|
|
32
|
+
|
|
33
|
+
**Avoid (last resort only):**
|
|
34
|
+
- CSS selectors with auto-generated IDs: `#r3-form-item`, `#«r3»-form-item`
|
|
35
|
+
- CSS selectors with unicode characters
|
|
36
|
+
- Complex CSS paths
|
|
37
|
+
|
|
38
|
+
**Examples provided:**
|
|
39
|
+
```typescript
|
|
40
|
+
// ❌ BAD
|
|
41
|
+
await page.fill('#«r3»-form-item', 'alice@example.com')
|
|
42
|
+
|
|
43
|
+
// ✅ GOOD
|
|
44
|
+
await page.getByLabel('Email').fill('alice@example.com')
|
|
45
|
+
await page.getByRole('textbox', {name: 'Email'}).fill('alice@example.com')
|
|
46
|
+
await page.getByPlaceholder('Enter your email').fill('alice@example.com')
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### 2. Page Info Utils (`page-info-utils.ts`)
|
|
50
|
+
|
|
51
|
+
**Reordered selector generation priority:**
|
|
52
|
+
|
|
53
|
+
Before:
|
|
54
|
+
1. ID selector (e.g., `#«r3»-form-item`)
|
|
55
|
+
2. data-testid
|
|
56
|
+
3. getByRole
|
|
57
|
+
|
|
58
|
+
After:
|
|
59
|
+
1. getByLabel (for inputs with associated labels)
|
|
60
|
+
2. getByRole with name (semantic and accessible)
|
|
61
|
+
3. getByPlaceholder (for inputs with placeholders)
|
|
62
|
+
4. getByText (visible text fallback)
|
|
63
|
+
5. getByTestId (stable, explicit)
|
|
64
|
+
6. ID selector - ONLY if stable (filters out auto-generated IDs with unicode or patterns like `rc_`, `:r[0-9]+:`, `__`)
|
|
65
|
+
|
|
66
|
+
**Enhanced display:**
|
|
67
|
+
- Shows best selector first, with up to 2 alternatives
|
|
68
|
+
- Example: `getByLabel('Email') (or: getByRole('textbox', {name: 'Email'}), getByPlaceholder('Enter your email'))`
|
|
69
|
+
|
|
70
|
+
### 3. Screenshot Tool (`take-screenshot.ts`)
|
|
71
|
+
|
|
72
|
+
**Updated vision analysis prompts:**
|
|
73
|
+
- System prompt now emphasizes: "ALWAYS prioritize semantic selectors (getByRole, getByLabel, getByText) over CSS selectors with auto-generated IDs"
|
|
74
|
+
- Analysis task explicitly instructs: "Recommend SEMANTIC SELECTORS FIRST" and "AVOID auto-generated IDs with unicode"
|
|
75
|
+
|
|
76
|
+
### 4. Updated EXPERIENCES Section
|
|
77
|
+
|
|
78
|
+
Enhanced examples to capture semantic selector patterns:
|
|
79
|
+
```
|
|
80
|
+
✅ GOOD - App-specific patterns:
|
|
81
|
+
- "Login form fields accessible via getByLabel: 'Email' and 'Password'"
|
|
82
|
+
- "Submit buttons consistently use role=button with text matching action"
|
|
83
|
+
- "Input fields have clear placeholders - prefer getByPlaceholder over IDs"
|
|
84
|
+
|
|
85
|
+
❌ BAD:
|
|
86
|
+
- Noting auto-generated IDs like #«r3»-form-item (these are unreliable)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Benefits
|
|
90
|
+
|
|
91
|
+
1. **More Maintainable**: Semantic selectors are resilient to UI changes
|
|
92
|
+
2. **Self-Documenting**: Code reads like natural language
|
|
93
|
+
3. **Accessibility**: Ensures UI elements are properly accessible
|
|
94
|
+
4. **Best Practices**: Follows Playwright's official recommendations
|
|
95
|
+
5. **User-Friendly**: Generated tests are more readable and easier to understand
|
|
96
|
+
|
|
97
|
+
## Expected Output
|
|
98
|
+
|
|
99
|
+
After these changes, the agent will generate:
|
|
100
|
+
|
|
101
|
+
```typescript
|
|
102
|
+
// Login form example
|
|
103
|
+
await page.getByLabel('Email').fill('alice@example.com')
|
|
104
|
+
await page.getByLabel('Password').fill('TestPass123')
|
|
105
|
+
await page.getByRole('button', {name: 'Sign In'}).click()
|
|
106
|
+
|
|
107
|
+
// Navigation example
|
|
108
|
+
await page.getByRole('link', {name: 'Dashboard'}).click()
|
|
109
|
+
|
|
110
|
+
// Form with placeholders
|
|
111
|
+
await page.getByPlaceholder('Search...').fill('test query')
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Instead of:
|
|
115
|
+
|
|
116
|
+
```typescript
|
|
117
|
+
// Old style with auto-generated IDs
|
|
118
|
+
await page.fill('#«r3»-form-item', 'alice@example.com')
|
|
119
|
+
await page.fill('#«r4»-form-item', 'TestPass123')
|
|
120
|
+
await page.click('#«r5»-button')
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Testing
|
|
124
|
+
|
|
125
|
+
The changes are backward compatible - the agent will still use ID selectors as a last resort when semantic selectors are not available. The build completed successfully with no linter errors.
|
|
126
|
+
|
|
127
|
+
## Files Modified
|
|
128
|
+
|
|
129
|
+
1. `/src/orchestrator/orchestrator-agent.ts` - System prompt enhancements
|
|
130
|
+
2. `/src/utils/page-info-utils.ts` - Selector priority reordering
|
|
131
|
+
3. `/src/orchestrator/tools/take-screenshot.ts` - Vision analysis updates
|
|
132
|
+
|
|
133
|
+
## Next Steps
|
|
134
|
+
|
|
135
|
+
Test the changes with real scenarios to verify that:
|
|
136
|
+
1. Generated commands use semantic selectors when available
|
|
137
|
+
2. Tests remain stable and maintainable
|
|
138
|
+
3. Fallback to ID selectors works when semantic options aren't available
|
|
139
|
+
|
|
@@ -22,6 +22,7 @@ export interface InsertCreditUsageResponse {
|
|
|
22
22
|
export class CreditUsageService {
|
|
23
23
|
private backendUrl: string;
|
|
24
24
|
private authConfig: AuthConfig | null;
|
|
25
|
+
private logger?: (message: string, level?: 'log' | 'error' | 'warn') => void;
|
|
25
26
|
|
|
26
27
|
constructor(authConfig?: AuthConfig, backendUrl?: string) {
|
|
27
28
|
// Use provided backend URL or fall back to environment configuration
|
|
@@ -36,6 +37,23 @@ export class CreditUsageService {
|
|
|
36
37
|
this.authConfig = authConfig || null;
|
|
37
38
|
}
|
|
38
39
|
|
|
40
|
+
/**
|
|
41
|
+
* Set a logger callback for capturing execution logs
|
|
42
|
+
*/
|
|
43
|
+
setLogger(logger: (message: string, level?: 'log' | 'error' | 'warn') => void): void {
|
|
44
|
+
this.logger = logger;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Log a message using the configured logger
|
|
49
|
+
*/
|
|
50
|
+
private log(message: string, level: 'log' | 'error' | 'warn' = 'log'): void {
|
|
51
|
+
if (this.logger) {
|
|
52
|
+
this.logger(message, level);
|
|
53
|
+
}
|
|
54
|
+
// No console fallback - logs are routed to consumer
|
|
55
|
+
}
|
|
56
|
+
|
|
39
57
|
/**
|
|
40
58
|
* Update authentication configuration
|
|
41
59
|
*/
|
|
@@ -82,7 +100,7 @@ export class CreditUsageService {
|
|
|
82
100
|
|
|
83
101
|
return response.data;
|
|
84
102
|
} catch (error: any) {
|
|
85
|
-
|
|
103
|
+
this.log(`Credit usage report failed: ${error}`, 'error');
|
|
86
104
|
throw new Error(`Credit usage report failed: ${error.message}`);
|
|
87
105
|
}
|
|
88
106
|
}
|
|
@@ -93,9 +111,9 @@ export class CreditUsageService {
|
|
|
93
111
|
async reportScriptGenerationCredit(jobId?: string): Promise<void> {
|
|
94
112
|
try {
|
|
95
113
|
await this.reportCreditUsage(1, CreditUsageReason.SCRIPT_GENERATE, jobId);
|
|
96
|
-
|
|
114
|
+
this.log(`Credit usage reported for script generation${jobId ? ` (job: ${jobId})` : ''}`);
|
|
97
115
|
} catch (error) {
|
|
98
|
-
|
|
116
|
+
this.log(`Failed to report script generation credit usage: ${error}`, 'error');
|
|
99
117
|
// Don't throw - credit reporting should not break the main flow
|
|
100
118
|
}
|
|
101
119
|
}
|
|
@@ -106,9 +124,9 @@ export class CreditUsageService {
|
|
|
106
124
|
async reportAIRepairCredit(jobId?: string): Promise<void> {
|
|
107
125
|
try {
|
|
108
126
|
await this.reportCreditUsage(1, CreditUsageReason.TEST_REPAIR, jobId);
|
|
109
|
-
|
|
127
|
+
this.log(`Credit usage reported for AI repair${jobId ? ` (job: ${jobId})` : ''}`);
|
|
110
128
|
} catch (error) {
|
|
111
|
-
|
|
129
|
+
this.log(`Failed to report AI repair credit usage: ${error}`, 'error');
|
|
112
130
|
// Don't throw - credit reporting should not break the main flow
|
|
113
131
|
}
|
|
114
132
|
}
|