greenrun-cli 0.2.9 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -50,7 +50,7 @@ export declare class ApiClient {
50
50
  name?: string;
51
51
  }): Promise<unknown>;
52
52
  deletePage(id: string): Promise<unknown>;
53
- listTests(projectId: string): Promise<unknown>;
53
+ listTests(projectId: string, compact?: boolean): Promise<unknown>;
54
54
  createTest(projectId: string, data: {
55
55
  name: string;
56
56
  instructions: string;
@@ -98,12 +98,10 @@ export declare class ApiClient {
98
98
  test_id: any;
99
99
  test_name: any;
100
100
  run_id: any;
101
- instructions: any;
102
101
  credential_name: any;
103
102
  pages: any;
104
103
  tags: any;
105
- script: any;
106
- script_generated_at: any;
104
+ has_script: any;
107
105
  }[];
108
106
  }>;
109
107
  }
@@ -61,8 +61,9 @@ export class ApiClient {
61
61
  return this.request('DELETE', `/pages/${id}`);
62
62
  }
63
63
  // Tests
64
- async listTests(projectId) {
65
- return this.request('GET', `/projects/${projectId}/tests`);
64
+ async listTests(projectId, compact) {
65
+ const query = compact ? '?compact=1' : '';
66
+ return this.request('GET', `/projects/${projectId}/tests${query}`);
66
67
  }
67
68
  async createTest(projectId, data) {
68
69
  return this.request('POST', `/projects/${projectId}/tests`, data);
@@ -106,7 +107,7 @@ export class ApiClient {
106
107
  async prepareTestBatch(projectId, filter, testIds) {
107
108
  const [projectResult, testsResult] = await Promise.all([
108
109
  this.getProject(projectId),
109
- this.listTests(projectId),
110
+ this.listTests(projectId, true),
110
111
  ]);
111
112
  const project = projectResult.project;
112
113
  let tests = (testsResult.tests || []).filter((t) => t.status === 'active');
@@ -127,44 +128,30 @@ export class ApiClient {
127
128
  tests = tests.filter((t) => (t.name || '').toLowerCase().includes(term));
128
129
  }
129
130
  }
131
+ const projectSummary = {
132
+ id: project.id, name: project.name, base_url: project.base_url,
133
+ auth_mode: project.auth_mode ?? 'none',
134
+ login_url: project.login_url ?? null,
135
+ register_url: project.register_url ?? null,
136
+ login_instructions: project.login_instructions ?? null,
137
+ register_instructions: project.register_instructions ?? null,
138
+ credentials: project.credentials ?? null,
139
+ };
130
140
  if (tests.length === 0) {
131
- return {
132
- project: {
133
- id: project.id, name: project.name, base_url: project.base_url,
134
- auth_mode: project.auth_mode ?? 'none',
135
- login_url: project.login_url ?? null,
136
- register_url: project.register_url ?? null,
137
- login_instructions: project.login_instructions ?? null,
138
- register_instructions: project.register_instructions ?? null,
139
- credentials: project.credentials ?? null,
140
- },
141
- tests: [],
142
- };
141
+ return { project: projectSummary, tests: [] };
143
142
  }
144
- // Fetch full test details in parallel
145
- const fullTests = await Promise.all(tests.map((t) => this.getTest(t.id)));
146
- // Start runs in parallel
143
+ // Start runs in parallel (listTests already has full details, no need for getTest)
147
144
  const runs = await Promise.all(tests.map((t) => this.startRun(t.id)));
148
145
  return {
149
- project: {
150
- id: project.id, name: project.name, base_url: project.base_url,
151
- auth_mode: project.auth_mode ?? 'none',
152
- login_url: project.login_url ?? null,
153
- register_url: project.register_url ?? null,
154
- login_instructions: project.login_instructions ?? null,
155
- register_instructions: project.register_instructions ?? null,
156
- credentials: project.credentials ?? null,
157
- },
158
- tests: fullTests.map((ft, i) => ({
159
- test_id: ft.test.id,
160
- test_name: ft.test.name,
146
+ project: projectSummary,
147
+ tests: tests.map((t, i) => ({
148
+ test_id: t.id,
149
+ test_name: t.name,
161
150
  run_id: runs[i].run.id,
162
- instructions: ft.test.instructions,
163
- credential_name: ft.test.credential_name ?? null,
164
- pages: ft.test.pages || [],
165
- tags: ft.test.tags || [],
166
- script: ft.test.script ?? null,
167
- script_generated_at: ft.test.script_generated_at ?? null,
151
+ credential_name: t.credential_name ?? null,
152
+ pages: (t.pages || []).map((p) => ({ id: p.id, url: p.url })),
153
+ tags: (t.tags || []).map((tg) => tg.name || tg),
154
+ has_script: t.has_script ?? !!t.script,
168
155
  })),
169
156
  };
170
157
  }
@@ -36,6 +36,43 @@ function prompt(rl, question) {
36
36
  });
37
37
  });
38
38
  }
39
+ function detectSystemChrome() {
40
+ const platform = process.platform;
41
+ if (platform === 'darwin') {
42
+ return existsSync('/Applications/Google Chrome.app/Contents/MacOS/Google Chrome');
43
+ }
44
+ if (platform === 'win32') {
45
+ const dirs = [process.env['PROGRAMFILES'], process.env['PROGRAMFILES(X86)'], process.env['LOCALAPPDATA']];
46
+ return dirs.some(dir => dir && existsSync(join(dir, 'Google', 'Chrome', 'Application', 'chrome.exe')));
47
+ }
48
+ // Linux
49
+ try {
50
+ execSync('which google-chrome-stable || which google-chrome || which chromium-browser || which chromium', { stdio: 'pipe' });
51
+ return true;
52
+ }
53
+ catch {
54
+ return false;
55
+ }
56
+ }
57
+ function installPlaywrightChromium() {
58
+ try {
59
+ console.log(' Installing @playwright/test (this may take a minute)...');
60
+ execSync('npm install -g @playwright/test@latest', { stdio: 'inherit' });
61
+ console.log(' Installing Chromium browser...');
62
+ execSync('npx playwright install --with-deps chromium', { stdio: 'inherit' });
63
+ return true;
64
+ }
65
+ catch {
66
+ console.error(' Failed to install Playwright. You can install manually:');
67
+ console.error(' npm install -g @playwright/test@latest');
68
+ console.error(' npx playwright install --with-deps chromium\n');
69
+ return false;
70
+ }
71
+ }
72
+ function checkNodeVersion() {
73
+ const match = process.version.match(/^v(\d+)\./);
74
+ return match ? parseInt(match[1], 10) >= 18 : false;
75
+ }
39
76
  function checkPrerequisites() {
40
77
  let claude = false;
41
78
  try {
@@ -55,14 +92,15 @@ async function validateToken(token) {
55
92
  'Accept': 'application/json',
56
93
  },
57
94
  });
58
- if (!response.ok)
59
- return { valid: false };
95
+ if (!response.ok) {
96
+ return { valid: false, error: `API returned HTTP ${response.status}` };
97
+ }
60
98
  const data = await response.json();
61
99
  const projects = Array.isArray(data) ? data : (data.data ?? []);
62
100
  return { valid: true, projectCount: projects.length };
63
101
  }
64
- catch {
65
- return { valid: false };
102
+ catch (err) {
103
+ return { valid: false, error: err?.message || String(err) };
66
104
  }
67
105
  }
68
106
  function getClaudeConfigPath() {
@@ -107,24 +145,24 @@ function configureMcpLocal(token) {
107
145
  console.error(` claude mcp add greenrun --transport stdio -e GREENRUN_API_TOKEN=${token} -- npx -y greenrun-cli@latest\n`);
108
146
  }
109
147
  }
110
- function configurePlaywrightMcp() {
148
+ function configurePlaywrightMcp(browser = 'chrome') {
111
149
  try {
112
150
  setLocalMcpServer('playwright', {
113
151
  type: 'stdio',
114
152
  command: 'npx',
115
153
  args: [
116
154
  '@playwright/mcp@latest',
117
- '--browser', 'chrome',
155
+ '--browser', browser,
118
156
  '--user-data-dir', join(homedir(), '.greenrun', 'browser-profile'),
119
157
  ],
120
158
  env: {},
121
159
  });
122
- console.log(' Configured playwright MCP server');
160
+ console.log(` Configured playwright MCP server (${browser})`);
123
161
  }
124
162
  catch {
125
163
  console.error('\nFailed to write Playwright MCP config to ~/.claude.json');
126
164
  console.error('You can add it manually:\n');
127
- console.error(' claude mcp add playwright -- npx @playwright/mcp@latest --browser chrome --user-data-dir ~/.greenrun/browser-profile\n');
165
+ console.error(` claude mcp add playwright -- npx @playwright/mcp@latest --browser ${browser} --user-data-dir ~/.greenrun/browser-profile\n`);
128
166
  }
129
167
  }
130
168
  function configureMcpProject(token) {
@@ -276,8 +314,61 @@ function installCommands() {
276
314
  console.log(` Installed /${cmd.replace('.md', '')}`);
277
315
  }
278
316
  }
317
+ function checkDependencies() {
318
+ console.log('Checking dependencies...');
319
+ let allGood = true;
320
+ // Node version
321
+ if (checkNodeVersion()) {
322
+ console.log(` [x] Node.js ${process.version}`);
323
+ }
324
+ else {
325
+ console.log(` [ ] Node.js ${process.version} (18+ required)`);
326
+ allGood = false;
327
+ }
328
+ // Claude Code
329
+ const prereqs = checkPrerequisites();
330
+ if (prereqs.claude) {
331
+ console.log(' [x] Claude Code CLI');
332
+ }
333
+ else {
334
+ console.log(' [ ] Claude Code CLI not found');
335
+ allGood = false;
336
+ }
337
+ // @playwright/test
338
+ try {
339
+ execSync('npx playwright --version', { stdio: 'pipe' });
340
+ console.log(' [x] @playwright/test');
341
+ }
342
+ catch {
343
+ console.log(' [ ] @playwright/test not installed');
344
+ console.log(' Run: npm install -g @playwright/test@latest');
345
+ allGood = false;
346
+ }
347
+ // Browser (Chrome or Chromium)
348
+ if (detectSystemChrome()) {
349
+ console.log(' [x] Chrome detected');
350
+ }
351
+ else {
352
+ try {
353
+ execSync('npx playwright install --dry-run chromium', { stdio: 'pipe' });
354
+ console.log(' [x] Playwright Chromium');
355
+ }
356
+ catch {
357
+ console.log(' [ ] No browser detected (Chrome or Playwright Chromium)');
358
+ console.log(' Run: npx playwright install --with-deps chromium');
359
+ allGood = false;
360
+ }
361
+ }
362
+ if (allGood) {
363
+ console.log(' All dependencies installed.\n');
364
+ }
365
+ else {
366
+ console.log('\n Some dependencies are missing. Install them and run again.\n');
367
+ }
368
+ }
279
369
  export function runUpdate() {
280
370
  console.log('\nGreenrun - Updating templates\n');
371
+ checkDependencies();
281
372
  installCommands();
282
373
  installSettings();
283
374
  installClaudeMd();
@@ -287,6 +378,12 @@ export async function runInit(args) {
287
378
  const opts = parseFlags(args);
288
379
  const interactive = !opts.token;
289
380
  console.log('\nGreenrun - Browser Test Management for Claude Code\n');
381
+ // Node version gate
382
+ if (!checkNodeVersion()) {
383
+ console.error(`Error: Node.js 18 or later is required (detected ${process.version}).`);
384
+ console.error('Install a newer version: https://nodejs.org/\n');
385
+ process.exit(1);
386
+ }
290
387
  // Prerequisites
291
388
  console.log('Prerequisites:');
292
389
  const prereqs = checkPrerequisites();
@@ -318,7 +415,7 @@ export async function runInit(args) {
318
415
  process.stdout.write(' Validating... ');
319
416
  const validation = await validateToken(token);
320
417
  if (!validation.valid) {
321
- console.log('Failed! Invalid token or cannot reach the API.');
418
+ console.log(`Failed! ${validation.error || 'Invalid token or cannot reach the API.'}`);
322
419
  rl.close();
323
420
  process.exit(1);
324
421
  }
@@ -348,12 +445,37 @@ export async function runInit(args) {
348
445
  process.stdout.write('Validating token... ');
349
446
  const validation = await validateToken(token);
350
447
  if (!validation.valid) {
351
- console.log('Failed!');
448
+ console.log(`Failed! ${validation.error || 'Invalid token or cannot reach the API.'}`);
352
449
  process.exit(1);
353
450
  }
354
451
  console.log(`Connected! (${validation.projectCount} project${validation.projectCount === 1 ? '' : 's'} found)`);
355
452
  scope = scope || 'local';
356
453
  }
454
+ // Detect browser
455
+ let browser = 'chrome';
456
+ if (!detectSystemChrome()) {
457
+ if (interactive) {
458
+ const rl2 = createInterface({ input: process.stdin, output: process.stdout });
459
+ console.log('Chrome not detected on this system.');
460
+ const installChoice = await prompt(rl2, ' Install Playwright Chromium? [Y/n]: ');
461
+ rl2.close();
462
+ if (installChoice.toLowerCase() !== 'n') {
463
+ if (installPlaywrightChromium()) {
464
+ browser = 'chromium';
465
+ }
466
+ else {
467
+ console.log(' Continuing with chrome config. You can install Chrome manually later.\n');
468
+ }
469
+ }
470
+ }
471
+ else {
472
+ console.log('Chrome not detected. Installing Playwright Chromium...');
473
+ if (installPlaywrightChromium()) {
474
+ browser = 'chromium';
475
+ }
476
+ }
477
+ console.log();
478
+ }
357
479
  // Configure MCP
358
480
  console.log('Configuring MCP servers...');
359
481
  if (scope === 'project') {
@@ -362,7 +484,7 @@ export async function runInit(args) {
362
484
  else {
363
485
  configureMcpLocal(token);
364
486
  }
365
- configurePlaywrightMcp();
487
+ configurePlaywrightMcp(browser);
366
488
  console.log(' MCP servers configured.\n');
367
489
  // Install extras
368
490
  if (opts.claudeMd) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "greenrun-cli",
3
- "version": "0.2.9",
3
+ "version": "0.2.11",
4
4
  "description": "CLI and MCP server for Greenrun - browser test management for Claude Code",
5
5
  "type": "module",
6
6
  "main": "dist/server.js",
@@ -23,9 +23,9 @@ If auth fails (login form still visible after following instructions), report al
23
23
 
24
24
  ## Execute
25
25
 
26
- You have a batch result from `prepare_test_batch` containing `project` (with `credentials` array) and `tests[]` (each with `test_id`, `test_name`, `run_id`, `instructions`, `credential_name`, `pages`, `tags`, `has_script`).
26
+ You have a batch result from `prepare_test_batch` containing `project` (with `credentials` array) and `tests[]` (each with `test_id`, `test_name`, `run_id`, `credential_name`, `pages`, `tags`, `has_script`).
27
27
 
28
- Note: `has_script` is a boolean indicating whether a cached Playwright script exists. To fetch the actual script content, call `get_test(test_id)` only do this when you need the script (e.g. in Step 5 when writing test files).
28
+ Note: The batch does not include `instructions` or `script` content. Use `get_test(test_id)` to fetch these when needed.
29
29
 
30
30
  If `tests` is empty, tell the user no matching active tests were found and stop.
31
31
 
@@ -42,25 +42,17 @@ Split the batch into two groups:
42
42
 
43
43
  If all tests are scripted, skip to Step 4.
44
44
 
45
- ### Step 3: Score and generate scripts (easy-first)
45
+ ### Step 3: Generate scripts for unscripted tests
46
46
 
47
- For each **unscripted** test, assign a difficulty score based on the instructions:
47
+ For each **unscripted** test, one at a time:
48
48
 
49
- - **easy** (1): Single-page tests with simple actions — navigate, check text/headings, verify static content, click a link and check the URL. Typically 1-4 steps, no form submissions, no multi-step flows.
50
- - **medium** (2): Tests involving form input, button clicks that trigger state changes, checking error/success messages, or verifying a redirect after an action. Typically 3-8 steps.
51
- - **hard** (3): Multi-page flows, tests requiring specific sequences of actions (e.g. add to cart then checkout), tests with complex assertions (table data, dynamic content), or tests involving file uploads, modals, or dialogs.
52
-
53
- Sort unscripted tests by difficulty ascending (easy first). This ensures simple tests get scripts generated quickly so native execution can start sooner.
54
-
55
- #### Walk-through script generation
56
-
57
- For each unscripted test (in difficulty order), do a **scouting pass** — actually follow the test instructions in the browser to observe all UI states:
58
-
59
- 1. Navigate to the test's starting page via `browser_navigate`
60
- 2. Take a `browser_snapshot` to see initial elements
61
- 3. Follow the test instructions step by step using Playwright MCP tools (`browser_click`, `browser_type`, `browser_snapshot` after each action)
62
- 4. Snapshot after each state change to capture: validation errors, success banners, modal dialogs, redirected pages, dynamically loaded content
63
- 5. Collect all observed elements and selectors as context
49
+ 1. Call `get_test(test_id)` to fetch the full instructions
50
+ 2. Do a **scouting pass** follow the test instructions in the browser to observe all UI states:
51
+ - Navigate to the test's starting page via `browser_navigate`
52
+ - Take a `browser_snapshot` to see initial elements
53
+ - Follow the test instructions step by step using Playwright MCP tools (`browser_click`, `browser_type`, `browser_snapshot` after each action)
54
+ - Snapshot after each state change to capture: validation errors, success banners, modal dialogs, redirected pages, dynamically loaded content
55
+ - Collect all observed elements and selectors as context
64
56
 
65
57
  #### Handling failures during scouting
66
58
 
@@ -113,21 +105,6 @@ test('{test_name}', async ({ page }) => {
113
105
 
114
106
  Save via `update_test(test_id, { script: <generated_script>, script_generated_at: <ISO_now> })`.
115
107
 
116
- **Pipeline optimisation**: After finishing all **easy** tests, if there are medium/hard tests remaining, proceed to Step 4 immediately with whatever scripts are ready (scripted + newly generated easy tests). Continue generating medium/hard scripts in parallel by launching a background Task agent for the remaining generation work. When those scripts are ready, they'll be saved to the API for next run.
117
-
118
- To launch the background generation agent:
119
-
120
- ```
121
- Task tool with:
122
- - subagent_type: "general-purpose"
123
- - run_in_background: true
124
- - max_turns: 50
125
- - model: "sonnet"
126
- - prompt: (include project details, remaining unscripted tests with instructions, and the scouting+generation procedure above)
127
- ```
128
-
129
- The background agent should: for each remaining test, do the scouting pass, generate the script, and call `update_test` to save it. It does NOT need to call `complete_run` — that happens in the native execution step.
130
-
131
108
  ### Step 4: Export auth state
132
109
 
133
110
  If `auth_mode` is not `none`, export the browser session so native Playwright inherits it:
@@ -175,17 +152,9 @@ npx playwright test --config /tmp/greenrun-tests/playwright.config.ts
175
152
 
176
153
  5. **Report results**: Call `complete_run(run_id, status, result_summary)` for each test. Map Playwright statuses: `passed` → `passed`, `failed`/`timedOut` → `failed`, other → `error`.
177
154
 
178
- 6. **Clean up browsers**: After native execution completes, close any browsers left behind by the test runner:
179
- ```bash
180
- npx playwright test --config /tmp/greenrun-tests/playwright.config.ts --list 2>/dev/null; true
181
- ```
182
- The Playwright Test runner normally cleans up after itself, but if tests crash or timeout, browser processes may linger. Also call `browser_close` to reset the MCP browser context before any subsequent AI fallback execution.
183
-
184
- ### Step 6: Handle unscripted tests without scripts
185
-
186
- Any tests that still don't have scripts (e.g. because the background agent hasn't finished, or script generation failed) need to be executed via AI agents using the legacy approach. Follow Step 7 for these tests.
155
+ 6. **Clean up**: Call `browser_close` to reset the MCP browser context.
187
156
 
188
- ### Step 7: Circuit breaker
157
+ ### Step 6: Circuit breaker
189
158
 
190
159
  After parsing all native results, walk through them in completion order. Track consecutive failures:
191
160
 
@@ -194,170 +163,25 @@ After parsing all native results, walk through them in completion order. Track c
194
163
  - Skip AI fallback for remaining tests
195
164
  - The counter resets on any pass
196
165
 
197
- ### Step 8: AI-agent fallback for native failures
166
+ ### Step 7: AI fallback for native failures
198
167
 
199
- For tests that **failed** in native execution (and circuit breaker has not tripped):
168
+ For tests that **failed** in native execution (and circuit breaker has not tripped), execute them one at a time using the AI agent approach:
200
169
 
201
170
  1. Close the current browser context with `browser_close` so the fallback starts fresh
202
171
  2. Re-authenticate by navigating to the login page and following the Authenticate procedure
203
- 3. Start new runs via `start_run(test_id)` (the original runs were already completed in Step 5)
204
- 4. Launch background Task agents using the tab-isolation pattern:
205
-
206
- Create tabs and launch agents in batches of 20:
207
-
208
- #### Create tab
209
- ```js
210
- async (page) => {
211
- const newPage = await page.context().newPage();
212
- await newPage.goto(START_URL);
213
- return { index: page.context().pages().length - 1, url: newPage.url() };
214
- }
215
- ```
216
-
217
- #### Launch agent
218
- ```
219
- Task tool with:
220
- - subagent_type: "general-purpose"
221
- - run_in_background: true
222
- - max_turns: 25
223
- - model: "sonnet"
224
- - prompt: (agent prompt below, including the native failure message for diagnosis)
225
- ```
226
-
227
- #### Agent prompt
228
-
229
- ```
230
- Greenrun browser test (AI fallback). Run ID: {run_id}
231
- Tab index: {INDEX}
232
-
233
- **{test_name}**
234
-
235
- {paste the full test instructions here}
236
-
237
- **Native execution failed with:** {failure_message}
238
-
239
- Determine if this is a stale script (UI changed) or an actual bug. If the script is stale, the test may still pass when executed manually.
240
-
241
- ## CRITICAL: Tab isolation
172
+ 3. For each failed test:
173
+ - Call `get_test(test_id)` to fetch the full instructions
174
+ - Start a new run via `start_run(test_id)` (the original run was already completed in Step 5)
175
+ - Navigate to the test's starting page via `browser_navigate`
176
+ - Follow the test instructions step by step using Playwright MCP tools
177
+ - Determine if this is a stale script (UI changed) or an actual bug
178
+ - If the test passes manually, invalidate the cached script: `update_test(test_id, { script: null, script_generated_at: null })`
179
+ - Call `complete_run(run_id, status, brief_summary)`
180
+ - Call `browser_close` before the next test to reset state
242
181
 
243
- You are assigned to tab index {INDEX}. You MUST use ONLY `browser_run_code` for ALL browser interactions. Do NOT use `browser_snapshot`, `browser_click`, `browser_type`, `browser_navigate`, or any other Playwright MCP tools. The only non-browser tool you may call is `complete_run`.
182
+ ### Step 8: Handle unscripted tests without scripts
244
183
 
245
- Every `browser_run_code` call must scope to your tab:
246
- ```js
247
- async (page) => {
248
- const p = page.context().pages()[INDEX];
249
- // ... your action here ...
250
- }
251
- ```
252
-
253
- ## Auth
254
- No authentication needed — the main page already authenticated and cookies are shared to your tab.
255
-
256
- ## Interaction patterns
257
-
258
- **Navigate:**
259
- ```js
260
- async (page) => {
261
- const p = page.context().pages()[INDEX];
262
- await p.goto('https://example.com/path');
263
- return p.url();
264
- }
265
- ```
266
-
267
- **Read page state (replaces browser_snapshot):**
268
- ```js
269
- async (page) => {
270
- const p = page.context().pages()[INDEX];
271
- const url = p.url();
272
- const title = await p.title();
273
- const text = await p.locator('body').innerText();
274
- const headings = await p.getByRole('heading').allTextContents();
275
- const buttons = await p.getByRole('button').allTextContents();
276
- const links = await p.getByRole('link').allTextContents();
277
- const textboxes = await p.getByRole('textbox').evaluateAll(els =>
278
- els.map(e => ({ name: e.getAttribute('name') || e.getAttribute('aria-label') || e.placeholder, value: e.value }))
279
- );
280
- return { url, title, headings, buttons, links, textboxes, text: text.substring(0, 2000) };
281
- }
282
- ```
283
-
284
- **Click an element:**
285
- ```js
286
- async (page) => {
287
- const p = page.context().pages()[INDEX];
288
- await p.getByRole('button', { name: 'Submit' }).click();
289
- return p.url();
290
- }
291
- ```
292
-
293
- **Fill a form field:**
294
- ```js
295
- async (page) => {
296
- const p = page.context().pages()[INDEX];
297
- await p.getByRole('textbox', { name: 'Email' }).fill('test@example.com');
298
- return 'filled';
299
- }
300
- ```
301
-
302
- **Handle a dialog:**
303
- ```js
304
- async (page) => {
305
- const p = page.context().pages()[INDEX];
306
- p.once('dialog', d => d.accept());
307
- await p.getByRole('button', { name: 'Delete' }).click();
308
- return p.url();
309
- }
310
- ```
311
-
312
- **Check for specific text (verification):**
313
- ```js
314
- async (page) => {
315
- const p = page.context().pages()[INDEX];
316
- const visible = await p.getByText('Success').isVisible();
317
- return { found: visible };
318
- }
319
- ```
320
-
321
- ## Rules
322
- - ONLY use `browser_run_code` — no other browser tools
323
- - Always scope to `page.context().pages()[INDEX]`
324
- - Use Playwright locators: `getByRole`, `getByText`, `getByLabel`, `getByPlaceholder`, `locator`
325
- - Read page state to find elements before interacting
326
- - Navigate with absolute URLs via `p.goto(url)` — never click nav links
327
-
328
- ## FORBIDDEN — never use these:
329
- - `browser_snapshot`, `browser_click`, `browser_type`, `browser_navigate` — these operate on the MAIN page and will interfere with other tests
330
- - `browser_wait` — NEVER call this
331
- - `browser_screenshot` — NEVER use
332
-
333
- ## Error recovery
334
- - On ANY failure: retry the failing step ONCE, then skip to Finish.
335
-
336
- ## Finish (MANDATORY — always reach this step)
337
- 1. If the test passes on manual execution, call `update_test(test_id, { script: null, script_generated_at: null })` to invalidate the stale cached script.
338
- 2. `complete_run(run_id, status, brief_summary)` — ALWAYS call this, even on error.
339
- 3. Return: {test_name} | {status} | {summary}
340
- ```
341
-
342
- #### Wait and clean up
343
-
344
- Wait for all agents to complete via `TaskOutput`. Then close extra tabs (newest first):
345
-
346
- ```js
347
- async (page) => {
348
- const pages = page.context().pages();
349
- for (let i = pages.length - 1; i >= 1; i--) {
350
- await pages[i].close();
351
- }
352
- return { remainingPages: page.context().pages().length };
353
- }
354
- ```
355
-
356
- Check for orphaned runs (agents that crashed without calling `complete_run`). For any orphaned run IDs, call `complete_run(run_id, "error", "Agent crashed or timed out")`.
357
-
358
- ### Step 9: Wait for background generation
359
-
360
- If a background generation agent was launched in Step 3, check if it has completed via `TaskOutput` with `block: false`. If still running, note this in the summary. The generated scripts will be available on the next run.
184
+ Any tests that didn't get scripts generated in Step 3 (e.g. if script generation failed) need to be executed the same way as Step 7 — one at a time using the AI agent approach. Follow the same pattern: get instructions, start run, execute in browser, complete run, close browser.
361
185
 
362
186
  ## Summarize
363
187
 
@@ -376,6 +200,4 @@ Total: "X passed, Y failed, Z errors out of N tests"
376
200
 
377
201
  If the circuit breaker tripped, note: "Circuit breaker tripped after N consecutive failures. M tests skipped."
378
202
 
379
- If background script generation is still running, note: "Script generation in progress for N tests. Scripts will be cached for next run."
380
-
381
203
  If any tests failed, highlight what went wrong and suggest next steps.