greenrun-cli 0.2.9 → 0.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api-client.d.ts +2 -4
- package/dist/api-client.js +23 -36
- package/dist/commands/init.js +133 -11
- package/package.json +1 -1
- package/templates/commands/procedures.md +26 -204
package/dist/api-client.d.ts
CHANGED
|
@@ -50,7 +50,7 @@ export declare class ApiClient {
|
|
|
50
50
|
name?: string;
|
|
51
51
|
}): Promise<unknown>;
|
|
52
52
|
deletePage(id: string): Promise<unknown>;
|
|
53
|
-
listTests(projectId: string): Promise<unknown>;
|
|
53
|
+
listTests(projectId: string, compact?: boolean): Promise<unknown>;
|
|
54
54
|
createTest(projectId: string, data: {
|
|
55
55
|
name: string;
|
|
56
56
|
instructions: string;
|
|
@@ -98,12 +98,10 @@ export declare class ApiClient {
|
|
|
98
98
|
test_id: any;
|
|
99
99
|
test_name: any;
|
|
100
100
|
run_id: any;
|
|
101
|
-
instructions: any;
|
|
102
101
|
credential_name: any;
|
|
103
102
|
pages: any;
|
|
104
103
|
tags: any;
|
|
105
|
-
|
|
106
|
-
script_generated_at: any;
|
|
104
|
+
has_script: any;
|
|
107
105
|
}[];
|
|
108
106
|
}>;
|
|
109
107
|
}
|
package/dist/api-client.js
CHANGED
|
@@ -61,8 +61,9 @@ export class ApiClient {
|
|
|
61
61
|
return this.request('DELETE', `/pages/${id}`);
|
|
62
62
|
}
|
|
63
63
|
// Tests
|
|
64
|
-
async listTests(projectId) {
|
|
65
|
-
|
|
64
|
+
async listTests(projectId, compact) {
|
|
65
|
+
const query = compact ? '?compact=1' : '';
|
|
66
|
+
return this.request('GET', `/projects/${projectId}/tests${query}`);
|
|
66
67
|
}
|
|
67
68
|
async createTest(projectId, data) {
|
|
68
69
|
return this.request('POST', `/projects/${projectId}/tests`, data);
|
|
@@ -106,7 +107,7 @@ export class ApiClient {
|
|
|
106
107
|
async prepareTestBatch(projectId, filter, testIds) {
|
|
107
108
|
const [projectResult, testsResult] = await Promise.all([
|
|
108
109
|
this.getProject(projectId),
|
|
109
|
-
this.listTests(projectId),
|
|
110
|
+
this.listTests(projectId, true),
|
|
110
111
|
]);
|
|
111
112
|
const project = projectResult.project;
|
|
112
113
|
let tests = (testsResult.tests || []).filter((t) => t.status === 'active');
|
|
@@ -127,44 +128,30 @@ export class ApiClient {
|
|
|
127
128
|
tests = tests.filter((t) => (t.name || '').toLowerCase().includes(term));
|
|
128
129
|
}
|
|
129
130
|
}
|
|
131
|
+
const projectSummary = {
|
|
132
|
+
id: project.id, name: project.name, base_url: project.base_url,
|
|
133
|
+
auth_mode: project.auth_mode ?? 'none',
|
|
134
|
+
login_url: project.login_url ?? null,
|
|
135
|
+
register_url: project.register_url ?? null,
|
|
136
|
+
login_instructions: project.login_instructions ?? null,
|
|
137
|
+
register_instructions: project.register_instructions ?? null,
|
|
138
|
+
credentials: project.credentials ?? null,
|
|
139
|
+
};
|
|
130
140
|
if (tests.length === 0) {
|
|
131
|
-
return {
|
|
132
|
-
project: {
|
|
133
|
-
id: project.id, name: project.name, base_url: project.base_url,
|
|
134
|
-
auth_mode: project.auth_mode ?? 'none',
|
|
135
|
-
login_url: project.login_url ?? null,
|
|
136
|
-
register_url: project.register_url ?? null,
|
|
137
|
-
login_instructions: project.login_instructions ?? null,
|
|
138
|
-
register_instructions: project.register_instructions ?? null,
|
|
139
|
-
credentials: project.credentials ?? null,
|
|
140
|
-
},
|
|
141
|
-
tests: [],
|
|
142
|
-
};
|
|
141
|
+
return { project: projectSummary, tests: [] };
|
|
143
142
|
}
|
|
144
|
-
//
|
|
145
|
-
const fullTests = await Promise.all(tests.map((t) => this.getTest(t.id)));
|
|
146
|
-
// Start runs in parallel
|
|
143
|
+
// Start runs in parallel (listTests already has full details, no need for getTest)
|
|
147
144
|
const runs = await Promise.all(tests.map((t) => this.startRun(t.id)));
|
|
148
145
|
return {
|
|
149
|
-
project:
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
register_url: project.register_url ?? null,
|
|
154
|
-
login_instructions: project.login_instructions ?? null,
|
|
155
|
-
register_instructions: project.register_instructions ?? null,
|
|
156
|
-
credentials: project.credentials ?? null,
|
|
157
|
-
},
|
|
158
|
-
tests: fullTests.map((ft, i) => ({
|
|
159
|
-
test_id: ft.test.id,
|
|
160
|
-
test_name: ft.test.name,
|
|
146
|
+
project: projectSummary,
|
|
147
|
+
tests: tests.map((t, i) => ({
|
|
148
|
+
test_id: t.id,
|
|
149
|
+
test_name: t.name,
|
|
161
150
|
run_id: runs[i].run.id,
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
script: ft.test.script ?? null,
|
|
167
|
-
script_generated_at: ft.test.script_generated_at ?? null,
|
|
151
|
+
credential_name: t.credential_name ?? null,
|
|
152
|
+
pages: (t.pages || []).map((p) => ({ id: p.id, url: p.url })),
|
|
153
|
+
tags: (t.tags || []).map((tg) => tg.name || tg),
|
|
154
|
+
has_script: t.has_script ?? !!t.script,
|
|
168
155
|
})),
|
|
169
156
|
};
|
|
170
157
|
}
|
package/dist/commands/init.js
CHANGED
|
@@ -36,6 +36,43 @@ function prompt(rl, question) {
|
|
|
36
36
|
});
|
|
37
37
|
});
|
|
38
38
|
}
|
|
39
|
+
function detectSystemChrome() {
|
|
40
|
+
const platform = process.platform;
|
|
41
|
+
if (platform === 'darwin') {
|
|
42
|
+
return existsSync('/Applications/Google Chrome.app/Contents/MacOS/Google Chrome');
|
|
43
|
+
}
|
|
44
|
+
if (platform === 'win32') {
|
|
45
|
+
const dirs = [process.env['PROGRAMFILES'], process.env['PROGRAMFILES(X86)'], process.env['LOCALAPPDATA']];
|
|
46
|
+
return dirs.some(dir => dir && existsSync(join(dir, 'Google', 'Chrome', 'Application', 'chrome.exe')));
|
|
47
|
+
}
|
|
48
|
+
// Linux
|
|
49
|
+
try {
|
|
50
|
+
execSync('which google-chrome-stable || which google-chrome || which chromium-browser || which chromium', { stdio: 'pipe' });
|
|
51
|
+
return true;
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
return false;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
function installPlaywrightChromium() {
|
|
58
|
+
try {
|
|
59
|
+
console.log(' Installing @playwright/test (this may take a minute)...');
|
|
60
|
+
execSync('npm install -g @playwright/test@latest', { stdio: 'inherit' });
|
|
61
|
+
console.log(' Installing Chromium browser...');
|
|
62
|
+
execSync('npx playwright install --with-deps chromium', { stdio: 'inherit' });
|
|
63
|
+
return true;
|
|
64
|
+
}
|
|
65
|
+
catch {
|
|
66
|
+
console.error(' Failed to install Playwright. You can install manually:');
|
|
67
|
+
console.error(' npm install -g @playwright/test@latest');
|
|
68
|
+
console.error(' npx playwright install --with-deps chromium\n');
|
|
69
|
+
return false;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
function checkNodeVersion() {
|
|
73
|
+
const match = process.version.match(/^v(\d+)\./);
|
|
74
|
+
return match ? parseInt(match[1], 10) >= 18 : false;
|
|
75
|
+
}
|
|
39
76
|
function checkPrerequisites() {
|
|
40
77
|
let claude = false;
|
|
41
78
|
try {
|
|
@@ -55,14 +92,15 @@ async function validateToken(token) {
|
|
|
55
92
|
'Accept': 'application/json',
|
|
56
93
|
},
|
|
57
94
|
});
|
|
58
|
-
if (!response.ok)
|
|
59
|
-
return { valid: false };
|
|
95
|
+
if (!response.ok) {
|
|
96
|
+
return { valid: false, error: `API returned HTTP ${response.status}` };
|
|
97
|
+
}
|
|
60
98
|
const data = await response.json();
|
|
61
99
|
const projects = Array.isArray(data) ? data : (data.data ?? []);
|
|
62
100
|
return { valid: true, projectCount: projects.length };
|
|
63
101
|
}
|
|
64
|
-
catch {
|
|
65
|
-
return { valid: false };
|
|
102
|
+
catch (err) {
|
|
103
|
+
return { valid: false, error: err?.message || String(err) };
|
|
66
104
|
}
|
|
67
105
|
}
|
|
68
106
|
function getClaudeConfigPath() {
|
|
@@ -107,24 +145,24 @@ function configureMcpLocal(token) {
|
|
|
107
145
|
console.error(` claude mcp add greenrun --transport stdio -e GREENRUN_API_TOKEN=${token} -- npx -y greenrun-cli@latest\n`);
|
|
108
146
|
}
|
|
109
147
|
}
|
|
110
|
-
function configurePlaywrightMcp() {
|
|
148
|
+
function configurePlaywrightMcp(browser = 'chrome') {
|
|
111
149
|
try {
|
|
112
150
|
setLocalMcpServer('playwright', {
|
|
113
151
|
type: 'stdio',
|
|
114
152
|
command: 'npx',
|
|
115
153
|
args: [
|
|
116
154
|
'@playwright/mcp@latest',
|
|
117
|
-
'--browser',
|
|
155
|
+
'--browser', browser,
|
|
118
156
|
'--user-data-dir', join(homedir(), '.greenrun', 'browser-profile'),
|
|
119
157
|
],
|
|
120
158
|
env: {},
|
|
121
159
|
});
|
|
122
|
-
console.log(
|
|
160
|
+
console.log(` Configured playwright MCP server (${browser})`);
|
|
123
161
|
}
|
|
124
162
|
catch {
|
|
125
163
|
console.error('\nFailed to write Playwright MCP config to ~/.claude.json');
|
|
126
164
|
console.error('You can add it manually:\n');
|
|
127
|
-
console.error(
|
|
165
|
+
console.error(` claude mcp add playwright -- npx @playwright/mcp@latest --browser ${browser} --user-data-dir ~/.greenrun/browser-profile\n`);
|
|
128
166
|
}
|
|
129
167
|
}
|
|
130
168
|
function configureMcpProject(token) {
|
|
@@ -276,8 +314,61 @@ function installCommands() {
|
|
|
276
314
|
console.log(` Installed /${cmd.replace('.md', '')}`);
|
|
277
315
|
}
|
|
278
316
|
}
|
|
317
|
+
function checkDependencies() {
|
|
318
|
+
console.log('Checking dependencies...');
|
|
319
|
+
let allGood = true;
|
|
320
|
+
// Node version
|
|
321
|
+
if (checkNodeVersion()) {
|
|
322
|
+
console.log(` [x] Node.js ${process.version}`);
|
|
323
|
+
}
|
|
324
|
+
else {
|
|
325
|
+
console.log(` [ ] Node.js ${process.version} (18+ required)`);
|
|
326
|
+
allGood = false;
|
|
327
|
+
}
|
|
328
|
+
// Claude Code
|
|
329
|
+
const prereqs = checkPrerequisites();
|
|
330
|
+
if (prereqs.claude) {
|
|
331
|
+
console.log(' [x] Claude Code CLI');
|
|
332
|
+
}
|
|
333
|
+
else {
|
|
334
|
+
console.log(' [ ] Claude Code CLI not found');
|
|
335
|
+
allGood = false;
|
|
336
|
+
}
|
|
337
|
+
// @playwright/test
|
|
338
|
+
try {
|
|
339
|
+
execSync('npx playwright --version', { stdio: 'pipe' });
|
|
340
|
+
console.log(' [x] @playwright/test');
|
|
341
|
+
}
|
|
342
|
+
catch {
|
|
343
|
+
console.log(' [ ] @playwright/test not installed');
|
|
344
|
+
console.log(' Run: npm install -g @playwright/test@latest');
|
|
345
|
+
allGood = false;
|
|
346
|
+
}
|
|
347
|
+
// Browser (Chrome or Chromium)
|
|
348
|
+
if (detectSystemChrome()) {
|
|
349
|
+
console.log(' [x] Chrome detected');
|
|
350
|
+
}
|
|
351
|
+
else {
|
|
352
|
+
try {
|
|
353
|
+
execSync('npx playwright install --dry-run chromium', { stdio: 'pipe' });
|
|
354
|
+
console.log(' [x] Playwright Chromium');
|
|
355
|
+
}
|
|
356
|
+
catch {
|
|
357
|
+
console.log(' [ ] No browser detected (Chrome or Playwright Chromium)');
|
|
358
|
+
console.log(' Run: npx playwright install --with-deps chromium');
|
|
359
|
+
allGood = false;
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
if (allGood) {
|
|
363
|
+
console.log(' All dependencies installed.\n');
|
|
364
|
+
}
|
|
365
|
+
else {
|
|
366
|
+
console.log('\n Some dependencies are missing. Install them and run again.\n');
|
|
367
|
+
}
|
|
368
|
+
}
|
|
279
369
|
export function runUpdate() {
|
|
280
370
|
console.log('\nGreenrun - Updating templates\n');
|
|
371
|
+
checkDependencies();
|
|
281
372
|
installCommands();
|
|
282
373
|
installSettings();
|
|
283
374
|
installClaudeMd();
|
|
@@ -287,6 +378,12 @@ export async function runInit(args) {
|
|
|
287
378
|
const opts = parseFlags(args);
|
|
288
379
|
const interactive = !opts.token;
|
|
289
380
|
console.log('\nGreenrun - Browser Test Management for Claude Code\n');
|
|
381
|
+
// Node version gate
|
|
382
|
+
if (!checkNodeVersion()) {
|
|
383
|
+
console.error(`Error: Node.js 18 or later is required (detected ${process.version}).`);
|
|
384
|
+
console.error('Install a newer version: https://nodejs.org/\n');
|
|
385
|
+
process.exit(1);
|
|
386
|
+
}
|
|
290
387
|
// Prerequisites
|
|
291
388
|
console.log('Prerequisites:');
|
|
292
389
|
const prereqs = checkPrerequisites();
|
|
@@ -318,7 +415,7 @@ export async function runInit(args) {
|
|
|
318
415
|
process.stdout.write(' Validating... ');
|
|
319
416
|
const validation = await validateToken(token);
|
|
320
417
|
if (!validation.valid) {
|
|
321
|
-
console.log(
|
|
418
|
+
console.log(`Failed! ${validation.error || 'Invalid token or cannot reach the API.'}`);
|
|
322
419
|
rl.close();
|
|
323
420
|
process.exit(1);
|
|
324
421
|
}
|
|
@@ -348,12 +445,37 @@ export async function runInit(args) {
|
|
|
348
445
|
process.stdout.write('Validating token... ');
|
|
349
446
|
const validation = await validateToken(token);
|
|
350
447
|
if (!validation.valid) {
|
|
351
|
-
console.log(
|
|
448
|
+
console.log(`Failed! ${validation.error || 'Invalid token or cannot reach the API.'}`);
|
|
352
449
|
process.exit(1);
|
|
353
450
|
}
|
|
354
451
|
console.log(`Connected! (${validation.projectCount} project${validation.projectCount === 1 ? '' : 's'} found)`);
|
|
355
452
|
scope = scope || 'local';
|
|
356
453
|
}
|
|
454
|
+
// Detect browser
|
|
455
|
+
let browser = 'chrome';
|
|
456
|
+
if (!detectSystemChrome()) {
|
|
457
|
+
if (interactive) {
|
|
458
|
+
const rl2 = createInterface({ input: process.stdin, output: process.stdout });
|
|
459
|
+
console.log('Chrome not detected on this system.');
|
|
460
|
+
const installChoice = await prompt(rl2, ' Install Playwright Chromium? [Y/n]: ');
|
|
461
|
+
rl2.close();
|
|
462
|
+
if (installChoice.toLowerCase() !== 'n') {
|
|
463
|
+
if (installPlaywrightChromium()) {
|
|
464
|
+
browser = 'chromium';
|
|
465
|
+
}
|
|
466
|
+
else {
|
|
467
|
+
console.log(' Continuing with chrome config. You can install Chrome manually later.\n');
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
else {
|
|
472
|
+
console.log('Chrome not detected. Installing Playwright Chromium...');
|
|
473
|
+
if (installPlaywrightChromium()) {
|
|
474
|
+
browser = 'chromium';
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
console.log();
|
|
478
|
+
}
|
|
357
479
|
// Configure MCP
|
|
358
480
|
console.log('Configuring MCP servers...');
|
|
359
481
|
if (scope === 'project') {
|
|
@@ -362,7 +484,7 @@ export async function runInit(args) {
|
|
|
362
484
|
else {
|
|
363
485
|
configureMcpLocal(token);
|
|
364
486
|
}
|
|
365
|
-
configurePlaywrightMcp();
|
|
487
|
+
configurePlaywrightMcp(browser);
|
|
366
488
|
console.log(' MCP servers configured.\n');
|
|
367
489
|
// Install extras
|
|
368
490
|
if (opts.claudeMd) {
|
package/package.json
CHANGED
|
@@ -23,9 +23,9 @@ If auth fails (login form still visible after following instructions), report al
|
|
|
23
23
|
|
|
24
24
|
## Execute
|
|
25
25
|
|
|
26
|
-
You have a batch result from `prepare_test_batch` containing `project` (with `credentials` array) and `tests[]` (each with `test_id`, `test_name`, `run_id`, `
|
|
26
|
+
You have a batch result from `prepare_test_batch` containing `project` (with `credentials` array) and `tests[]` (each with `test_id`, `test_name`, `run_id`, `credential_name`, `pages`, `tags`, `has_script`).
|
|
27
27
|
|
|
28
|
-
Note:
|
|
28
|
+
Note: The batch does not include `instructions` or `script` content. Use `get_test(test_id)` to fetch these when needed.
|
|
29
29
|
|
|
30
30
|
If `tests` is empty, tell the user no matching active tests were found and stop.
|
|
31
31
|
|
|
@@ -42,25 +42,17 @@ Split the batch into two groups:
|
|
|
42
42
|
|
|
43
43
|
If all tests are scripted, skip to Step 4.
|
|
44
44
|
|
|
45
|
-
### Step 3:
|
|
45
|
+
### Step 3: Generate scripts for unscripted tests
|
|
46
46
|
|
|
47
|
-
For each **unscripted** test,
|
|
47
|
+
For each **unscripted** test, one at a time:
|
|
48
48
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
For each unscripted test (in difficulty order), do a **scouting pass** — actually follow the test instructions in the browser to observe all UI states:
|
|
58
|
-
|
|
59
|
-
1. Navigate to the test's starting page via `browser_navigate`
|
|
60
|
-
2. Take a `browser_snapshot` to see initial elements
|
|
61
|
-
3. Follow the test instructions step by step using Playwright MCP tools (`browser_click`, `browser_type`, `browser_snapshot` after each action)
|
|
62
|
-
4. Snapshot after each state change to capture: validation errors, success banners, modal dialogs, redirected pages, dynamically loaded content
|
|
63
|
-
5. Collect all observed elements and selectors as context
|
|
49
|
+
1. Call `get_test(test_id)` to fetch the full instructions
|
|
50
|
+
2. Do a **scouting pass** — follow the test instructions in the browser to observe all UI states:
|
|
51
|
+
- Navigate to the test's starting page via `browser_navigate`
|
|
52
|
+
- Take a `browser_snapshot` to see initial elements
|
|
53
|
+
- Follow the test instructions step by step using Playwright MCP tools (`browser_click`, `browser_type`, `browser_snapshot` after each action)
|
|
54
|
+
- Snapshot after each state change to capture: validation errors, success banners, modal dialogs, redirected pages, dynamically loaded content
|
|
55
|
+
- Collect all observed elements and selectors as context
|
|
64
56
|
|
|
65
57
|
#### Handling failures during scouting
|
|
66
58
|
|
|
@@ -113,21 +105,6 @@ test('{test_name}', async ({ page }) => {
|
|
|
113
105
|
|
|
114
106
|
Save via `update_test(test_id, { script: <generated_script>, script_generated_at: <ISO_now> })`.
|
|
115
107
|
|
|
116
|
-
**Pipeline optimisation**: After finishing all **easy** tests, if there are medium/hard tests remaining, proceed to Step 4 immediately with whatever scripts are ready (scripted + newly generated easy tests). Continue generating medium/hard scripts in parallel by launching a background Task agent for the remaining generation work. When those scripts are ready, they'll be saved to the API for next run.
|
|
117
|
-
|
|
118
|
-
To launch the background generation agent:
|
|
119
|
-
|
|
120
|
-
```
|
|
121
|
-
Task tool with:
|
|
122
|
-
- subagent_type: "general-purpose"
|
|
123
|
-
- run_in_background: true
|
|
124
|
-
- max_turns: 50
|
|
125
|
-
- model: "sonnet"
|
|
126
|
-
- prompt: (include project details, remaining unscripted tests with instructions, and the scouting+generation procedure above)
|
|
127
|
-
```
|
|
128
|
-
|
|
129
|
-
The background agent should: for each remaining test, do the scouting pass, generate the script, and call `update_test` to save it. It does NOT need to call `complete_run` — that happens in the native execution step.
|
|
130
|
-
|
|
131
108
|
### Step 4: Export auth state
|
|
132
109
|
|
|
133
110
|
If `auth_mode` is not `none`, export the browser session so native Playwright inherits it:
|
|
@@ -175,17 +152,9 @@ npx playwright test --config /tmp/greenrun-tests/playwright.config.ts
|
|
|
175
152
|
|
|
176
153
|
5. **Report results**: Call `complete_run(run_id, status, result_summary)` for each test. Map Playwright statuses: `passed` → `passed`, `failed`/`timedOut` → `failed`, other → `error`.
|
|
177
154
|
|
|
178
|
-
6. **Clean up
|
|
179
|
-
```bash
|
|
180
|
-
npx playwright test --config /tmp/greenrun-tests/playwright.config.ts --list 2>/dev/null; true
|
|
181
|
-
```
|
|
182
|
-
The Playwright Test runner normally cleans up after itself, but if tests crash or timeout, browser processes may linger. Also call `browser_close` to reset the MCP browser context before any subsequent AI fallback execution.
|
|
183
|
-
|
|
184
|
-
### Step 6: Handle unscripted tests without scripts
|
|
185
|
-
|
|
186
|
-
Any tests that still don't have scripts (e.g. because the background agent hasn't finished, or script generation failed) need to be executed via AI agents using the legacy approach. Follow Step 7 for these tests.
|
|
155
|
+
6. **Clean up**: Call `browser_close` to reset the MCP browser context.
|
|
187
156
|
|
|
188
|
-
### Step
|
|
157
|
+
### Step 6: Circuit breaker
|
|
189
158
|
|
|
190
159
|
After parsing all native results, walk through them in completion order. Track consecutive failures:
|
|
191
160
|
|
|
@@ -194,170 +163,25 @@ After parsing all native results, walk through them in completion order. Track c
|
|
|
194
163
|
- Skip AI fallback for remaining tests
|
|
195
164
|
- The counter resets on any pass
|
|
196
165
|
|
|
197
|
-
### Step
|
|
166
|
+
### Step 7: AI fallback for native failures
|
|
198
167
|
|
|
199
|
-
For tests that **failed** in native execution (and circuit breaker has not tripped):
|
|
168
|
+
For tests that **failed** in native execution (and circuit breaker has not tripped), execute them one at a time using the AI agent approach:
|
|
200
169
|
|
|
201
170
|
1. Close the current browser context with `browser_close` so the fallback starts fresh
|
|
202
171
|
2. Re-authenticate by navigating to the login page and following the Authenticate procedure
|
|
203
|
-
3.
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
await newPage.goto(START_URL);
|
|
213
|
-
return { index: page.context().pages().length - 1, url: newPage.url() };
|
|
214
|
-
}
|
|
215
|
-
```
|
|
216
|
-
|
|
217
|
-
#### Launch agent
|
|
218
|
-
```
|
|
219
|
-
Task tool with:
|
|
220
|
-
- subagent_type: "general-purpose"
|
|
221
|
-
- run_in_background: true
|
|
222
|
-
- max_turns: 25
|
|
223
|
-
- model: "sonnet"
|
|
224
|
-
- prompt: (agent prompt below, including the native failure message for diagnosis)
|
|
225
|
-
```
|
|
226
|
-
|
|
227
|
-
#### Agent prompt
|
|
228
|
-
|
|
229
|
-
```
|
|
230
|
-
Greenrun browser test (AI fallback). Run ID: {run_id}
|
|
231
|
-
Tab index: {INDEX}
|
|
232
|
-
|
|
233
|
-
**{test_name}**
|
|
234
|
-
|
|
235
|
-
{paste the full test instructions here}
|
|
236
|
-
|
|
237
|
-
**Native execution failed with:** {failure_message}
|
|
238
|
-
|
|
239
|
-
Determine if this is a stale script (UI changed) or an actual bug. If the script is stale, the test may still pass when executed manually.
|
|
240
|
-
|
|
241
|
-
## CRITICAL: Tab isolation
|
|
172
|
+
3. For each failed test:
|
|
173
|
+
- Call `get_test(test_id)` to fetch the full instructions
|
|
174
|
+
- Start a new run via `start_run(test_id)` (the original run was already completed in Step 5)
|
|
175
|
+
- Navigate to the test's starting page via `browser_navigate`
|
|
176
|
+
- Follow the test instructions step by step using Playwright MCP tools
|
|
177
|
+
- Determine if this is a stale script (UI changed) or an actual bug
|
|
178
|
+
- If the test passes manually, invalidate the cached script: `update_test(test_id, { script: null, script_generated_at: null })`
|
|
179
|
+
- Call `complete_run(run_id, status, brief_summary)`
|
|
180
|
+
- Call `browser_close` before the next test to reset state
|
|
242
181
|
|
|
243
|
-
|
|
182
|
+
### Step 8: Handle unscripted tests without scripts
|
|
244
183
|
|
|
245
|
-
|
|
246
|
-
```js
|
|
247
|
-
async (page) => {
|
|
248
|
-
const p = page.context().pages()[INDEX];
|
|
249
|
-
// ... your action here ...
|
|
250
|
-
}
|
|
251
|
-
```
|
|
252
|
-
|
|
253
|
-
## Auth
|
|
254
|
-
No authentication needed — the main page already authenticated and cookies are shared to your tab.
|
|
255
|
-
|
|
256
|
-
## Interaction patterns
|
|
257
|
-
|
|
258
|
-
**Navigate:**
|
|
259
|
-
```js
|
|
260
|
-
async (page) => {
|
|
261
|
-
const p = page.context().pages()[INDEX];
|
|
262
|
-
await p.goto('https://example.com/path');
|
|
263
|
-
return p.url();
|
|
264
|
-
}
|
|
265
|
-
```
|
|
266
|
-
|
|
267
|
-
**Read page state (replaces browser_snapshot):**
|
|
268
|
-
```js
|
|
269
|
-
async (page) => {
|
|
270
|
-
const p = page.context().pages()[INDEX];
|
|
271
|
-
const url = p.url();
|
|
272
|
-
const title = await p.title();
|
|
273
|
-
const text = await p.locator('body').innerText();
|
|
274
|
-
const headings = await p.getByRole('heading').allTextContents();
|
|
275
|
-
const buttons = await p.getByRole('button').allTextContents();
|
|
276
|
-
const links = await p.getByRole('link').allTextContents();
|
|
277
|
-
const textboxes = await p.getByRole('textbox').evaluateAll(els =>
|
|
278
|
-
els.map(e => ({ name: e.getAttribute('name') || e.getAttribute('aria-label') || e.placeholder, value: e.value }))
|
|
279
|
-
);
|
|
280
|
-
return { url, title, headings, buttons, links, textboxes, text: text.substring(0, 2000) };
|
|
281
|
-
}
|
|
282
|
-
```
|
|
283
|
-
|
|
284
|
-
**Click an element:**
|
|
285
|
-
```js
|
|
286
|
-
async (page) => {
|
|
287
|
-
const p = page.context().pages()[INDEX];
|
|
288
|
-
await p.getByRole('button', { name: 'Submit' }).click();
|
|
289
|
-
return p.url();
|
|
290
|
-
}
|
|
291
|
-
```
|
|
292
|
-
|
|
293
|
-
**Fill a form field:**
|
|
294
|
-
```js
|
|
295
|
-
async (page) => {
|
|
296
|
-
const p = page.context().pages()[INDEX];
|
|
297
|
-
await p.getByRole('textbox', { name: 'Email' }).fill('test@example.com');
|
|
298
|
-
return 'filled';
|
|
299
|
-
}
|
|
300
|
-
```
|
|
301
|
-
|
|
302
|
-
**Handle a dialog:**
|
|
303
|
-
```js
|
|
304
|
-
async (page) => {
|
|
305
|
-
const p = page.context().pages()[INDEX];
|
|
306
|
-
p.once('dialog', d => d.accept());
|
|
307
|
-
await p.getByRole('button', { name: 'Delete' }).click();
|
|
308
|
-
return p.url();
|
|
309
|
-
}
|
|
310
|
-
```
|
|
311
|
-
|
|
312
|
-
**Check for specific text (verification):**
|
|
313
|
-
```js
|
|
314
|
-
async (page) => {
|
|
315
|
-
const p = page.context().pages()[INDEX];
|
|
316
|
-
const visible = await p.getByText('Success').isVisible();
|
|
317
|
-
return { found: visible };
|
|
318
|
-
}
|
|
319
|
-
```
|
|
320
|
-
|
|
321
|
-
## Rules
|
|
322
|
-
- ONLY use `browser_run_code` — no other browser tools
|
|
323
|
-
- Always scope to `page.context().pages()[INDEX]`
|
|
324
|
-
- Use Playwright locators: `getByRole`, `getByText`, `getByLabel`, `getByPlaceholder`, `locator`
|
|
325
|
-
- Read page state to find elements before interacting
|
|
326
|
-
- Navigate with absolute URLs via `p.goto(url)` — never click nav links
|
|
327
|
-
|
|
328
|
-
## FORBIDDEN — never use these:
|
|
329
|
-
- `browser_snapshot`, `browser_click`, `browser_type`, `browser_navigate` — these operate on the MAIN page and will interfere with other tests
|
|
330
|
-
- `browser_wait` — NEVER call this
|
|
331
|
-
- `browser_screenshot` — NEVER use
|
|
332
|
-
|
|
333
|
-
## Error recovery
|
|
334
|
-
- On ANY failure: retry the failing step ONCE, then skip to Finish.
|
|
335
|
-
|
|
336
|
-
## Finish (MANDATORY — always reach this step)
|
|
337
|
-
1. If the test passes on manual execution, call `update_test(test_id, { script: null, script_generated_at: null })` to invalidate the stale cached script.
|
|
338
|
-
2. `complete_run(run_id, status, brief_summary)` — ALWAYS call this, even on error.
|
|
339
|
-
3. Return: {test_name} | {status} | {summary}
|
|
340
|
-
```
|
|
341
|
-
|
|
342
|
-
#### Wait and clean up
|
|
343
|
-
|
|
344
|
-
Wait for all agents to complete via `TaskOutput`. Then close extra tabs (newest first):
|
|
345
|
-
|
|
346
|
-
```js
|
|
347
|
-
async (page) => {
|
|
348
|
-
const pages = page.context().pages();
|
|
349
|
-
for (let i = pages.length - 1; i >= 1; i--) {
|
|
350
|
-
await pages[i].close();
|
|
351
|
-
}
|
|
352
|
-
return { remainingPages: page.context().pages().length };
|
|
353
|
-
}
|
|
354
|
-
```
|
|
355
|
-
|
|
356
|
-
Check for orphaned runs (agents that crashed without calling `complete_run`). For any orphaned run IDs, call `complete_run(run_id, "error", "Agent crashed or timed out")`.
|
|
357
|
-
|
|
358
|
-
### Step 9: Wait for background generation
|
|
359
|
-
|
|
360
|
-
If a background generation agent was launched in Step 3, check if it has completed via `TaskOutput` with `block: false`. If still running, note this in the summary. The generated scripts will be available on the next run.
|
|
184
|
+
Any tests that didn't get scripts generated in Step 3 (e.g. if script generation failed) need to be executed the same way as Step 7 — one at a time using the AI agent approach. Follow the same pattern: get instructions, start run, execute in browser, complete run, close browser.
|
|
361
185
|
|
|
362
186
|
## Summarize
|
|
363
187
|
|
|
@@ -376,6 +200,4 @@ Total: "X passed, Y failed, Z errors out of N tests"
|
|
|
376
200
|
|
|
377
201
|
If the circuit breaker tripped, note: "Circuit breaker tripped after N consecutive failures. M tests skipped."
|
|
378
202
|
|
|
379
|
-
If background script generation is still running, note: "Script generation in progress for N tests. Scripts will be cached for next run."
|
|
380
|
-
|
|
381
203
|
If any tests failed, highlight what went wrong and suggest next steps.
|