greenrun-cli 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api-client.d.ts +48 -2
- package/dist/api-client.js +66 -0
- package/dist/commands/init.js +47 -20
- package/dist/server.js +49 -1
- package/package.json +1 -1
- package/templates/claude-md.md +20 -10
- package/templates/commands/greenrun-sweep.md +3 -37
- package/templates/commands/greenrun.md +11 -86
- package/templates/commands/procedures.md +335 -0
package/dist/api-client.d.ts
CHANGED
|
@@ -12,14 +12,32 @@ export declare class ApiClient {
|
|
|
12
12
|
name: string;
|
|
13
13
|
base_url?: string;
|
|
14
14
|
description?: string;
|
|
15
|
-
|
|
15
|
+
auth_mode?: string;
|
|
16
|
+
login_url?: string;
|
|
17
|
+
register_url?: string;
|
|
18
|
+
login_instructions?: string;
|
|
19
|
+
register_instructions?: string;
|
|
20
|
+
credentials?: {
|
|
21
|
+
name: string;
|
|
22
|
+
email: string;
|
|
23
|
+
password: string;
|
|
24
|
+
}[];
|
|
16
25
|
}): Promise<unknown>;
|
|
17
26
|
getProject(id: string): Promise<unknown>;
|
|
18
27
|
updateProject(id: string, data: {
|
|
19
28
|
name?: string;
|
|
20
29
|
base_url?: string;
|
|
21
30
|
description?: string;
|
|
22
|
-
|
|
31
|
+
auth_mode?: string;
|
|
32
|
+
login_url?: string;
|
|
33
|
+
register_url?: string;
|
|
34
|
+
login_instructions?: string;
|
|
35
|
+
register_instructions?: string;
|
|
36
|
+
credentials?: {
|
|
37
|
+
name: string;
|
|
38
|
+
email: string;
|
|
39
|
+
password: string;
|
|
40
|
+
}[];
|
|
23
41
|
}): Promise<unknown>;
|
|
24
42
|
deleteProject(id: string): Promise<unknown>;
|
|
25
43
|
listPages(projectId: string): Promise<unknown>;
|
|
@@ -39,6 +57,7 @@ export declare class ApiClient {
|
|
|
39
57
|
page_ids?: string[];
|
|
40
58
|
status?: string;
|
|
41
59
|
tags?: string[];
|
|
60
|
+
credential_name?: string;
|
|
42
61
|
}): Promise<unknown>;
|
|
43
62
|
getTest(id: string): Promise<unknown>;
|
|
44
63
|
updateTest(id: string, data: {
|
|
@@ -47,6 +66,9 @@ export declare class ApiClient {
|
|
|
47
66
|
page_ids?: string[];
|
|
48
67
|
status?: string;
|
|
49
68
|
tags?: string[];
|
|
69
|
+
credential_name?: string | null;
|
|
70
|
+
script?: string | null;
|
|
71
|
+
script_generated_at?: string | null;
|
|
50
72
|
}): Promise<unknown>;
|
|
51
73
|
deleteTest(id: string): Promise<unknown>;
|
|
52
74
|
sweep(projectId: string, params: {
|
|
@@ -60,4 +82,28 @@ export declare class ApiClient {
|
|
|
60
82
|
}): Promise<unknown>;
|
|
61
83
|
getRun(runId: string): Promise<unknown>;
|
|
62
84
|
listRuns(testId: string): Promise<unknown>;
|
|
85
|
+
prepareTestBatch(projectId: string, filter?: string, testIds?: string[]): Promise<{
|
|
86
|
+
project: {
|
|
87
|
+
id: any;
|
|
88
|
+
name: any;
|
|
89
|
+
base_url: any;
|
|
90
|
+
auth_mode: any;
|
|
91
|
+
login_url: any;
|
|
92
|
+
register_url: any;
|
|
93
|
+
login_instructions: any;
|
|
94
|
+
register_instructions: any;
|
|
95
|
+
credentials: any;
|
|
96
|
+
};
|
|
97
|
+
tests: {
|
|
98
|
+
test_id: any;
|
|
99
|
+
test_name: any;
|
|
100
|
+
run_id: any;
|
|
101
|
+
instructions: any;
|
|
102
|
+
credential_name: any;
|
|
103
|
+
pages: any;
|
|
104
|
+
tags: any;
|
|
105
|
+
script: any;
|
|
106
|
+
script_generated_at: any;
|
|
107
|
+
}[];
|
|
108
|
+
}>;
|
|
63
109
|
}
|
package/dist/api-client.js
CHANGED
|
@@ -102,4 +102,70 @@ export class ApiClient {
|
|
|
102
102
|
async listRuns(testId) {
|
|
103
103
|
return this.request('GET', `/tests/${testId}/runs`);
|
|
104
104
|
}
|
|
105
|
+
// Batch operations
|
|
106
|
+
async prepareTestBatch(projectId, filter, testIds) {
|
|
107
|
+
const [projectResult, testsResult] = await Promise.all([
|
|
108
|
+
this.getProject(projectId),
|
|
109
|
+
this.listTests(projectId),
|
|
110
|
+
]);
|
|
111
|
+
const project = projectResult.project;
|
|
112
|
+
let tests = (testsResult.tests || []).filter((t) => t.status === 'active');
|
|
113
|
+
if (testIds && testIds.length > 0) {
|
|
114
|
+
const idSet = new Set(testIds);
|
|
115
|
+
tests = tests.filter((t) => idSet.has(t.id));
|
|
116
|
+
}
|
|
117
|
+
else if (filter) {
|
|
118
|
+
if (filter.startsWith('tag:')) {
|
|
119
|
+
const tag = filter.slice(4).toLowerCase();
|
|
120
|
+
tests = tests.filter((t) => (t.tags || []).some((tg) => (tg.name || tg).toLowerCase() === tag));
|
|
121
|
+
}
|
|
122
|
+
else if (filter.startsWith('/')) {
|
|
123
|
+
tests = tests.filter((t) => (t.pages || []).some((p) => (p.url || '').includes(filter)));
|
|
124
|
+
}
|
|
125
|
+
else {
|
|
126
|
+
const term = filter.toLowerCase();
|
|
127
|
+
tests = tests.filter((t) => (t.name || '').toLowerCase().includes(term));
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
if (tests.length === 0) {
|
|
131
|
+
return {
|
|
132
|
+
project: {
|
|
133
|
+
id: project.id, name: project.name, base_url: project.base_url,
|
|
134
|
+
auth_mode: project.auth_mode ?? 'none',
|
|
135
|
+
login_url: project.login_url ?? null,
|
|
136
|
+
register_url: project.register_url ?? null,
|
|
137
|
+
login_instructions: project.login_instructions ?? null,
|
|
138
|
+
register_instructions: project.register_instructions ?? null,
|
|
139
|
+
credentials: project.credentials ?? null,
|
|
140
|
+
},
|
|
141
|
+
tests: [],
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
// Fetch full test details in parallel
|
|
145
|
+
const fullTests = await Promise.all(tests.map((t) => this.getTest(t.id)));
|
|
146
|
+
// Start runs in parallel
|
|
147
|
+
const runs = await Promise.all(tests.map((t) => this.startRun(t.id)));
|
|
148
|
+
return {
|
|
149
|
+
project: {
|
|
150
|
+
id: project.id, name: project.name, base_url: project.base_url,
|
|
151
|
+
auth_mode: project.auth_mode ?? 'none',
|
|
152
|
+
login_url: project.login_url ?? null,
|
|
153
|
+
register_url: project.register_url ?? null,
|
|
154
|
+
login_instructions: project.login_instructions ?? null,
|
|
155
|
+
register_instructions: project.register_instructions ?? null,
|
|
156
|
+
credentials: project.credentials ?? null,
|
|
157
|
+
},
|
|
158
|
+
tests: fullTests.map((ft, i) => ({
|
|
159
|
+
test_id: ft.test.id,
|
|
160
|
+
test_name: ft.test.name,
|
|
161
|
+
run_id: runs[i].run.id,
|
|
162
|
+
instructions: ft.test.instructions,
|
|
163
|
+
credential_name: ft.test.credential_name ?? null,
|
|
164
|
+
pages: ft.test.pages || [],
|
|
165
|
+
tags: ft.test.tags || [],
|
|
166
|
+
script: ft.test.script ?? null,
|
|
167
|
+
script_generated_at: ft.test.script_generated_at ?? null,
|
|
168
|
+
})),
|
|
169
|
+
};
|
|
170
|
+
}
|
|
105
171
|
}
|
package/dist/commands/init.js
CHANGED
|
@@ -44,7 +44,7 @@ function checkPrerequisites() {
|
|
|
44
44
|
catch {
|
|
45
45
|
// not installed
|
|
46
46
|
}
|
|
47
|
-
return { claude
|
|
47
|
+
return { claude };
|
|
48
48
|
}
|
|
49
49
|
async function validateToken(token) {
|
|
50
50
|
try {
|
|
@@ -74,6 +74,15 @@ function configureMcpLocal(token) {
|
|
|
74
74
|
console.error(` claude mcp add greenrun --transport stdio -e GREENRUN_API_TOKEN=${token} -- npx -y greenrun-cli@latest\n`);
|
|
75
75
|
}
|
|
76
76
|
}
|
|
77
|
+
function configurePlaywrightMcp() {
|
|
78
|
+
try {
|
|
79
|
+
execSync('claude mcp add playwright -- npx @playwright/mcp@latest --browser chrome --user-data-dir ~/.greenrun/browser-profile', { stdio: 'inherit' });
|
|
80
|
+
}
|
|
81
|
+
catch {
|
|
82
|
+
console.error('\nFailed to add Playwright MCP. You can add it manually:\n');
|
|
83
|
+
console.error(' claude mcp add playwright -- npx @playwright/mcp@latest --browser chrome --user-data-dir ~/.greenrun/browser-profile\n');
|
|
84
|
+
}
|
|
85
|
+
}
|
|
77
86
|
function configureMcpProject(token) {
|
|
78
87
|
const mcpConfig = {
|
|
79
88
|
mcpServers: {
|
|
@@ -127,7 +136,9 @@ function installClaudeMd() {
|
|
|
127
136
|
if (existsSync(claudeMdPath)) {
|
|
128
137
|
const existing = readFileSync(claudeMdPath, 'utf-8');
|
|
129
138
|
if (existing.includes('## Greenrun')) {
|
|
130
|
-
|
|
139
|
+
const updated = existing.replace(/## Greenrun[\s\S]*$/, snippet.trimEnd());
|
|
140
|
+
writeFileSync(claudeMdPath, updated.endsWith('\n') ? updated : updated + '\n');
|
|
141
|
+
console.log(' Replaced Greenrun section in CLAUDE.md');
|
|
131
142
|
return;
|
|
132
143
|
}
|
|
133
144
|
appendFileSync(claudeMdPath, '\n' + snippet);
|
|
@@ -155,6 +166,7 @@ function installSettings() {
|
|
|
155
166
|
'mcp__greenrun__list_projects',
|
|
156
167
|
'mcp__greenrun__get_project',
|
|
157
168
|
'mcp__greenrun__create_project',
|
|
169
|
+
'mcp__greenrun__update_project',
|
|
158
170
|
'mcp__greenrun__list_pages',
|
|
159
171
|
'mcp__greenrun__create_page',
|
|
160
172
|
'mcp__greenrun__list_tests',
|
|
@@ -166,19 +178,34 @@ function installSettings() {
|
|
|
166
178
|
'mcp__greenrun__get_run',
|
|
167
179
|
'mcp__greenrun__list_runs',
|
|
168
180
|
'mcp__greenrun__sweep',
|
|
181
|
+
'mcp__greenrun__prepare_test_batch',
|
|
169
182
|
];
|
|
170
183
|
const browserTools = [
|
|
171
|
-
'
|
|
172
|
-
'
|
|
173
|
-
'
|
|
174
|
-
'
|
|
175
|
-
'
|
|
176
|
-
'
|
|
177
|
-
'
|
|
178
|
-
'
|
|
179
|
-
'
|
|
180
|
-
'
|
|
181
|
-
'
|
|
184
|
+
'mcp__playwright__browser_navigate',
|
|
185
|
+
'mcp__playwright__browser_snapshot',
|
|
186
|
+
'mcp__playwright__browser_click',
|
|
187
|
+
'mcp__playwright__browser_type',
|
|
188
|
+
'mcp__playwright__browser_handle_dialog',
|
|
189
|
+
'mcp__playwright__browser_tab_list',
|
|
190
|
+
'mcp__playwright__browser_tab_new',
|
|
191
|
+
'mcp__playwright__browser_tab_select',
|
|
192
|
+
'mcp__playwright__browser_tab_close',
|
|
193
|
+
'mcp__playwright__browser_select_option',
|
|
194
|
+
'mcp__playwright__browser_hover',
|
|
195
|
+
'mcp__playwright__browser_drag',
|
|
196
|
+
'mcp__playwright__browser_press_key',
|
|
197
|
+
'mcp__playwright__browser_screenshot',
|
|
198
|
+
'mcp__playwright__browser_wait',
|
|
199
|
+
'mcp__playwright__browser_file_upload',
|
|
200
|
+
'mcp__playwright__browser_pdf_save',
|
|
201
|
+
'mcp__playwright__browser_close',
|
|
202
|
+
'mcp__playwright__browser_console_messages',
|
|
203
|
+
'mcp__playwright__browser_resize',
|
|
204
|
+
'mcp__playwright__browser_run_code',
|
|
205
|
+
'mcp__playwright__browser_evaluate',
|
|
206
|
+
'mcp__playwright__browser_fill_form',
|
|
207
|
+
'mcp__playwright__browser_tabs',
|
|
208
|
+
'mcp__playwright__browser_network_requests',
|
|
182
209
|
];
|
|
183
210
|
const requiredTools = [...greenrunTools, ...browserTools];
|
|
184
211
|
existing.permissions = existing.permissions || {};
|
|
@@ -191,7 +218,7 @@ function installSettings() {
|
|
|
191
218
|
function installCommands() {
|
|
192
219
|
const commandsDir = join(process.cwd(), '.claude', 'commands');
|
|
193
220
|
mkdirSync(commandsDir, { recursive: true });
|
|
194
|
-
const commands = ['greenrun.md', 'greenrun-sweep.md'];
|
|
221
|
+
const commands = ['greenrun.md', 'greenrun-sweep.md', 'procedures.md'];
|
|
195
222
|
for (const cmd of commands) {
|
|
196
223
|
const src = join(TEMPLATES_DIR, 'commands', cmd);
|
|
197
224
|
if (!existsSync(src)) {
|
|
@@ -228,8 +255,7 @@ export async function runInit(args) {
|
|
|
228
255
|
process.exit(1);
|
|
229
256
|
}
|
|
230
257
|
}
|
|
231
|
-
console.log(' [i]
|
|
232
|
-
console.log(' Get it at: https://chromewebstore.google.com/detail/claude-in-chrome\n');
|
|
258
|
+
console.log(' [i] Playwright MCP will be configured for browser test execution\n');
|
|
233
259
|
let token = opts.token;
|
|
234
260
|
let scope = opts.scope;
|
|
235
261
|
if (interactive) {
|
|
@@ -283,14 +309,15 @@ export async function runInit(args) {
|
|
|
283
309
|
scope = scope || 'local';
|
|
284
310
|
}
|
|
285
311
|
// Configure MCP
|
|
286
|
-
console.log('Configuring MCP
|
|
312
|
+
console.log('Configuring MCP servers...');
|
|
287
313
|
if (scope === 'project') {
|
|
288
314
|
configureMcpProject(token);
|
|
289
315
|
}
|
|
290
316
|
else {
|
|
291
317
|
configureMcpLocal(token);
|
|
292
318
|
}
|
|
293
|
-
|
|
319
|
+
configurePlaywrightMcp();
|
|
320
|
+
console.log(' MCP servers configured.\n');
|
|
294
321
|
// Install extras
|
|
295
322
|
if (opts.claudeMd) {
|
|
296
323
|
installClaudeMd();
|
|
@@ -302,7 +329,7 @@ export async function runInit(args) {
|
|
|
302
329
|
console.log(`
|
|
303
330
|
Done! Restart Claude Code to connect.
|
|
304
331
|
|
|
305
|
-
|
|
306
|
-
|
|
332
|
+
Playwright will launch a Chrome browser automatically when running tests.
|
|
333
|
+
Run /greenrun to execute your test suite.
|
|
307
334
|
`);
|
|
308
335
|
}
|
package/dist/server.js
CHANGED
|
@@ -26,7 +26,16 @@ export async function startServer() {
|
|
|
26
26
|
name: z.string().describe('Project name'),
|
|
27
27
|
base_url: z.string().optional().describe('Base URL of the site (e.g. https://myapp.com)'),
|
|
28
28
|
description: z.string().optional().describe('Project description'),
|
|
29
|
-
|
|
29
|
+
auth_mode: z.enum(['none', 'existing_user', 'new_user']).optional().describe('How to authenticate before tests (default: none)'),
|
|
30
|
+
login_url: z.string().optional().describe('URL of login page (for existing_user auth mode)'),
|
|
31
|
+
register_url: z.string().optional().describe('URL of registration page (for new_user auth mode)'),
|
|
32
|
+
login_instructions: z.string().optional().describe('Steps to log in with existing credentials'),
|
|
33
|
+
register_instructions: z.string().optional().describe('Steps to register a new user'),
|
|
34
|
+
credentials: z.array(z.object({
|
|
35
|
+
name: z.string().describe('Credential set name (e.g. "admin", "viewer")'),
|
|
36
|
+
email: z.string().describe('Login email'),
|
|
37
|
+
password: z.string().describe('Login password'),
|
|
38
|
+
})).optional().describe('Named credential sets for test authentication (max 20)'),
|
|
30
39
|
}, async (args) => {
|
|
31
40
|
const result = await api.createProject(args);
|
|
32
41
|
return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
|
|
@@ -35,6 +44,26 @@ export async function startServer() {
|
|
|
35
44
|
const result = await api.getProject(args.project_id);
|
|
36
45
|
return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
|
|
37
46
|
});
|
|
47
|
+
server.tool('update_project', 'Update project settings', {
|
|
48
|
+
project_id: z.string().describe('Project UUID'),
|
|
49
|
+
name: z.string().optional().describe('Updated project name'),
|
|
50
|
+
base_url: z.string().optional().describe('Updated base URL'),
|
|
51
|
+
description: z.string().optional().describe('Updated description'),
|
|
52
|
+
auth_mode: z.enum(['none', 'existing_user', 'new_user']).optional().describe('How to authenticate before tests'),
|
|
53
|
+
login_url: z.string().optional().describe('URL of login page (for existing_user auth mode)'),
|
|
54
|
+
register_url: z.string().optional().describe('URL of registration page (for new_user auth mode)'),
|
|
55
|
+
login_instructions: z.string().optional().describe('Steps to log in with existing credentials'),
|
|
56
|
+
register_instructions: z.string().optional().describe('Steps to register a new user'),
|
|
57
|
+
credentials: z.array(z.object({
|
|
58
|
+
name: z.string().describe('Credential set name (e.g. "admin", "viewer")'),
|
|
59
|
+
email: z.string().describe('Login email'),
|
|
60
|
+
password: z.string().describe('Login password'),
|
|
61
|
+
})).optional().describe('Named credential sets for test authentication (max 20)'),
|
|
62
|
+
}, async (args) => {
|
|
63
|
+
const { project_id, ...data } = args;
|
|
64
|
+
const result = await api.updateProject(project_id, data);
|
|
65
|
+
return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
|
|
66
|
+
});
|
|
38
67
|
// --- Pages ---
|
|
39
68
|
server.tool('list_pages', 'List pages in a project', { project_id: z.string().describe('Project UUID') }, async (args) => {
|
|
40
69
|
const result = await api.listPages(args.project_id);
|
|
@@ -64,6 +93,7 @@ export async function startServer() {
|
|
|
64
93
|
page_ids: z.array(z.string()).optional().describe('UUIDs of pages this test covers'),
|
|
65
94
|
status: z.enum(['draft', 'active', 'archived']).optional().describe('Test status (default: active)'),
|
|
66
95
|
tags: z.array(z.string()).optional().describe('Tag names for organizing tests (e.g. ["smoke", "auth"])'),
|
|
96
|
+
credential_name: z.string().optional().describe('Name of a credential set from the project to use for authentication'),
|
|
67
97
|
}, async (args) => {
|
|
68
98
|
const result = await api.createTest(args.project_id, {
|
|
69
99
|
name: args.name,
|
|
@@ -71,6 +101,7 @@ export async function startServer() {
|
|
|
71
101
|
page_ids: args.page_ids,
|
|
72
102
|
status: args.status,
|
|
73
103
|
tags: args.tags,
|
|
104
|
+
credential_name: args.credential_name,
|
|
74
105
|
});
|
|
75
106
|
return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
|
|
76
107
|
});
|
|
@@ -81,6 +112,9 @@ export async function startServer() {
|
|
|
81
112
|
page_ids: z.array(z.string()).optional().describe('Updated page UUIDs (replaces existing)'),
|
|
82
113
|
status: z.enum(['draft', 'active', 'archived']).optional().describe('Updated status'),
|
|
83
114
|
tags: z.array(z.string()).optional().describe('Updated tag names (replaces existing tags)'),
|
|
115
|
+
credential_name: z.string().optional().nullable().describe('Name of a credential set from the project to use for authentication'),
|
|
116
|
+
script: z.string().optional().nullable().describe('Generated Playwright test script'),
|
|
117
|
+
script_generated_at: z.string().optional().nullable().describe('ISO timestamp when the script was generated'),
|
|
84
118
|
}, async (args) => {
|
|
85
119
|
const { test_id, ...data } = args;
|
|
86
120
|
const result = await api.updateTest(test_id, data);
|
|
@@ -98,6 +132,20 @@ export async function startServer() {
|
|
|
98
132
|
});
|
|
99
133
|
return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
|
|
100
134
|
});
|
|
135
|
+
// --- Batch ---
|
|
136
|
+
server.tool('prepare_test_batch', 'Prepare a batch of tests for execution: lists tests, filters, fetches full details, and starts runs — all in one call. Returns everything needed to execute tests.', {
|
|
137
|
+
project_id: z.string().describe('Project UUID'),
|
|
138
|
+
filter: z.string().optional().describe('Filter: "tag:xxx" for tag, "/path" for page URL, or text for name substring'),
|
|
139
|
+
test_ids: z.array(z.string()).optional().describe('Specific test UUIDs to run (overrides filter)'),
|
|
140
|
+
}, async (args) => {
|
|
141
|
+
try {
|
|
142
|
+
const result = await api.prepareTestBatch(args.project_id, args.filter, args.test_ids);
|
|
143
|
+
return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
|
|
144
|
+
}
|
|
145
|
+
catch (error) {
|
|
146
|
+
return { content: [{ type: 'text', text: `Error: ${error.message}` }], isError: true };
|
|
147
|
+
}
|
|
148
|
+
});
|
|
101
149
|
// --- Test Runs ---
|
|
102
150
|
server.tool('start_run', 'Start a test run (sets status to running)', { test_id: z.string().describe('Test UUID') }, async (args) => {
|
|
103
151
|
const result = await api.startRun(args.test_id);
|
package/package.json
CHANGED
package/templates/claude-md.md
CHANGED
|
@@ -2,34 +2,44 @@
|
|
|
2
2
|
|
|
3
3
|
### Prerequisites
|
|
4
4
|
|
|
5
|
-
- **
|
|
5
|
+
- **Playwright MCP** must be configured for browser test execution (`claude mcp add playwright -- npx @playwright/mcp@latest --browser chrome --user-data-dir ~/.greenrun/browser-profile`)
|
|
6
6
|
- MCP server must be connected (check with `/mcp` in Claude Code)
|
|
7
7
|
|
|
8
8
|
### Available MCP Tools
|
|
9
9
|
|
|
10
10
|
The Greenrun MCP server provides these tools:
|
|
11
11
|
|
|
12
|
-
- **list_projects** / **get_project** / **create_project** - Manage projects
|
|
12
|
+
- **list_projects** / **get_project** / **create_project** - Manage projects (includes auth configuration)
|
|
13
13
|
- **list_pages** / **create_page** - Manage page URLs within a project
|
|
14
14
|
- **list_tests** / **get_test** / **create_test** / **update_test** - Manage test cases
|
|
15
15
|
- **start_run** / **complete_run** / **get_run** / **list_runs** - Execute and track test runs
|
|
16
16
|
- **sweep** - Impact analysis: find tests affected by changed pages
|
|
17
|
+
- **prepare_test_batch** - Batch prepare tests for execution (lists, filters, fetches details, starts runs in one call)
|
|
17
18
|
|
|
18
19
|
### Running Tests
|
|
19
20
|
|
|
20
|
-
|
|
21
|
+
Use the `/greenrun` slash command to run all tests automatically, or `/greenrun tag:smoke` to filter.
|
|
21
22
|
|
|
22
|
-
|
|
23
|
-
2. For each test, call `get_test` to retrieve the full instructions
|
|
24
|
-
3. Call `start_run` to begin a run (returns a run ID)
|
|
25
|
-
4. Execute the test instructions using browser automation (Claude in Chrome)
|
|
26
|
-
5. Call `complete_run` with the run ID, status (passed/failed/error), and a result summary
|
|
23
|
+
To run tests manually:
|
|
27
24
|
|
|
28
|
-
|
|
25
|
+
1. Use `list_projects` to find the project
|
|
26
|
+
2. Call `prepare_test_batch` with the project ID (and optional filter) to get test details and run IDs
|
|
27
|
+
3. Execute each test's instructions using Playwright browser automation tools (`browser_navigate`, `browser_snapshot`, `browser_click`, `browser_type`)
|
|
28
|
+
4. Call `complete_run` with the run ID, status (passed/failed/error), and a result summary
|
|
29
|
+
|
|
30
|
+
### Auth Configuration
|
|
31
|
+
|
|
32
|
+
Projects can be configured with authentication settings so tests auto-login before execution:
|
|
33
|
+
|
|
34
|
+
- **`auth_mode: "none"`** - No authentication (default)
|
|
35
|
+
- **`auth_mode: "existing_user"`** - Log in with existing credentials via `login_url` and `login_instructions`
|
|
36
|
+
- **`auth_mode: "new_user"`** - Register a new account each run via `register_url` and `register_instructions`
|
|
37
|
+
|
|
38
|
+
Projects can also store named **credentials** (name/email/password sets). Each test can reference a credential by `credential_name` to use specific login details during execution.
|
|
29
39
|
|
|
30
40
|
### Creating Tests
|
|
31
41
|
|
|
32
|
-
1. Navigate to the page you want to test
|
|
42
|
+
1. Navigate to the page you want to test using Playwright
|
|
33
43
|
2. Write clear, step-by-step test instructions describing what to do and what to verify
|
|
34
44
|
3. Use `create_page` to register the page URL if not already registered
|
|
35
45
|
4. Use `create_test` with the instructions and page IDs
|
|
@@ -36,41 +36,7 @@ Present the affected tests:
|
|
|
36
36
|
|
|
37
37
|
### 6. Offer to run
|
|
38
38
|
|
|
39
|
-
Ask the user if they want to run the affected tests. If yes
|
|
39
|
+
Ask the user if they want to run the affected tests. If yes:
|
|
40
40
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
For each test in a batch, launch a background agent with `max_turns: 30` and `model: "sonnet"`. Use this prompt:
|
|
44
|
-
|
|
45
|
-
```
|
|
46
|
-
You are executing a single Greenrun browser test. You have access to browser automation tools and Greenrun MCP tools.
|
|
47
|
-
|
|
48
|
-
**Test: {test_name}** (ID: {test_id})
|
|
49
|
-
|
|
50
|
-
Step 1: Call `get_test` with test_id "{test_id}" to get full instructions.
|
|
51
|
-
Step 2: Call `start_run` with test_id "{test_id}" to begin - save the returned `run_id`.
|
|
52
|
-
Step 3: Execute the test instructions using browser automation:
|
|
53
|
-
- Call `tabs_context_mcp` then create a new browser tab for this test
|
|
54
|
-
- Follow each instruction step exactly as written
|
|
55
|
-
- The instructions will tell you where to navigate and what to do
|
|
56
|
-
- Only take a screenshot when you need to verify a visual assertion — not for every navigation or click
|
|
57
|
-
- When reading page content, prefer `find` or `read_page` with `filter: "interactive"` over full DOM reads
|
|
58
|
-
- NEVER trigger JavaScript alerts, confirms, or prompts — they block the browser extension entirely. Before clicking delete buttons or other destructive actions, use `javascript_tool` to override: `window.alert = () => {}; window.confirm = () => true; window.prompt = () => null;`
|
|
59
|
-
- If browser tools stop responding (no result or timeout), assume a dialog is blocking — report the error and stop. Do not keep retrying.
|
|
60
|
-
- If you get stuck or a step fails, record the failure and move on — do not retry more than once
|
|
61
|
-
Step 4: Call `complete_run` with:
|
|
62
|
-
- run_id: the run ID from step 2
|
|
63
|
-
- status: "passed" if all checks succeeded, "failed" if any check failed, "error" if execution was blocked
|
|
64
|
-
- result: a brief summary of what happened (include the failure reason if failed/error)
|
|
65
|
-
Step 5: Close the browser tab you created to clean up.
|
|
66
|
-
|
|
67
|
-
Return a single line summary: {test_name} | {status} | {result_summary}
|
|
68
|
-
```
|
|
69
|
-
|
|
70
|
-
Wait for each batch to complete before launching the next. After all tests finish, present a summary table:
|
|
71
|
-
|
|
72
|
-
| Test | Pages | Tags | Status | Result |
|
|
73
|
-
|------|-------|------|--------|--------|
|
|
74
|
-
| Test name | Affected page URLs | tag1, tag2 | passed/failed/error | Brief summary |
|
|
75
|
-
|
|
76
|
-
Include the total count: "X passed, Y failed, Z errors out of N tests"
|
|
41
|
+
1. Call `prepare_test_batch` with the project ID and `test_ids` set to the affected test IDs from the sweep results.
|
|
42
|
+
2. Read `.claude/commands/procedures.md` and follow the Execute and Summarize procedures using the batch result.
|
|
@@ -2,100 +2,25 @@ Run Greenrun browser tests for this project in parallel.
|
|
|
2
2
|
|
|
3
3
|
## Instructions
|
|
4
4
|
|
|
5
|
-
You are executing browser tests managed by Greenrun.
|
|
5
|
+
You are executing browser tests managed by Greenrun. Follow these steps precisely:
|
|
6
6
|
|
|
7
7
|
### 1. Find the project
|
|
8
8
|
|
|
9
9
|
Call `list_projects` to get all projects. Match the current project by name or base URL. If no match is found, tell the user and stop.
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
### 2. Prepare test batch
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
Call `prepare_test_batch` with the project ID.
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
If the user specified an argument ("$ARGUMENTS"), pass it as the `filter` parameter:
|
|
16
|
+
- `tag:smoke` → filters by tag
|
|
17
|
+
- `/checkout` → filters by page URL
|
|
18
|
+
- `login` → filters by test name
|
|
16
19
|
|
|
17
|
-
If
|
|
18
|
-
- If it starts with `/` (e.g. `/checkout`), filter to tests linked to a page matching that URL
|
|
19
|
-
- If it starts with `tag:` (e.g. `tag:smoke`), filter to tests with that tag
|
|
20
|
-
- Otherwise, treat it as a test name filter
|
|
20
|
+
If no argument is given, omit the filter to run all active tests.
|
|
21
21
|
|
|
22
|
-
If
|
|
22
|
+
If the result has zero tests, tell the user and stop.
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
### 3. Execute tests
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
Call `get_test` for ALL matching tests **in parallel** (multiple tool calls in one message). This retrieves the full instructions for each test.
|
|
29
|
-
|
|
30
|
-
Then call `start_run` for ALL tests **in parallel** to get run IDs.
|
|
31
|
-
|
|
32
|
-
You now have everything needed to launch agents: test name, full instructions, and run_id for each test.
|
|
33
|
-
|
|
34
|
-
### 4. Execute tests in parallel
|
|
35
|
-
|
|
36
|
-
Split the test list into batches of size `concurrency` (from the project settings).
|
|
37
|
-
|
|
38
|
-
For each batch, launch all tests simultaneously using the **Task tool** with `run_in_background: true`. Each background agent receives a prompt with the full instructions and run_id embedded — agents do NOT need to call `get_test` or `start_run`.
|
|
39
|
-
|
|
40
|
-
```
|
|
41
|
-
For each test in the current batch, call the Task tool with:
|
|
42
|
-
- subagent_type: "general-purpose"
|
|
43
|
-
- run_in_background: true
|
|
44
|
-
- max_turns: 50
|
|
45
|
-
- model: "sonnet"
|
|
46
|
-
- prompt: (see below)
|
|
47
|
-
```
|
|
48
|
-
|
|
49
|
-
The prompt for each background agent should be:
|
|
50
|
-
|
|
51
|
-
```
|
|
52
|
-
You are executing a single Greenrun browser test using browser automation tools. Be efficient — minimize tool calls to complete the test as fast as possible.
|
|
53
|
-
|
|
54
|
-
**Test: {test_name}**
|
|
55
|
-
**Run ID: {run_id}**
|
|
56
|
-
|
|
57
|
-
## Test Instructions
|
|
58
|
-
|
|
59
|
-
{paste the full test instructions from get_test here}
|
|
60
|
-
|
|
61
|
-
## Execution Steps
|
|
62
|
-
|
|
63
|
-
1. Call `tabs_context_mcp` then create a new browser tab with `tabs_create_mcp`
|
|
64
|
-
2. Follow each test instruction step exactly as written, using these rules to minimize tool calls:
|
|
65
|
-
|
|
66
|
-
**Speed rules (critical):**
|
|
67
|
-
- NEVER take screenshots. Use `read_page` or `find` for all assertions and to locate elements.
|
|
68
|
-
- Navigate directly to URLs (e.g. `navigate` to `/tokens`) instead of clicking through nav links
|
|
69
|
-
- Use `javascript_tool` for quick assertions: `document.querySelector('h1')?.textContent` is faster than `read_page` for checking a heading
|
|
70
|
-
- Use `read_page` with `filter: "interactive"` to verify multiple things in one call rather than separate `find` calls
|
|
71
|
-
- Use `form_input` with element refs for filling forms — avoid click-then-type sequences
|
|
72
|
-
- When clicking elements, use `ref` parameter instead of coordinates to avoid needing screenshots
|
|
73
|
-
- Combine verification: after a page loads, do ONE `read_page` call and check all assertions from that result
|
|
74
|
-
|
|
75
|
-
**Reliability rules:**
|
|
76
|
-
- NEVER trigger JavaScript alerts, confirms, or prompts — they block the browser extension entirely. Before clicking delete buttons or other destructive actions, use `javascript_tool` to override: `window.alert = () => {}; window.confirm = () => true; window.prompt = () => null;`
|
|
77
|
-
- If browser tools stop responding (no result or timeout), assume a dialog is blocking — report the error and stop. Do not keep retrying.
|
|
78
|
-
- If you get stuck or a step fails, record the failure and move on — do not retry more than once
|
|
79
|
-
- If you are redirected to a login page, try using an existing logged-in tab from `tabs_context_mcp` instead of creating a new one
|
|
80
|
-
|
|
81
|
-
3. Call `complete_run` with:
|
|
82
|
-
- run_id: "{run_id}"
|
|
83
|
-
- status: "passed" if all checks succeeded, "failed" if any check failed, "error" if execution was blocked
|
|
84
|
-
- result: a brief summary of what happened (include the failure reason if failed/error)
|
|
85
|
-
|
|
86
|
-
Return a single line summary: {test_name} | {status} | {result_summary}
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
After launching all agents in a batch, wait for them all to complete (use `TaskOutput` to collect results) before launching the next batch.
|
|
90
|
-
|
|
91
|
-
### 5. Summarize results
|
|
92
|
-
|
|
93
|
-
After all batches complete, collect results from all background agents and present a summary table:
|
|
94
|
-
|
|
95
|
-
| Test | Pages | Tags | Status | Result |
|
|
96
|
-
|------|-------|------|--------|--------|
|
|
97
|
-
| Test name | /login, /dashboard | smoke, auth | passed/failed/error | Brief summary |
|
|
98
|
-
|
|
99
|
-
Include the total count: "X passed, Y failed, Z errors out of N tests"
|
|
100
|
-
|
|
101
|
-
If any tests failed, highlight what went wrong and suggest next steps.
|
|
26
|
+
Read `.claude/commands/procedures.md` and follow the Execute and Summarize procedures using the batch result.
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
Shared procedures for executing Greenrun browser tests. Referenced by `/greenrun` and `/greenrun-sweep`.
|
|
2
|
+
|
|
3
|
+
## Authenticate
|
|
4
|
+
|
|
5
|
+
Before executing tests, handle authentication based on the project's `auth_mode` from the batch result.
|
|
6
|
+
|
|
7
|
+
- **`none`** (or missing): Skip authentication entirely.
|
|
8
|
+
- **`existing_user`**: Navigate to the project's `login_url` and follow the `login_instructions` step by step. Use `browser_snapshot` after to verify the page shows an authenticated state (no login form visible).
|
|
9
|
+
- **`new_user`**: Navigate to the project's `register_url` and follow the `register_instructions` step by step. Use `browser_snapshot` after to verify registration succeeded and the user is authenticated.
|
|
10
|
+
|
|
11
|
+
### Credentials
|
|
12
|
+
|
|
13
|
+
The project may include a `credentials` array — named credential sets with `name`, `email`, and `password`. Each test may have a `credential_name` field referencing one of these sets.
|
|
14
|
+
|
|
15
|
+
When authenticating for a test with `credential_name`:
|
|
16
|
+
- Find the matching credential in `project.credentials` by name
|
|
17
|
+
- Use that credential's email and password to fill the login form at `login_url`
|
|
18
|
+
- If no `credential_name` is set on a test, use the first credential in the array (or fall back to `login_instructions`)
|
|
19
|
+
|
|
20
|
+
When authenticating once for a batch (Step 1 below), use the credential that appears most frequently across the batch's tests. If tests use different credentials, re-authenticate between tests as needed.
|
|
21
|
+
|
|
22
|
+
If auth fails (login form still visible after following instructions), report all tests as error with "Authentication failed" and stop.
|
|
23
|
+
|
|
24
|
+
## Execute
|
|
25
|
+
|
|
26
|
+
You have a batch result from `prepare_test_batch` containing `project` (with `credentials` array) and `tests[]` (each with `test_id`, `test_name`, `run_id`, `instructions`, `credential_name`, `pages`, `tags`, `script`, `script_generated_at`).
|
|
27
|
+
|
|
28
|
+
If `tests` is empty, tell the user no matching active tests were found and stop.
|
|
29
|
+
|
|
30
|
+
### Step 1: Authenticate on the main page
|
|
31
|
+
|
|
32
|
+
Run the Authenticate procedure above once, using the standard Playwright tools (`browser_navigate`, `browser_snapshot`, `browser_click`, `browser_type`).
|
|
33
|
+
|
|
34
|
+
### Step 2: Classify tests
|
|
35
|
+
|
|
36
|
+
Split the batch into two groups:
|
|
37
|
+
|
|
38
|
+
- **scripted**: tests where `script` is non-null (cached Playwright scripts ready to run)
|
|
39
|
+
- **unscripted**: tests where `script` is null (need script generation)
|
|
40
|
+
|
|
41
|
+
If all tests are scripted, skip to Step 4.
|
|
42
|
+
|
|
43
|
+
### Step 3: Score and generate scripts (easy-first)
|
|
44
|
+
|
|
45
|
+
For each **unscripted** test, assign a difficulty score based on the instructions:
|
|
46
|
+
|
|
47
|
+
- **easy** (1): Single-page tests with simple actions — navigate, check text/headings, verify static content, click a link and check the URL. Typically 1-4 steps, no form submissions, no multi-step flows.
|
|
48
|
+
- **medium** (2): Tests involving form input, button clicks that trigger state changes, checking error/success messages, or verifying a redirect after an action. Typically 3-8 steps.
|
|
49
|
+
- **hard** (3): Multi-page flows, tests requiring specific sequences of actions (e.g. add to cart then checkout), tests with complex assertions (table data, dynamic content), or tests involving file uploads, modals, or dialogs.
|
|
50
|
+
|
|
51
|
+
Sort unscripted tests by difficulty ascending (easy first). This ensures simple tests get scripts generated quickly so native execution can start sooner.
|
|
52
|
+
|
|
53
|
+
#### Walk-through script generation
|
|
54
|
+
|
|
55
|
+
For each unscripted test (in difficulty order), do a **scouting pass** — actually follow the test instructions in the browser to observe all UI states:
|
|
56
|
+
|
|
57
|
+
1. Navigate to the test's starting page via `browser_navigate`
|
|
58
|
+
2. Take a `browser_snapshot` to see initial elements
|
|
59
|
+
3. Follow the test instructions step by step using Playwright MCP tools (`browser_click`, `browser_type`, `browser_snapshot` after each action)
|
|
60
|
+
4. Snapshot after each state change to capture: validation errors, success banners, modal dialogs, redirected pages, dynamically loaded content
|
|
61
|
+
5. Collect all observed elements and selectors as context
|
|
62
|
+
|
|
63
|
+
Then generate a `.spec.ts` script using the observed elements:
|
|
64
|
+
|
|
65
|
+
```ts
|
|
66
|
+
import { test, expect } from '@playwright/test';
|
|
67
|
+
test('{test_name}', async ({ page }) => {
|
|
68
|
+
// If the test has a credential_name, include login steps using the matching
|
|
69
|
+
// credential from project.credentials (email + password) at the login_url
|
|
70
|
+
await page.goto('{start_url}');
|
|
71
|
+
// Steps generated from scouting pass observations
|
|
72
|
+
// Use getByRole, getByText, getByLabel, getByPlaceholder for selectors
|
|
73
|
+
});
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Save via `update_test(test_id, { script: <generated_script>, script_generated_at: <ISO_now> })`.
|
|
77
|
+
|
|
78
|
+
**Pipeline optimisation**: After finishing all **easy** tests, if there are medium/hard tests remaining, proceed to Step 4 immediately with whatever scripts are ready (scripted + newly generated easy tests). Continue generating medium/hard scripts in parallel by launching a background Task agent for the remaining generation work. When those scripts are ready, they'll be saved to the API for next run.
|
|
79
|
+
|
|
80
|
+
To launch the background generation agent:
|
|
81
|
+
|
|
82
|
+
```
|
|
83
|
+
Task tool with:
|
|
84
|
+
- subagent_type: "general-purpose"
|
|
85
|
+
- run_in_background: true
|
|
86
|
+
- max_turns: 50
|
|
87
|
+
- model: "sonnet"
|
|
88
|
+
- prompt: (include project details, remaining unscripted tests with instructions, and the scouting+generation procedure above)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
The background agent should: for each remaining test, do the scouting pass, generate the script, and call `update_test` to save it. It does NOT need to call `complete_run` — that happens in the native execution step.
|
|
92
|
+
|
|
93
|
+
### Step 4: Export auth state
|
|
94
|
+
|
|
95
|
+
If `auth_mode` is not `none`, export the browser session so native Playwright inherits it:
|
|
96
|
+
|
|
97
|
+
```js
|
|
98
|
+
async (page) => {
|
|
99
|
+
const state = await page.context().storageState();
|
|
100
|
+
require('fs').writeFileSync('/tmp/greenrun-auth-state.json', JSON.stringify(state));
|
|
101
|
+
return 'Auth state exported';
|
|
102
|
+
}
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Call this via `browser_run_code`. If `auth_mode` is `none`, skip this step.
|
|
106
|
+
|
|
107
|
+
### Step 5: Write files and run natively
|
|
108
|
+
|
|
109
|
+
Gather all tests that have scripts (previously scripted + newly generated from Step 3).
|
|
110
|
+
|
|
111
|
+
1. **Write test files**: For each scripted test, write the script to `/tmp/greenrun-tests/{test_id}.spec.ts`
|
|
112
|
+
|
|
113
|
+
2. **Write config**: Write `/tmp/greenrun-tests/playwright.config.ts`:
|
|
114
|
+
|
|
115
|
+
```ts
|
|
116
|
+
import { defineConfig } from '@playwright/test';
|
|
117
|
+
export default defineConfig({
|
|
118
|
+
testDir: '.',
|
|
119
|
+
timeout: 30000,
|
|
120
|
+
workers: 20,
|
|
121
|
+
reporter: [['json', { outputFile: 'results.json' }]],
|
|
122
|
+
use: {
|
|
123
|
+
baseURL: '{base_url}',
|
|
124
|
+
storageState: '/tmp/greenrun-auth-state.json', // omit 'use.storageState' entirely if auth_mode is 'none'
|
|
125
|
+
},
|
|
126
|
+
});
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Replace `{base_url}` with the project's base_url.
|
|
130
|
+
|
|
131
|
+
3. **Execute**: Run via Bash:
|
|
132
|
+
```
|
|
133
|
+
npx playwright test --config /tmp/greenrun-tests/playwright.config.ts
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
4. **Parse results**: Read `/tmp/greenrun-tests/results.json`. Map each result back to a run ID via the filename: `{test_id}.spec.ts` → test_id → find the matching run_id from the batch.
|
|
137
|
+
|
|
138
|
+
5. **Report results**: Call `complete_run(run_id, status, result_summary)` for each test. Map Playwright statuses: `passed` → `passed`, `failed`/`timedOut` → `failed`, other → `error`.
|
|
139
|
+
|
|
140
|
+
### Step 6: Handle unscripted tests without scripts
|
|
141
|
+
|
|
142
|
+
Any tests that still don't have scripts (e.g. because the background agent hasn't finished, or script generation failed) need to be executed via AI agents using the legacy approach. Follow Step 7 for these tests.
|
|
143
|
+
|
|
144
|
+
### Step 7: Circuit breaker
|
|
145
|
+
|
|
146
|
+
After parsing all native results, walk through them in completion order. Track consecutive failures:
|
|
147
|
+
|
|
148
|
+
- If **3 or more consecutive failures** occur:
|
|
149
|
+
- Mark all remaining un-reported tests as error: "Circuit breaker: N consecutive failures detected"
|
|
150
|
+
- Skip AI fallback for remaining tests
|
|
151
|
+
- The counter resets on any pass
|
|
152
|
+
|
|
153
|
+
### Step 8: AI-agent fallback for native failures
|
|
154
|
+
|
|
155
|
+
For tests that **failed** in native execution (and circuit breaker has not tripped):
|
|
156
|
+
|
|
157
|
+
1. Start new runs via `start_run(test_id)` (the original runs were already completed in Step 5)
|
|
158
|
+
2. Launch background Task agents using the tab-isolation pattern:
|
|
159
|
+
|
|
160
|
+
Create tabs and launch agents in batches of 20:
|
|
161
|
+
|
|
162
|
+
#### Create tab
|
|
163
|
+
```js
|
|
164
|
+
async (page) => {
|
|
165
|
+
const newPage = await page.context().newPage();
|
|
166
|
+
await newPage.goto(START_URL);
|
|
167
|
+
return { index: page.context().pages().length - 1, url: newPage.url() };
|
|
168
|
+
}
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
#### Launch agent
|
|
172
|
+
```
|
|
173
|
+
Task tool with:
|
|
174
|
+
- subagent_type: "general-purpose"
|
|
175
|
+
- run_in_background: true
|
|
176
|
+
- max_turns: 25
|
|
177
|
+
- model: "sonnet"
|
|
178
|
+
- prompt: (agent prompt below, including the native failure message for diagnosis)
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
#### Agent prompt
|
|
182
|
+
|
|
183
|
+
```
|
|
184
|
+
Greenrun browser test (AI fallback). Run ID: {run_id}
|
|
185
|
+
Tab index: {INDEX}
|
|
186
|
+
|
|
187
|
+
**{test_name}**
|
|
188
|
+
|
|
189
|
+
{paste the full test instructions here}
|
|
190
|
+
|
|
191
|
+
**Native execution failed with:** {failure_message}
|
|
192
|
+
|
|
193
|
+
Determine if this is a stale script (UI changed) or an actual bug. If the script is stale, the test may still pass when executed manually.
|
|
194
|
+
|
|
195
|
+
## CRITICAL: Tab isolation
|
|
196
|
+
|
|
197
|
+
You are assigned to tab index {INDEX}. You MUST use ONLY `browser_run_code` for ALL browser interactions. Do NOT use `browser_snapshot`, `browser_click`, `browser_type`, `browser_navigate`, or any other Playwright MCP tools. The only non-browser tool you may call is `complete_run`.
|
|
198
|
+
|
|
199
|
+
Every `browser_run_code` call must scope to your tab:
|
|
200
|
+
```js
|
|
201
|
+
async (page) => {
|
|
202
|
+
const p = page.context().pages()[INDEX];
|
|
203
|
+
// ... your action here ...
|
|
204
|
+
}
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
## Auth
|
|
208
|
+
No authentication needed — the main page already authenticated and cookies are shared to your tab.
|
|
209
|
+
|
|
210
|
+
## Interaction patterns
|
|
211
|
+
|
|
212
|
+
**Navigate:**
|
|
213
|
+
```js
|
|
214
|
+
async (page) => {
|
|
215
|
+
const p = page.context().pages()[INDEX];
|
|
216
|
+
await p.goto('https://example.com/path');
|
|
217
|
+
return p.url();
|
|
218
|
+
}
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
**Read page state (replaces browser_snapshot):**
|
|
222
|
+
```js
|
|
223
|
+
async (page) => {
|
|
224
|
+
const p = page.context().pages()[INDEX];
|
|
225
|
+
const url = p.url();
|
|
226
|
+
const title = await p.title();
|
|
227
|
+
const text = await p.locator('body').innerText();
|
|
228
|
+
const headings = await p.getByRole('heading').allTextContents();
|
|
229
|
+
const buttons = await p.getByRole('button').allTextContents();
|
|
230
|
+
const links = await p.getByRole('link').allTextContents();
|
|
231
|
+
const textboxes = await p.getByRole('textbox').evaluateAll(els =>
|
|
232
|
+
els.map(e => ({ name: e.getAttribute('name') || e.getAttribute('aria-label') || e.placeholder, value: e.value }))
|
|
233
|
+
);
|
|
234
|
+
return { url, title, headings, buttons, links, textboxes, text: text.substring(0, 2000) };
|
|
235
|
+
}
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
**Click an element:**
|
|
239
|
+
```js
|
|
240
|
+
async (page) => {
|
|
241
|
+
const p = page.context().pages()[INDEX];
|
|
242
|
+
await p.getByRole('button', { name: 'Submit' }).click();
|
|
243
|
+
return p.url();
|
|
244
|
+
}
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
**Fill a form field:**
|
|
248
|
+
```js
|
|
249
|
+
async (page) => {
|
|
250
|
+
const p = page.context().pages()[INDEX];
|
|
251
|
+
await p.getByRole('textbox', { name: 'Email' }).fill('test@example.com');
|
|
252
|
+
return 'filled';
|
|
253
|
+
}
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
**Handle a dialog:**
|
|
257
|
+
```js
|
|
258
|
+
async (page) => {
|
|
259
|
+
const p = page.context().pages()[INDEX];
|
|
260
|
+
p.once('dialog', d => d.accept());
|
|
261
|
+
await p.getByRole('button', { name: 'Delete' }).click();
|
|
262
|
+
return p.url();
|
|
263
|
+
}
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
**Check for specific text (verification):**
|
|
267
|
+
```js
|
|
268
|
+
async (page) => {
|
|
269
|
+
const p = page.context().pages()[INDEX];
|
|
270
|
+
const visible = await p.getByText('Success').isVisible();
|
|
271
|
+
return { found: visible };
|
|
272
|
+
}
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
## Rules
|
|
276
|
+
- ONLY use `browser_run_code` — no other browser tools
|
|
277
|
+
- Always scope to `page.context().pages()[INDEX]`
|
|
278
|
+
- Use Playwright locators: `getByRole`, `getByText`, `getByLabel`, `getByPlaceholder`, `locator`
|
|
279
|
+
- Read page state to find elements before interacting
|
|
280
|
+
- Navigate with absolute URLs via `p.goto(url)` — never click nav links
|
|
281
|
+
|
|
282
|
+
## FORBIDDEN — never use these:
|
|
283
|
+
- `browser_snapshot`, `browser_click`, `browser_type`, `browser_navigate` — these operate on the MAIN page and will interfere with other tests
|
|
284
|
+
- `browser_wait` — NEVER call this
|
|
285
|
+
- `browser_screenshot` — NEVER use
|
|
286
|
+
|
|
287
|
+
## Error recovery
|
|
288
|
+
- On ANY failure: retry the failing step ONCE, then skip to Finish.
|
|
289
|
+
|
|
290
|
+
## Finish (MANDATORY — always reach this step)
|
|
291
|
+
1. If the test passes on manual execution, call `update_test(test_id, { script: null, script_generated_at: null })` to invalidate the stale cached script.
|
|
292
|
+
2. `complete_run(run_id, status, brief_summary)` — ALWAYS call this, even on error.
|
|
293
|
+
3. Return: {test_name} | {status} | {summary}
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
#### Wait and clean up
|
|
297
|
+
|
|
298
|
+
Wait for all agents to complete via `TaskOutput`. Then close extra tabs (newest first):
|
|
299
|
+
|
|
300
|
+
```js
|
|
301
|
+
async (page) => {
|
|
302
|
+
const pages = page.context().pages();
|
|
303
|
+
for (let i = pages.length - 1; i >= 1; i--) {
|
|
304
|
+
await pages[i].close();
|
|
305
|
+
}
|
|
306
|
+
return { remainingPages: page.context().pages().length };
|
|
307
|
+
}
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
Check for orphaned runs (agents that crashed without calling `complete_run`). For any orphaned run IDs, call `complete_run(run_id, "error", "Agent crashed or timed out")`.
|
|
311
|
+
|
|
312
|
+
### Step 9: Wait for background generation
|
|
313
|
+
|
|
314
|
+
If a background generation agent was launched in Step 3, check if it has completed via `TaskOutput` with `block: false`. If still running, note this in the summary. The generated scripts will be available on the next run.
|
|
315
|
+
|
|
316
|
+
## Summarize
|
|
317
|
+
|
|
318
|
+
Present a summary table with a Mode column showing how each test was executed:
|
|
319
|
+
|
|
320
|
+
| Test | Pages | Tags | Mode | Status | Result |
|
|
321
|
+
|------|-------|------|------|--------|--------|
|
|
322
|
+
| Test name | /login, /dashboard | smoke, auth | native/agent/skipped | passed/failed/error | Brief summary |
|
|
323
|
+
|
|
324
|
+
Mode values:
|
|
325
|
+
- **native** — executed via `npx playwright test`
|
|
326
|
+
- **agent** — executed via AI agent (fallback or no script available)
|
|
327
|
+
- **skipped** — circuit breaker tripped, not executed
|
|
328
|
+
|
|
329
|
+
Total: "X passed, Y failed, Z errors out of N tests"
|
|
330
|
+
|
|
331
|
+
If the circuit breaker tripped, note: "Circuit breaker tripped after N consecutive failures. M tests skipped."
|
|
332
|
+
|
|
333
|
+
If background script generation is still running, note: "Script generation in progress for N tests. Scripts will be cached for next run."
|
|
334
|
+
|
|
335
|
+
If any tests failed, highlight what went wrong and suggest next steps.
|