greenrun-cli 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,14 +12,32 @@ export declare class ApiClient {
12
12
  name: string;
13
13
  base_url?: string;
14
14
  description?: string;
15
- concurrency?: number;
15
+ auth_mode?: string;
16
+ login_url?: string;
17
+ register_url?: string;
18
+ login_instructions?: string;
19
+ register_instructions?: string;
20
+ credentials?: {
21
+ name: string;
22
+ email: string;
23
+ password: string;
24
+ }[];
16
25
  }): Promise<unknown>;
17
26
  getProject(id: string): Promise<unknown>;
18
27
  updateProject(id: string, data: {
19
28
  name?: string;
20
29
  base_url?: string;
21
30
  description?: string;
22
- concurrency?: number;
31
+ auth_mode?: string;
32
+ login_url?: string;
33
+ register_url?: string;
34
+ login_instructions?: string;
35
+ register_instructions?: string;
36
+ credentials?: {
37
+ name: string;
38
+ email: string;
39
+ password: string;
40
+ }[];
23
41
  }): Promise<unknown>;
24
42
  deleteProject(id: string): Promise<unknown>;
25
43
  listPages(projectId: string): Promise<unknown>;
@@ -39,6 +57,7 @@ export declare class ApiClient {
39
57
  page_ids?: string[];
40
58
  status?: string;
41
59
  tags?: string[];
60
+ credential_name?: string;
42
61
  }): Promise<unknown>;
43
62
  getTest(id: string): Promise<unknown>;
44
63
  updateTest(id: string, data: {
@@ -47,6 +66,9 @@ export declare class ApiClient {
47
66
  page_ids?: string[];
48
67
  status?: string;
49
68
  tags?: string[];
69
+ credential_name?: string | null;
70
+ script?: string | null;
71
+ script_generated_at?: string | null;
50
72
  }): Promise<unknown>;
51
73
  deleteTest(id: string): Promise<unknown>;
52
74
  sweep(projectId: string, params: {
@@ -60,4 +82,28 @@ export declare class ApiClient {
60
82
  }): Promise<unknown>;
61
83
  getRun(runId: string): Promise<unknown>;
62
84
  listRuns(testId: string): Promise<unknown>;
85
+ prepareTestBatch(projectId: string, filter?: string, testIds?: string[]): Promise<{
86
+ project: {
87
+ id: any;
88
+ name: any;
89
+ base_url: any;
90
+ auth_mode: any;
91
+ login_url: any;
92
+ register_url: any;
93
+ login_instructions: any;
94
+ register_instructions: any;
95
+ credentials: any;
96
+ };
97
+ tests: {
98
+ test_id: any;
99
+ test_name: any;
100
+ run_id: any;
101
+ instructions: any;
102
+ credential_name: any;
103
+ pages: any;
104
+ tags: any;
105
+ script: any;
106
+ script_generated_at: any;
107
+ }[];
108
+ }>;
63
109
  }
@@ -102,4 +102,70 @@ export class ApiClient {
102
102
  async listRuns(testId) {
103
103
  return this.request('GET', `/tests/${testId}/runs`);
104
104
  }
105
+ // Batch operations
106
+ async prepareTestBatch(projectId, filter, testIds) {
107
+ const [projectResult, testsResult] = await Promise.all([
108
+ this.getProject(projectId),
109
+ this.listTests(projectId),
110
+ ]);
111
+ const project = projectResult.project;
112
+ let tests = (testsResult.tests || []).filter((t) => t.status === 'active');
113
+ if (testIds && testIds.length > 0) {
114
+ const idSet = new Set(testIds);
115
+ tests = tests.filter((t) => idSet.has(t.id));
116
+ }
117
+ else if (filter) {
118
+ if (filter.startsWith('tag:')) {
119
+ const tag = filter.slice(4).toLowerCase();
120
+ tests = tests.filter((t) => (t.tags || []).some((tg) => (tg.name || tg).toLowerCase() === tag));
121
+ }
122
+ else if (filter.startsWith('/')) {
123
+ tests = tests.filter((t) => (t.pages || []).some((p) => (p.url || '').includes(filter)));
124
+ }
125
+ else {
126
+ const term = filter.toLowerCase();
127
+ tests = tests.filter((t) => (t.name || '').toLowerCase().includes(term));
128
+ }
129
+ }
130
+ if (tests.length === 0) {
131
+ return {
132
+ project: {
133
+ id: project.id, name: project.name, base_url: project.base_url,
134
+ auth_mode: project.auth_mode ?? 'none',
135
+ login_url: project.login_url ?? null,
136
+ register_url: project.register_url ?? null,
137
+ login_instructions: project.login_instructions ?? null,
138
+ register_instructions: project.register_instructions ?? null,
139
+ credentials: project.credentials ?? null,
140
+ },
141
+ tests: [],
142
+ };
143
+ }
144
+ // Fetch full test details in parallel
145
+ const fullTests = await Promise.all(tests.map((t) => this.getTest(t.id)));
146
+ // Start runs in parallel
147
+ const runs = await Promise.all(tests.map((t) => this.startRun(t.id)));
148
+ return {
149
+ project: {
150
+ id: project.id, name: project.name, base_url: project.base_url,
151
+ auth_mode: project.auth_mode ?? 'none',
152
+ login_url: project.login_url ?? null,
153
+ register_url: project.register_url ?? null,
154
+ login_instructions: project.login_instructions ?? null,
155
+ register_instructions: project.register_instructions ?? null,
156
+ credentials: project.credentials ?? null,
157
+ },
158
+ tests: fullTests.map((ft, i) => ({
159
+ test_id: ft.test.id,
160
+ test_name: ft.test.name,
161
+ run_id: runs[i].run.id,
162
+ instructions: ft.test.instructions,
163
+ credential_name: ft.test.credential_name ?? null,
164
+ pages: ft.test.pages || [],
165
+ tags: ft.test.tags || [],
166
+ script: ft.test.script ?? null,
167
+ script_generated_at: ft.test.script_generated_at ?? null,
168
+ })),
169
+ };
170
+ }
105
171
  }
@@ -44,7 +44,7 @@ function checkPrerequisites() {
44
44
  catch {
45
45
  // not installed
46
46
  }
47
- return { claude, chromeHint: true };
47
+ return { claude };
48
48
  }
49
49
  async function validateToken(token) {
50
50
  try {
@@ -74,6 +74,15 @@ function configureMcpLocal(token) {
74
74
  console.error(` claude mcp add greenrun --transport stdio -e GREENRUN_API_TOKEN=${token} -- npx -y greenrun-cli@latest\n`);
75
75
  }
76
76
  }
77
+ function configurePlaywrightMcp() {
78
+ try {
79
+ execSync('claude mcp add playwright -- npx @playwright/mcp@latest --browser chrome --user-data-dir ~/.greenrun/browser-profile', { stdio: 'inherit' });
80
+ }
81
+ catch {
82
+ console.error('\nFailed to add Playwright MCP. You can add it manually:\n');
83
+ console.error(' claude mcp add playwright -- npx @playwright/mcp@latest --browser chrome --user-data-dir ~/.greenrun/browser-profile\n');
84
+ }
85
+ }
77
86
  function configureMcpProject(token) {
78
87
  const mcpConfig = {
79
88
  mcpServers: {
@@ -127,7 +136,9 @@ function installClaudeMd() {
127
136
  if (existsSync(claudeMdPath)) {
128
137
  const existing = readFileSync(claudeMdPath, 'utf-8');
129
138
  if (existing.includes('## Greenrun')) {
130
- console.log(' CLAUDE.md already contains Greenrun section, skipping');
139
+ const updated = existing.replace(/## Greenrun[\s\S]*$/, snippet.trimEnd());
140
+ writeFileSync(claudeMdPath, updated.endsWith('\n') ? updated : updated + '\n');
141
+ console.log(' Replaced Greenrun section in CLAUDE.md');
131
142
  return;
132
143
  }
133
144
  appendFileSync(claudeMdPath, '\n' + snippet);
@@ -155,6 +166,7 @@ function installSettings() {
155
166
  'mcp__greenrun__list_projects',
156
167
  'mcp__greenrun__get_project',
157
168
  'mcp__greenrun__create_project',
169
+ 'mcp__greenrun__update_project',
158
170
  'mcp__greenrun__list_pages',
159
171
  'mcp__greenrun__create_page',
160
172
  'mcp__greenrun__list_tests',
@@ -166,19 +178,34 @@ function installSettings() {
166
178
  'mcp__greenrun__get_run',
167
179
  'mcp__greenrun__list_runs',
168
180
  'mcp__greenrun__sweep',
181
+ 'mcp__greenrun__prepare_test_batch',
169
182
  ];
170
183
  const browserTools = [
171
- 'mcp__claude-in-chrome__tabs_context_mcp',
172
- 'mcp__claude-in-chrome__tabs_create_mcp',
173
- 'mcp__claude-in-chrome__navigate',
174
- 'mcp__claude-in-chrome__computer',
175
- 'mcp__claude-in-chrome__read_page',
176
- 'mcp__claude-in-chrome__find',
177
- 'mcp__claude-in-chrome__form_input',
178
- 'mcp__claude-in-chrome__javascript_tool',
179
- 'mcp__claude-in-chrome__get_page_text',
180
- 'mcp__claude-in-chrome__read_console_messages',
181
- 'mcp__claude-in-chrome__read_network_requests',
184
+ 'mcp__playwright__browser_navigate',
185
+ 'mcp__playwright__browser_snapshot',
186
+ 'mcp__playwright__browser_click',
187
+ 'mcp__playwright__browser_type',
188
+ 'mcp__playwright__browser_handle_dialog',
189
+ 'mcp__playwright__browser_tab_list',
190
+ 'mcp__playwright__browser_tab_new',
191
+ 'mcp__playwright__browser_tab_select',
192
+ 'mcp__playwright__browser_tab_close',
193
+ 'mcp__playwright__browser_select_option',
194
+ 'mcp__playwright__browser_hover',
195
+ 'mcp__playwright__browser_drag',
196
+ 'mcp__playwright__browser_press_key',
197
+ 'mcp__playwright__browser_screenshot',
198
+ 'mcp__playwright__browser_wait',
199
+ 'mcp__playwright__browser_file_upload',
200
+ 'mcp__playwright__browser_pdf_save',
201
+ 'mcp__playwright__browser_close',
202
+ 'mcp__playwright__browser_console_messages',
203
+ 'mcp__playwright__browser_resize',
204
+ 'mcp__playwright__browser_run_code',
205
+ 'mcp__playwright__browser_evaluate',
206
+ 'mcp__playwright__browser_fill_form',
207
+ 'mcp__playwright__browser_tabs',
208
+ 'mcp__playwright__browser_network_requests',
182
209
  ];
183
210
  const requiredTools = [...greenrunTools, ...browserTools];
184
211
  existing.permissions = existing.permissions || {};
@@ -228,8 +255,7 @@ export async function runInit(args) {
228
255
  process.exit(1);
229
256
  }
230
257
  }
231
- console.log(' [i] Claude in Chrome extension required for browser test execution');
232
- console.log(' Get it at: https://chromewebstore.google.com/detail/claude-in-chrome\n');
258
+ console.log(' [i] Playwright MCP will be configured for browser test execution\n');
233
259
  let token = opts.token;
234
260
  let scope = opts.scope;
235
261
  if (interactive) {
@@ -283,14 +309,15 @@ export async function runInit(args) {
283
309
  scope = scope || 'local';
284
310
  }
285
311
  // Configure MCP
286
- console.log('Configuring MCP server...');
312
+ console.log('Configuring MCP servers...');
287
313
  if (scope === 'project') {
288
314
  configureMcpProject(token);
289
315
  }
290
316
  else {
291
317
  configureMcpLocal(token);
292
318
  }
293
- console.log(' MCP server configured.\n');
319
+ configurePlaywrightMcp();
320
+ console.log(' MCP servers configured.\n');
294
321
  // Install extras
295
322
  if (opts.claudeMd) {
296
323
  installClaudeMd();
@@ -302,7 +329,7 @@ export async function runInit(args) {
302
329
  console.log(`
303
330
  Done! Restart Claude Code to connect.
304
331
 
305
- Make sure Chrome is open with the Claude in Chrome extension active
306
- before running /greenrun - Claude needs browser access to execute tests.
332
+ Playwright will launch a Chrome browser automatically when running tests.
333
+ Run /greenrun to execute your test suite.
307
334
  `);
308
335
  }
package/dist/server.js CHANGED
@@ -26,7 +26,16 @@ export async function startServer() {
26
26
  name: z.string().describe('Project name'),
27
27
  base_url: z.string().optional().describe('Base URL of the site (e.g. https://myapp.com)'),
28
28
  description: z.string().optional().describe('Project description'),
29
- concurrency: z.number().int().min(1).max(20).optional().describe('Number of tests to run in parallel (default: 5)'),
29
+ auth_mode: z.enum(['none', 'existing_user', 'new_user']).optional().describe('How to authenticate before tests (default: none)'),
30
+ login_url: z.string().optional().describe('URL of login page (for existing_user auth mode)'),
31
+ register_url: z.string().optional().describe('URL of registration page (for new_user auth mode)'),
32
+ login_instructions: z.string().optional().describe('Steps to log in with existing credentials'),
33
+ register_instructions: z.string().optional().describe('Steps to register a new user'),
34
+ credentials: z.array(z.object({
35
+ name: z.string().describe('Credential set name (e.g. "admin", "viewer")'),
36
+ email: z.string().describe('Login email'),
37
+ password: z.string().describe('Login password'),
38
+ })).optional().describe('Named credential sets for test authentication (max 20)'),
30
39
  }, async (args) => {
31
40
  const result = await api.createProject(args);
32
41
  return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
@@ -35,6 +44,26 @@ export async function startServer() {
35
44
  const result = await api.getProject(args.project_id);
36
45
  return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
37
46
  });
47
+ server.tool('update_project', 'Update project settings', {
48
+ project_id: z.string().describe('Project UUID'),
49
+ name: z.string().optional().describe('Updated project name'),
50
+ base_url: z.string().optional().describe('Updated base URL'),
51
+ description: z.string().optional().describe('Updated description'),
52
+ auth_mode: z.enum(['none', 'existing_user', 'new_user']).optional().describe('How to authenticate before tests'),
53
+ login_url: z.string().optional().describe('URL of login page (for existing_user auth mode)'),
54
+ register_url: z.string().optional().describe('URL of registration page (for new_user auth mode)'),
55
+ login_instructions: z.string().optional().describe('Steps to log in with existing credentials'),
56
+ register_instructions: z.string().optional().describe('Steps to register a new user'),
57
+ credentials: z.array(z.object({
58
+ name: z.string().describe('Credential set name (e.g. "admin", "viewer")'),
59
+ email: z.string().describe('Login email'),
60
+ password: z.string().describe('Login password'),
61
+ })).optional().describe('Named credential sets for test authentication (max 20)'),
62
+ }, async (args) => {
63
+ const { project_id, ...data } = args;
64
+ const result = await api.updateProject(project_id, data);
65
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
66
+ });
38
67
  // --- Pages ---
39
68
  server.tool('list_pages', 'List pages in a project', { project_id: z.string().describe('Project UUID') }, async (args) => {
40
69
  const result = await api.listPages(args.project_id);
@@ -64,6 +93,7 @@ export async function startServer() {
64
93
  page_ids: z.array(z.string()).optional().describe('UUIDs of pages this test covers'),
65
94
  status: z.enum(['draft', 'active', 'archived']).optional().describe('Test status (default: active)'),
66
95
  tags: z.array(z.string()).optional().describe('Tag names for organizing tests (e.g. ["smoke", "auth"])'),
96
+ credential_name: z.string().optional().describe('Name of a credential set from the project to use for authentication'),
67
97
  }, async (args) => {
68
98
  const result = await api.createTest(args.project_id, {
69
99
  name: args.name,
@@ -71,6 +101,7 @@ export async function startServer() {
71
101
  page_ids: args.page_ids,
72
102
  status: args.status,
73
103
  tags: args.tags,
104
+ credential_name: args.credential_name,
74
105
  });
75
106
  return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
76
107
  });
@@ -81,6 +112,9 @@ export async function startServer() {
81
112
  page_ids: z.array(z.string()).optional().describe('Updated page UUIDs (replaces existing)'),
82
113
  status: z.enum(['draft', 'active', 'archived']).optional().describe('Updated status'),
83
114
  tags: z.array(z.string()).optional().describe('Updated tag names (replaces existing tags)'),
115
+ credential_name: z.string().optional().nullable().describe('Name of a credential set from the project to use for authentication'),
116
+ script: z.string().optional().nullable().describe('Generated Playwright test script'),
117
+ script_generated_at: z.string().optional().nullable().describe('ISO timestamp when the script was generated'),
84
118
  }, async (args) => {
85
119
  const { test_id, ...data } = args;
86
120
  const result = await api.updateTest(test_id, data);
@@ -98,6 +132,20 @@ export async function startServer() {
98
132
  });
99
133
  return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
100
134
  });
135
+ // --- Batch ---
136
+ server.tool('prepare_test_batch', 'Prepare a batch of tests for execution: lists tests, filters, fetches full details, and starts runs — all in one call. Returns everything needed to execute tests.', {
137
+ project_id: z.string().describe('Project UUID'),
138
+ filter: z.string().optional().describe('Filter: "tag:xxx" for tag, "/path" for page URL, or text for name substring'),
139
+ test_ids: z.array(z.string()).optional().describe('Specific test UUIDs to run (overrides filter)'),
140
+ }, async (args) => {
141
+ try {
142
+ const result = await api.prepareTestBatch(args.project_id, args.filter, args.test_ids);
143
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
144
+ }
145
+ catch (error) {
146
+ return { content: [{ type: 'text', text: `Error: ${error.message}` }], isError: true };
147
+ }
148
+ });
101
149
  // --- Test Runs ---
102
150
  server.tool('start_run', 'Start a test run (sets status to running)', { test_id: z.string().describe('Test UUID') }, async (args) => {
103
151
  const result = await api.startRun(args.test_id);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "greenrun-cli",
3
- "version": "0.1.6",
3
+ "version": "0.2.0",
4
4
  "description": "CLI and MCP server for Greenrun - browser test management for Claude Code",
5
5
  "type": "module",
6
6
  "main": "dist/server.js",
@@ -2,34 +2,44 @@
2
2
 
3
3
  ### Prerequisites
4
4
 
5
- - **Claude in Chrome extension** must be installed and active in your browser for test execution
5
+ - **Playwright MCP** must be configured for browser test execution (`claude mcp add playwright -- npx @playwright/mcp@latest --browser chrome --user-data-dir ~/.greenrun/browser-profile`)
6
6
  - MCP server must be connected (check with `/mcp` in Claude Code)
7
7
 
8
8
  ### Available MCP Tools
9
9
 
10
10
  The Greenrun MCP server provides these tools:
11
11
 
12
- - **list_projects** / **get_project** / **create_project** - Manage projects
12
+ - **list_projects** / **get_project** / **create_project** - Manage projects (includes auth configuration)
13
13
  - **list_pages** / **create_page** - Manage page URLs within a project
14
14
  - **list_tests** / **get_test** / **create_test** / **update_test** - Manage test cases
15
15
  - **start_run** / **complete_run** / **get_run** / **list_runs** - Execute and track test runs
16
16
  - **sweep** - Impact analysis: find tests affected by changed pages
17
+ - **prepare_test_batch** - Batch prepare tests for execution (lists, filters, fetches details, starts runs in one call)
17
18
 
18
19
  ### Running Tests
19
20
 
20
- To run tests for this project:
21
+ Use the `/greenrun` slash command to run all tests automatically, or `/greenrun tag:smoke` to filter.
21
22
 
22
- 1. Use `list_projects` to find the project, then `list_tests` to get all tests
23
- 2. For each test, call `get_test` to retrieve the full instructions
24
- 3. Call `start_run` to begin a run (returns a run ID)
25
- 4. Execute the test instructions using browser automation (Claude in Chrome)
26
- 5. Call `complete_run` with the run ID, status (passed/failed/error), and a result summary
23
+ To run tests manually:
27
24
 
28
- Or use the `/greenrun` slash command to run all tests automatically.
25
+ 1. Use `list_projects` to find the project
26
+ 2. Call `prepare_test_batch` with the project ID (and optional filter) to get test details and run IDs
27
+ 3. Execute each test's instructions using Playwright browser automation tools (`browser_navigate`, `browser_snapshot`, `browser_click`, `browser_type`)
28
+ 4. Call `complete_run` with the run ID, status (passed/failed/error), and a result summary
29
+
30
+ ### Auth Configuration
31
+
32
+ Projects can be configured with authentication settings so tests auto-login before execution:
33
+
34
+ - **`auth_mode: "none"`** - No authentication (default)
35
+ - **`auth_mode: "existing_user"`** - Log in with existing credentials via `login_url` and `login_instructions`
36
+ - **`auth_mode: "new_user"`** - Register a new account each run via `register_url` and `register_instructions`
37
+
38
+ Projects can also store named **credentials** (name/email/password sets). Each test can reference a credential by `credential_name` to use specific login details during execution.
29
39
 
30
40
  ### Creating Tests
31
41
 
32
- 1. Navigate to the page you want to test in Chrome
42
+ 1. Navigate to the page you want to test using Playwright
33
43
  2. Write clear, step-by-step test instructions describing what to do and what to verify
34
44
  3. Use `create_page` to register the page URL if not already registered
35
45
  4. Use `create_test` with the instructions and page IDs
@@ -36,4 +36,7 @@ Present the affected tests:
36
36
 
37
37
  ### 6. Offer to run
38
38
 
39
- Ask the user if they want to run the affected tests. If yes, read `.claude/commands/procedures.md` for the agent prompt template and execution procedures. Follow those procedures to pre-fetch test details, launch agents in batches, collect results, and summarize.
39
+ Ask the user if they want to run the affected tests. If yes:
40
+
41
+ 1. Call `prepare_test_batch` with the project ID and `test_ids` set to the affected test IDs from the sweep results.
42
+ 2. Read `.claude/commands/procedures.md` and follow the Execute and Summarize procedures using the batch result.
@@ -2,27 +2,25 @@ Run Greenrun browser tests for this project in parallel.
2
2
 
3
3
  ## Instructions
4
4
 
5
- You are executing browser tests managed by Greenrun. Tests run in parallel using background agents, each with its own browser tab. Follow these steps precisely:
5
+ You are executing browser tests managed by Greenrun. Follow these steps precisely:
6
6
 
7
7
  ### 1. Find the project
8
8
 
9
9
  Call `list_projects` to get all projects. Match the current project by name or base URL. If no match is found, tell the user and stop.
10
10
 
11
- Note the project's `concurrency` value (default: 5). This controls how many tests run simultaneously.
11
+ ### 2. Prepare test batch
12
12
 
13
- ### 2. Get tests
13
+ Call `prepare_test_batch` with the project ID.
14
14
 
15
- Call `list_tests` with the project ID. Each test has associated pages and tags which are organizational metadata for filtering.
15
+ If the user specified an argument ("$ARGUMENTS"), pass it as the `filter` parameter:
16
+ - `tag:smoke` → filters by tag
17
+ - `/checkout` → filters by page URL
18
+ - `login` → filters by test name
16
19
 
17
- If the user specified an argument ("$ARGUMENTS"), use it to filter tests:
18
- - If it starts with `/` (e.g. `/checkout`), filter to tests linked to a page matching that URL
19
- - If it starts with `tag:` (e.g. `tag:smoke`), filter to tests with that tag
20
- - Otherwise, treat it as a test name filter
20
+ If no argument is given, omit the filter to run all active tests.
21
21
 
22
- If no argument is given, run all active tests.
23
-
24
- If there are no matching active tests, tell the user and stop.
22
+ If the result has zero tests, tell the user and stop.
25
23
 
26
24
  ### 3. Execute tests
27
25
 
28
- Read `.claude/commands/procedures.md` for the agent prompt template and execution procedures. Follow those procedures to pre-fetch test details, launch agents in batches, collect results, and summarize.
26
+ Read `.claude/commands/procedures.md` and follow the Execute and Summarize procedures using the batch result.
@@ -1,64 +1,335 @@
1
- Shared procedures for executing Greenrun browser tests in parallel. Referenced by `/greenrun` and `/greenrun-sweep`.
1
+ Shared procedures for executing Greenrun browser tests. Referenced by `/greenrun` and `/greenrun-sweep`.
2
2
 
3
- ## Pre-fetch
3
+ ## Authenticate
4
4
 
5
- Before launching agents, call `get_test` for ALL tests **in parallel** to get full instructions. Then call `start_run` for ALL tests **in parallel** to get run IDs.
5
+ Before executing tests, handle authentication based on the project's `auth_mode` from the batch result.
6
6
 
7
- ## Launch agents
7
+ - **`none`** (or missing): Skip authentication entirely.
8
+ - **`existing_user`**: Navigate to the project's `login_url` and follow the `login_instructions` step by step. Use `browser_snapshot` after to verify the page shows an authenticated state (no login form visible).
9
+ - **`new_user`**: Navigate to the project's `register_url` and follow the `register_instructions` step by step. Use `browser_snapshot` after to verify registration succeeded and the user is authenticated.
8
10
 
9
- Split tests into batches of size `concurrency` (from project settings, default: 5).
11
+ ### Credentials
10
12
 
11
- For each batch, launch all tests simultaneously using the **Task tool** with `run_in_background: true`:
13
+ The project may include a `credentials` array named credential sets with `name`, `email`, and `password`. Each test may have a `credential_name` field referencing one of these sets.
14
+
15
+ When authenticating for a test with `credential_name`:
16
+ - Find the matching credential in `project.credentials` by name
17
+ - Use that credential's email and password to fill the login form at `login_url`
18
+ - If no `credential_name` is set on a test, use the first credential in the array (or fall back to `login_instructions`)
19
+
20
+ When authenticating once for a batch (Step 1 below), use the credential that appears most frequently across the batch's tests. If tests use different credentials, re-authenticate between tests as needed.
21
+
22
+ If auth fails (login form still visible after following instructions), report all tests as error with "Authentication failed" and stop.
23
+
24
+ ## Execute
25
+
26
+ You have a batch result from `prepare_test_batch` containing `project` (with `credentials` array) and `tests[]` (each with `test_id`, `test_name`, `run_id`, `instructions`, `credential_name`, `pages`, `tags`, `script`, `script_generated_at`).
27
+
28
+ If `tests` is empty, tell the user no matching active tests were found and stop.
29
+
30
+ ### Step 1: Authenticate on the main page
31
+
32
+ Run the Authenticate procedure above once, using the standard Playwright tools (`browser_navigate`, `browser_snapshot`, `browser_click`, `browser_type`).
33
+
34
+ ### Step 2: Classify tests
35
+
36
+ Split the batch into two groups:
37
+
38
+ - **scripted**: tests where `script` is non-null (cached Playwright scripts ready to run)
39
+ - **unscripted**: tests where `script` is null (need script generation)
40
+
41
+ If all tests are scripted, skip to Step 4.
42
+
43
+ ### Step 3: Score and generate scripts (easy-first)
44
+
45
+ For each **unscripted** test, assign a difficulty score based on the instructions:
46
+
47
+ - **easy** (1): Single-page tests with simple actions — navigate, check text/headings, verify static content, click a link and check the URL. Typically 1-4 steps, no form submissions, no multi-step flows.
48
+ - **medium** (2): Tests involving form input, button clicks that trigger state changes, checking error/success messages, or verifying a redirect after an action. Typically 3-8 steps.
49
+ - **hard** (3): Multi-page flows, tests requiring specific sequences of actions (e.g. add to cart then checkout), tests with complex assertions (table data, dynamic content), or tests involving file uploads, modals, or dialogs.
50
+
51
+ Sort unscripted tests by difficulty ascending (easy first). This ensures simple tests get scripts generated quickly so native execution can start sooner.
52
+
53
+ #### Walk-through script generation
54
+
55
+ For each unscripted test (in difficulty order), do a **scouting pass** — actually follow the test instructions in the browser to observe all UI states:
56
+
57
+ 1. Navigate to the test's starting page via `browser_navigate`
58
+ 2. Take a `browser_snapshot` to see initial elements
59
+ 3. Follow the test instructions step by step using Playwright MCP tools (`browser_click`, `browser_type`, `browser_snapshot` after each action)
60
+ 4. Snapshot after each state change to capture: validation errors, success banners, modal dialogs, redirected pages, dynamically loaded content
61
+ 5. Collect all observed elements and selectors as context
62
+
63
+ Then generate a `.spec.ts` script using the observed elements:
64
+
65
+ ```ts
66
+ import { test, expect } from '@playwright/test';
67
+ test('{test_name}', async ({ page }) => {
68
+ // If the test has a credential_name, include login steps using the matching
69
+ // credential from project.credentials (email + password) at the login_url
70
+ await page.goto('{start_url}');
71
+ // Steps generated from scouting pass observations
72
+ // Use getByRole, getByText, getByLabel, getByPlaceholder for selectors
73
+ });
74
+ ```
75
+
76
+ Save via `update_test(test_id, { script: <generated_script>, script_generated_at: <ISO_now> })`.
77
+
78
+ **Pipeline optimisation**: After finishing all **easy** tests, if there are medium/hard tests remaining, proceed to Step 4 immediately with whatever scripts are ready (scripted + newly generated easy tests). Continue generating medium/hard scripts in parallel by launching a background Task agent for the remaining generation work. When those scripts are ready, they'll be saved to the API for next run.
79
+
80
+ To launch the background generation agent:
12
81
 
13
82
  ```
14
- For each test in the current batch, call the Task tool with:
83
+ Task tool with:
84
+ - subagent_type: "general-purpose"
85
+ - run_in_background: true
86
+ - max_turns: 50
87
+ - model: "sonnet"
88
+ - prompt: (include project details, remaining unscripted tests with instructions, and the scouting+generation procedure above)
89
+ ```
90
+
91
+ The background agent should: for each remaining test, do the scouting pass, generate the script, and call `update_test` to save it. It does NOT need to call `complete_run` — that happens in the native execution step.
92
+
93
+ ### Step 4: Export auth state
94
+
95
+ If `auth_mode` is not `none`, export the browser session so native Playwright inherits it:
96
+
97
+ ```js
98
+ async (page) => {
99
+ const state = await page.context().storageState();
100
+ require('fs').writeFileSync('/tmp/greenrun-auth-state.json', JSON.stringify(state));
101
+ return 'Auth state exported';
102
+ }
103
+ ```
104
+
105
+ Call this via `browser_run_code`. If `auth_mode` is `none`, skip this step.
106
+
107
+ ### Step 5: Write files and run natively
108
+
109
+ Gather all tests that have scripts (previously scripted + newly generated from Step 3).
110
+
111
+ 1. **Write test files**: For each scripted test, write the script to `/tmp/greenrun-tests/{test_id}.spec.ts`
112
+
113
+ 2. **Write config**: Write `/tmp/greenrun-tests/playwright.config.ts`:
114
+
115
+ ```ts
116
+ import { defineConfig } from '@playwright/test';
117
+ export default defineConfig({
118
+ testDir: '.',
119
+ timeout: 30000,
120
+ workers: 20,
121
+ reporter: [['json', { outputFile: 'results.json' }]],
122
+ use: {
123
+ baseURL: '{base_url}',
124
+ storageState: '/tmp/greenrun-auth-state.json', // omit 'use.storageState' entirely if auth_mode is 'none'
125
+ },
126
+ });
127
+ ```
128
+
129
+ Replace `{base_url}` with the project's base_url.
130
+
131
+ 3. **Execute**: Run via Bash:
132
+ ```
133
+ npx playwright test --config /tmp/greenrun-tests/playwright.config.ts
134
+ ```
135
+
136
+ 4. **Parse results**: Read `/tmp/greenrun-tests/results.json`. Map each result back to a run ID via the filename: `{test_id}.spec.ts` → test_id → find the matching run_id from the batch.
137
+
138
+ 5. **Report results**: Call `complete_run(run_id, status, result_summary)` for each test. Map Playwright statuses: `passed` → `passed`, `failed`/`timedOut` → `failed`, other → `error`.
139
+
140
+ ### Step 6: Handle unscripted tests without scripts
141
+
142
+ Any tests that still don't have scripts (e.g. because the background agent hasn't finished, or script generation failed) need to be executed via AI agents using the legacy approach. Follow Step 7 for these tests.
143
+
144
+ ### Step 7: Circuit breaker
145
+
146
+ After parsing all native results, walk through them in completion order. Track consecutive failures:
147
+
148
+ - If **3 or more consecutive failures** occur:
149
+ - Mark all remaining un-reported tests as error: "Circuit breaker: N consecutive failures detected"
150
+ - Skip AI fallback for remaining tests
151
+ - The counter resets on any pass
152
+
153
+ ### Step 8: AI-agent fallback for native failures
154
+
155
+ For tests that **failed** in native execution (and circuit breaker has not tripped):
156
+
157
+ 1. Start new runs via `start_run(test_id)` (the original runs were already completed in Step 5)
158
+ 2. Launch background Task agents using the tab-isolation pattern:
159
+
160
+ Create tabs and launch agents in batches of 20:
161
+
162
+ #### Create tab
163
+ ```js
164
+ async (page) => {
165
+ const newPage = await page.context().newPage();
166
+ await newPage.goto(START_URL);
167
+ return { index: page.context().pages().length - 1, url: newPage.url() };
168
+ }
169
+ ```
170
+
171
+ #### Launch agent
172
+ ```
173
+ Task tool with:
15
174
  - subagent_type: "general-purpose"
16
175
  - run_in_background: true
17
176
  - max_turns: 25
18
- - model: "haiku"
19
- - prompt: (see agent prompt below)
177
+ - model: "sonnet"
178
+ - prompt: (agent prompt below, including the native failure message for diagnosis)
20
179
  ```
21
180
 
22
- ### Agent prompt
181
+ #### Agent prompt
23
182
 
24
183
  ```
25
- Execute a Greenrun browser test. Run ID: {run_id}
184
+ Greenrun browser test (AI fallback). Run ID: {run_id}
185
+ Tab index: {INDEX}
186
+
187
+ **{test_name}**
26
188
 
27
- **Test: {test_name}**
189
+ {paste the full test instructions here}
28
190
 
29
- ## Instructions
30
- {paste the full test instructions from get_test here}
191
+ **Native execution failed with:** {failure_message}
31
192
 
32
- ## Setup
33
- 1. Call `tabs_context_mcp`, then `tabs_create_mcp` to create YOUR tab. Use ONLY this tabId — other tabs belong to parallel tests.
34
- 2. Navigate to the first URL. Run `javascript_tool`: `window.location.pathname`. If it returns `/login`, call `complete_run` with status "error", result "Not authenticated", then `window.close()` and stop.
193
+ Determine if this is a stale script (UI changed) or an actual bug. If the script is stale, the test may still pass when executed manually.
35
194
 
36
- ## Execution rules
37
- - Verify assertions with `screenshot` after actions that change the page. Do NOT use `read_page` for verification.
38
- - Use `find` to locate elements, then `ref` parameter on `computer` tool or `form_input` to interact.
39
- - Navigate with absolute URLs via `navigate` — don't click nav links.
40
- - Before destructive buttons: `window.alert = () => {}; window.confirm = () => true; window.prompt = () => null;`
41
- - On failure or timeout, retry ONCE then move on. Max 35 tool calls total.
195
+ ## CRITICAL: Tab isolation
42
196
 
43
- ## Finish
44
- Call `complete_run` with run_id "{run_id}", status ("passed"/"failed"/"error"), and a brief result summary.
45
- Then run `javascript_tool`: `window.close()`.
197
+ You are assigned to tab index {INDEX}. You MUST use ONLY `browser_run_code` for ALL browser interactions. Do NOT use `browser_snapshot`, `browser_click`, `browser_type`, `browser_navigate`, or any other Playwright MCP tools. The only non-browser tool you may call is `complete_run`.
46
198
 
47
- Return: {test_name} | {status} | {result_summary}
199
+ Every `browser_run_code` call must scope to your tab:
200
+ ```js
201
+ async (page) => {
202
+ const p = page.context().pages()[INDEX];
203
+ // ... your action here ...
204
+ }
48
205
  ```
49
206
 
50
- ## Collect results
207
+ ## Auth
208
+ No authentication needed — the main page already authenticated and cookies are shared to your tab.
209
+
210
+ ## Interaction patterns
51
211
 
52
- After launching all agents in a batch, wait for them all to complete (use `TaskOutput`) before launching the next batch.
212
+ **Navigate:**
213
+ ```js
214
+ async (page) => {
215
+ const p = page.context().pages()[INDEX];
216
+ await p.goto('https://example.com/path');
217
+ return p.url();
218
+ }
219
+ ```
220
+
221
+ **Read page state (replaces browser_snapshot):**
222
+ ```js
223
+ async (page) => {
224
+ const p = page.context().pages()[INDEX];
225
+ const url = p.url();
226
+ const title = await p.title();
227
+ const text = await p.locator('body').innerText();
228
+ const headings = await p.getByRole('heading').allTextContents();
229
+ const buttons = await p.getByRole('button').allTextContents();
230
+ const links = await p.getByRole('link').allTextContents();
231
+ const textboxes = await p.getByRole('textbox').evaluateAll(els =>
232
+ els.map(e => ({ name: e.getAttribute('name') || e.getAttribute('aria-label') || e.placeholder, value: e.value }))
233
+ );
234
+ return { url, title, headings, buttons, links, textboxes, text: text.substring(0, 2000) };
235
+ }
236
+ ```
237
+
238
+ **Click an element:**
239
+ ```js
240
+ async (page) => {
241
+ const p = page.context().pages()[INDEX];
242
+ await p.getByRole('button', { name: 'Submit' }).click();
243
+ return p.url();
244
+ }
245
+ ```
246
+
247
+ **Fill a form field:**
248
+ ```js
249
+ async (page) => {
250
+ const p = page.context().pages()[INDEX];
251
+ await p.getByRole('textbox', { name: 'Email' }).fill('test@example.com');
252
+ return 'filled';
253
+ }
254
+ ```
255
+
256
+ **Handle a dialog:**
257
+ ```js
258
+ async (page) => {
259
+ const p = page.context().pages()[INDEX];
260
+ p.once('dialog', d => d.accept());
261
+ await p.getByRole('button', { name: 'Delete' }).click();
262
+ return p.url();
263
+ }
264
+ ```
265
+
266
+ **Check for specific text (verification):**
267
+ ```js
268
+ async (page) => {
269
+ const p = page.context().pages()[INDEX];
270
+ const visible = await p.getByText('Success').isVisible();
271
+ return { found: visible };
272
+ }
273
+ ```
274
+
275
+ ## Rules
276
+ - ONLY use `browser_run_code` — no other browser tools
277
+ - Always scope to `page.context().pages()[INDEX]`
278
+ - Use Playwright locators: `getByRole`, `getByText`, `getByLabel`, `getByPlaceholder`, `locator`
279
+ - Read page state to find elements before interacting
280
+ - Navigate with absolute URLs via `p.goto(url)` — never click nav links
281
+
282
+ ## FORBIDDEN — never use these:
283
+ - `browser_snapshot`, `browser_click`, `browser_type`, `browser_navigate` — these operate on the MAIN page and will interfere with other tests
284
+ - `browser_wait` — NEVER call this
285
+ - `browser_screenshot` — NEVER use
286
+
287
+ ## Error recovery
288
+ - On ANY failure: retry the failing step ONCE, then skip to Finish.
289
+
290
+ ## Finish (MANDATORY — always reach this step)
291
+ 1. If the test passes on manual execution, call `update_test(test_id, { script: null, script_generated_at: null })` to invalidate the stale cached script.
292
+ 2. `complete_run(run_id, status, brief_summary)` — ALWAYS call this, even on error.
293
+ 3. Return: {test_name} | {status} | {summary}
294
+ ```
295
+
296
+ #### Wait and clean up
297
+
298
+ Wait for all agents to complete via `TaskOutput`. Then close extra tabs (newest first):
299
+
300
+ ```js
301
+ async (page) => {
302
+ const pages = page.context().pages();
303
+ for (let i = pages.length - 1; i >= 1; i--) {
304
+ await pages[i].close();
305
+ }
306
+ return { remainingPages: page.context().pages().length };
307
+ }
308
+ ```
309
+
310
+ Check for orphaned runs (agents that crashed without calling `complete_run`). For any orphaned run IDs, call `complete_run(run_id, "error", "Agent crashed or timed out")`.
311
+
312
+ ### Step 9: Wait for background generation
313
+
314
+ If a background generation agent was launched in Step 3, check if it has completed via `TaskOutput` with `block: false`. If still running, note this in the summary. The generated scripts will be available on the next run.
53
315
 
54
316
  ## Summarize
55
317
 
56
- After all batches complete, present a summary table:
318
+ Present a summary table with a Mode column showing how each test was executed:
319
+
320
+ | Test | Pages | Tags | Mode | Status | Result |
321
+ |------|-------|------|------|--------|--------|
322
+ | Test name | /login, /dashboard | smoke, auth | native/agent/skipped | passed/failed/error | Brief summary |
323
+
324
+ Mode values:
325
+ - **native** — executed via `npx playwright test`
326
+ - **agent** — executed via AI agent (fallback or no script available)
327
+ - **skipped** — circuit breaker tripped, not executed
328
+
329
+ Total: "X passed, Y failed, Z errors out of N tests"
57
330
 
58
- | Test | Pages | Tags | Status | Result |
59
- |------|-------|------|--------|--------|
60
- | Test name | /login, /dashboard | smoke, auth | passed/failed/error | Brief summary |
331
+ If the circuit breaker tripped, note: "Circuit breaker tripped after N consecutive failures. M tests skipped."
61
332
 
62
- Include the total count: "X passed, Y failed, Z errors out of N tests"
333
+ If background script generation is still running, note: "Script generation in progress for N tests. Scripts will be cached for next run."
63
334
 
64
335
  If any tests failed, highlight what went wrong and suggest next steps.