greenrun-cli 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,14 +12,32 @@ export declare class ApiClient {
12
12
  name: string;
13
13
  base_url?: string;
14
14
  description?: string;
15
- concurrency?: number;
15
+ auth_mode?: string;
16
+ login_url?: string;
17
+ register_url?: string;
18
+ login_instructions?: string;
19
+ register_instructions?: string;
20
+ credentials?: {
21
+ name: string;
22
+ email: string;
23
+ password: string;
24
+ }[];
16
25
  }): Promise<unknown>;
17
26
  getProject(id: string): Promise<unknown>;
18
27
  updateProject(id: string, data: {
19
28
  name?: string;
20
29
  base_url?: string;
21
30
  description?: string;
22
- concurrency?: number;
31
+ auth_mode?: string;
32
+ login_url?: string;
33
+ register_url?: string;
34
+ login_instructions?: string;
35
+ register_instructions?: string;
36
+ credentials?: {
37
+ name: string;
38
+ email: string;
39
+ password: string;
40
+ }[];
23
41
  }): Promise<unknown>;
24
42
  deleteProject(id: string): Promise<unknown>;
25
43
  listPages(projectId: string): Promise<unknown>;
@@ -39,6 +57,7 @@ export declare class ApiClient {
39
57
  page_ids?: string[];
40
58
  status?: string;
41
59
  tags?: string[];
60
+ credential_name?: string;
42
61
  }): Promise<unknown>;
43
62
  getTest(id: string): Promise<unknown>;
44
63
  updateTest(id: string, data: {
@@ -47,6 +66,9 @@ export declare class ApiClient {
47
66
  page_ids?: string[];
48
67
  status?: string;
49
68
  tags?: string[];
69
+ credential_name?: string | null;
70
+ script?: string | null;
71
+ script_generated_at?: string | null;
50
72
  }): Promise<unknown>;
51
73
  deleteTest(id: string): Promise<unknown>;
52
74
  sweep(projectId: string, params: {
@@ -60,4 +82,28 @@ export declare class ApiClient {
60
82
  }): Promise<unknown>;
61
83
  getRun(runId: string): Promise<unknown>;
62
84
  listRuns(testId: string): Promise<unknown>;
85
+ prepareTestBatch(projectId: string, filter?: string, testIds?: string[]): Promise<{
86
+ project: {
87
+ id: any;
88
+ name: any;
89
+ base_url: any;
90
+ auth_mode: any;
91
+ login_url: any;
92
+ register_url: any;
93
+ login_instructions: any;
94
+ register_instructions: any;
95
+ credentials: any;
96
+ };
97
+ tests: {
98
+ test_id: any;
99
+ test_name: any;
100
+ run_id: any;
101
+ instructions: any;
102
+ credential_name: any;
103
+ pages: any;
104
+ tags: any;
105
+ script: any;
106
+ script_generated_at: any;
107
+ }[];
108
+ }>;
63
109
  }
@@ -102,4 +102,70 @@ export class ApiClient {
102
102
  async listRuns(testId) {
103
103
  return this.request('GET', `/tests/${testId}/runs`);
104
104
  }
105
+ // Batch operations
106
+ async prepareTestBatch(projectId, filter, testIds) {
107
+ const [projectResult, testsResult] = await Promise.all([
108
+ this.getProject(projectId),
109
+ this.listTests(projectId),
110
+ ]);
111
+ const project = projectResult.project;
112
+ let tests = (testsResult.tests || []).filter((t) => t.status === 'active');
113
+ if (testIds && testIds.length > 0) {
114
+ const idSet = new Set(testIds);
115
+ tests = tests.filter((t) => idSet.has(t.id));
116
+ }
117
+ else if (filter) {
118
+ if (filter.startsWith('tag:')) {
119
+ const tag = filter.slice(4).toLowerCase();
120
+ tests = tests.filter((t) => (t.tags || []).some((tg) => (tg.name || tg).toLowerCase() === tag));
121
+ }
122
+ else if (filter.startsWith('/')) {
123
+ tests = tests.filter((t) => (t.pages || []).some((p) => (p.url || '').includes(filter)));
124
+ }
125
+ else {
126
+ const term = filter.toLowerCase();
127
+ tests = tests.filter((t) => (t.name || '').toLowerCase().includes(term));
128
+ }
129
+ }
130
+ if (tests.length === 0) {
131
+ return {
132
+ project: {
133
+ id: project.id, name: project.name, base_url: project.base_url,
134
+ auth_mode: project.auth_mode ?? 'none',
135
+ login_url: project.login_url ?? null,
136
+ register_url: project.register_url ?? null,
137
+ login_instructions: project.login_instructions ?? null,
138
+ register_instructions: project.register_instructions ?? null,
139
+ credentials: project.credentials ?? null,
140
+ },
141
+ tests: [],
142
+ };
143
+ }
144
+ // Fetch full test details in parallel
145
+ const fullTests = await Promise.all(tests.map((t) => this.getTest(t.id)));
146
+ // Start runs in parallel
147
+ const runs = await Promise.all(tests.map((t) => this.startRun(t.id)));
148
+ return {
149
+ project: {
150
+ id: project.id, name: project.name, base_url: project.base_url,
151
+ auth_mode: project.auth_mode ?? 'none',
152
+ login_url: project.login_url ?? null,
153
+ register_url: project.register_url ?? null,
154
+ login_instructions: project.login_instructions ?? null,
155
+ register_instructions: project.register_instructions ?? null,
156
+ credentials: project.credentials ?? null,
157
+ },
158
+ tests: fullTests.map((ft, i) => ({
159
+ test_id: ft.test.id,
160
+ test_name: ft.test.name,
161
+ run_id: runs[i].run.id,
162
+ instructions: ft.test.instructions,
163
+ credential_name: ft.test.credential_name ?? null,
164
+ pages: ft.test.pages || [],
165
+ tags: ft.test.tags || [],
166
+ script: ft.test.script ?? null,
167
+ script_generated_at: ft.test.script_generated_at ?? null,
168
+ })),
169
+ };
170
+ }
105
171
  }
@@ -44,7 +44,7 @@ function checkPrerequisites() {
44
44
  catch {
45
45
  // not installed
46
46
  }
47
- return { claude, chromeHint: true };
47
+ return { claude };
48
48
  }
49
49
  async function validateToken(token) {
50
50
  try {
@@ -74,6 +74,15 @@ function configureMcpLocal(token) {
74
74
  console.error(` claude mcp add greenrun --transport stdio -e GREENRUN_API_TOKEN=${token} -- npx -y greenrun-cli@latest\n`);
75
75
  }
76
76
  }
77
+ function configurePlaywrightMcp() {
78
+ try {
79
+ execSync('claude mcp add playwright -- npx @playwright/mcp@latest --browser chrome --user-data-dir ~/.greenrun/browser-profile', { stdio: 'inherit' });
80
+ }
81
+ catch {
82
+ console.error('\nFailed to add Playwright MCP. You can add it manually:\n');
83
+ console.error(' claude mcp add playwright -- npx @playwright/mcp@latest --browser chrome --user-data-dir ~/.greenrun/browser-profile\n');
84
+ }
85
+ }
77
86
  function configureMcpProject(token) {
78
87
  const mcpConfig = {
79
88
  mcpServers: {
@@ -127,7 +136,9 @@ function installClaudeMd() {
127
136
  if (existsSync(claudeMdPath)) {
128
137
  const existing = readFileSync(claudeMdPath, 'utf-8');
129
138
  if (existing.includes('## Greenrun')) {
130
- console.log(' CLAUDE.md already contains Greenrun section, skipping');
139
+ const updated = existing.replace(/## Greenrun[\s\S]*$/, snippet.trimEnd());
140
+ writeFileSync(claudeMdPath, updated.endsWith('\n') ? updated : updated + '\n');
141
+ console.log(' Replaced Greenrun section in CLAUDE.md');
131
142
  return;
132
143
  }
133
144
  appendFileSync(claudeMdPath, '\n' + snippet);
@@ -155,6 +166,7 @@ function installSettings() {
155
166
  'mcp__greenrun__list_projects',
156
167
  'mcp__greenrun__get_project',
157
168
  'mcp__greenrun__create_project',
169
+ 'mcp__greenrun__update_project',
158
170
  'mcp__greenrun__list_pages',
159
171
  'mcp__greenrun__create_page',
160
172
  'mcp__greenrun__list_tests',
@@ -166,19 +178,34 @@ function installSettings() {
166
178
  'mcp__greenrun__get_run',
167
179
  'mcp__greenrun__list_runs',
168
180
  'mcp__greenrun__sweep',
181
+ 'mcp__greenrun__prepare_test_batch',
169
182
  ];
170
183
  const browserTools = [
171
- 'mcp__claude-in-chrome__tabs_context_mcp',
172
- 'mcp__claude-in-chrome__tabs_create_mcp',
173
- 'mcp__claude-in-chrome__navigate',
174
- 'mcp__claude-in-chrome__computer',
175
- 'mcp__claude-in-chrome__read_page',
176
- 'mcp__claude-in-chrome__find',
177
- 'mcp__claude-in-chrome__form_input',
178
- 'mcp__claude-in-chrome__javascript_tool',
179
- 'mcp__claude-in-chrome__get_page_text',
180
- 'mcp__claude-in-chrome__read_console_messages',
181
- 'mcp__claude-in-chrome__read_network_requests',
184
+ 'mcp__playwright__browser_navigate',
185
+ 'mcp__playwright__browser_snapshot',
186
+ 'mcp__playwright__browser_click',
187
+ 'mcp__playwright__browser_type',
188
+ 'mcp__playwright__browser_handle_dialog',
189
+ 'mcp__playwright__browser_tab_list',
190
+ 'mcp__playwright__browser_tab_new',
191
+ 'mcp__playwright__browser_tab_select',
192
+ 'mcp__playwright__browser_tab_close',
193
+ 'mcp__playwright__browser_select_option',
194
+ 'mcp__playwright__browser_hover',
195
+ 'mcp__playwright__browser_drag',
196
+ 'mcp__playwright__browser_press_key',
197
+ 'mcp__playwright__browser_screenshot',
198
+ 'mcp__playwright__browser_wait',
199
+ 'mcp__playwright__browser_file_upload',
200
+ 'mcp__playwright__browser_pdf_save',
201
+ 'mcp__playwright__browser_close',
202
+ 'mcp__playwright__browser_console_messages',
203
+ 'mcp__playwright__browser_resize',
204
+ 'mcp__playwright__browser_run_code',
205
+ 'mcp__playwright__browser_evaluate',
206
+ 'mcp__playwright__browser_fill_form',
207
+ 'mcp__playwright__browser_tabs',
208
+ 'mcp__playwright__browser_network_requests',
182
209
  ];
183
210
  const requiredTools = [...greenrunTools, ...browserTools];
184
211
  existing.permissions = existing.permissions || {};
@@ -191,7 +218,7 @@ function installSettings() {
191
218
  function installCommands() {
192
219
  const commandsDir = join(process.cwd(), '.claude', 'commands');
193
220
  mkdirSync(commandsDir, { recursive: true });
194
- const commands = ['greenrun.md', 'greenrun-sweep.md'];
221
+ const commands = ['greenrun.md', 'greenrun-sweep.md', 'procedures.md'];
195
222
  for (const cmd of commands) {
196
223
  const src = join(TEMPLATES_DIR, 'commands', cmd);
197
224
  if (!existsSync(src)) {
@@ -228,8 +255,7 @@ export async function runInit(args) {
228
255
  process.exit(1);
229
256
  }
230
257
  }
231
- console.log(' [i] Claude in Chrome extension required for browser test execution');
232
- console.log(' Get it at: https://chromewebstore.google.com/detail/claude-in-chrome\n');
258
+ console.log(' [i] Playwright MCP will be configured for browser test execution\n');
233
259
  let token = opts.token;
234
260
  let scope = opts.scope;
235
261
  if (interactive) {
@@ -283,14 +309,15 @@ export async function runInit(args) {
283
309
  scope = scope || 'local';
284
310
  }
285
311
  // Configure MCP
286
- console.log('Configuring MCP server...');
312
+ console.log('Configuring MCP servers...');
287
313
  if (scope === 'project') {
288
314
  configureMcpProject(token);
289
315
  }
290
316
  else {
291
317
  configureMcpLocal(token);
292
318
  }
293
- console.log(' MCP server configured.\n');
319
+ configurePlaywrightMcp();
320
+ console.log(' MCP servers configured.\n');
294
321
  // Install extras
295
322
  if (opts.claudeMd) {
296
323
  installClaudeMd();
@@ -302,7 +329,7 @@ export async function runInit(args) {
302
329
  console.log(`
303
330
  Done! Restart Claude Code to connect.
304
331
 
305
- Make sure Chrome is open with the Claude in Chrome extension active
306
- before running /greenrun - Claude needs browser access to execute tests.
332
+ Playwright will launch a Chrome browser automatically when running tests.
333
+ Run /greenrun to execute your test suite.
307
334
  `);
308
335
  }
package/dist/server.js CHANGED
@@ -26,7 +26,16 @@ export async function startServer() {
26
26
  name: z.string().describe('Project name'),
27
27
  base_url: z.string().optional().describe('Base URL of the site (e.g. https://myapp.com)'),
28
28
  description: z.string().optional().describe('Project description'),
29
- concurrency: z.number().int().min(1).max(20).optional().describe('Number of tests to run in parallel (default: 5)'),
29
+ auth_mode: z.enum(['none', 'existing_user', 'new_user']).optional().describe('How to authenticate before tests (default: none)'),
30
+ login_url: z.string().optional().describe('URL of login page (for existing_user auth mode)'),
31
+ register_url: z.string().optional().describe('URL of registration page (for new_user auth mode)'),
32
+ login_instructions: z.string().optional().describe('Steps to log in with existing credentials'),
33
+ register_instructions: z.string().optional().describe('Steps to register a new user'),
34
+ credentials: z.array(z.object({
35
+ name: z.string().describe('Credential set name (e.g. "admin", "viewer")'),
36
+ email: z.string().describe('Login email'),
37
+ password: z.string().describe('Login password'),
38
+ })).optional().describe('Named credential sets for test authentication (max 20)'),
30
39
  }, async (args) => {
31
40
  const result = await api.createProject(args);
32
41
  return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
@@ -35,6 +44,26 @@ export async function startServer() {
35
44
  const result = await api.getProject(args.project_id);
36
45
  return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
37
46
  });
47
+ server.tool('update_project', 'Update project settings', {
48
+ project_id: z.string().describe('Project UUID'),
49
+ name: z.string().optional().describe('Updated project name'),
50
+ base_url: z.string().optional().describe('Updated base URL'),
51
+ description: z.string().optional().describe('Updated description'),
52
+ auth_mode: z.enum(['none', 'existing_user', 'new_user']).optional().describe('How to authenticate before tests'),
53
+ login_url: z.string().optional().describe('URL of login page (for existing_user auth mode)'),
54
+ register_url: z.string().optional().describe('URL of registration page (for new_user auth mode)'),
55
+ login_instructions: z.string().optional().describe('Steps to log in with existing credentials'),
56
+ register_instructions: z.string().optional().describe('Steps to register a new user'),
57
+ credentials: z.array(z.object({
58
+ name: z.string().describe('Credential set name (e.g. "admin", "viewer")'),
59
+ email: z.string().describe('Login email'),
60
+ password: z.string().describe('Login password'),
61
+ })).optional().describe('Named credential sets for test authentication (max 20)'),
62
+ }, async (args) => {
63
+ const { project_id, ...data } = args;
64
+ const result = await api.updateProject(project_id, data);
65
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
66
+ });
38
67
  // --- Pages ---
39
68
  server.tool('list_pages', 'List pages in a project', { project_id: z.string().describe('Project UUID') }, async (args) => {
40
69
  const result = await api.listPages(args.project_id);
@@ -64,6 +93,7 @@ export async function startServer() {
64
93
  page_ids: z.array(z.string()).optional().describe('UUIDs of pages this test covers'),
65
94
  status: z.enum(['draft', 'active', 'archived']).optional().describe('Test status (default: active)'),
66
95
  tags: z.array(z.string()).optional().describe('Tag names for organizing tests (e.g. ["smoke", "auth"])'),
96
+ credential_name: z.string().optional().describe('Name of a credential set from the project to use for authentication'),
67
97
  }, async (args) => {
68
98
  const result = await api.createTest(args.project_id, {
69
99
  name: args.name,
@@ -71,6 +101,7 @@ export async function startServer() {
71
101
  page_ids: args.page_ids,
72
102
  status: args.status,
73
103
  tags: args.tags,
104
+ credential_name: args.credential_name,
74
105
  });
75
106
  return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
76
107
  });
@@ -81,6 +112,9 @@ export async function startServer() {
81
112
  page_ids: z.array(z.string()).optional().describe('Updated page UUIDs (replaces existing)'),
82
113
  status: z.enum(['draft', 'active', 'archived']).optional().describe('Updated status'),
83
114
  tags: z.array(z.string()).optional().describe('Updated tag names (replaces existing tags)'),
115
+ credential_name: z.string().optional().nullable().describe('Name of a credential set from the project to use for authentication'),
116
+ script: z.string().optional().nullable().describe('Generated Playwright test script'),
117
+ script_generated_at: z.string().optional().nullable().describe('ISO timestamp when the script was generated'),
84
118
  }, async (args) => {
85
119
  const { test_id, ...data } = args;
86
120
  const result = await api.updateTest(test_id, data);
@@ -98,6 +132,20 @@ export async function startServer() {
98
132
  });
99
133
  return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
100
134
  });
135
+ // --- Batch ---
136
+ server.tool('prepare_test_batch', 'Prepare a batch of tests for execution: lists tests, filters, fetches full details, and starts runs — all in one call. Returns everything needed to execute tests.', {
137
+ project_id: z.string().describe('Project UUID'),
138
+ filter: z.string().optional().describe('Filter: "tag:xxx" for tag, "/path" for page URL, or text for name substring'),
139
+ test_ids: z.array(z.string()).optional().describe('Specific test UUIDs to run (overrides filter)'),
140
+ }, async (args) => {
141
+ try {
142
+ const result = await api.prepareTestBatch(args.project_id, args.filter, args.test_ids);
143
+ return { content: [{ type: 'text', text: JSON.stringify(result, null, 2) }] };
144
+ }
145
+ catch (error) {
146
+ return { content: [{ type: 'text', text: `Error: ${error.message}` }], isError: true };
147
+ }
148
+ });
101
149
  // --- Test Runs ---
102
150
  server.tool('start_run', 'Start a test run (sets status to running)', { test_id: z.string().describe('Test UUID') }, async (args) => {
103
151
  const result = await api.startRun(args.test_id);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "greenrun-cli",
3
- "version": "0.1.5",
3
+ "version": "0.2.0",
4
4
  "description": "CLI and MCP server for Greenrun - browser test management for Claude Code",
5
5
  "type": "module",
6
6
  "main": "dist/server.js",
@@ -2,34 +2,44 @@
2
2
 
3
3
  ### Prerequisites
4
4
 
5
- - **Claude in Chrome extension** must be installed and active in your browser for test execution
5
+ - **Playwright MCP** must be configured for browser test execution (`claude mcp add playwright -- npx @playwright/mcp@latest --browser chrome --user-data-dir ~/.greenrun/browser-profile`)
6
6
  - MCP server must be connected (check with `/mcp` in Claude Code)
7
7
 
8
8
  ### Available MCP Tools
9
9
 
10
10
  The Greenrun MCP server provides these tools:
11
11
 
12
- - **list_projects** / **get_project** / **create_project** - Manage projects
12
+ - **list_projects** / **get_project** / **create_project** - Manage projects (includes auth configuration)
13
13
  - **list_pages** / **create_page** - Manage page URLs within a project
14
14
  - **list_tests** / **get_test** / **create_test** / **update_test** - Manage test cases
15
15
  - **start_run** / **complete_run** / **get_run** / **list_runs** - Execute and track test runs
16
16
  - **sweep** - Impact analysis: find tests affected by changed pages
17
+ - **prepare_test_batch** - Batch prepare tests for execution (lists, filters, fetches details, starts runs in one call)
17
18
 
18
19
  ### Running Tests
19
20
 
20
- To run tests for this project:
21
+ Use the `/greenrun` slash command to run all tests automatically, or `/greenrun tag:smoke` to filter.
21
22
 
22
- 1. Use `list_projects` to find the project, then `list_tests` to get all tests
23
- 2. For each test, call `get_test` to retrieve the full instructions
24
- 3. Call `start_run` to begin a run (returns a run ID)
25
- 4. Execute the test instructions using browser automation (Claude in Chrome)
26
- 5. Call `complete_run` with the run ID, status (passed/failed/error), and a result summary
23
+ To run tests manually:
27
24
 
28
- Or use the `/greenrun` slash command to run all tests automatically.
25
+ 1. Use `list_projects` to find the project
26
+ 2. Call `prepare_test_batch` with the project ID (and optional filter) to get test details and run IDs
27
+ 3. Execute each test's instructions using Playwright browser automation tools (`browser_navigate`, `browser_snapshot`, `browser_click`, `browser_type`)
28
+ 4. Call `complete_run` with the run ID, status (passed/failed/error), and a result summary
29
+
30
+ ### Auth Configuration
31
+
32
+ Projects can be configured with authentication settings so tests auto-login before execution:
33
+
34
+ - **`auth_mode: "none"`** - No authentication (default)
35
+ - **`auth_mode: "existing_user"`** - Log in with existing credentials via `login_url` and `login_instructions`
36
+ - **`auth_mode: "new_user"`** - Register a new account each run via `register_url` and `register_instructions`
37
+
38
+ Projects can also store named **credentials** (name/email/password sets). Each test can reference a credential by `credential_name` to use specific login details during execution.
29
39
 
30
40
  ### Creating Tests
31
41
 
32
- 1. Navigate to the page you want to test in Chrome
42
+ 1. Navigate to the page you want to test using Playwright
33
43
  2. Write clear, step-by-step test instructions describing what to do and what to verify
34
44
  3. Use `create_page` to register the page URL if not already registered
35
45
  4. Use `create_test` with the instructions and page IDs
@@ -36,41 +36,7 @@ Present the affected tests:
36
36
 
37
37
  ### 6. Offer to run
38
38
 
39
- Ask the user if they want to run the affected tests. If yes, execute them **in parallel** using the same approach as the `/greenrun` command:
39
+ Ask the user if they want to run the affected tests. If yes:
40
40
 
41
- Use the project's `concurrency` setting (default: 5) to determine batch size. Split affected tests into batches and launch each batch simultaneously using the **Task tool** with `run_in_background: true`.
42
-
43
- For each test in a batch, launch a background agent with `max_turns: 30` and `model: "sonnet"`. Use this prompt:
44
-
45
- ```
46
- You are executing a single Greenrun browser test. You have access to browser automation tools and Greenrun MCP tools.
47
-
48
- **Test: {test_name}** (ID: {test_id})
49
-
50
- Step 1: Call `get_test` with test_id "{test_id}" to get full instructions.
51
- Step 2: Call `start_run` with test_id "{test_id}" to begin - save the returned `run_id`.
52
- Step 3: Execute the test instructions using browser automation:
53
- - Call `tabs_context_mcp` then create a new browser tab for this test
54
- - Follow each instruction step exactly as written
55
- - The instructions will tell you where to navigate and what to do
56
- - Only take a screenshot when you need to verify a visual assertion — not for every navigation or click
57
- - When reading page content, prefer `find` or `read_page` with `filter: "interactive"` over full DOM reads
58
- - NEVER trigger JavaScript alerts, confirms, or prompts — they block the browser extension entirely. Before clicking delete buttons or other destructive actions, use `javascript_tool` to override: `window.alert = () => {}; window.confirm = () => true; window.prompt = () => null;`
59
- - If browser tools stop responding (no result or timeout), assume a dialog is blocking — report the error and stop. Do not keep retrying.
60
- - If you get stuck or a step fails, record the failure and move on — do not retry more than once
61
- Step 4: Call `complete_run` with:
62
- - run_id: the run ID from step 2
63
- - status: "passed" if all checks succeeded, "failed" if any check failed, "error" if execution was blocked
64
- - result: a brief summary of what happened (include the failure reason if failed/error)
65
- Step 5: Close the browser tab you created to clean up.
66
-
67
- Return a single line summary: {test_name} | {status} | {result_summary}
68
- ```
69
-
70
- Wait for each batch to complete before launching the next. After all tests finish, present a summary table:
71
-
72
- | Test | Pages | Tags | Status | Result |
73
- |------|-------|------|--------|--------|
74
- | Test name | Affected page URLs | tag1, tag2 | passed/failed/error | Brief summary |
75
-
76
- Include the total count: "X passed, Y failed, Z errors out of N tests"
41
+ 1. Call `prepare_test_batch` with the project ID and `test_ids` set to the affected test IDs from the sweep results.
42
+ 2. Read `.claude/commands/procedures.md` and follow the Execute and Summarize procedures using the batch result.
@@ -2,100 +2,25 @@ Run Greenrun browser tests for this project in parallel.
2
2
 
3
3
  ## Instructions
4
4
 
5
- You are executing browser tests managed by Greenrun. Tests run in parallel using background agents, each with its own browser tab. Follow these steps precisely:
5
+ You are executing browser tests managed by Greenrun. Follow these steps precisely:
6
6
 
7
7
  ### 1. Find the project
8
8
 
9
9
  Call `list_projects` to get all projects. Match the current project by name or base URL. If no match is found, tell the user and stop.
10
10
 
11
- Note the project's `concurrency` value (default: 5). This controls how many tests run simultaneously.
11
+ ### 2. Prepare test batch
12
12
 
13
- ### 2. Get tests
13
+ Call `prepare_test_batch` with the project ID.
14
14
 
15
- Call `list_tests` with the project ID. Each test has associated pages and tags which are organizational metadata for filtering.
15
+ If the user specified an argument ("$ARGUMENTS"), pass it as the `filter` parameter:
16
+ - `tag:smoke` → filters by tag
17
+ - `/checkout` → filters by page URL
18
+ - `login` → filters by test name
16
19
 
17
- If the user specified an argument ("$ARGUMENTS"), use it to filter tests:
18
- - If it starts with `/` (e.g. `/checkout`), filter to tests linked to a page matching that URL
19
- - If it starts with `tag:` (e.g. `tag:smoke`), filter to tests with that tag
20
- - Otherwise, treat it as a test name filter
20
+ If no argument is given, omit the filter to run all active tests.
21
21
 
22
- If no argument is given, run all active tests.
22
+ If the result has zero tests, tell the user and stop.
23
23
 
24
- If there are no matching active tests, tell the user and stop.
24
+ ### 3. Execute tests
25
25
 
26
- ### 3. Pre-fetch test details
27
-
28
- Call `get_test` for ALL matching tests **in parallel** (multiple tool calls in one message). This retrieves the full instructions for each test.
29
-
30
- Then call `start_run` for ALL tests **in parallel** to get run IDs.
31
-
32
- You now have everything needed to launch agents: test name, full instructions, and run_id for each test.
33
-
34
- ### 4. Execute tests in parallel
35
-
36
- Split the test list into batches of size `concurrency` (from the project settings).
37
-
38
- For each batch, launch all tests simultaneously using the **Task tool** with `run_in_background: true`. Each background agent receives a prompt with the full instructions and run_id embedded — agents do NOT need to call `get_test` or `start_run`.
39
-
40
- ```
41
- For each test in the current batch, call the Task tool with:
42
- - subagent_type: "general-purpose"
43
- - run_in_background: true
44
- - max_turns: 50
45
- - model: "sonnet"
46
- - prompt: (see below)
47
- ```
48
-
49
- The prompt for each background agent should be:
50
-
51
- ```
52
- You are executing a single Greenrun browser test using browser automation tools. Be efficient — minimize tool calls to complete the test as fast as possible.
53
-
54
- **Test: {test_name}**
55
- **Run ID: {run_id}**
56
-
57
- ## Test Instructions
58
-
59
- {paste the full test instructions from get_test here}
60
-
61
- ## Execution Steps
62
-
63
- 1. Call `tabs_context_mcp` then create a new browser tab with `tabs_create_mcp`
64
- 2. Follow each test instruction step exactly as written, using these rules to minimize tool calls:
65
-
66
- **Speed rules (critical):**
67
- - NEVER take screenshots. Use `read_page` or `find` for all assertions and to locate elements.
68
- - Navigate directly to URLs (e.g. `navigate` to `/tokens`) instead of clicking through nav links
69
- - Use `javascript_tool` for quick assertions: `document.querySelector('h1')?.textContent` is faster than `read_page` for checking a heading
70
- - Use `read_page` with `filter: "interactive"` to verify multiple things in one call rather than separate `find` calls
71
- - Use `form_input` with element refs for filling forms — avoid click-then-type sequences
72
- - When clicking elements, use `ref` parameter instead of coordinates to avoid needing screenshots
73
- - Combine verification: after a page loads, do ONE `read_page` call and check all assertions from that result
74
-
75
- **Reliability rules:**
76
- - NEVER trigger JavaScript alerts, confirms, or prompts — they block the browser extension entirely. Before clicking delete buttons or other destructive actions, use `javascript_tool` to override: `window.alert = () => {}; window.confirm = () => true; window.prompt = () => null;`
77
- - If browser tools stop responding (no result or timeout), assume a dialog is blocking — report the error and stop. Do not keep retrying.
78
- - If you get stuck or a step fails, record the failure and move on — do not retry more than once
79
- - If you are redirected to a login page, try using an existing logged-in tab from `tabs_context_mcp` instead of creating a new one
80
-
81
- 3. Call `complete_run` with:
82
- - run_id: "{run_id}"
83
- - status: "passed" if all checks succeeded, "failed" if any check failed, "error" if execution was blocked
84
- - result: a brief summary of what happened (include the failure reason if failed/error)
85
-
86
- Return a single line summary: {test_name} | {status} | {result_summary}
87
- ```
88
-
89
- After launching all agents in a batch, wait for them all to complete (use `TaskOutput` to collect results) before launching the next batch.
90
-
91
- ### 5. Summarize results
92
-
93
- After all batches complete, collect results from all background agents and present a summary table:
94
-
95
- | Test | Pages | Tags | Status | Result |
96
- |------|-------|------|--------|--------|
97
- | Test name | /login, /dashboard | smoke, auth | passed/failed/error | Brief summary |
98
-
99
- Include the total count: "X passed, Y failed, Z errors out of N tests"
100
-
101
- If any tests failed, highlight what went wrong and suggest next steps.
26
+ Read `.claude/commands/procedures.md` and follow the Execute and Summarize procedures using the batch result.
@@ -0,0 +1,335 @@
1
+ Shared procedures for executing Greenrun browser tests. Referenced by `/greenrun` and `/greenrun-sweep`.
2
+
3
+ ## Authenticate
4
+
5
+ Before executing tests, handle authentication based on the project's `auth_mode` from the batch result.
6
+
7
+ - **`none`** (or missing): Skip authentication entirely.
8
+ - **`existing_user`**: Navigate to the project's `login_url` and follow the `login_instructions` step by step. Use `browser_snapshot` after to verify the page shows an authenticated state (no login form visible).
9
+ - **`new_user`**: Navigate to the project's `register_url` and follow the `register_instructions` step by step. Use `browser_snapshot` after to verify registration succeeded and the user is authenticated.
10
+
11
+ ### Credentials
12
+
13
+ The project may include a `credentials` array — named credential sets with `name`, `email`, and `password`. Each test may have a `credential_name` field referencing one of these sets.
14
+
15
+ When authenticating for a test with `credential_name`:
16
+ - Find the matching credential in `project.credentials` by name
17
+ - Use that credential's email and password to fill the login form at `login_url`
18
+ - If no `credential_name` is set on a test, use the first credential in the array (or fall back to `login_instructions`)
19
+
20
+ When authenticating once for a batch (Step 1 below), use the credential that appears most frequently across the batch's tests. If tests use different credentials, re-authenticate between tests as needed.
21
+
22
+ If auth fails (login form still visible after following instructions), report all tests as error with "Authentication failed" and stop.
23
+
24
+ ## Execute
25
+
26
+ You have a batch result from `prepare_test_batch` containing `project` (with `credentials` array) and `tests[]` (each with `test_id`, `test_name`, `run_id`, `instructions`, `credential_name`, `pages`, `tags`, `script`, `script_generated_at`).
27
+
28
+ If `tests` is empty, tell the user no matching active tests were found and stop.
29
+
30
+ ### Step 1: Authenticate on the main page
31
+
32
+ Run the Authenticate procedure above once, using the standard Playwright tools (`browser_navigate`, `browser_snapshot`, `browser_click`, `browser_type`).
33
+
34
+ ### Step 2: Classify tests
35
+
36
+ Split the batch into two groups:
37
+
38
+ - **scripted**: tests where `script` is non-null (cached Playwright scripts ready to run)
39
+ - **unscripted**: tests where `script` is null (need script generation)
40
+
41
+ If all tests are scripted, skip to Step 4.
42
+
43
+ ### Step 3: Score and generate scripts (easy-first)
44
+
45
+ For each **unscripted** test, assign a difficulty score based on the instructions:
46
+
47
+ - **easy** (1): Single-page tests with simple actions — navigate, check text/headings, verify static content, click a link and check the URL. Typically 1-4 steps, no form submissions, no multi-step flows.
48
+ - **medium** (2): Tests involving form input, button clicks that trigger state changes, checking error/success messages, or verifying a redirect after an action. Typically 3-8 steps.
49
+ - **hard** (3): Multi-page flows, tests requiring specific sequences of actions (e.g. add to cart then checkout), tests with complex assertions (table data, dynamic content), or tests involving file uploads, modals, or dialogs.
50
+
51
+ Sort unscripted tests by difficulty ascending (easy first). This ensures simple tests get scripts generated quickly so native execution can start sooner.
52
+
53
+ #### Walk-through script generation
54
+
55
+ For each unscripted test (in difficulty order), do a **scouting pass** — actually follow the test instructions in the browser to observe all UI states:
56
+
57
+ 1. Navigate to the test's starting page via `browser_navigate`
58
+ 2. Take a `browser_snapshot` to see initial elements
59
+ 3. Follow the test instructions step by step using Playwright MCP tools (`browser_click`, `browser_type`, `browser_snapshot` after each action)
60
+ 4. Snapshot after each state change to capture: validation errors, success banners, modal dialogs, redirected pages, dynamically loaded content
61
+ 5. Collect all observed elements and selectors as context
62
+
63
+ Then generate a `.spec.ts` script using the observed elements:
64
+
65
+ ```ts
66
+ import { test, expect } from '@playwright/test';
67
+ test('{test_name}', async ({ page }) => {
68
+ // If the test has a credential_name, include login steps using the matching
69
+ // credential from project.credentials (email + password) at the login_url
70
+ await page.goto('{start_url}');
71
+ // Steps generated from scouting pass observations
72
+ // Use getByRole, getByText, getByLabel, getByPlaceholder for selectors
73
+ });
74
+ ```
75
+
76
+ Save via `update_test(test_id, { script: <generated_script>, script_generated_at: <ISO_now> })`.
77
+
78
+ **Pipeline optimisation**: After finishing all **easy** tests, if there are medium/hard tests remaining, proceed to Step 4 immediately with whatever scripts are ready (scripted + newly generated easy tests). Continue generating medium/hard scripts in parallel by launching a background Task agent for the remaining generation work. When those scripts are ready, they'll be saved to the API for next run.
79
+
80
+ To launch the background generation agent:
81
+
82
+ ```
83
+ Task tool with:
84
+ - subagent_type: "general-purpose"
85
+ - run_in_background: true
86
+ - max_turns: 50
87
+ - model: "sonnet"
88
+ - prompt: (include project details, remaining unscripted tests with instructions, and the scouting+generation procedure above)
89
+ ```
90
+
91
+ The background agent should: for each remaining test, do the scouting pass, generate the script, and call `update_test` to save it. It does NOT need to call `complete_run` — that happens in the native execution step.
92
+
93
+ ### Step 4: Export auth state
94
+
95
+ If `auth_mode` is not `none`, export the browser session so native Playwright inherits it:
96
+
97
+ ```js
98
+ async (page) => {
99
+ const state = await page.context().storageState();
100
+ require('fs').writeFileSync('/tmp/greenrun-auth-state.json', JSON.stringify(state));
101
+ return 'Auth state exported';
102
+ }
103
+ ```
104
+
105
+ Call this via `browser_run_code`. If `auth_mode` is `none`, skip this step.
106
+
107
+ ### Step 5: Write files and run natively
108
+
109
+ Gather all tests that have scripts (previously scripted + newly generated from Step 3).
110
+
111
+ 1. **Write test files**: For each scripted test, write the script to `/tmp/greenrun-tests/{test_id}.spec.ts`
112
+
113
+ 2. **Write config**: Write `/tmp/greenrun-tests/playwright.config.ts`:
114
+
115
+ ```ts
116
+ import { defineConfig } from '@playwright/test';
117
+ export default defineConfig({
118
+ testDir: '.',
119
+ timeout: 30000,
120
+ workers: 20,
121
+ reporter: [['json', { outputFile: 'results.json' }]],
122
+ use: {
123
+ baseURL: '{base_url}',
124
+ storageState: '/tmp/greenrun-auth-state.json', // omit 'use.storageState' entirely if auth_mode is 'none'
125
+ },
126
+ });
127
+ ```
128
+
129
+ Replace `{base_url}` with the project's base_url.
130
+
131
+ 3. **Execute**: Run via Bash:
132
+ ```
133
+ npx playwright test --config /tmp/greenrun-tests/playwright.config.ts
134
+ ```
135
+
136
+ 4. **Parse results**: Read `/tmp/greenrun-tests/results.json`. Map each result back to a run ID via the filename: `{test_id}.spec.ts` → test_id → find the matching run_id from the batch.
137
+
138
+ 5. **Report results**: Call `complete_run(run_id, status, result_summary)` for each test. Map Playwright statuses: `passed` → `passed`, `failed`/`timedOut` → `failed`, other → `error`.
139
+
140
+ ### Step 6: Handle unscripted tests without scripts
141
+
142
+ Any tests that still don't have scripts (e.g. because the background agent hasn't finished, or script generation failed) need to be executed via AI agents using the legacy approach. Follow Step 7 for these tests.
143
+
144
+ ### Step 7: Circuit breaker
145
+
146
+ After parsing all native results, walk through them in completion order. Track consecutive failures:
147
+
148
+ - If **3 or more consecutive failures** occur:
149
+ - Mark all remaining un-reported tests as error: "Circuit breaker: N consecutive failures detected"
150
+ - Skip AI fallback for remaining tests
151
+ - The counter resets on any pass
152
+
153
+ ### Step 8: AI-agent fallback for native failures
154
+
155
+ For tests that **failed** in native execution (and circuit breaker has not tripped):
156
+
157
+ 1. Start new runs via `start_run(test_id)` (the original runs were already completed in Step 5)
158
+ 2. Launch background Task agents using the tab-isolation pattern:
159
+
160
+ Create tabs and launch agents in batches of 20:
161
+
162
+ #### Create tab
163
+ ```js
164
+ async (page) => {
165
+ const newPage = await page.context().newPage();
166
+ await newPage.goto(START_URL);
167
+ return { index: page.context().pages().length - 1, url: newPage.url() };
168
+ }
169
+ ```
170
+
171
+ #### Launch agent
172
+ ```
173
+ Task tool with:
174
+ - subagent_type: "general-purpose"
175
+ - run_in_background: true
176
+ - max_turns: 25
177
+ - model: "sonnet"
178
+ - prompt: (agent prompt below, including the native failure message for diagnosis)
179
+ ```
180
+
181
+ #### Agent prompt
182
+
183
+ ```
184
+ Greenrun browser test (AI fallback). Run ID: {run_id}
185
+ Tab index: {INDEX}
186
+
187
+ **{test_name}**
188
+
189
+ {paste the full test instructions here}
190
+
191
+ **Native execution failed with:** {failure_message}
192
+
193
+ Determine if this is a stale script (UI changed) or an actual bug. If the script is stale, the test may still pass when executed manually.
194
+
195
+ ## CRITICAL: Tab isolation
196
+
197
+ You are assigned to tab index {INDEX}. You MUST use ONLY `browser_run_code` for ALL browser interactions. Do NOT use `browser_snapshot`, `browser_click`, `browser_type`, `browser_navigate`, or any other Playwright MCP tools. The only non-browser tool you may call is `complete_run`.
198
+
199
+ Every `browser_run_code` call must scope to your tab:
200
+ ```js
201
+ async (page) => {
202
+ const p = page.context().pages()[INDEX];
203
+ // ... your action here ...
204
+ }
205
+ ```
206
+
207
+ ## Auth
208
+ No authentication needed — the main page already authenticated and cookies are shared to your tab.
209
+
210
+ ## Interaction patterns
211
+
212
+ **Navigate:**
213
+ ```js
214
+ async (page) => {
215
+ const p = page.context().pages()[INDEX];
216
+ await p.goto('https://example.com/path');
217
+ return p.url();
218
+ }
219
+ ```
220
+
221
+ **Read page state (replaces browser_snapshot):**
222
+ ```js
223
+ async (page) => {
224
+ const p = page.context().pages()[INDEX];
225
+ const url = p.url();
226
+ const title = await p.title();
227
+ const text = await p.locator('body').innerText();
228
+ const headings = await p.getByRole('heading').allTextContents();
229
+ const buttons = await p.getByRole('button').allTextContents();
230
+ const links = await p.getByRole('link').allTextContents();
231
+ const textboxes = await p.getByRole('textbox').evaluateAll(els =>
232
+ els.map(e => ({ name: e.getAttribute('name') || e.getAttribute('aria-label') || e.placeholder, value: e.value }))
233
+ );
234
+ return { url, title, headings, buttons, links, textboxes, text: text.substring(0, 2000) };
235
+ }
236
+ ```
237
+
238
+ **Click an element:**
239
+ ```js
240
+ async (page) => {
241
+ const p = page.context().pages()[INDEX];
242
+ await p.getByRole('button', { name: 'Submit' }).click();
243
+ return p.url();
244
+ }
245
+ ```
246
+
247
+ **Fill a form field:**
248
+ ```js
249
+ async (page) => {
250
+ const p = page.context().pages()[INDEX];
251
+ await p.getByRole('textbox', { name: 'Email' }).fill('test@example.com');
252
+ return 'filled';
253
+ }
254
+ ```
255
+
256
+ **Handle a dialog:**
257
+ ```js
258
+ async (page) => {
259
+ const p = page.context().pages()[INDEX];
260
+ p.once('dialog', d => d.accept());
261
+ await p.getByRole('button', { name: 'Delete' }).click();
262
+ return p.url();
263
+ }
264
+ ```
265
+
266
+ **Check for specific text (verification):**
267
+ ```js
268
+ async (page) => {
269
+ const p = page.context().pages()[INDEX];
270
+ const visible = await p.getByText('Success').isVisible();
271
+ return { found: visible };
272
+ }
273
+ ```
274
+
275
+ ## Rules
276
+ - ONLY use `browser_run_code` — no other browser tools
277
+ - Always scope to `page.context().pages()[INDEX]`
278
+ - Use Playwright locators: `getByRole`, `getByText`, `getByLabel`, `getByPlaceholder`, `locator`
279
+ - Read page state to find elements before interacting
280
+ - Navigate with absolute URLs via `p.goto(url)` — never click nav links
281
+
282
+ ## FORBIDDEN — never use these:
283
+ - `browser_snapshot`, `browser_click`, `browser_type`, `browser_navigate` — these operate on the MAIN page and will interfere with other tests
284
+ - `browser_wait` — NEVER call this
285
+ - `browser_screenshot` — NEVER use
286
+
287
+ ## Error recovery
288
+ - On ANY failure: retry the failing step ONCE, then skip to Finish.
289
+
290
+ ## Finish (MANDATORY — always reach this step)
291
+ 1. If the test passes on manual execution, call `update_test(test_id, { script: null, script_generated_at: null })` to invalidate the stale cached script.
292
+ 2. `complete_run(run_id, status, brief_summary)` — ALWAYS call this, even on error.
293
+ 3. Return: {test_name} | {status} | {summary}
294
+ ```
295
+
296
+ #### Wait and clean up
297
+
298
+ Wait for all agents to complete via `TaskOutput`. Then close extra tabs (newest first):
299
+
300
+ ```js
301
+ async (page) => {
302
+ const pages = page.context().pages();
303
+ for (let i = pages.length - 1; i >= 1; i--) {
304
+ await pages[i].close();
305
+ }
306
+ return { remainingPages: page.context().pages().length };
307
+ }
308
+ ```
309
+
310
+ Check for orphaned runs (agents that crashed without calling `complete_run`). For any orphaned run IDs, call `complete_run(run_id, "error", "Agent crashed or timed out")`.
311
+
312
+ ### Step 9: Wait for background generation
313
+
314
+ If a background generation agent was launched in Step 3, check if it has completed via `TaskOutput` with `block: false`. If still running, note this in the summary. The generated scripts will be available on the next run.
315
+
316
+ ## Summarize
317
+
318
+ Present a summary table with a Mode column showing how each test was executed:
319
+
320
+ | Test | Pages | Tags | Mode | Status | Result |
321
+ |------|-------|------|------|--------|--------|
322
+ | Test name | /login, /dashboard | smoke, auth | native/agent/skipped | passed/failed/error | Brief summary |
323
+
324
+ Mode values:
325
+ - **native** — executed via `npx playwright test`
326
+ - **agent** — executed via AI agent (fallback or no script available)
327
+ - **skipped** — circuit breaker tripped, not executed
328
+
329
+ Total: "X passed, Y failed, Z errors out of N tests"
330
+
331
+ If the circuit breaker tripped, note: "Circuit breaker tripped after N consecutive failures. M tests skipped."
332
+
333
+ If background script generation is still running, note: "Script generation in progress for N tests. Scripts will be cached for next run."
334
+
335
+ If any tests failed, highlight what went wrong and suggest next steps.