greenrun-cli 0.2.10 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/init.js +53 -0
- package/package.json +1 -1
- package/templates/commands/procedures.md +66 -222
package/dist/commands/init.js
CHANGED
|
@@ -314,8 +314,61 @@ function installCommands() {
|
|
|
314
314
|
console.log(` Installed /${cmd.replace('.md', '')}`);
|
|
315
315
|
}
|
|
316
316
|
}
|
|
317
|
+
function checkDependencies() {
|
|
318
|
+
console.log('Checking dependencies...');
|
|
319
|
+
let allGood = true;
|
|
320
|
+
// Node version
|
|
321
|
+
if (checkNodeVersion()) {
|
|
322
|
+
console.log(` [x] Node.js ${process.version}`);
|
|
323
|
+
}
|
|
324
|
+
else {
|
|
325
|
+
console.log(` [ ] Node.js ${process.version} (18+ required)`);
|
|
326
|
+
allGood = false;
|
|
327
|
+
}
|
|
328
|
+
// Claude Code
|
|
329
|
+
const prereqs = checkPrerequisites();
|
|
330
|
+
if (prereqs.claude) {
|
|
331
|
+
console.log(' [x] Claude Code CLI');
|
|
332
|
+
}
|
|
333
|
+
else {
|
|
334
|
+
console.log(' [ ] Claude Code CLI not found');
|
|
335
|
+
allGood = false;
|
|
336
|
+
}
|
|
337
|
+
// @playwright/test
|
|
338
|
+
try {
|
|
339
|
+
execSync('npx playwright --version', { stdio: 'pipe' });
|
|
340
|
+
console.log(' [x] @playwright/test');
|
|
341
|
+
}
|
|
342
|
+
catch {
|
|
343
|
+
console.log(' [ ] @playwright/test not installed');
|
|
344
|
+
console.log(' Run: npm install -g @playwright/test@latest');
|
|
345
|
+
allGood = false;
|
|
346
|
+
}
|
|
347
|
+
// Browser (Chrome or Chromium)
|
|
348
|
+
if (detectSystemChrome()) {
|
|
349
|
+
console.log(' [x] Chrome detected');
|
|
350
|
+
}
|
|
351
|
+
else {
|
|
352
|
+
try {
|
|
353
|
+
execSync('npx playwright install --dry-run chromium', { stdio: 'pipe' });
|
|
354
|
+
console.log(' [x] Playwright Chromium');
|
|
355
|
+
}
|
|
356
|
+
catch {
|
|
357
|
+
console.log(' [ ] No browser detected (Chrome or Playwright Chromium)');
|
|
358
|
+
console.log(' Run: npx playwright install --with-deps chromium');
|
|
359
|
+
allGood = false;
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
if (allGood) {
|
|
363
|
+
console.log(' All dependencies installed.\n');
|
|
364
|
+
}
|
|
365
|
+
else {
|
|
366
|
+
console.log('\n Some dependencies are missing. Install them and run again.\n');
|
|
367
|
+
}
|
|
368
|
+
}
|
|
317
369
|
export function runUpdate() {
|
|
318
370
|
console.log('\nGreenrun - Updating templates\n');
|
|
371
|
+
checkDependencies();
|
|
319
372
|
installCommands();
|
|
320
373
|
installSettings();
|
|
321
374
|
installClaudeMd();
|
package/package.json
CHANGED
|
@@ -23,9 +23,9 @@ If auth fails (login form still visible after following instructions), report al
|
|
|
23
23
|
|
|
24
24
|
## Execute
|
|
25
25
|
|
|
26
|
-
You have a batch result from `prepare_test_batch` containing `project` (with `credentials` array) and `tests[]` (each with `test_id`, `test_name`, `run_id`, `
|
|
26
|
+
You have a batch result from `prepare_test_batch` containing `project` (with `credentials` array) and `tests[]` (each with `test_id`, `test_name`, `run_id`, `credential_name`, `pages`, `tags`, `has_script`).
|
|
27
27
|
|
|
28
|
-
Note:
|
|
28
|
+
Note: The batch does not include `instructions` or `script` content. Use `get_test(test_id)` to fetch these when needed.
|
|
29
29
|
|
|
30
30
|
If `tests` is empty, tell the user no matching active tests were found and stop.
|
|
31
31
|
|
|
@@ -42,91 +42,63 @@ Split the batch into two groups:
|
|
|
42
42
|
|
|
43
43
|
If all tests are scripted, skip to Step 4.
|
|
44
44
|
|
|
45
|
-
### Step 3:
|
|
45
|
+
### Step 3: Generate scripts for unscripted tests
|
|
46
46
|
|
|
47
|
-
For each **unscripted** test,
|
|
47
|
+
For each **unscripted** test, launch a Task agent sequentially (one at a time, wait for each to complete before starting the next). This keeps browser snapshot data out of the parent context.
|
|
48
48
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
For each unscripted test (in difficulty order), do a **scouting pass** — actually follow the test instructions in the browser to observe all UI states:
|
|
58
|
-
|
|
59
|
-
1. Navigate to the test's starting page via `browser_navigate`
|
|
60
|
-
2. Take a `browser_snapshot` to see initial elements
|
|
61
|
-
3. Follow the test instructions step by step using Playwright MCP tools (`browser_click`, `browser_type`, `browser_snapshot` after each action)
|
|
62
|
-
4. Snapshot after each state change to capture: validation errors, success banners, modal dialogs, redirected pages, dynamically loaded content
|
|
63
|
-
5. Collect all observed elements and selectors as context
|
|
64
|
-
|
|
65
|
-
#### Handling failures during scouting
|
|
66
|
-
|
|
67
|
-
If a step doesn't work as expected during the scouting pass, investigate before moving on:
|
|
68
|
-
|
|
69
|
-
1. **Determine the cause**: Is it a test problem (wrong instructions, bad selectors, missing prerequisite) or an application bug (form won't submit, unexpected error, broken functionality)?
|
|
49
|
+
```
|
|
50
|
+
Task tool with:
|
|
51
|
+
- subagent_type: "general-purpose"
|
|
52
|
+
- max_turns: 30
|
|
53
|
+
- model: "sonnet"
|
|
54
|
+
- prompt: (see agent prompt below)
|
|
55
|
+
```
|
|
70
56
|
|
|
71
|
-
|
|
72
|
-
- Adjust the instructions to match what the UI actually requires (e.g. a required field the instructions missed, a different button label, an extra confirmation step)
|
|
73
|
-
- Update the test via `update_test` with corrected instructions
|
|
74
|
-
- Retry the failing step
|
|
57
|
+
#### Script generation agent prompt
|
|
75
58
|
|
|
76
|
-
|
|
77
|
-
- Find a way to make the original test pass by avoiding the broken path (e.g. if a discount code field breaks form submission, leave it blank)
|
|
78
|
-
- Update the original test instructions if needed to use the working path
|
|
79
|
-
- Create a **new bug test** that reproduces the specific failure:
|
|
80
|
-
```
|
|
81
|
-
create_test(project_id, {
|
|
82
|
-
name: "BUG: [description of the failure]",
|
|
83
|
-
instructions: "[steps that reproduce the bug, ending with the expected vs actual behaviour]",
|
|
84
|
-
tags: ["bug"],
|
|
85
|
-
page_ids: [relevant page IDs],
|
|
86
|
-
credential_name: same as original test
|
|
87
|
-
})
|
|
88
|
-
```
|
|
89
|
-
- Start a run for the bug test and immediately complete it as failed:
|
|
90
|
-
```
|
|
91
|
-
start_run(bug_test_id) → complete_run(run_id, "failed", "description of what went wrong")
|
|
92
|
-
```
|
|
93
|
-
- Continue scouting the original test with the workaround
|
|
59
|
+
Include the following in the prompt, substituting the actual values:
|
|
94
60
|
|
|
95
|
-
|
|
61
|
+
```
|
|
62
|
+
Greenrun script generation for test: {test_name}
|
|
63
|
+
Test ID: {test_id}
|
|
64
|
+
Project ID: {project_id}
|
|
96
65
|
|
|
97
|
-
|
|
66
|
+
Project auth: {auth_mode}, login_url: {login_url}
|
|
67
|
+
Credentials: {credential_name} — email: {email}, password: {password}
|
|
98
68
|
|
|
99
|
-
|
|
69
|
+
## Task
|
|
100
70
|
|
|
101
|
-
|
|
71
|
+
1. Call `get_test("{test_id}")` to fetch the full test instructions
|
|
72
|
+
2. Authenticate: navigate to {login_url} and log in with the credential above using `browser_navigate`, `browser_snapshot`, `browser_click`, `browser_type`
|
|
73
|
+
3. Do a scouting pass — follow the test instructions step by step in the browser:
|
|
74
|
+
- Navigate to the test's starting page via `browser_navigate`
|
|
75
|
+
- Take a `browser_snapshot` to see initial elements
|
|
76
|
+
- Follow each instruction using Playwright MCP tools (`browser_click`, `browser_type`, `browser_snapshot` after each action)
|
|
77
|
+
- Snapshot after each state change to capture selectors, validation errors, success banners, modal dialogs, redirected pages
|
|
78
|
+
4. Handle failures:
|
|
79
|
+
- If a step fails because the test instructions are wrong (wrong field name, missing step, bad selector), fix the instructions and retry. Update the test via `update_test` with corrected instructions.
|
|
80
|
+
- If a step fails because of an application bug, work around it for the main test and create a new bug test:
|
|
81
|
+
`create_test({project_id}, { name: "BUG: [description]", instructions: "[repro steps]", tags: ["bug"], page_ids: [...], credential_name: "{credential_name}" })`
|
|
82
|
+
Then: `start_run(bug_test_id)` → `complete_run(run_id, "failed", "description")`
|
|
83
|
+
5. After scouting, generate a Playwright `.spec.ts` script:
|
|
102
84
|
|
|
103
|
-
```ts
|
|
104
85
|
import { test, expect } from '@playwright/test';
|
|
105
86
|
test('{test_name}', async ({ page }) => {
|
|
106
|
-
//
|
|
107
|
-
// credential from project.credentials (email + password) at the login_url
|
|
87
|
+
// Include login steps using the credential email + password at login_url
|
|
108
88
|
await page.goto('{start_url}');
|
|
109
|
-
// Steps
|
|
89
|
+
// Steps from scouting observations
|
|
110
90
|
// Use getByRole, getByText, getByLabel, getByPlaceholder for selectors
|
|
111
91
|
});
|
|
112
|
-
```
|
|
113
92
|
|
|
114
|
-
Save
|
|
93
|
+
6. Save: `update_test("{test_id}", { script: <generated_script>, script_generated_at: "<ISO_now>" })`
|
|
94
|
+
7. Close browser: `browser_close`
|
|
115
95
|
|
|
116
|
-
|
|
96
|
+
## Return
|
|
117
97
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
```
|
|
121
|
-
Task tool with:
|
|
122
|
-
- subagent_type: "general-purpose"
|
|
123
|
-
- run_in_background: true
|
|
124
|
-
- max_turns: 50
|
|
125
|
-
- model: "sonnet"
|
|
126
|
-
- prompt: (include project details, remaining unscripted tests with instructions, and the scouting+generation procedure above)
|
|
98
|
+
Return a one-line summary: {test_name} | script generated | or | {test_name} | failed | {reason}
|
|
127
99
|
```
|
|
128
100
|
|
|
129
|
-
|
|
101
|
+
After each agent completes, note the result and proceed to the next unscripted test.
|
|
130
102
|
|
|
131
103
|
### Step 4: Export auth state
|
|
132
104
|
|
|
@@ -175,17 +147,9 @@ npx playwright test --config /tmp/greenrun-tests/playwright.config.ts
|
|
|
175
147
|
|
|
176
148
|
5. **Report results**: Call `complete_run(run_id, status, result_summary)` for each test. Map Playwright statuses: `passed` → `passed`, `failed`/`timedOut` → `failed`, other → `error`.
|
|
177
149
|
|
|
178
|
-
6. **Clean up
|
|
179
|
-
```bash
|
|
180
|
-
npx playwright test --config /tmp/greenrun-tests/playwright.config.ts --list 2>/dev/null; true
|
|
181
|
-
```
|
|
182
|
-
The Playwright Test runner normally cleans up after itself, but if tests crash or timeout, browser processes may linger. Also call `browser_close` to reset the MCP browser context before any subsequent AI fallback execution.
|
|
183
|
-
|
|
184
|
-
### Step 6: Handle unscripted tests without scripts
|
|
150
|
+
6. **Clean up**: Call `browser_close` to reset the MCP browser context.
|
|
185
151
|
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
### Step 7: Circuit breaker
|
|
152
|
+
### Step 6: Circuit breaker
|
|
189
153
|
|
|
190
154
|
After parsing all native results, walk through them in completion order. Track consecutive failures:
|
|
191
155
|
|
|
@@ -194,170 +158,52 @@ After parsing all native results, walk through them in completion order. Track c
|
|
|
194
158
|
- Skip AI fallback for remaining tests
|
|
195
159
|
- The counter resets on any pass
|
|
196
160
|
|
|
197
|
-
### Step
|
|
198
|
-
|
|
199
|
-
For tests that **failed** in native execution (and circuit breaker has not tripped):
|
|
161
|
+
### Step 7: AI fallback for native failures
|
|
200
162
|
|
|
201
|
-
|
|
202
|
-
2. Re-authenticate by navigating to the login page and following the Authenticate procedure
|
|
203
|
-
3. Start new runs via `start_run(test_id)` (the original runs were already completed in Step 5)
|
|
204
|
-
4. Launch background Task agents using the tab-isolation pattern:
|
|
163
|
+
For tests that **failed** in native execution (and circuit breaker has not tripped), execute them one at a time via Task agents. This keeps snapshot data out of the parent context.
|
|
205
164
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
#### Create tab
|
|
209
|
-
```js
|
|
210
|
-
async (page) => {
|
|
211
|
-
const newPage = await page.context().newPage();
|
|
212
|
-
await newPage.goto(START_URL);
|
|
213
|
-
return { index: page.context().pages().length - 1, url: newPage.url() };
|
|
214
|
-
}
|
|
215
|
-
```
|
|
165
|
+
For each failed test, launch a Task agent sequentially (wait for each to complete before the next):
|
|
216
166
|
|
|
217
|
-
#### Launch agent
|
|
218
167
|
```
|
|
219
168
|
Task tool with:
|
|
220
169
|
- subagent_type: "general-purpose"
|
|
221
|
-
- run_in_background: true
|
|
222
170
|
- max_turns: 25
|
|
223
171
|
- model: "sonnet"
|
|
224
|
-
- prompt: (agent prompt below
|
|
172
|
+
- prompt: (see agent prompt below)
|
|
225
173
|
```
|
|
226
174
|
|
|
227
|
-
####
|
|
175
|
+
#### AI fallback agent prompt
|
|
228
176
|
|
|
229
177
|
```
|
|
230
|
-
Greenrun
|
|
231
|
-
|
|
178
|
+
Greenrun AI fallback test. Test: {test_name}
|
|
179
|
+
Test ID: {test_id}
|
|
232
180
|
|
|
233
|
-
|
|
181
|
+
Project auth: {auth_mode}, login_url: {login_url}
|
|
182
|
+
Credentials: {credential_name} — email: {email}, password: {password}
|
|
234
183
|
|
|
235
|
-
|
|
184
|
+
Native execution failed with: {failure_message}
|
|
236
185
|
|
|
237
|
-
|
|
186
|
+
## Task
|
|
238
187
|
|
|
239
|
-
|
|
188
|
+
1. Call `get_test("{test_id}")` to fetch the full test instructions
|
|
189
|
+
2. Start a new run: `start_run("{test_id}")` — note the run_id
|
|
190
|
+
3. Authenticate: navigate to {login_url} and log in with the credential above
|
|
191
|
+
4. Follow the test instructions step by step using Playwright MCP tools (`browser_navigate`, `browser_snapshot`, `browser_click`, `browser_type`)
|
|
192
|
+
5. Determine if the native failure was a stale script (UI changed) or an actual application bug
|
|
193
|
+
6. If the test passes manually, invalidate the stale cached script: `update_test("{test_id}", { script: null, script_generated_at: null })`
|
|
194
|
+
7. Call `complete_run(run_id, status, brief_summary)` — ALWAYS call this, even on error
|
|
195
|
+
8. Call `browser_close`
|
|
240
196
|
|
|
241
|
-
##
|
|
197
|
+
## Return
|
|
242
198
|
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
Every `browser_run_code` call must scope to your tab:
|
|
246
|
-
```js
|
|
247
|
-
async (page) => {
|
|
248
|
-
const p = page.context().pages()[INDEX];
|
|
249
|
-
// ... your action here ...
|
|
250
|
-
}
|
|
199
|
+
Return: {test_name} | {status} | {summary}
|
|
251
200
|
```
|
|
252
201
|
|
|
253
|
-
|
|
254
|
-
No authentication needed — the main page already authenticated and cookies are shared to your tab.
|
|
202
|
+
After each agent completes, note the result. If the agent fails to call `complete_run`, call it yourself with status "error".
|
|
255
203
|
|
|
256
|
-
|
|
204
|
+
### Step 8: Handle unscripted tests without scripts
|
|
257
205
|
|
|
258
|
-
|
|
259
|
-
```js
|
|
260
|
-
async (page) => {
|
|
261
|
-
const p = page.context().pages()[INDEX];
|
|
262
|
-
await p.goto('https://example.com/path');
|
|
263
|
-
return p.url();
|
|
264
|
-
}
|
|
265
|
-
```
|
|
266
|
-
|
|
267
|
-
**Read page state (replaces browser_snapshot):**
|
|
268
|
-
```js
|
|
269
|
-
async (page) => {
|
|
270
|
-
const p = page.context().pages()[INDEX];
|
|
271
|
-
const url = p.url();
|
|
272
|
-
const title = await p.title();
|
|
273
|
-
const text = await p.locator('body').innerText();
|
|
274
|
-
const headings = await p.getByRole('heading').allTextContents();
|
|
275
|
-
const buttons = await p.getByRole('button').allTextContents();
|
|
276
|
-
const links = await p.getByRole('link').allTextContents();
|
|
277
|
-
const textboxes = await p.getByRole('textbox').evaluateAll(els =>
|
|
278
|
-
els.map(e => ({ name: e.getAttribute('name') || e.getAttribute('aria-label') || e.placeholder, value: e.value }))
|
|
279
|
-
);
|
|
280
|
-
return { url, title, headings, buttons, links, textboxes, text: text.substring(0, 2000) };
|
|
281
|
-
}
|
|
282
|
-
```
|
|
283
|
-
|
|
284
|
-
**Click an element:**
|
|
285
|
-
```js
|
|
286
|
-
async (page) => {
|
|
287
|
-
const p = page.context().pages()[INDEX];
|
|
288
|
-
await p.getByRole('button', { name: 'Submit' }).click();
|
|
289
|
-
return p.url();
|
|
290
|
-
}
|
|
291
|
-
```
|
|
292
|
-
|
|
293
|
-
**Fill a form field:**
|
|
294
|
-
```js
|
|
295
|
-
async (page) => {
|
|
296
|
-
const p = page.context().pages()[INDEX];
|
|
297
|
-
await p.getByRole('textbox', { name: 'Email' }).fill('test@example.com');
|
|
298
|
-
return 'filled';
|
|
299
|
-
}
|
|
300
|
-
```
|
|
301
|
-
|
|
302
|
-
**Handle a dialog:**
|
|
303
|
-
```js
|
|
304
|
-
async (page) => {
|
|
305
|
-
const p = page.context().pages()[INDEX];
|
|
306
|
-
p.once('dialog', d => d.accept());
|
|
307
|
-
await p.getByRole('button', { name: 'Delete' }).click();
|
|
308
|
-
return p.url();
|
|
309
|
-
}
|
|
310
|
-
```
|
|
311
|
-
|
|
312
|
-
**Check for specific text (verification):**
|
|
313
|
-
```js
|
|
314
|
-
async (page) => {
|
|
315
|
-
const p = page.context().pages()[INDEX];
|
|
316
|
-
const visible = await p.getByText('Success').isVisible();
|
|
317
|
-
return { found: visible };
|
|
318
|
-
}
|
|
319
|
-
```
|
|
320
|
-
|
|
321
|
-
## Rules
|
|
322
|
-
- ONLY use `browser_run_code` — no other browser tools
|
|
323
|
-
- Always scope to `page.context().pages()[INDEX]`
|
|
324
|
-
- Use Playwright locators: `getByRole`, `getByText`, `getByLabel`, `getByPlaceholder`, `locator`
|
|
325
|
-
- Read page state to find elements before interacting
|
|
326
|
-
- Navigate with absolute URLs via `p.goto(url)` — never click nav links
|
|
327
|
-
|
|
328
|
-
## FORBIDDEN — never use these:
|
|
329
|
-
- `browser_snapshot`, `browser_click`, `browser_type`, `browser_navigate` — these operate on the MAIN page and will interfere with other tests
|
|
330
|
-
- `browser_wait` — NEVER call this
|
|
331
|
-
- `browser_screenshot` — NEVER use
|
|
332
|
-
|
|
333
|
-
## Error recovery
|
|
334
|
-
- On ANY failure: retry the failing step ONCE, then skip to Finish.
|
|
335
|
-
|
|
336
|
-
## Finish (MANDATORY — always reach this step)
|
|
337
|
-
1. If the test passes on manual execution, call `update_test(test_id, { script: null, script_generated_at: null })` to invalidate the stale cached script.
|
|
338
|
-
2. `complete_run(run_id, status, brief_summary)` — ALWAYS call this, even on error.
|
|
339
|
-
3. Return: {test_name} | {status} | {summary}
|
|
340
|
-
```
|
|
341
|
-
|
|
342
|
-
#### Wait and clean up
|
|
343
|
-
|
|
344
|
-
Wait for all agents to complete via `TaskOutput`. Then close extra tabs (newest first):
|
|
345
|
-
|
|
346
|
-
```js
|
|
347
|
-
async (page) => {
|
|
348
|
-
const pages = page.context().pages();
|
|
349
|
-
for (let i = pages.length - 1; i >= 1; i--) {
|
|
350
|
-
await pages[i].close();
|
|
351
|
-
}
|
|
352
|
-
return { remainingPages: page.context().pages().length };
|
|
353
|
-
}
|
|
354
|
-
```
|
|
355
|
-
|
|
356
|
-
Check for orphaned runs (agents that crashed without calling `complete_run`). For any orphaned run IDs, call `complete_run(run_id, "error", "Agent crashed or timed out")`.
|
|
357
|
-
|
|
358
|
-
### Step 9: Wait for background generation
|
|
359
|
-
|
|
360
|
-
If a background generation agent was launched in Step 3, check if it has completed via `TaskOutput` with `block: false`. If still running, note this in the summary. The generated scripts will be available on the next run.
|
|
206
|
+
Any tests that didn't get scripts generated in Step 3 (e.g. if script generation failed) need to be executed the same way as Step 7 — launch a Task agent for each one sequentially using the AI fallback agent prompt above (omit the "Native execution failed with" line).
|
|
361
207
|
|
|
362
208
|
## Summarize
|
|
363
209
|
|
|
@@ -376,6 +222,4 @@ Total: "X passed, Y failed, Z errors out of N tests"
|
|
|
376
222
|
|
|
377
223
|
If the circuit breaker tripped, note: "Circuit breaker tripped after N consecutive failures. M tests skipped."
|
|
378
224
|
|
|
379
|
-
If background script generation is still running, note: "Script generation in progress for N tests. Scripts will be cached for next run."
|
|
380
|
-
|
|
381
225
|
If any tests failed, highlight what went wrong and suggest next steps.
|