npm - greenrun-cli - Versions diffs - 0.2.10 → 0.2.11 - Mend

greenrun-cli 0.2.10 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/commands/init.js +53 -0
package/package.json +1 -1
package/templates/commands/procedures.md +26 -204

package/dist/commands/init.js CHANGED Viewed

@@ -314,8 +314,61 @@ function installCommands() {
         console.log(`  Installed /${cmd.replace('.md', '')}`);
     }
 }
+function checkDependencies() {
+    console.log('Checking dependencies...');
+    let allGood = true;
+    // Node version
+    if (checkNodeVersion()) {
+        console.log(`  [x] Node.js ${process.version}`);
+    }
+    else {
+        console.log(`  [ ] Node.js ${process.version} (18+ required)`);
+        allGood = false;
+    }
+    // Claude Code
+    const prereqs = checkPrerequisites();
+    if (prereqs.claude) {
+        console.log('  [x] Claude Code CLI');
+    }
+    else {
+        console.log('  [ ] Claude Code CLI not found');
+        allGood = false;
+    }
+    // @playwright/test
+    try {
+        execSync('npx playwright --version', { stdio: 'pipe' });
+        console.log('  [x] @playwright/test');
+    }
+    catch {
+        console.log('  [ ] @playwright/test not installed');
+        console.log('      Run: npm install -g @playwright/test@latest');
+        allGood = false;
+    }
+    // Browser (Chrome or Chromium)
+    if (detectSystemChrome()) {
+        console.log('  [x] Chrome detected');
+    }
+    else {
+        try {
+            execSync('npx playwright install --dry-run chromium', { stdio: 'pipe' });
+            console.log('  [x] Playwright Chromium');
+        }
+        catch {
+            console.log('  [ ] No browser detected (Chrome or Playwright Chromium)');
+            console.log('      Run: npx playwright install --with-deps chromium');
+            allGood = false;
+        }
+    }
+    if (allGood) {
+        console.log('  All dependencies installed.\n');
+    }
+    else {
+        console.log('\n  Some dependencies are missing. Install them and run again.\n');
+    }
+}
 export function runUpdate() {
     console.log('\nGreenrun - Updating templates\n');
+    checkDependencies();
     installCommands();
     installSettings();
     installClaudeMd();

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "greenrun-cli",
-  "version": "0.2.10",
+  "version": "0.2.11",
   "description": "CLI and MCP server for Greenrun - browser test management for Claude Code",
   "type": "module",
   "main": "dist/server.js",

package/templates/commands/procedures.md CHANGED Viewed

@@ -23,9 +23,9 @@ If auth fails (login form still visible after following instructions), report al
 ## Execute
-You have a batch result from `prepare_test_batch` containing `project` (with `credentials` array) and `tests[]` (each with `test_id`, `test_name`, `run_id`, `instructions`, `credential_name`, `pages`, `tags`, `has_script`).
+You have a batch result from `prepare_test_batch` containing `project` (with `credentials` array) and `tests[]` (each with `test_id`, `test_name`, `run_id`, `credential_name`, `pages`, `tags`, `has_script`).
-Note: `has_script` is a boolean indicating whether a cached Playwright script exists. To fetch the actual script content, call `get_test(test_id)` — only do this when you need the script (e.g. in Step 5 when writing test files).
+Note: The batch does not include `instructions` or `script` content. Use `get_test(test_id)` to fetch these when needed.
 If `tests` is empty, tell the user no matching active tests were found and stop.
@@ -42,25 +42,17 @@ Split the batch into two groups:
 If all tests are scripted, skip to Step 4.
-### Step 3: Score and generate scripts (easy-first)
+### Step 3: Generate scripts for unscripted tests
-For each **unscripted** test, assign a difficulty score based on the instructions:
+For each **unscripted** test, one at a time:
-- **easy** (1): Single-page tests with simple actions — navigate, check text/headings, verify static content, click a link and check the URL. Typically 1-4 steps, no form submissions, no multi-step flows.
-- **medium** (2): Tests involving form input, button clicks that trigger state changes, checking error/success messages, or verifying a redirect after an action. Typically 3-8 steps.
-- **hard** (3): Multi-page flows, tests requiring specific sequences of actions (e.g. add to cart then checkout), tests with complex assertions (table data, dynamic content), or tests involving file uploads, modals, or dialogs.
-Sort unscripted tests by difficulty ascending (easy first). This ensures simple tests get scripts generated quickly so native execution can start sooner.
-#### Walk-through script generation
-For each unscripted test (in difficulty order), do a **scouting pass** — actually follow the test instructions in the browser to observe all UI states:
-1. Navigate to the test's starting page via `browser_navigate`
-2. Take a `browser_snapshot` to see initial elements
-3. Follow the test instructions step by step using Playwright MCP tools (`browser_click`, `browser_type`, `browser_snapshot` after each action)
-4. Snapshot after each state change to capture: validation errors, success banners, modal dialogs, redirected pages, dynamically loaded content
-5. Collect all observed elements and selectors as context
+1. Call `get_test(test_id)` to fetch the full instructions
+2. Do a **scouting pass** — follow the test instructions in the browser to observe all UI states:
+   - Navigate to the test's starting page via `browser_navigate`
+   - Take a `browser_snapshot` to see initial elements
+   - Follow the test instructions step by step using Playwright MCP tools (`browser_click`, `browser_type`, `browser_snapshot` after each action)
+   - Snapshot after each state change to capture: validation errors, success banners, modal dialogs, redirected pages, dynamically loaded content
+   - Collect all observed elements and selectors as context
 #### Handling failures during scouting
@@ -113,21 +105,6 @@ test('{test_name}', async ({ page }) => {
 Save via `update_test(test_id, { script: <generated_script>, script_generated_at: <ISO_now> })`.
-**Pipeline optimisation**: After finishing all **easy** tests, if there are medium/hard tests remaining, proceed to Step 4 immediately with whatever scripts are ready (scripted + newly generated easy tests). Continue generating medium/hard scripts in parallel by launching a background Task agent for the remaining generation work. When those scripts are ready, they'll be saved to the API for next run.
-To launch the background generation agent:
-```
-Task tool with:
-- subagent_type: "general-purpose"
-- run_in_background: true
-- max_turns: 50
-- model: "sonnet"
-- prompt: (include project details, remaining unscripted tests with instructions, and the scouting+generation procedure above)
-```
-The background agent should: for each remaining test, do the scouting pass, generate the script, and call `update_test` to save it. It does NOT need to call `complete_run` — that happens in the native execution step.
 ### Step 4: Export auth state
 If `auth_mode` is not `none`, export the browser session so native Playwright inherits it:
@@ -175,17 +152,9 @@ npx playwright test --config /tmp/greenrun-tests/playwright.config.ts
 5. **Report results**: Call `complete_run(run_id, status, result_summary)` for each test. Map Playwright statuses: `passed` → `passed`, `failed`/`timedOut` → `failed`, other → `error`.
-6. **Clean up browsers**: After native execution completes, close any browsers left behind by the test runner:
-```bash
-npx playwright test --config /tmp/greenrun-tests/playwright.config.ts --list 2>/dev/null; true
-```
-The Playwright Test runner normally cleans up after itself, but if tests crash or timeout, browser processes may linger. Also call `browser_close` to reset the MCP browser context before any subsequent AI fallback execution.
-### Step 6: Handle unscripted tests without scripts
-Any tests that still don't have scripts (e.g. because the background agent hasn't finished, or script generation failed) need to be executed via AI agents using the legacy approach. Follow Step 7 for these tests.
+6. **Clean up**: Call `browser_close` to reset the MCP browser context.
-### Step 7: Circuit breaker
+### Step 6: Circuit breaker
 After parsing all native results, walk through them in completion order. Track consecutive failures:
@@ -194,170 +163,25 @@ After parsing all native results, walk through them in completion order. Track c
   - Skip AI fallback for remaining tests
   - The counter resets on any pass
-### Step 8: AI-agent fallback for native failures
+### Step 7: AI fallback for native failures
-For tests that **failed** in native execution (and circuit breaker has not tripped):
+For tests that **failed** in native execution (and circuit breaker has not tripped), execute them one at a time using the AI agent approach:
 1. Close the current browser context with `browser_close` so the fallback starts fresh
 2. Re-authenticate by navigating to the login page and following the Authenticate procedure
-3. Start new runs via `start_run(test_id)` (the original runs were already completed in Step 5)
-4. Launch background Task agents using the tab-isolation pattern:
-Create tabs and launch agents in batches of 20:
-#### Create tab
-```js
-async (page) => {
-  const newPage = await page.context().newPage();
-  await newPage.goto(START_URL);
-  return { index: page.context().pages().length - 1, url: newPage.url() };
-}
-```
-#### Launch agent
-```
-Task tool with:
-- subagent_type: "general-purpose"
-- run_in_background: true
-- max_turns: 25
-- model: "sonnet"
-- prompt: (agent prompt below, including the native failure message for diagnosis)
-```
-#### Agent prompt
-```
-Greenrun browser test (AI fallback). Run ID: {run_id}
-Tab index: {INDEX}
-**{test_name}**
-{paste the full test instructions here}
-**Native execution failed with:** {failure_message}
-Determine if this is a stale script (UI changed) or an actual bug. If the script is stale, the test may still pass when executed manually.
-## CRITICAL: Tab isolation
+3. For each failed test:
+   - Call `get_test(test_id)` to fetch the full instructions
+   - Start a new run via `start_run(test_id)` (the original run was already completed in Step 5)
+   - Navigate to the test's starting page via `browser_navigate`
+   - Follow the test instructions step by step using Playwright MCP tools
+   - Determine if this is a stale script (UI changed) or an actual bug
+   - If the test passes manually, invalidate the cached script: `update_test(test_id, { script: null, script_generated_at: null })`
+   - Call `complete_run(run_id, status, brief_summary)`
+   - Call `browser_close` before the next test to reset state
-You are assigned to tab index {INDEX}. You MUST use ONLY `browser_run_code` for ALL browser interactions. Do NOT use `browser_snapshot`, `browser_click`, `browser_type`, `browser_navigate`, or any other Playwright MCP tools. The only non-browser tool you may call is `complete_run`.
+### Step 8: Handle unscripted tests without scripts
-Every `browser_run_code` call must scope to your tab:
-```js
-async (page) => {
-  const p = page.context().pages()[INDEX];
-  // ... your action here ...
-}
-```
-## Auth
-No authentication needed — the main page already authenticated and cookies are shared to your tab.
-## Interaction patterns
-**Navigate:**
-```js
-async (page) => {
-  const p = page.context().pages()[INDEX];
-  await p.goto('https://example.com/path');
-  return p.url();
-}
-```
-**Read page state (replaces browser_snapshot):**
-```js
-async (page) => {
-  const p = page.context().pages()[INDEX];
-  const url = p.url();
-  const title = await p.title();
-  const text = await p.locator('body').innerText();
-  const headings = await p.getByRole('heading').allTextContents();
-  const buttons = await p.getByRole('button').allTextContents();
-  const links = await p.getByRole('link').allTextContents();
-  const textboxes = await p.getByRole('textbox').evaluateAll(els =>
-    els.map(e => ({ name: e.getAttribute('name') || e.getAttribute('aria-label') || e.placeholder, value: e.value }))
-  );
-  return { url, title, headings, buttons, links, textboxes, text: text.substring(0, 2000) };
-}
-```
-**Click an element:**
-```js
-async (page) => {
-  const p = page.context().pages()[INDEX];
-  await p.getByRole('button', { name: 'Submit' }).click();
-  return p.url();
-}
-```
-**Fill a form field:**
-```js
-async (page) => {
-  const p = page.context().pages()[INDEX];
-  await p.getByRole('textbox', { name: 'Email' }).fill('test@example.com');
-  return 'filled';
-}
-```
-**Handle a dialog:**
-```js
-async (page) => {
-  const p = page.context().pages()[INDEX];
-  p.once('dialog', d => d.accept());
-  await p.getByRole('button', { name: 'Delete' }).click();
-  return p.url();
-}
-```
-**Check for specific text (verification):**
-```js
-async (page) => {
-  const p = page.context().pages()[INDEX];
-  const visible = await p.getByText('Success').isVisible();
-  return { found: visible };
-}
-```
-## Rules
-- ONLY use `browser_run_code` — no other browser tools
-- Always scope to `page.context().pages()[INDEX]`
-- Use Playwright locators: `getByRole`, `getByText`, `getByLabel`, `getByPlaceholder`, `locator`
-- Read page state to find elements before interacting
-- Navigate with absolute URLs via `p.goto(url)` — never click nav links
-## FORBIDDEN — never use these:
-- `browser_snapshot`, `browser_click`, `browser_type`, `browser_navigate` — these operate on the MAIN page and will interfere with other tests
-- `browser_wait` — NEVER call this
-- `browser_screenshot` — NEVER use
-## Error recovery
-- On ANY failure: retry the failing step ONCE, then skip to Finish.
-## Finish (MANDATORY — always reach this step)
-1. If the test passes on manual execution, call `update_test(test_id, { script: null, script_generated_at: null })` to invalidate the stale cached script.
-2. `complete_run(run_id, status, brief_summary)` — ALWAYS call this, even on error.
-3. Return: {test_name} | {status} | {summary}
-```
-#### Wait and clean up
-Wait for all agents to complete via `TaskOutput`. Then close extra tabs (newest first):
-```js
-async (page) => {
-  const pages = page.context().pages();
-  for (let i = pages.length - 1; i >= 1; i--) {
-    await pages[i].close();
-  }
-  return { remainingPages: page.context().pages().length };
-}
-```
-Check for orphaned runs (agents that crashed without calling `complete_run`). For any orphaned run IDs, call `complete_run(run_id, "error", "Agent crashed or timed out")`.
-### Step 9: Wait for background generation
-If a background generation agent was launched in Step 3, check if it has completed via `TaskOutput` with `block: false`. If still running, note this in the summary. The generated scripts will be available on the next run.
+Any tests that didn't get scripts generated in Step 3 (e.g. if script generation failed) need to be executed the same way as Step 7 — one at a time using the AI agent approach. Follow the same pattern: get instructions, start run, execute in browser, complete run, close browser.
 ## Summarize
@@ -376,6 +200,4 @@ Total: "X passed, Y failed, Z errors out of N tests"
 If the circuit breaker tripped, note: "Circuit breaker tripped after N consecutive failures. M tests skipped."
-If background script generation is still running, note: "Script generation in progress for N tests. Scripts will be cached for next run."
 If any tests failed, highlight what went wrong and suggest next steps.