npm - @matware/e2e-runner - Versions diffs - 1.0.3 → 1.1.1 - Mend

@matware/e2e-runner 1.0.3 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/README.md +150 -8
package/bin/cli.js +242 -2
package/package.json +6 -3
package/src/actions.js +28 -4
package/src/ai-generate.js +216 -0
package/src/config.js +44 -0
package/src/dashboard.js +559 -0
package/src/db.js +387 -0
package/src/index.js +5 -1
package/src/issues.js +152 -0
package/src/mcp-server.js +8 -337
package/src/mcp-tools.js +656 -0
package/src/reporter.js +85 -2
package/src/runner.js +119 -9
package/src/verify.js +65 -0
package/src/websocket.js +177 -0
package/templates/dashboard.html +1281 -0
package/templates/e2e.config.js +3 -0

package/README.md CHANGED Viewed

@@ -37,18 +37,38 @@ JSON-driven E2E test runner. Define browser tests as simple JSON action arrays,
 ## Quick Start
+**One-liner** (requires Node.js >= 20 and Docker):
 ```bash
-# Install
-npm install @matware/e2e-runner
+curl -fsSL https://raw.githubusercontent.com/fastslack/mtw-e2e-runner/main/scripts/quickstart.sh | bash
+```
+This checks prerequisites, installs the package, scaffolds the project, starts the Chrome pool, and runs the sample tests.
-# Scaffold project structure
+**Step by step:**
+```bash
+# 1. Install
+npm install --save-dev @matware/e2e-runner
+# 2. Scaffold project structure
 npx e2e-runner init
-# Start Chrome pool (requires Docker)
+# 3. Start Chrome pool (requires Docker)
 npx e2e-runner pool start
-# Run all tests
+# 4. Run all tests
 npx e2e-runner run --all
+# 5. Open the dashboard
+npx e2e-runner dashboard
+```
+**Add to Claude Code** (once, available in all projects):
+```bash
+claude mcp add --transport stdio --scope user e2e-runner \
+  -- npx -y -p @matware/e2e-runner e2e-runner-mcp
 ```
 The `init` command creates:
@@ -128,6 +148,14 @@ npx e2e-runner pool start                 # Start Chrome container
 npx e2e-runner pool stop                  # Stop Chrome container
 npx e2e-runner pool status                # Check pool health
+# Issue-to-test
+npx e2e-runner issue <url>                # Fetch issue details
+npx e2e-runner issue <url> --generate     # Generate test file via AI
+npx e2e-runner issue <url> --verify       # Generate + run + report
+# Dashboard
+npx e2e-runner dashboard                  # Start web dashboard
 # Other
 npx e2e-runner list                       # List available suites
 npx e2e-runner init                       # Scaffold project
@@ -150,6 +178,7 @@ npx e2e-runner init                       # Scaffold project
 | `--env <name>` | `default` | Environment profile |
 | `--pool-port <port>` | `3333` | Chrome pool port |
 | `--max-sessions <n>` | `10` | Max concurrent Chrome sessions |
+| `--project-name <name>` | dir name | Project display name for dashboard |
 ## Configuration
@@ -201,6 +230,9 @@ When `--env <name>` is set, the matching profile from `environments` overrides e
 | `TEST_TIMEOUT` | `testTimeout` |
 | `OUTPUT_FORMAT` | `outputFormat` |
 | `E2E_ENV` | `env` |
+| `PROJECT_NAME` | `projectName` |
+| `ANTHROPIC_API_KEY` | `anthropicApiKey` |
+| `ANTHROPIC_MODEL` | `anthropicModel` |
 ## Hooks
@@ -342,12 +374,14 @@ claude mcp add --transport stdio --scope user e2e-runner \
 | `e2e_list` | List available test suites with test names and counts |
 | `e2e_create_test` | Create a new test JSON file |
 | `e2e_pool_status` | Check Chrome pool availability and capacity |
-| `e2e_pool_start` | Start the Chrome pool Docker container |
-| `e2e_pool_stop` | Stop the Chrome pool |
+| `e2e_screenshot` | Retrieve a screenshot by its hash (e.g. `ss:a3f2b1c9`) |
+| `e2e_issue` | Fetch a GitHub/GitLab issue and generate E2E tests |
+> **Note:** Pool start/stop are only available via CLI (`e2e-runner pool start|stop`), not via MCP — restarting the pool kills all active sessions from other clients.
 All tools accept an optional `cwd` parameter (absolute path to the project root). Claude Code passes its current working directory so the MCP server resolves `e2e/tests/`, `e2e.config.js`, and `.e2e-pool/` relative to the correct project — even when switching between multiple projects in the same session.
-Once installed, Claude Code can run tests, analyze failures, create new test files, and manage the Chrome pool as part of its normal workflow. Just ask:
+Once installed, Claude Code can run tests, analyze failures, and create new test files as part of its normal workflow. Just ask:
 > "Run all E2E tests"
 > "Create a test that verifies the checkout flow"
@@ -360,6 +394,108 @@ claude mcp list
 # e2e-runner: ... - Connected
 ```
+## Issue-to-Test
+Turn GitHub and GitLab issues into executable E2E tests. Paste an issue URL and get runnable tests -- automatically.
+### How It Works
+1. **Fetch** -- Pulls issue details (title, body, labels) via `gh` or `glab` CLI
+2. **Generate** -- AI creates JSON test actions based on the issue description
+3. **Run** -- Optionally executes the tests immediately to verify if a bug is reproducible
+### Two Modes
+**Prompt mode** (default, no API key): Returns issue data + a structured prompt. Claude Code uses its own intelligence to create tests via `e2e_create_test` and run them.
+**Verify mode** (requires `ANTHROPIC_API_KEY`): Calls Claude API directly, generates tests, runs them, and reports whether the bug is confirmed or not reproducible.
+### CLI
+```bash
+# Fetch and display issue details
+e2e-runner issue https://github.com/owner/repo/issues/42
+# Generate a test file via Claude API
+e2e-runner issue https://github.com/owner/repo/issues/42 --generate
+# -> Creates e2e/tests/issue-42.json
+# Generate + run + report bug status
+e2e-runner issue https://github.com/owner/repo/issues/42 --verify
+# -> "BUG CONFIRMED" or "NOT REPRODUCIBLE"
+# Output AI prompt as JSON (for piping)
+e2e-runner issue https://github.com/owner/repo/issues/42 --prompt
+```
+### MCP
+In Claude Code, the `e2e_issue` tool handles everything:
+> "Fetch issue https://github.com/owner/repo/issues/42 and create E2E tests for it"
+Claude Code receives the issue data, generates appropriate test actions, saves them via `e2e_create_test`, and runs them with `e2e_run`.
+### Auth Requirements
+- **GitHub**: `gh` CLI authenticated (`gh auth login`)
+- **GitLab**: `glab` CLI authenticated (`glab auth login`)
+Provider is auto-detected from the URL. Self-hosted GitLab is supported via `glab` config.
+### Bug Verification Logic
+Generated tests assert the **correct** behavior. If the tests fail, the correct behavior doesn't work -- bug confirmed. If all tests pass, the bug is not reproducible.
+## Web Dashboard
+Real-time UI for running tests, viewing results, screenshots, and run history.
+```bash
+e2e-runner dashboard                  # Start on default port 8484
+e2e-runner dashboard --port 9090      # Custom port
+```
+### Live Execution
+Monitor tests in real-time as they run. Each test shows its steps with individual durations, pass/fail status, and active connection count.
+<p align="center">
+  <img src="https://raw.githubusercontent.com/fastslack/mtw-e2e-runner/main/docs/screenshots/blog-dashboard-live-running.png" alt="Dashboard - Live test execution" width="900" />
+</p>
+### Test Suites
+Browse all test suites across multiple projects. Run a single suite or all tests with one click.
+<p align="center">
+  <img src="https://raw.githubusercontent.com/fastslack/mtw-e2e-runner/main/docs/screenshots/blog-dashboard-suites.png" alt="Dashboard - Test suites grid" width="900" />
+</p>
+### Run History
+Track pass rate trends over time with the bar chart. Click any row to expand the full run detail with per-test results, screenshots, and console errors.
+<p align="center">
+  <img src="https://raw.githubusercontent.com/fastslack/mtw-e2e-runner/main/docs/screenshots/blog-dashboard-runs.png" alt="Dashboard - Run history with trend chart" width="900" />
+</p>
+### Run Detail
+Expanded view shows each test with PASS/FAIL badge, screenshot thumbnails with copyable hashes (`ss:77c28b5a`), and formatted console errors.
+<p align="center">
+  <img src="https://raw.githubusercontent.com/fastslack/mtw-e2e-runner/main/docs/screenshots/blog-dashboard-run-detail.png" alt="Dashboard - Run detail with screenshot hashes" width="900" />
+</p>
+### Screenshot Gallery
+Browse all captured screenshots per project. Includes both manual captures and error screenshots.
+<p align="center">
+  <img src="https://raw.githubusercontent.com/fastslack/mtw-e2e-runner/main/docs/screenshots/blog-dashboard-screenshots-gallery.png" alt="Dashboard - Screenshot gallery" width="900" />
+</p>
 ## Architecture
 ```
@@ -371,6 +507,12 @@ src/runner.js         Parallel test executor with retries and timeouts
 src/actions.js        Action engine: maps JSON actions to Puppeteer calls
 src/reporter.js       JSON reports, JUnit XML, console output
 src/mcp-server.js     MCP server: exposes tools for Claude Code
+src/mcp-tools.js      Shared MCP tool definitions and handlers
+src/dashboard.js      Web dashboard: HTTP server, REST API, WebSocket
+src/db.js             SQLite multi-project database
+src/issues.js         GitHub/GitLab issue fetching (gh/glab CLI)
+src/ai-generate.js    AI test generation (prompt builder + Claude API)
+src/verify.js         Bug verification orchestrator
 src/logger.js         ANSI colored logger
 src/index.js          Programmatic API (createRunner)
 templates/            Scaffolding templates for init command

package/bin/cli.js CHANGED Viewed

@@ -13,6 +13,12 @@
  *   e2e-runner pool stop                  Stop the Chrome Pool
  *   e2e-runner pool status                Show pool status
  *   e2e-runner pool restart               Restart the pool
+ *   e2e-runner dashboard                   Start the web dashboard
+ *   e2e-runner capture <url>              Capture a screenshot of any URL
+ *   e2e-runner issue <url>                Fetch issue and show details
+ *   e2e-runner issue <url> --generate     Generate test file via Claude API
+ *   e2e-runner issue <url> --verify       Generate + run + report bug status
+ *   e2e-runner issue <url> --prompt       Output the AI prompt (for piping)
  *   e2e-runner init                       Scaffold e2e/ in the current project
  *   e2e-runner --help                     Show help
  *   e2e-runner --version                  Show version
@@ -20,11 +26,17 @@
 import fs from 'fs';
 import path from 'path';
+import http from 'http';
 import { fileURLToPath } from 'url';
 import { loadConfig } from '../src/config.js';
-import { startPool, stopPool, restartPool, getPoolStatus, waitForPool } from '../src/pool.js';
+import { startPool, stopPool, restartPool, getPoolStatus, waitForPool, connectToPool } from '../src/pool.js';
 import { runTestsParallel, loadTestFile, loadTestSuite, loadAllSuites, listSuites } from '../src/runner.js';
-import { generateReport, saveReport, printReport } from '../src/reporter.js';
+import { generateReport, saveReport, printReport, persistRun } from '../src/reporter.js';
+import { startDashboard } from '../src/dashboard.js';
+import { fetchIssue } from '../src/issues.js';
+import { buildPrompt, generateTests, hasApiKey } from '../src/ai-generate.js';
+import { verifyIssue } from '../src/verify.js';
+import { ensureProject, computeScreenshotHash, registerScreenshotHash } from '../src/db.js';
 import { log, colors as C } from '../src/logger.js';
 const __filename = fileURLToPath(import.meta.url);
@@ -59,6 +71,12 @@ function parseCLIConfig() {
   if (getFlag('--test-timeout')) cliArgs.testTimeout = parseInt(getFlag('--test-timeout'));
   if (getFlag('--output')) cliArgs.outputFormat = getFlag('--output');
   if (getFlag('--env')) cliArgs.env = getFlag('--env');
+  if (getFlag('--port')) cliArgs.dashboardPort = parseInt(getFlag('--port'));
+  if (getFlag('--dashboard-port')) cliArgs.dashboardPort = parseInt(getFlag('--dashboard-port'));
+  if (getFlag('--project-name')) cliArgs.projectName = getFlag('--project-name');
+  if (hasFlag('--fail-on-network-error')) cliArgs.failOnNetworkError = true;
+  if (getFlag('--auth-token')) cliArgs.authToken = getFlag('--auth-token');
+  if (getFlag('--auth-storage-key')) cliArgs.authStorageKey = getFlag('--auth-storage-key');
   return cliArgs;
 }
@@ -75,6 +93,20 @@ ${C.bold}Usage:${C.reset}
   e2e-runner list                       List available suites
+  e2e-runner dashboard                  Start the web dashboard
+  e2e-runner dashboard --port <port>    Custom port (default: 8484)
+  e2e-runner capture <url>               Capture a screenshot of any URL
+  e2e-runner capture <url> --full-page  Capture full scrollable page
+  e2e-runner capture <url> --selector <sel>  Wait for selector before capture
+  e2e-runner capture <url> --delay <ms> Wait before capturing
+  e2e-runner capture <url> --filename <name> Custom filename
+  e2e-runner issue <url>                Fetch issue and show details
+  e2e-runner issue <url> --generate     Generate test file via Claude API
+  e2e-runner issue <url> --verify       Generate + run + report bug status
+  e2e-runner issue <url> --prompt       Output the AI prompt (for piping)
   e2e-runner pool start                 Start the Chrome Pool
   e2e-runner pool stop                  Stop the Chrome Pool
   e2e-runner pool status                Show pool status
@@ -96,6 +128,8 @@ ${C.bold}Options:${C.reset}
   --test-timeout <ms>      Per-test timeout (default: 60000)
   --output <format>        Report format: json, junit, both (default: json)
   --env <name>             Environment profile from config (default: default)
+  --project-name <name>    Project display name for dashboard (default: directory name)
+  --fail-on-network-error  Fail tests when network requests fail (e.g. ERR_CONNECTION_REFUSED)
 ${C.bold}Config:${C.reset}
   Looks for e2e.config.js or e2e.config.json in the current directory.
@@ -106,6 +140,7 @@ ${C.bold}Config:${C.reset}
 async function cmdRun() {
   const cliArgs = parseCLIConfig();
   const config = await loadConfig(cliArgs);
+  config.triggeredBy = 'cli';
   let tests = [];
   let hooks = {};
@@ -151,13 +186,35 @@ async function cmdRun() {
   const pressure = await waitForPool(config.poolUrl);
   log('✅', `Pool ready (${pressure.running}/${pressure.maxConcurrent} sessions, queued: ${pressure.queued})`);
+  // Wire up live progress to dashboard if running
+  let _lastBroadcast = null;
+  try {
+    const res = await fetch('http://127.0.0.1:' + (config.dashboardPort || 8484) + '/api/status');
+    if (res.ok) {
+      const dp = config.dashboardPort || 8484;
+      config.onProgress = (data) => {
+        const body = JSON.stringify(data);
+        _lastBroadcast = new Promise((resolve) => {
+          const req = http.request({ hostname: '127.0.0.1', port: dp, path: '/api/broadcast', method: 'POST', headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(body) }, timeout: 1000 });
+          req.on('error', () => resolve());
+          req.on('close', () => resolve());
+          req.end(body);
+        });
+      };
+    }
+  } catch { /* dashboard not running */ }
   // Execute tests
   console.log('');
+  const suiteName = getFlag('--suite') || (hasFlag('--all') ? null : null);
   const results = await runTestsParallel(tests, config, hooks);
   const report = generateReport(results);
   saveReport(report, config.screenshotsDir, config);
+  persistRun(report, config, suiteName);
   printReport(report, config.screenshotsDir);
+  // Wait for the last dashboard broadcast (run:complete) to flush before exiting
+  if (_lastBroadcast) await _lastBroadcast;
   process.exit(report.summary.failed > 0 ? 1 : 0);
 }
@@ -288,6 +345,177 @@ ${C.bold}Next steps:${C.reset}
 `);
 }
+async function cmdDashboard() {
+  const cliArgs = parseCLIConfig();
+  const config = await loadConfig(cliArgs);
+  console.log(`\n${C.bold}${C.cyan}@matware/e2e-runner${C.reset} v${pkg.version}`);
+  console.log(`${C.dim}Starting dashboard on port ${config.dashboardPort}...${C.reset}\n`);
+  const handle = await startDashboard(config);
+  // Keep process alive until SIGINT/SIGTERM
+  const shutdown = () => {
+    console.log(`\n${C.dim}Shutting down dashboard...${C.reset}`);
+    handle.close();
+    process.exit(0);
+  };
+  process.on('SIGINT', shutdown);
+  process.on('SIGTERM', shutdown);
+}
+async function cmdCapture() {
+  const url = args[1];
+  if (!url || url.startsWith('--')) {
+    console.error(`${C.red}Usage: e2e-runner capture <url> [--filename <name>] [--full-page] [--selector <sel>] [--delay <ms>]${C.reset}`);
+    process.exit(1);
+  }
+  const cliArgs = parseCLIConfig();
+  const config = await loadConfig(cliArgs);
+  console.log(`\n${C.bold}${C.cyan}@matware/e2e-runner${C.reset} v${pkg.version}`);
+  log('🔌', 'Checking Chrome Pool...');
+  await waitForPool(config.poolUrl);
+  let browser;
+  try {
+    browser = await connectToPool(config.poolUrl);
+    const page = await browser.newPage();
+    await page.setViewport(config.viewport);
+    log('📸', `Navigating to ${C.cyan}${url}${C.reset}`);
+    await page.goto(url, { waitUntil: 'networkidle2', timeout: 30000 });
+    const selector = getFlag('--selector');
+    if (selector) {
+      log('⏳', `Waiting for selector: ${C.dim}${selector}${C.reset}`);
+      await page.waitForSelector(selector, { timeout: 10000 });
+    }
+    const delay = getFlag('--delay');
+    if (delay) {
+      await new Promise(r => setTimeout(r, parseInt(delay)));
+    }
+    // Build filename
+    let filename = getFlag('--filename') || `capture-${Date.now()}.png`;
+    filename = path.basename(filename);
+    if (!filename.endsWith('.png')) filename += '.png';
+    if (!fs.existsSync(config.screenshotsDir)) {
+      fs.mkdirSync(config.screenshotsDir, { recursive: true });
+    }
+    const screenshotPath = path.join(config.screenshotsDir, filename);
+    const fullPage = hasFlag('--full-page');
+    await page.screenshot({ path: screenshotPath, fullPage });
+    // Register hash in SQLite
+    const cwd = process.cwd();
+    const projectName = config.projectName || path.basename(cwd);
+    const projectId = ensureProject(cwd, projectName, config.screenshotsDir, config.testsDir);
+    const hash = computeScreenshotHash(screenshotPath);
+    registerScreenshotHash(hash, screenshotPath, projectId, null);
+    log('✅', `Saved: ${C.cyan}${screenshotPath}${C.reset}`);
+    log('🏷️', `Hash:  ${C.bold}ss:${hash}${C.reset}`);
+    console.log('');
+  } finally {
+    if (browser) browser.disconnect();
+  }
+}
+async function cmdIssue() {
+  const url = args[1];
+  if (!url || url.startsWith('--')) {
+    console.error(`${C.red}Usage: e2e-runner issue <url> [--generate|--verify|--prompt]${C.reset}`);
+    process.exit(1);
+  }
+  const cliArgs = parseCLIConfig();
+  const config = await loadConfig(cliArgs);
+  if (hasFlag('--prompt')) {
+    // Output AI prompt as JSON to stdout
+    const issue = fetchIssue(url);
+    const promptData = buildPrompt(issue, config);
+    console.log(JSON.stringify(promptData, null, 2));
+    return;
+  }
+  if (hasFlag('--verify')) {
+    // Generate + run + report
+    if (!hasApiKey(config)) {
+      console.error(`${C.red}ANTHROPIC_API_KEY is required for --verify mode.${C.reset}`);
+      process.exit(1);
+    }
+    console.log(`\n${C.bold}${C.cyan}@matware/e2e-runner${C.reset} v${pkg.version}`);
+    log('🔍', 'Fetching issue...');
+    const result = await verifyIssue(url, config);
+    const { issue, report, bugConfirmed } = result;
+    console.log('');
+    if (bugConfirmed) {
+      log('🐛', `${C.red}${C.bold}BUG CONFIRMED${C.reset} — ${issue.title}`);
+      log('', `${C.dim}${report.summary.failed} of ${report.summary.total} tests failed${C.reset}`);
+    } else {
+      log('✅', `${C.green}${C.bold}NOT REPRODUCIBLE${C.reset} — ${issue.title}`);
+      log('', `${C.dim}All ${report.summary.total} tests passed${C.reset}`);
+    }
+    console.log(`${C.dim}Issue: ${issue.url}${C.reset}\n`);
+    process.exit(bugConfirmed ? 1 : 0);
+  }
+  if (hasFlag('--generate')) {
+    // Generate test file via Claude API
+    if (!hasApiKey(config)) {
+      console.error(`${C.red}ANTHROPIC_API_KEY is required for --generate mode.${C.reset}`);
+      process.exit(1);
+    }
+    console.log(`\n${C.bold}${C.cyan}@matware/e2e-runner${C.reset} v${pkg.version}`);
+    log('🔍', 'Fetching issue...');
+    const issue = fetchIssue(url);
+    log('📋', `${C.cyan}${issue.title}${C.reset}`);
+    log('🤖', 'Generating tests via Claude API...');
+    const { tests, suiteName } = await generateTests(issue, config);
+    if (!fs.existsSync(config.testsDir)) {
+      fs.mkdirSync(config.testsDir, { recursive: true });
+    }
+    const filePath = path.join(config.testsDir, `${suiteName}.json`);
+    fs.writeFileSync(filePath, JSON.stringify(tests, null, 2) + '\n');
+    log('✅', `Created ${C.cyan}${filePath}${C.reset} (${tests.length} tests)`);
+    console.log(`${C.dim}Run with: e2e-runner run --suite ${suiteName}${C.reset}\n`);
+    return;
+  }
+  // Default: fetch and display issue
+  log('🔍', 'Fetching issue...');
+  const issue = fetchIssue(url);
+  console.log(`\n${C.bold}${issue.title}${C.reset}`);
+  console.log(`${C.dim}${'─'.repeat(50)}${C.reset}`);
+  console.log(`  Repo:    ${C.cyan}${issue.repo}${C.reset}`);
+  console.log(`  Number:  #${issue.number}`);
+  console.log(`  State:   ${issue.state === 'open' ? C.green : C.red}${issue.state}${C.reset}`);
+  console.log(`  Labels:  ${issue.labels.length ? issue.labels.join(', ') : C.dim + 'none' + C.reset}`);
+  console.log(`  URL:     ${C.dim}${issue.url}${C.reset}`);
+  if (issue.body) {
+    console.log(`\n${C.bold}Description:${C.reset}`);
+    console.log(issue.body.length > 500 ? issue.body.substring(0, 500) + '...' : issue.body);
+  }
+  console.log('');
+}
 // ==================== Main ====================
 async function main() {
@@ -316,6 +544,18 @@ async function main() {
       await cmdPool();
       break;
+    case 'dashboard':
+      await cmdDashboard();
+      break;
+    case 'capture':
+      await cmdCapture();
+      break;
+    case 'issue':
+      await cmdIssue();
+      break;
     case 'init':
       cmdInit();
       break;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@matware/e2e-runner",
-  "version": "1.0.3",
+  "version": "1.1.1",
   "mcpName": "io.github.fastslack/e2e-runner",
   "description": "E2E test runner using Chrome Pool (browserless/chrome) with parallel execution",
   "type": "module",
@@ -25,17 +25,20 @@
     "browserless",
     "parallel",
     "mcp",
-    "claude-code"
+    "claude-code",
+    "github-issues",
+    "ai-testing"
   ],
   "author": "Matware",
   "license": "Apache-2.0",
   "repository": {
     "type": "git",
-    "url": "https://github.com/fastslack/mtw-e2e-runner.git"
+    "url": "git+https://github.com/fastslack/mtw-e2e-runner.git"
   },
   "homepage": "https://github.com/fastslack/mtw-e2e-runner#readme",
   "dependencies": {
     "@modelcontextprotocol/sdk": "^1.12.1",
+    "better-sqlite3": "^11.0.0",
     "puppeteer-core": "^24.0.0"
   },
   "engines": {

package/src/actions.js CHANGED Viewed

@@ -7,6 +7,7 @@
  * The JS comes from team-authored JSON test files.
  */
+import path from 'path';
 import { log } from './logger.js';
 function sleep(ms) {
@@ -70,7 +71,9 @@ export async function executeAction(page, action, config) {
       if (!/\.(png|jpg|jpeg|webp)$/i.test(filename)) {
         filename += '.png';
       }
-      const filepath = `${screenshotsDir}/${filename}`;
+      // Sanitize: use only the basename to prevent path traversal
+      filename = path.basename(filename);
+      const filepath = path.join(screenshotsDir, filename);
       await page.screenshot({ path: filepath, fullPage: action.fullPage || false });
       return { screenshot: filepath };
     }
@@ -146,10 +149,31 @@ export async function executeAction(page, action, config) {
       await page.hover(selector);
       break;
-    case 'evaluate':
-      // Intentional: runs JS in browser page context (from test JSON files)
-      await page.evaluate(value);
+    case 'navigate': {
+      const navUrl = value.startsWith('http') ? value : `${baseUrl}${value}`;
+      // Navigate with a race: try page.goto but don't block more than 5s
+      // This handles SPAs where domcontentloaded may not fire on client-side routing
+      try {
+        await Promise.race([
+          page.goto(navUrl, { waitUntil: 'load', timeout: 30000 }),
+          sleep(5000),
+        ]);
+      } catch { /* navigation may still be loading */ }
       break;
+    }
+    case 'evaluate': {
+      // Intentional: runs JS in browser page context (from test JSON files)
+      const evalResult = await page.evaluate(value);
+      // Check return value for failure signals
+      if (typeof evalResult === 'string' && /^(FAIL|ERROR|FAILED)[\s:]/i.test(evalResult)) {
+        throw new Error(`evaluate failed: ${evalResult}`);
+      }
+      if (evalResult === false) {
+        throw new Error('evaluate returned false');
+      }
+      return evalResult !== undefined && evalResult !== null ? { value: evalResult } : null;
+    }
     default:
       log('⚠️', `Unknown action: ${type}`);