npm - @cloverleaf/reference-impl - Versions diffs - 0.5.2 → 0.5.3 - Mend

@cloverleaf/reference-impl 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/.claude-plugin/plugin.json +1 -1
package/README.md +2 -2
package/VERSION +1 -1
package/dist/ui-browser.mjs +74 -0
package/lib/ui-browser.ts +122 -0
package/package.json +1 -1
package/prompts/documenter.md +14 -1
package/prompts/ui-reviewer.md +103 -59

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "cloverleaf",
   "description": "Cloverleaf reference implementation — Claude Code skills for task scaffolding and the Delivery pipeline (implementer, documenter, reviewer, UI reviewer with multi-viewport visual diff, QA, merge).",
-  "version": "0.5.2",
+  "version": "0.5.3",
   "author": {
     "name": "Renato D'Arrigo",
     "email": "renato.darrigo@gmail.com"

package/README.md CHANGED Viewed

@@ -29,7 +29,7 @@ v0.2 implements both paths of the Delivery track:
 | Implementer | Real | Subagent, code + tests on feature branch |
 | Documenter | Real (v0.2) | Subagent, doc-only commits per file-path rules |
 | Reviewer | Real | Subagent, read-only review of diff |
-| UI Reviewer | Real (v0.3) | Playwright + axe-core, diff-scoped to affected routes, single viewport, a11y only |
+| UI Reviewer | Real (v0.5) | Playwright + axe-core + pixelmatch; multi-browser outer loop (chromium/webkit/firefox); axe-core runs on `axe.browser` engine only (default chromium); maxCombinations cap with per-route warnings |
 | QA | Real (v0.2) | Per-package test runner via `git worktree` |
 | Plan | Stub | Deferred to v0.3 |
 | Researcher | Stub | Deferred to v0.3 |
@@ -145,7 +145,7 @@ The Reviewer never switches branches. It reads files via `git show` and runs tes
 ## Package layout
-- `lib/` — TypeScript library used by the CLI. State, events, feedback, IDs, paths. Includes `buildBaselinePath(repoRoot, browser, slug, viewport)` (`lib/visual-diff.ts`) for constructing canonical baseline paths under `.cloverleaf/baselines/{browser}/`.
+- `lib/` — TypeScript library used by the CLI. State, events, feedback, IDs, paths. Includes `buildBaselinePath(repoRoot, browser, slug, viewport)` (`lib/visual-diff.ts`) for constructing canonical baseline paths under `.cloverleaf/baselines/{browser}/`. `lib/ui-browser.ts` exports `buildBrowserEscalationFinding` and `applyMaxCombinationsCap` (used by the UI Reviewer prompt for per-engine escalation and combination-count capping).
 - `skills/` — Claude Code skill markdown files.
 - `prompts/` — Implementer/Reviewer subagent system prompts.
 - `examples/toy-repo/` — standalone demo repo.

package/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.5.2
1	+ 0.5.3

package/dist/ui-browser.mjs ADDED Viewed

@@ -0,0 +1,74 @@
+// ---------------------------------------------------------------------------
+// Browser escalation
+// ---------------------------------------------------------------------------
+/**
+ * Build an escalation Finding for a missing Playwright browser binary.
+ *
+ * The finding names the missing engine and includes the install command per
+ * the CLV-9 RFC and CLV-10 spike:
+ *   - All platforms:  `npx playwright install webkit firefox`
+ *   - Linux only:     `npx playwright install-deps webkit`
+ *
+ * @param engine   The browser engine that is missing.
+ * @param platform The platform string (defaults to `process.platform`). Pass
+ *                 "linux" explicitly to include the install-deps hint; all
+ *                 other values are treated as non-Linux.
+ */
+export function buildBrowserEscalationFinding(engine, platform = process.platform) {
+    const isLinux = platform === 'linux';
+    const installCmd = 'npx playwright install webkit firefox';
+    const depsHint = isLinux
+        ? ` On Linux, also run: npx playwright install-deps webkit`
+        : '';
+    return {
+        severity: 'error',
+        rule: 'browser-missing',
+        message: `Playwright ${engine} not installed. Run '${installCmd}' on this machine.${depsHint}`,
+        metadata: { engine, installCommand: installCmd },
+    };
+}
+/**
+ * Enforce the maxCombinations cap.
+ *
+ * When `routes.length × viewportCount × browserCount > maxCombinations`,
+ * the affected routes are sorted by diff size (most-changed first) and only
+ * the first `floor(maxCombinations / (viewportCount × browserCount))` routes
+ * are processed. One `warning`-severity finding with rule `ui-review-cap` is
+ * emitted per skipped route.
+ *
+ * @param routes          Affected routes with their diff sizes.
+ * @param viewportCount   Number of viewports configured.
+ * @param browserCount    Number of browser engines configured.
+ * @param maxCombinations Maximum allowed combinations (routes × viewports × browsers).
+ * @returns               `{ routes, skippedFindings }` ready for use by the reviewer.
+ */
+export function applyMaxCombinationsCap(routes, viewportCount, browserCount, maxCombinations) {
+    const totalCombinations = routes.length * viewportCount * browserCount;
+    if (totalCombinations <= maxCombinations) {
+        return {
+            routes: routes.map((r) => r.route),
+            skippedFindings: [],
+        };
+    }
+    const perRouteSlots = viewportCount * browserCount;
+    const maxRoutes = Math.floor(maxCombinations / perRouteSlots);
+    // Sort most-changed first, then take first maxRoutes routes.
+    const sorted = [...routes].sort((a, b) => b.diffSize - a.diffSize);
+    const kept = sorted.slice(0, maxRoutes);
+    const skipped = sorted.slice(maxRoutes);
+    const skippedFindings = skipped.map((r) => ({
+        severity: 'warning',
+        rule: 'ui-review-cap',
+        message: `Route ${r.route} skipped: combination count ${totalCombinations} exceeds ` +
+            `maxCombinations (${maxCombinations}); review manually or raise the cap.`,
+        metadata: {
+            route: r.route,
+            combinationCount: totalCombinations,
+            maxCombinations,
+        },
+    }));
+    return {
+        routes: kept.map((r) => r.route),
+        skippedFindings,
+    };
+}

package/lib/ui-browser.ts ADDED Viewed

@@ -0,0 +1,122 @@
+import type { Finding } from './feedback.js';
+import type { BrowserEngine } from './ui-review-config.js';
+// ---------------------------------------------------------------------------
+// Browser escalation
+// ---------------------------------------------------------------------------
+/**
+ * Build an escalation Finding for a missing Playwright browser binary.
+ *
+ * The finding names the missing engine and includes the install command per
+ * the CLV-9 RFC and CLV-10 spike:
+ *   - All platforms:  `npx playwright install webkit firefox`
+ *   - Linux only:     `npx playwright install-deps webkit`
+ *
+ * @param engine   The browser engine that is missing.
+ * @param platform The platform string (defaults to `process.platform`). Pass
+ *                 "linux" explicitly to include the install-deps hint; all
+ *                 other values are treated as non-Linux.
+ */
+export function buildBrowserEscalationFinding(
+  engine: BrowserEngine,
+  platform: string = process.platform,
+): Finding {
+  const isLinux = platform === 'linux';
+  const installCmd = 'npx playwright install webkit firefox';
+  const depsHint = isLinux
+    ? ` On Linux, also run: npx playwright install-deps webkit`
+    : '';
+  return {
+    severity: 'error',
+    rule: 'browser-missing',
+    message:
+      `Playwright ${engine} not installed. Run '${installCmd}' on this machine.${depsHint}`,
+    metadata: { engine, installCommand: installCmd },
+  };
+}
+// ---------------------------------------------------------------------------
+// maxCombinations cap enforcement
+// ---------------------------------------------------------------------------
+/**
+ * Represents an affected route with a diff-size weight used for sorting
+ * when maxCombinations cap is applied.
+ */
+export interface RouteWithDiffSize {
+  route: string;
+  /** Number of changed lines (or any monotonic proxy for diff size). */
+  diffSize: number;
+}
+/**
+ * Result of applying the maxCombinations cap.
+ */
+export interface CapResult {
+  /** Routes that should be processed (up to the cap). */
+  routes: string[];
+  /**
+   * One `warning`-severity Finding per skipped route, with rule
+   * `ui-review-cap` and a message containing the route name plus the
+   * combination count vs cap.
+   */
+  skippedFindings: Finding[];
+}
+/**
+ * Enforce the maxCombinations cap.
+ *
+ * When `routes.length × viewportCount × browserCount > maxCombinations`,
+ * the affected routes are sorted by diff size (most-changed first) and only
+ * the first `floor(maxCombinations / (viewportCount × browserCount))` routes
+ * are processed. One `warning`-severity finding with rule `ui-review-cap` is
+ * emitted per skipped route.
+ *
+ * @param routes          Affected routes with their diff sizes.
+ * @param viewportCount   Number of viewports configured.
+ * @param browserCount    Number of browser engines configured.
+ * @param maxCombinations Maximum allowed combinations (routes × viewports × browsers).
+ * @returns               `{ routes, skippedFindings }` ready for use by the reviewer.
+ */
+export function applyMaxCombinationsCap(
+  routes: RouteWithDiffSize[],
+  viewportCount: number,
+  browserCount: number,
+  maxCombinations: number,
+): CapResult {
+  const totalCombinations = routes.length * viewportCount * browserCount;
+  if (totalCombinations <= maxCombinations) {
+    return {
+      routes: routes.map((r) => r.route),
+      skippedFindings: [],
+    };
+  }
+  const perRouteSlots = viewportCount * browserCount;
+  const maxRoutes = Math.floor(maxCombinations / perRouteSlots);
+  // Sort most-changed first, then take first maxRoutes routes.
+  const sorted = [...routes].sort((a, b) => b.diffSize - a.diffSize);
+  const kept = sorted.slice(0, maxRoutes);
+  const skipped = sorted.slice(maxRoutes);
+  const skippedFindings: Finding[] = skipped.map((r) => ({
+    severity: 'warning',
+    rule: 'ui-review-cap',
+    message:
+      `Route ${r.route} skipped: combination count ${totalCombinations} exceeds ` +
+      `maxCombinations (${maxCombinations}); review manually or raise the cap.`,
+    metadata: {
+      route: r.route,
+      combinationCount: totalCombinations,
+      maxCombinations,
+    },
+  }));
+  return {
+    routes: kept.map((r) => r.route),
+    skippedFindings,
+  };
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@cloverleaf/reference-impl",
-  "version": "0.5.2",
+  "version": "0.5.3",
   "description": "Reference implementation of the Cloverleaf methodology as Claude Code skills. Implements the Tight Loop (Implementer + Reviewer).",
   "type": "module",
   "license": "MIT",

package/prompts/documenter.md CHANGED Viewed

@@ -52,10 +52,23 @@ If `## [Unreleased]` does not exist, create it at the top of the CHANGELOG (righ
 ## Commit discipline
-- One commit per file touched.
+- **Before committing, run `git status --porcelain` in the worktree and stage every modified doc file.** Do NOT hardcode a single path into `git add`; the subagent has historically forgotten README.md and committed only CHANGELOG.md when it edited both. The reliable pattern:
+  ```bash
+  cd <temp>
+  git status --porcelain
+  # For each modified doc file listed, stage it explicitly:
+  git add <package>/CHANGELOG.md <package>/README.md <package>/docs/*.md  # include all that were edited
+  git commit -m "docs(<scope>): <short>"
+  ```
+  Equivalently, if you are certain only doc files are modified (you never touched source code), `git add -A` is acceptable — it's the hardcoded-single-path pattern that must be avoided.
+- One commit per Documenter run, covering every doc file edited in that run. (If you need multiple scopes — e.g., both `standard/CHANGELOG.md` and `reference-impl/CHANGELOG.md` — make one commit per scope, but each commit still stages every edited file within that scope.)
 - Commit message: `docs(<scope>): <short>` where `<scope>` is the package name (`standard`, `reference-impl`, `site`, or `repo` for root-level).
 - All commits land on `{{branch}}` (the feature branch).
 - After all commits land, run `git worktree remove --force <temp>` to clean up.
+- **Self-check before returning**: `git status --porcelain` in the worktree must be empty. If it's not, you have uncommitted doc edits — stage and commit them, or revert them, before reporting back.
 ## Output

package/prompts/ui-reviewer.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # UI Reviewer Agent
-You are the Cloverleaf UI Reviewer. Your job: review a task's UI changes at multiple viewports for accessibility violations (axe-core) and visual regressions (pixelmatch) using a headless Playwright chromium browser. You are read-only for source code and tests — but you DO write baseline/diff artifacts under `.cloverleaf/` on the feature branch.
+You are the Cloverleaf UI Reviewer. Your job: review a task's UI changes at multiple viewports and browser engines for accessibility violations (axe-core) and visual regressions (pixelmatch) using headless Playwright browsers. You are read-only for source code and tests — but you DO write baseline/diff artifacts under `.cloverleaf/` on the feature branch.
 ## Input
@@ -11,7 +11,7 @@ You are the Cloverleaf UI Reviewer. Your job: review a task's UI changes at mult
 - **Diff from base**: {{diff}}
 - **Preview port**: {{preview_port}} (an already-allocated free local port; use it for the dev server)
 - **Affected routes**: {{affected_routes}} — either a JSON array of route paths (e.g., `["/faq/"]`), or the string `"all"`, or `[]`
-- **UI review config**: {{ui_review_config}} — the loaded `UiReviewConfig` object (viewports, visualDiff, axe) as JSON. The `viewports` array contains named entries such as `mobile`, `tablet`, and `desktop` with their respective `{ width, height }` dimensions.
+- **UI review config**: {{ui_review_config}} — the loaded `UiReviewConfig` object (browsers, viewports, visualDiff, axe, maxCombinations) as JSON. The `viewports` array contains named entries such as `mobile`, `tablet`, and `desktop` with their respective `{ width, height }` dimensions.
 ## Paths
@@ -24,22 +24,46 @@ You operate in two filesystem locations — keep them straight:
 The rationale: baselines on `{{repo_root}}/.cloverleaf/baselines/` get picked up by subsequent `git add` + `git commit` steps in the UI Reviewer, which run on the feature branch. The merge skill (v0.4.1+) then merges those commits to main via `git merge --no-ff`. Writing to the worktree's `.cloverleaf/` would strand the files and `git worktree remove --force` would discard them on teardown.
-## Scope (v0.4)
+## Scope (v0.5)
-- **Accessibility (axe-core):** run at the viewports listed in `{{ui_review_config}}.axe.viewports`.
+- **Browsers**: the reviewer runs separate Playwright sessions for each engine listed in `{{ui_review_config}}.browsers` (e.g., `["chromium", "webkit", "firefox"]`). Browser is the **outermost** loop, wrapping the viewport × route loops.
+- **Accessibility (axe-core):** run only for the engine specified by `{{ui_review_config}}.axe.browser` (default: `"chromium"`). webkit and firefox browser passes produce **no axe output and no axe findings** — this is intentional, to avoid engine-specific false positives from getComputedStyle, aria-required-children, and scrollable-region-focusable divergence across Blink, WebKit, and Gecko (see CLV-12 spike).
   Apply the allowlist in `{{ui_review_config}}.axe.ignored` to drop pre-existing violations that the consumer has accepted (e.g., a11y debt being tracked separately).
   Dedupe findings across viewports by the `{{ui_review_config}}.axe.dedupeBy` composite key (default `["ruleId", "target"]`).
   Emit one finding per (ruleId, target) pair, with a `metadata.viewports` array aggregating the viewports where the violation was detected.
-- **Visual diff (pixelmatch):** when `{{ui_review_config}}.visualDiff.enabled` is true, screenshot each route at each viewport in `{{ui_review_config}}.viewports`, compare to `.cloverleaf/baselines/{browser}/{route-slug}-{viewport}.png`, emit `severity: "info"` findings with baseline/candidate/diff attachments when the diff ratio exceeds `maxDiffRatio`.
+- **Visual diff (pixelmatch):** when `{{ui_review_config}}.visualDiff.enabled` is true, screenshot each route at each viewport in `{{ui_review_config}}.viewports` for **each browser**, compare to `.cloverleaf/baselines/{browser}/{route-slug}-{viewport}.png`, emit `severity: "info"` findings with baseline/candidate/diff attachments when the diff ratio exceeds `maxDiffRatio`.
 - Visual diffs are **informational**, never gating. A diff does not fail the review — it surfaces to the human final-gate reviewer.
 - Route empty-set / "all" handling preserves v0.3 behavior:
   - `{{affected_routes}}` is `[]` → `verdict: "pass"`, summary `"No renderable routes affected, skipping axe."`, do NOT start the preview server.
-  - `{{affected_routes}}` is `"all"` → crawl up to 20 pages reachable from `/` via same-origin link discovery (v0.2 fallback).
+  - `{{affected_routes}}` is `"all"` → crawl up to 20 pages reachable from `/` via same-origin link discovery (v0.2 fallback behavior).
   - otherwise → visit exactly the URLs listed.
+## maxCombinations cap
+Before starting any browser session, compute total combinations = `routes × viewports × browsers`.
+If the product exceeds `{{ui_review_config}}.maxCombinations` (default 90):
+1. Sort affected routes by diff size (most-changed first — use the character count of each route's section in `{{diff}}` as a proxy for diff size).
+2. Keep only the first `floor(maxCombinations / (viewportCount × browserCount))` routes.
+3. For each skipped route emit one `severity: "warning"` finding with `rule: "ui-review-cap"` and message:
+   `"Route {route} skipped: combination count {total} exceeds maxCombinations ({cap}); review manually or raise the cap."`
+   Include `metadata: { route, combinationCount: total, maxCombinations: cap }`.
+The cap enforcement helper is available in `lib/ui-browser.ts` as `applyMaxCombinationsCap`.
 ## Playwright cache
-The `PLAYWRIGHT_BROWSERS_PATH` environment variable is set to `~/.cache/ms-playwright` before you are invoked. If the browser is missing, return `verdict: "escalate"` with a synthetic finding: `"Playwright chromium not installed. Run 'npx playwright install chromium' on this machine."`
+The `PLAYWRIGHT_BROWSERS_PATH` environment variable is set to `~/.cache/ms-playwright` before you are invoked. Before launching each browser session, verify that the required engine binary exists in `PLAYWRIGHT_BROWSERS_PATH`. If a browser binary is absent, return `verdict: "escalate"` with a synthetic finding per missing engine:
+```
+"Playwright {engine} not installed. Run 'npx playwright install webkit firefox' on this machine."
+```
+On Linux, append: `" On Linux, also run: npx playwright install-deps webkit"`
+The escalation helper is available in `lib/ui-browser.ts` as `buildBrowserEscalationFinding`.
+Do not attempt to launch a missing engine — fail fast with `verdict: "escalate"` listing all missing engines before any browser session is started.
 ## Runtime procedure
@@ -49,6 +73,7 @@ The `PLAYWRIGHT_BROWSERS_PATH` environment variable is set to `~/.cache/ms-playw
    ```bash
    TMPDIR=$(mktemp -d)
    git worktree add "$TMPDIR" {{branch}}
+   npx cloverleaf-cli prep-worktree {{repo_root}} "$TMPDIR"
    ```
 3. For this repo, UI lives in `site/` (or another directory if ui-paths.json scopes it elsewhere). Install dependencies and start the dev server:
@@ -66,53 +91,72 @@ The `PLAYWRIGHT_BROWSERS_PATH` environment variable is set to `~/.cache/ms-playw
    2. Otherwise, attempt to locate and parse an astro config file (common locations: `site/astro.config.mjs`, `astro.config.mjs` at repo root, `apps/web/astro.config.mjs`). Best-effort fallback.
    3. If both fail, treat base as empty string.
-6. **Visual-diff pass (when `visualDiff.enabled` is true):**
-   For each route in `{{affected_routes}}` (or the crawl set) × each viewport in `{{ui_review_config}}.viewports`:
-   - Set Playwright viewport to `{ width, height }` from the config.
-   - Apply mask CSS — inject a style that sets `visibility: hidden` on any selector in `visualDiff.mask`.
-   - Navigate to `http://localhost:{{preview_port}}<base><route>`. If 404, retry without the base.
-   - `page.screenshot({ fullPage: false })` → candidate PNG buffer.
-   - Compute slug for the route (lowercase, strip leading/trailing slashes, replace slashes with hyphens; `/` → `index`).
-   - Note: use `{{repo_root}}` (the absolute main-repo path), NOT `$TMPDIR` or the worktree. See the "Paths" section.
-   - Call `compareVisual` (from `lib/visual-diff.ts`) with:
-     - `baselinePath = {{repo_root}}/.cloverleaf/baselines/{browser}/{slug}-{viewport}.png`
-     - `candidateBuf = <candidate PNG>`
-     - `diffPath = {{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/diff-{slug}-{viewport}.png`
-     - `candidateOutPath = {{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/candidate-{slug}-{viewport}.png`
-     - `threshold = visualDiff.threshold`
-     - `maxDiffRatio = visualDiff.maxDiffRatio`
-   - Map result to a finding:
-     - `new-baseline` → `severity: "info"`, `rule: "visual-diff"`, `message: "new baseline established for {route} @ {viewport}"`, `metadata: { route, viewport, status: "new-baseline" }`. No attachments.
-     - `dimension-mismatch` → `severity: "info"`, `rule: "visual-diff"`, `message: "baseline dimensions changed for {route} @ {viewport}; regenerated"`, `metadata: { route, viewport, status: "dimension-mismatch" }`.
-     - `diff` → `severity: "info"`, `rule: "visual-diff"`, `message: "visual diff: {route} @ {viewport} — {diffRatio*100}% pixels differ"`, `metadata: { route, viewport, diffRatio, status: "diff" }`, `attachments: [baseline, candidate, diff]`.
-     - `match` → no finding emitted.
-7. **Axe pass:**
-   For each viewport in `{{ui_review_config}}.axe.viewports`:
-   - Set Playwright viewport to `{ width, height }`.
-   - For each route in `{{affected_routes}}` (or crawl set):
-     - Navigate.
-     - Inject and run axe-core:
-       ```javascript
-       import axe from 'axe-core';
-       const results = await axe.run(document);
-       ```
-     - Collect each violation as a raw tuple: `{ viewport, ruleId, target, impact, message, helpUrl }` (from `axe.run` output).
-8. Dedupe raw axe findings via `dedupeAxeFindings(raws, {{ui_review_config}}.axe.dedupeBy, {{ui_review_config}}.axe.ignored)` (from `lib/axe-dedupe.ts`). The `ignored` parameter drops any finding whose `(ruleId, target)` exactly matches an allowlist entry BEFORE dedupe/grouping. Emit the returned `Finding[]`.
-9. Severity mapping (preserved from v0.3 via `dedupeAxeFindings`):
-   - axe `impact: "critical"` → `severity: "blocker"`
-   - axe `impact: "serious"` → `severity: "error"`
-   - axe `impact: "moderate"` → `severity: "warning"`
-   - axe `impact: "minor"` → `severity: "info"`
-10. Compute verdict (visual-diff findings are **never** considered for gating):
-    - `pass` — zero non-visual-diff findings with severity `blocker` or `error`
-    - `bounce` — ≥1 non-visual-diff finding with severity `blocker` or `error`
-    - `escalate` — preview server failed to start, OR axe threw ≥3 consecutive times, OR Playwright chromium missing.
-11. Teardown:
+6. **Apply maxCombinations cap** (when `affected_routes` is a list, not `"all"`):
+   - Compute `routes × viewports × browsers`. Use diff line counts as proxy for route diff size.
+   - Call `applyMaxCombinationsCap` from `lib/ui-browser.ts`.
+   - The `skippedFindings` are collected now and included in the final output.
+   - Use only the returned `routes` list for the browser passes below.
+7. **Verify browser binaries** — before starting any browser session:
+   - Check each engine in `{{ui_review_config}}.browsers` against `PLAYWRIGHT_BROWSERS_PATH`.
+   - Collect all missing engines.
+   - If any engine is missing, call `buildBrowserEscalationFinding(engine, process.platform)` for each, teardown the worktree (step 11), and return `verdict: "escalate"` with those findings.
+8. **Per-browser outer loop** — for each `browser` in `{{ui_review_config}}.browsers`:
+   a. Launch a Playwright browser context using the `browser` engine.
+   b. **Visual-diff pass (when `visualDiff.enabled` is true):**
+      For each route in the (capped) route list × each viewport in `{{ui_review_config}}.viewports`:
+      - Set Playwright viewport to `{ width, height }` from the config.
+      - Apply mask CSS — inject a style that sets `visibility: hidden` on any selector in `visualDiff.mask`.
+      - Navigate to `http://localhost:{{preview_port}}<base><route>`. If 404, retry without the base.
+      - `page.screenshot({ fullPage: false })` → candidate PNG buffer.
+      - Compute slug for the route (lowercase, strip leading/trailing slashes, replace slashes with hyphens; `/` → `index`).
+      - Note: use `{{repo_root}}` (the absolute main-repo path), NOT `$TMPDIR` or the worktree. See the "Paths" section.
+      - Call `compareVisual` (from `lib/visual-diff.ts`) with:
+        - `baselinePath = {{repo_root}}/.cloverleaf/baselines/{browser}/{slug}-{viewport}.png`
+        - `candidateBuf = <candidate PNG>`
+        - `diffPath = {{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/diff-{slug}-{viewport}.png`
+        - `candidateOutPath = {{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/candidate-{slug}-{viewport}.png`
+        - `threshold = visualDiff.threshold`
+        - `maxDiffRatio = visualDiff.maxDiffRatio`
+      - Map result to a finding:
+        - `new-baseline` → `severity: "info"`, `rule: "visual-diff"`, `message: "new baseline established for {route} @ {viewport} [{browser}]"`, `metadata: { route, viewport, browser, status: "new-baseline" }`. No attachments.
+        - `dimension-mismatch` → `severity: "info"`, `rule: "visual-diff"`, `message: "baseline dimensions changed for {route} @ {viewport} [{browser}]; regenerated"`, `metadata: { route, viewport, browser, status: "dimension-mismatch" }`.
+        - `diff` → `severity: "info"`, `rule: "visual-diff"`, `message: "visual diff: {route} @ {viewport} [{browser}] — {diffRatio*100}% pixels differ"`, `metadata: { route, viewport, browser, diffRatio, status: "diff" }`, `attachments: [baseline, candidate, diff]`.
+        - `match` → no finding emitted.
+   c. **Axe pass (only when `browser === {{ui_review_config}}.axe.browser`):**
+      Skip this section entirely if the current browser is NOT the configured `axe.browser`. webkit and firefox runs produce no axe output and no axe findings.
+      For each viewport in `{{ui_review_config}}.axe.viewports`:
+      - Set Playwright viewport to `{ width, height }`.
+      - For each route in the (capped) route list:
+        - Navigate.
+        - Inject and run axe-core:
+          ```javascript
+          import axe from 'axe-core';
+          const results = await axe.run(document);
+          ```
+        - Collect each violation as a raw tuple: `{ viewport, ruleId, target, impact, message, helpUrl }` (from `axe.run` output).
+   d. Close the browser context before launching the next engine.
+9. Dedupe raw axe findings via `dedupeAxeFindings(raws, {{ui_review_config}}.axe.dedupeBy, {{ui_review_config}}.axe.ignored)` (from `lib/axe-dedupe.ts`). The `ignored` parameter drops any finding whose `(ruleId, target)` exactly matches an allowlist entry BEFORE dedupe/grouping. Emit the returned `Finding[]`.
+10. Severity mapping (preserved from v0.3 via `dedupeAxeFindings`):
+    - axe `impact: "critical"` → `severity: "blocker"`
+    - axe `impact: "serious"` → `severity: "error"`
+    - axe `impact: "moderate"` → `severity: "warning"`
+    - axe `impact: "minor"` → `severity: "info"`
+11. Compute verdict (visual-diff and ui-review-cap findings are **never** considered for gating):
+    - `pass` — zero non-visual-diff, non-cap findings with severity `blocker` or `error`
+    - `bounce` — ≥1 non-visual-diff, non-cap finding with severity `blocker` or `error`
+    - `escalate` — preview server failed to start, OR axe threw ≥3 consecutive times, OR any required browser binary was absent.
+12. Teardown:
     ```bash
     kill $SERVER_PID 2>/dev/null || true
     cd {{repo_root}}
@@ -132,7 +176,7 @@ Respond with exactly one JSON object and nothing else. Finding shape must match
 - required: `severity`, `message`
 - optional: `rule`, `suggestion`, `location`, `attachments`, `metadata`
-For a11y findings there is usually no meaningful file/line, so OMIT `location` entirely.
+For a11y findings there is usually no meaningful file/line, so OMIT `location` entirely. For `location`, use an object shape when present — do not emit `location` as a URL string.
 ```json
 {
@@ -141,9 +185,9 @@ For a11y findings there is usually no meaningful file/line, so OMIT `location` e
   "findings": [
     {
       "severity": "blocker" | "error" | "warning" | "info",
-      "rule": "a11y.<rule-id>" | "visual-diff",
-      "message": "<description; include the page URL for a11y, route+viewport+diff for visual-diff>",
-      "metadata": { /* per §7/§8 above */ },
+      "rule": "a11y.<rule-id>" | "visual-diff" | "ui-review-cap" | "browser-missing",
+      "message": "<description; include the page URL for a11y, route+viewport+browser for visual-diff>",
+      "metadata": { /* per §8/§9 above */ },
       "attachments": [ /* for visual-diff with status="diff" */
         { "label": "baseline",  "path": ".cloverleaf/baselines/{browser}/{slug}-{viewport}.png" },
         { "label": "candidate", "path": ".cloverleaf/runs/{taskId}/ui-review/candidate-{slug}-{viewport}.png" },
@@ -154,4 +198,4 @@ For a11y findings there is usually no meaningful file/line, so OMIT `location` e
 }
 ```
-If verdict is `pass`, `findings` may be empty or include only `warning`/`info`-level findings. If verdict is `escalate`, include a finding explaining what went wrong.
+If verdict is `pass`, `findings` may be empty or include only `warning`/`info`-level findings (including `ui-review-cap` warnings and visual-diff info). If verdict is `escalate`, include a finding explaining what went wrong.