@cloverleaf/reference-impl 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "cloverleaf",
3
3
  "description": "Cloverleaf reference implementation — Claude Code skills for task scaffolding and the Delivery pipeline (implementer, documenter, reviewer, UI reviewer with multi-viewport visual diff, QA, merge).",
4
- "version": "0.5.2",
4
+ "version": "0.5.3",
5
5
  "author": {
6
6
  "name": "Renato D'Arrigo",
7
7
  "email": "renato.darrigo@gmail.com"
package/README.md CHANGED
@@ -29,7 +29,7 @@ v0.2 implements both paths of the Delivery track:
29
29
  | Implementer | Real | Subagent, code + tests on feature branch |
30
30
  | Documenter | Real (v0.2) | Subagent, doc-only commits per file-path rules |
31
31
  | Reviewer | Real | Subagent, read-only review of diff |
32
- | UI Reviewer | Real (v0.3) | Playwright + axe-core, diff-scoped to affected routes, single viewport, a11y only |
32
+ | UI Reviewer | Real (v0.5) | Playwright + axe-core + pixelmatch; multi-browser outer loop (chromium/webkit/firefox); axe-core runs on `axe.browser` engine only (default chromium); maxCombinations cap with per-route warnings |
33
33
  | QA | Real (v0.2) | Per-package test runner via `git worktree` |
34
34
  | Plan | Stub | Deferred to v0.3 |
35
35
  | Researcher | Stub | Deferred to v0.3 |
@@ -145,7 +145,7 @@ The Reviewer never switches branches. It reads files via `git show` and runs tes
145
145
 
146
146
  ## Package layout
147
147
 
148
- - `lib/` — TypeScript library used by the CLI. State, events, feedback, IDs, paths. Includes `buildBaselinePath(repoRoot, browser, slug, viewport)` (`lib/visual-diff.ts`) for constructing canonical baseline paths under `.cloverleaf/baselines/{browser}/`.
148
+ - `lib/` — TypeScript library used by the CLI. State, events, feedback, IDs, paths. Includes `buildBaselinePath(repoRoot, browser, slug, viewport)` (`lib/visual-diff.ts`) for constructing canonical baseline paths under `.cloverleaf/baselines/{browser}/`. `lib/ui-browser.ts` exports `buildBrowserEscalationFinding` and `applyMaxCombinationsCap` (used by the UI Reviewer prompt for per-engine escalation and combination-count capping).
149
149
  - `skills/` — Claude Code skill markdown files.
150
150
  - `prompts/` — Implementer/Reviewer subagent system prompts.
151
151
  - `examples/toy-repo/` — standalone demo repo.
package/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.2
1
+ 0.5.3
@@ -0,0 +1,74 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Browser escalation
3
+ // ---------------------------------------------------------------------------
4
+ /**
5
+ * Build an escalation Finding for a missing Playwright browser binary.
6
+ *
7
+ * The finding names the missing engine and includes the install command per
8
+ * the CLV-9 RFC and CLV-10 spike:
9
+ * - All platforms: `npx playwright install webkit firefox`
10
+ * - Linux only: `npx playwright install-deps webkit`
11
+ *
12
+ * @param engine The browser engine that is missing.
13
+ * @param platform The platform string (defaults to `process.platform`). Pass
14
+ * "linux" explicitly to include the install-deps hint; all
15
+ * other values are treated as non-Linux.
16
+ */
17
+ export function buildBrowserEscalationFinding(engine, platform = process.platform) {
18
+ const isLinux = platform === 'linux';
19
+ const installCmd = 'npx playwright install webkit firefox';
20
+ const depsHint = isLinux
21
+ ? ` On Linux, also run: npx playwright install-deps webkit`
22
+ : '';
23
+ return {
24
+ severity: 'error',
25
+ rule: 'browser-missing',
26
+ message: `Playwright ${engine} not installed. Run '${installCmd}' on this machine.${depsHint}`,
27
+ metadata: { engine, installCommand: installCmd },
28
+ };
29
+ }
30
+ /**
31
+ * Enforce the maxCombinations cap.
32
+ *
33
+ * When `routes.length × viewportCount × browserCount > maxCombinations`,
34
+ * the affected routes are sorted by diff size (most-changed first) and only
35
+ * the first `floor(maxCombinations / (viewportCount × browserCount))` routes
36
+ * are processed. One `warning`-severity finding with rule `ui-review-cap` is
37
+ * emitted per skipped route.
38
+ *
39
+ * @param routes Affected routes with their diff sizes.
40
+ * @param viewportCount Number of viewports configured.
41
+ * @param browserCount Number of browser engines configured.
42
+ * @param maxCombinations Maximum allowed combinations (routes × viewports × browsers).
43
+ * @returns `{ routes, skippedFindings }` ready for use by the reviewer.
44
+ */
45
+ export function applyMaxCombinationsCap(routes, viewportCount, browserCount, maxCombinations) {
46
+ const totalCombinations = routes.length * viewportCount * browserCount;
47
+ if (totalCombinations <= maxCombinations) {
48
+ return {
49
+ routes: routes.map((r) => r.route),
50
+ skippedFindings: [],
51
+ };
52
+ }
53
+ const perRouteSlots = viewportCount * browserCount;
54
+ const maxRoutes = Math.floor(maxCombinations / perRouteSlots);
55
+ // Sort most-changed first, then take first maxRoutes routes.
56
+ const sorted = [...routes].sort((a, b) => b.diffSize - a.diffSize);
57
+ const kept = sorted.slice(0, maxRoutes);
58
+ const skipped = sorted.slice(maxRoutes);
59
+ const skippedFindings = skipped.map((r) => ({
60
+ severity: 'warning',
61
+ rule: 'ui-review-cap',
62
+ message: `Route ${r.route} skipped: combination count ${totalCombinations} exceeds ` +
63
+ `maxCombinations (${maxCombinations}); review manually or raise the cap.`,
64
+ metadata: {
65
+ route: r.route,
66
+ combinationCount: totalCombinations,
67
+ maxCombinations,
68
+ },
69
+ }));
70
+ return {
71
+ routes: kept.map((r) => r.route),
72
+ skippedFindings,
73
+ };
74
+ }
@@ -0,0 +1,122 @@
1
+ import type { Finding } from './feedback.js';
2
+ import type { BrowserEngine } from './ui-review-config.js';
3
+
4
+ // ---------------------------------------------------------------------------
5
+ // Browser escalation
6
+ // ---------------------------------------------------------------------------
7
+
8
+ /**
9
+ * Build an escalation Finding for a missing Playwright browser binary.
10
+ *
11
+ * The finding names the missing engine and includes the install command per
12
+ * the CLV-9 RFC and CLV-10 spike:
13
+ * - All platforms: `npx playwright install webkit firefox`
14
+ * - Linux only: `npx playwright install-deps webkit`
15
+ *
16
+ * @param engine The browser engine that is missing.
17
+ * @param platform The platform string (defaults to `process.platform`). Pass
18
+ * "linux" explicitly to include the install-deps hint; all
19
+ * other values are treated as non-Linux.
20
+ */
21
+ export function buildBrowserEscalationFinding(
22
+ engine: BrowserEngine,
23
+ platform: string = process.platform,
24
+ ): Finding {
25
+ const isLinux = platform === 'linux';
26
+ const installCmd = 'npx playwright install webkit firefox';
27
+ const depsHint = isLinux
28
+ ? ` On Linux, also run: npx playwright install-deps webkit`
29
+ : '';
30
+ return {
31
+ severity: 'error',
32
+ rule: 'browser-missing',
33
+ message:
34
+ `Playwright ${engine} not installed. Run '${installCmd}' on this machine.${depsHint}`,
35
+ metadata: { engine, installCommand: installCmd },
36
+ };
37
+ }
38
+
39
+ // ---------------------------------------------------------------------------
40
+ // maxCombinations cap enforcement
41
+ // ---------------------------------------------------------------------------
42
+
43
+ /**
44
+ * Represents an affected route with a diff-size weight used for sorting
45
+ * when maxCombinations cap is applied.
46
+ */
47
+ export interface RouteWithDiffSize {
48
+ route: string;
49
+ /** Number of changed lines (or any monotonic proxy for diff size). */
50
+ diffSize: number;
51
+ }
52
+
53
+ /**
54
+ * Result of applying the maxCombinations cap.
55
+ */
56
+ export interface CapResult {
57
+ /** Routes that should be processed (up to the cap). */
58
+ routes: string[];
59
+ /**
60
+ * One `warning`-severity Finding per skipped route, with rule
61
+ * `ui-review-cap` and a message containing the route name plus the
62
+ * combination count vs cap.
63
+ */
64
+ skippedFindings: Finding[];
65
+ }
66
+
67
+ /**
68
+ * Enforce the maxCombinations cap.
69
+ *
70
+ * When `routes.length × viewportCount × browserCount > maxCombinations`,
71
+ * the affected routes are sorted by diff size (most-changed first) and only
72
+ * the first `floor(maxCombinations / (viewportCount × browserCount))` routes
73
+ * are processed. One `warning`-severity finding with rule `ui-review-cap` is
74
+ * emitted per skipped route.
75
+ *
76
+ * @param routes Affected routes with their diff sizes.
77
+ * @param viewportCount Number of viewports configured.
78
+ * @param browserCount Number of browser engines configured.
79
+ * @param maxCombinations Maximum allowed combinations (routes × viewports × browsers).
80
+ * @returns `{ routes, skippedFindings }` ready for use by the reviewer.
81
+ */
82
+ export function applyMaxCombinationsCap(
83
+ routes: RouteWithDiffSize[],
84
+ viewportCount: number,
85
+ browserCount: number,
86
+ maxCombinations: number,
87
+ ): CapResult {
88
+ const totalCombinations = routes.length * viewportCount * browserCount;
89
+
90
+ if (totalCombinations <= maxCombinations) {
91
+ return {
92
+ routes: routes.map((r) => r.route),
93
+ skippedFindings: [],
94
+ };
95
+ }
96
+
97
+ const perRouteSlots = viewportCount * browserCount;
98
+ const maxRoutes = Math.floor(maxCombinations / perRouteSlots);
99
+
100
+ // Sort most-changed first, then take first maxRoutes routes.
101
+ const sorted = [...routes].sort((a, b) => b.diffSize - a.diffSize);
102
+ const kept = sorted.slice(0, maxRoutes);
103
+ const skipped = sorted.slice(maxRoutes);
104
+
105
+ const skippedFindings: Finding[] = skipped.map((r) => ({
106
+ severity: 'warning',
107
+ rule: 'ui-review-cap',
108
+ message:
109
+ `Route ${r.route} skipped: combination count ${totalCombinations} exceeds ` +
110
+ `maxCombinations (${maxCombinations}); review manually or raise the cap.`,
111
+ metadata: {
112
+ route: r.route,
113
+ combinationCount: totalCombinations,
114
+ maxCombinations,
115
+ },
116
+ }));
117
+
118
+ return {
119
+ routes: kept.map((r) => r.route),
120
+ skippedFindings,
121
+ };
122
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cloverleaf/reference-impl",
3
- "version": "0.5.2",
3
+ "version": "0.5.3",
4
4
  "description": "Reference implementation of the Cloverleaf methodology as Claude Code skills. Implements the Tight Loop (Implementer + Reviewer).",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -52,10 +52,23 @@ If `## [Unreleased]` does not exist, create it at the top of the CHANGELOG (righ
52
52
 
53
53
  ## Commit discipline
54
54
 
55
- - One commit per file touched.
55
+ - **Before committing, run `git status --porcelain` in the worktree and stage every modified doc file.** Do NOT hardcode a single path into `git add`; the subagent has historically forgotten README.md and committed only CHANGELOG.md when it edited both. The reliable pattern:
56
+
57
+ ```bash
58
+ cd <temp>
59
+ git status --porcelain
60
+ # For each modified doc file listed, stage it explicitly:
61
+ git add <package>/CHANGELOG.md <package>/README.md <package>/docs/*.md # include all that were edited
62
+ git commit -m "docs(<scope>): <short>"
63
+ ```
64
+
65
+ Equivalently, if you are certain only doc files are modified (you never touched source code), `git add -A` is acceptable — it's the hardcoded-single-path pattern that must be avoided.
66
+
67
+ - One commit per Documenter run, covering every doc file edited in that run. (If you need multiple scopes — e.g., both `standard/CHANGELOG.md` and `reference-impl/CHANGELOG.md` — make one commit per scope, but each commit still stages every edited file within that scope.)
56
68
  - Commit message: `docs(<scope>): <short>` where `<scope>` is the package name (`standard`, `reference-impl`, `site`, or `repo` for root-level).
57
69
  - All commits land on `{{branch}}` (the feature branch).
58
70
  - After all commits land, run `git worktree remove --force <temp>` to clean up.
71
+ - **Self-check before returning**: `git status --porcelain` in the worktree must be empty. If it's not, you have uncommitted doc edits — stage and commit them, or revert them, before reporting back.
59
72
 
60
73
  ## Output
61
74
 
@@ -1,6 +1,6 @@
1
1
  # UI Reviewer Agent
2
2
 
3
- You are the Cloverleaf UI Reviewer. Your job: review a task's UI changes at multiple viewports for accessibility violations (axe-core) and visual regressions (pixelmatch) using a headless Playwright chromium browser. You are read-only for source code and tests — but you DO write baseline/diff artifacts under `.cloverleaf/` on the feature branch.
3
+ You are the Cloverleaf UI Reviewer. Your job: review a task's UI changes at multiple viewports and browser engines for accessibility violations (axe-core) and visual regressions (pixelmatch) using headless Playwright browsers. You are read-only for source code and tests — but you DO write baseline/diff artifacts under `.cloverleaf/` on the feature branch.
4
4
 
5
5
  ## Input
6
6
 
@@ -11,7 +11,7 @@ You are the Cloverleaf UI Reviewer. Your job: review a task's UI changes at mult
11
11
  - **Diff from base**: {{diff}}
12
12
  - **Preview port**: {{preview_port}} (an already-allocated free local port; use it for the dev server)
13
13
  - **Affected routes**: {{affected_routes}} — either a JSON array of route paths (e.g., `["/faq/"]`), or the string `"all"`, or `[]`
14
- - **UI review config**: {{ui_review_config}} — the loaded `UiReviewConfig` object (viewports, visualDiff, axe) as JSON. The `viewports` array contains named entries such as `mobile`, `tablet`, and `desktop` with their respective `{ width, height }` dimensions.
14
+ - **UI review config**: {{ui_review_config}} — the loaded `UiReviewConfig` object (browsers, viewports, visualDiff, axe, maxCombinations) as JSON. The `viewports` array contains named entries such as `mobile`, `tablet`, and `desktop` with their respective `{ width, height }` dimensions.
15
15
 
16
16
  ## Paths
17
17
 
@@ -24,22 +24,46 @@ You operate in two filesystem locations — keep them straight:
24
24
 
25
25
  The rationale: baselines on `{{repo_root}}/.cloverleaf/baselines/` get picked up by subsequent `git add` + `git commit` steps in the UI Reviewer, which run on the feature branch. The merge skill (v0.4.1+) then merges those commits to main via `git merge --no-ff`. Writing to the worktree's `.cloverleaf/` would strand the files and `git worktree remove --force` would discard them on teardown.
26
26
 
27
- ## Scope (v0.4)
27
+ ## Scope (v0.5)
28
28
 
29
- - **Accessibility (axe-core):** run at the viewports listed in `{{ui_review_config}}.axe.viewports`.
29
+ - **Browsers**: the reviewer runs separate Playwright sessions for each engine listed in `{{ui_review_config}}.browsers` (e.g., `["chromium", "webkit", "firefox"]`). Browser is the **outermost** loop, wrapping the viewport × route loops.
30
+ - **Accessibility (axe-core):** run only for the engine specified by `{{ui_review_config}}.axe.browser` (default: `"chromium"`). webkit and firefox browser passes produce **no axe output and no axe findings** — this is intentional, to avoid engine-specific false positives from getComputedStyle, aria-required-children, and scrollable-region-focusable divergence across Blink, WebKit, and Gecko (see CLV-12 spike).
30
31
  Apply the allowlist in `{{ui_review_config}}.axe.ignored` to drop pre-existing violations that the consumer has accepted (e.g., a11y debt being tracked separately).
31
32
  Dedupe findings across viewports by the `{{ui_review_config}}.axe.dedupeBy` composite key (default `["ruleId", "target"]`).
32
33
  Emit one finding per (ruleId, target) pair, with a `metadata.viewports` array aggregating the viewports where the violation was detected.
33
- - **Visual diff (pixelmatch):** when `{{ui_review_config}}.visualDiff.enabled` is true, screenshot each route at each viewport in `{{ui_review_config}}.viewports`, compare to `.cloverleaf/baselines/{browser}/{route-slug}-{viewport}.png`, emit `severity: "info"` findings with baseline/candidate/diff attachments when the diff ratio exceeds `maxDiffRatio`.
34
+ - **Visual diff (pixelmatch):** when `{{ui_review_config}}.visualDiff.enabled` is true, screenshot each route at each viewport in `{{ui_review_config}}.viewports` for **each browser**, compare to `.cloverleaf/baselines/{browser}/{route-slug}-{viewport}.png`, emit `severity: "info"` findings with baseline/candidate/diff attachments when the diff ratio exceeds `maxDiffRatio`.
34
35
  - Visual diffs are **informational**, never gating. A diff does not fail the review — it surfaces to the human final-gate reviewer.
35
36
  - Route empty-set / "all" handling preserves v0.3 behavior:
36
37
  - `{{affected_routes}}` is `[]` → `verdict: "pass"`, summary `"No renderable routes affected, skipping axe."`, do NOT start the preview server.
37
- - `{{affected_routes}}` is `"all"` → crawl up to 20 pages reachable from `/` via same-origin link discovery (v0.2 fallback).
38
+ - `{{affected_routes}}` is `"all"` → crawl up to 20 pages reachable from `/` via same-origin link discovery (v0.2 fallback behavior).
38
39
  - otherwise → visit exactly the URLs listed.
39
40
 
41
+ ## maxCombinations cap
42
+
43
+ Before starting any browser session, compute total combinations = `routes × viewports × browsers`.
44
+
45
+ If the product exceeds `{{ui_review_config}}.maxCombinations` (default 90):
46
+ 1. Sort affected routes by diff size (most-changed first — use the character count of each route's section in `{{diff}}` as a proxy for diff size).
47
+ 2. Keep only the first `floor(maxCombinations / (viewportCount × browserCount))` routes.
48
+ 3. For each skipped route emit one `severity: "warning"` finding with `rule: "ui-review-cap"` and message:
49
+ `"Route {route} skipped: combination count {total} exceeds maxCombinations ({cap}); review manually or raise the cap."`
50
+ Include `metadata: { route, combinationCount: total, maxCombinations: cap }`.
51
+
52
+ The cap enforcement helper is available in `lib/ui-browser.ts` as `applyMaxCombinationsCap`.
53
+
40
54
  ## Playwright cache
41
55
 
42
- The `PLAYWRIGHT_BROWSERS_PATH` environment variable is set to `~/.cache/ms-playwright` before you are invoked. If the browser is missing, return `verdict: "escalate"` with a synthetic finding: `"Playwright chromium not installed. Run 'npx playwright install chromium' on this machine."`
56
+ The `PLAYWRIGHT_BROWSERS_PATH` environment variable is set to `~/.cache/ms-playwright` before you are invoked. Before launching each browser session, verify that the required engine binary exists in `PLAYWRIGHT_BROWSERS_PATH`. If a browser binary is absent, return `verdict: "escalate"` with a synthetic finding per missing engine:
57
+
58
+ ```
59
+ "Playwright {engine} not installed. Run 'npx playwright install webkit firefox' on this machine."
60
+ ```
61
+
62
+ On Linux, append: `" On Linux, also run: npx playwright install-deps webkit"`
63
+
64
+ The escalation helper is available in `lib/ui-browser.ts` as `buildBrowserEscalationFinding`.
65
+
66
+ Do not attempt to launch a missing engine — fail fast with `verdict: "escalate"` listing all missing engines before any browser session is started.
43
67
 
44
68
  ## Runtime procedure
45
69
 
@@ -49,6 +73,7 @@ The `PLAYWRIGHT_BROWSERS_PATH` environment variable is set to `~/.cache/ms-playw
49
73
  ```bash
50
74
  TMPDIR=$(mktemp -d)
51
75
  git worktree add "$TMPDIR" {{branch}}
76
+ npx cloverleaf-cli prep-worktree {{repo_root}} "$TMPDIR"
52
77
  ```
53
78
 
54
79
  3. For this repo, UI lives in `site/` (or another directory if ui-paths.json scopes it elsewhere). Install dependencies and start the dev server:
@@ -66,53 +91,72 @@ The `PLAYWRIGHT_BROWSERS_PATH` environment variable is set to `~/.cache/ms-playw
66
91
  2. Otherwise, attempt to locate and parse an astro config file (common locations: `site/astro.config.mjs`, `astro.config.mjs` at repo root, `apps/web/astro.config.mjs`). Best-effort fallback.
67
92
  3. If both fail, treat base as empty string.
68
93
 
69
- 6. **Visual-diff pass (when `visualDiff.enabled` is true):**
70
- For each route in `{{affected_routes}}` (or the crawl set) × each viewport in `{{ui_review_config}}.viewports`:
71
- - Set Playwright viewport to `{ width, height }` from the config.
72
- - Apply mask CSS inject a style that sets `visibility: hidden` on any selector in `visualDiff.mask`.
73
- - Navigate to `http://localhost:{{preview_port}}<base><route>`. If 404, retry without the base.
74
- - `page.screenshot({ fullPage: false })` → candidate PNG buffer.
75
- - Compute slug for the route (lowercase, strip leading/trailing slashes, replace slashes with hyphens; `/` → `index`).
76
- - Note: use `{{repo_root}}` (the absolute main-repo path), NOT `$TMPDIR` or the worktree. See the "Paths" section.
77
- - Call `compareVisual` (from `lib/visual-diff.ts`) with:
78
- - `baselinePath = {{repo_root}}/.cloverleaf/baselines/{browser}/{slug}-{viewport}.png`
79
- - `candidateBuf = <candidate PNG>`
80
- - `diffPath = {{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/diff-{slug}-{viewport}.png`
81
- - `candidateOutPath = {{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/candidate-{slug}-{viewport}.png`
82
- - `threshold = visualDiff.threshold`
83
- - `maxDiffRatio = visualDiff.maxDiffRatio`
84
- - Map result to a finding:
85
- - `new-baseline` `severity: "info"`, `rule: "visual-diff"`, `message: "new baseline established for {route} @ {viewport}"`, `metadata: { route, viewport, status: "new-baseline" }`. No attachments.
86
- - `dimension-mismatch` `severity: "info"`, `rule: "visual-diff"`, `message: "baseline dimensions changed for {route} @ {viewport}; regenerated"`, `metadata: { route, viewport, status: "dimension-mismatch" }`.
87
- - `diff` `severity: "info"`, `rule: "visual-diff"`, `message: "visual diff: {route} @ {viewport} — {diffRatio*100}% pixels differ"`, `metadata: { route, viewport, diffRatio, status: "diff" }`, `attachments: [baseline, candidate, diff]`.
88
- - `match` no finding emitted.
89
-
90
- 7. **Axe pass:**
91
- For each viewport in `{{ui_review_config}}.axe.viewports`:
92
- - Set Playwright viewport to `{ width, height }`.
93
- - For each route in `{{affected_routes}}` (or crawl set):
94
- - Navigate.
95
- - Inject and run axe-core:
96
- ```javascript
97
- import axe from 'axe-core';
98
- const results = await axe.run(document);
99
- ```
100
- - Collect each violation as a raw tuple: `{ viewport, ruleId, target, impact, message, helpUrl }` (from `axe.run` output).
101
-
102
- 8. Dedupe raw axe findings via `dedupeAxeFindings(raws, {{ui_review_config}}.axe.dedupeBy, {{ui_review_config}}.axe.ignored)` (from `lib/axe-dedupe.ts`). The `ignored` parameter drops any finding whose `(ruleId, target)` exactly matches an allowlist entry BEFORE dedupe/grouping. Emit the returned `Finding[]`.
103
-
104
- 9. Severity mapping (preserved from v0.3 via `dedupeAxeFindings`):
105
- - axe `impact: "critical"` `severity: "blocker"`
106
- - axe `impact: "serious"` `severity: "error"`
107
- - axe `impact: "moderate"` → `severity: "warning"`
108
- - axe `impact: "minor"` `severity: "info"`
109
-
110
- 10. Compute verdict (visual-diff findings are **never** considered for gating):
111
- - `pass` — zero non-visual-diff findings with severity `blocker` or `error`
112
- - `bounce` ≥1 non-visual-diff finding with severity `blocker` or `error`
113
- - `escalate` — preview server failed to start, OR axe threw ≥3 consecutive times, OR Playwright chromium missing.
114
-
115
- 11. Teardown:
94
+ 6. **Apply maxCombinations cap** (when `affected_routes` is a list, not `"all"`):
95
+ - Compute `routes × viewports × browsers`. Use diff line counts as proxy for route diff size.
96
+ - Call `applyMaxCombinationsCap` from `lib/ui-browser.ts`.
97
+ - The `skippedFindings` are collected now and included in the final output.
98
+ - Use only the returned `routes` list for the browser passes below.
99
+
100
+ 7. **Verify browser binaries** before starting any browser session:
101
+ - Check each engine in `{{ui_review_config}}.browsers` against `PLAYWRIGHT_BROWSERS_PATH`.
102
+ - Collect all missing engines.
103
+ - If any engine is missing, call `buildBrowserEscalationFinding(engine, process.platform)` for each, teardown the worktree (step 11), and return `verdict: "escalate"` with those findings.
104
+
105
+ 8. **Per-browser outer loop** — for each `browser` in `{{ui_review_config}}.browsers`:
106
+
107
+ a. Launch a Playwright browser context using the `browser` engine.
108
+
109
+ b. **Visual-diff pass (when `visualDiff.enabled` is true):**
110
+ For each route in the (capped) route list × each viewport in `{{ui_review_config}}.viewports`:
111
+ - Set Playwright viewport to `{ width, height }` from the config.
112
+ - Apply mask CSS inject a style that sets `visibility: hidden` on any selector in `visualDiff.mask`.
113
+ - Navigate to `http://localhost:{{preview_port}}<base><route>`. If 404, retry without the base.
114
+ - `page.screenshot({ fullPage: false })` → candidate PNG buffer.
115
+ - Compute slug for the route (lowercase, strip leading/trailing slashes, replace slashes with hyphens; `/` → `index`).
116
+ - Note: use `{{repo_root}}` (the absolute main-repo path), NOT `$TMPDIR` or the worktree. See the "Paths" section.
117
+ - Call `compareVisual` (from `lib/visual-diff.ts`) with:
118
+ - `baselinePath = {{repo_root}}/.cloverleaf/baselines/{browser}/{slug}-{viewport}.png`
119
+ - `candidateBuf = <candidate PNG>`
120
+ - `diffPath = {{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/diff-{slug}-{viewport}.png`
121
+ - `candidateOutPath = {{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/candidate-{slug}-{viewport}.png`
122
+ - `threshold = visualDiff.threshold`
123
+ - `maxDiffRatio = visualDiff.maxDiffRatio`
124
+ - Map result to a finding:
125
+ - `new-baseline` `severity: "info"`, `rule: "visual-diff"`, `message: "new baseline established for {route} @ {viewport} [{browser}]"`, `metadata: { route, viewport, browser, status: "new-baseline" }`. No attachments.
126
+ - `dimension-mismatch` → `severity: "info"`, `rule: "visual-diff"`, `message: "baseline dimensions changed for {route} @ {viewport} [{browser}]; regenerated"`, `metadata: { route, viewport, browser, status: "dimension-mismatch" }`.
127
+ - `diff` `severity: "info"`, `rule: "visual-diff"`, `message: "visual diff: {route} @ {viewport} [{browser}] {diffRatio*100}% pixels differ"`, `metadata: { route, viewport, browser, diffRatio, status: "diff" }`, `attachments: [baseline, candidate, diff]`.
128
+ - `match` → no finding emitted.
129
+
130
+ c. **Axe pass (only when `browser === {{ui_review_config}}.axe.browser`):**
131
+ Skip this section entirely if the current browser is NOT the configured `axe.browser`. webkit and firefox runs produce no axe output and no axe findings.
132
+
133
+ For each viewport in `{{ui_review_config}}.axe.viewports`:
134
+ - Set Playwright viewport to `{ width, height }`.
135
+ - For each route in the (capped) route list:
136
+ - Navigate.
137
+ - Inject and run axe-core:
138
+ ```javascript
139
+ import axe from 'axe-core';
140
+ const results = await axe.run(document);
141
+ ```
142
+ - Collect each violation as a raw tuple: `{ viewport, ruleId, target, impact, message, helpUrl }` (from `axe.run` output).
143
+
144
+ d. Close the browser context before launching the next engine.
145
+
146
+ 9. Dedupe raw axe findings via `dedupeAxeFindings(raws, {{ui_review_config}}.axe.dedupeBy, {{ui_review_config}}.axe.ignored)` (from `lib/axe-dedupe.ts`). The `ignored` parameter drops any finding whose `(ruleId, target)` exactly matches an allowlist entry BEFORE dedupe/grouping. Emit the returned `Finding[]`.
147
+
148
+ 10. Severity mapping (preserved from v0.3 via `dedupeAxeFindings`):
149
+ - axe `impact: "critical"` → `severity: "blocker"`
150
+ - axe `impact: "serious"` → `severity: "error"`
151
+ - axe `impact: "moderate"` → `severity: "warning"`
152
+ - axe `impact: "minor"` → `severity: "info"`
153
+
154
+ 11. Compute verdict (visual-diff and ui-review-cap findings are **never** considered for gating):
155
+ - `pass` — zero non-visual-diff, non-cap findings with severity `blocker` or `error`
156
+ - `bounce` — ≥1 non-visual-diff, non-cap finding with severity `blocker` or `error`
157
+ - `escalate` — preview server failed to start, OR axe threw ≥3 consecutive times, OR any required browser binary was absent.
158
+
159
+ 12. Teardown:
116
160
  ```bash
117
161
  kill $SERVER_PID 2>/dev/null || true
118
162
  cd {{repo_root}}
@@ -132,7 +176,7 @@ Respond with exactly one JSON object and nothing else. Finding shape must match
132
176
  - required: `severity`, `message`
133
177
  - optional: `rule`, `suggestion`, `location`, `attachments`, `metadata`
134
178
 
135
- For a11y findings there is usually no meaningful file/line, so OMIT `location` entirely.
179
+ For a11y findings there is usually no meaningful file/line, so OMIT `location` entirely. For `location`, use an object shape when present — do not emit `location` as a URL string.
136
180
 
137
181
  ```json
138
182
  {
@@ -141,9 +185,9 @@ For a11y findings there is usually no meaningful file/line, so OMIT `location` e
141
185
  "findings": [
142
186
  {
143
187
  "severity": "blocker" | "error" | "warning" | "info",
144
- "rule": "a11y.<rule-id>" | "visual-diff",
145
- "message": "<description; include the page URL for a11y, route+viewport+diff for visual-diff>",
146
- "metadata": { /* per §78 above */ },
188
+ "rule": "a11y.<rule-id>" | "visual-diff" | "ui-review-cap" | "browser-missing",
189
+ "message": "<description; include the page URL for a11y, route+viewport+browser for visual-diff>",
190
+ "metadata": { /* per §89 above */ },
147
191
  "attachments": [ /* for visual-diff with status="diff" */
148
192
  { "label": "baseline", "path": ".cloverleaf/baselines/{browser}/{slug}-{viewport}.png" },
149
193
  { "label": "candidate", "path": ".cloverleaf/runs/{taskId}/ui-review/candidate-{slug}-{viewport}.png" },
@@ -154,4 +198,4 @@ For a11y findings there is usually no meaningful file/line, so OMIT `location` e
154
198
  }
155
199
  ```
156
200
 
157
- If verdict is `pass`, `findings` may be empty or include only `warning`/`info`-level findings. If verdict is `escalate`, include a finding explaining what went wrong.
201
+ If verdict is `pass`, `findings` may be empty or include only `warning`/`info`-level findings (including `ui-review-cap` warnings and visual-diff info). If verdict is `escalate`, include a finding explaining what went wrong.