npm - @cloverleaf/reference-impl - Versions diffs - 0.5.2 → 0.5.4 - Mend

@cloverleaf/reference-impl 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/.claude-plugin/plugin.json +1 -1
package/README.md +3 -2
package/VERSION +1 -1
package/dist/cli.mjs +21 -0
package/dist/index.mjs +1 -0
package/dist/paths.mjs +6 -0
package/dist/ui-browser.mjs +74 -0
package/dist/ui-review-state.mjs +40 -0
package/lib/cli.ts +22 -0
package/lib/index.ts +1 -0
package/lib/paths.ts +8 -0
package/lib/ui-browser.ts +122 -0
package/lib/ui-review-state.ts +52 -0
package/package.json +1 -1
package/prompts/documenter.md +14 -1
package/prompts/ui-reviewer.md +118 -60
package/skills/cloverleaf-approve-baselines/SKILL.md +85 -0
package/skills/cloverleaf-ui-review/SKILL.md +28 -6

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "cloverleaf",
   "description": "Cloverleaf reference implementation — Claude Code skills for task scaffolding and the Delivery pipeline (implementer, documenter, reviewer, UI reviewer with multi-viewport visual diff, QA, merge).",
-  "version": "0.5.2",
+  "version": "0.5.4",
   "author": {
     "name": "Renato D'Arrigo",
     "email": "renato.darrigo@gmail.com"

package/README.md CHANGED Viewed

@@ -29,7 +29,7 @@ v0.2 implements both paths of the Delivery track:
 | Implementer | Real | Subagent, code + tests on feature branch |
 | Documenter | Real (v0.2) | Subagent, doc-only commits per file-path rules |
 | Reviewer | Real | Subagent, read-only review of diff |
-| UI Reviewer | Real (v0.3) | Playwright + axe-core, diff-scoped to affected routes, single viewport, a11y only |
+| UI Reviewer | Real (v0.5) | Playwright + axe-core + pixelmatch; multi-browser outer loop (chromium/webkit/firefox); axe-core runs on `axe.browser` engine only (default chromium); maxCombinations cap with per-route warnings |
 | QA | Real (v0.2) | Per-package test runner via `git worktree` |
 | Plan | Stub | Deferred to v0.3 |
 | Researcher | Stub | Deferred to v0.3 |
@@ -41,6 +41,7 @@ v0.2 implements both paths of the Delivery track:
 - `/cloverleaf-document` — run Documenter *(new in v0.2)*
 - `/cloverleaf-review` — run Reviewer
 - `/cloverleaf-ui-review` — run UI Reviewer *(new in v0.2)*
+- `/cloverleaf-approve-baselines` — human baseline-approval gate; clears `baselines_pending` and advances `ui-review → qa` *(new in CLV-19)*
 - `/cloverleaf-qa` — run QA *(new in v0.2)*
 - `/cloverleaf-merge` — human gate (branches on state)
 - `/cloverleaf-run` — orchestrator (dispatches by `risk_class`)
@@ -145,7 +146,7 @@ The Reviewer never switches branches. It reads files via `git show` and runs tes
 ## Package layout
-- `lib/` — TypeScript library used by the CLI. State, events, feedback, IDs, paths. Includes `buildBaselinePath(repoRoot, browser, slug, viewport)` (`lib/visual-diff.ts`) for constructing canonical baseline paths under `.cloverleaf/baselines/{browser}/`.
+- `lib/` — TypeScript library used by the CLI. State, events, feedback, IDs, paths. Includes `buildBaselinePath(repoRoot, browser, slug, viewport)` (`lib/visual-diff.ts`) for constructing canonical baseline paths under `.cloverleaf/baselines/{browser}/`. `lib/ui-browser.ts` exports `buildBrowserEscalationFinding` and `applyMaxCombinationsCap` (used by the UI Reviewer prompt for per-engine escalation and combination-count capping). `lib/ui-review-state.ts` exports `readUiReviewState`, `writeUiReviewState`, and `uiReviewStatePath` — the baseline-approval sidecar API for `.cloverleaf/runs/{taskId}/ui-review/state.json`.
 - `skills/` — Claude Code skill markdown files.
 - `prompts/` — Implementer/Reviewer subagent system prompts.
 - `examples/toy-repo/` — standalone demo repo.

package/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.5.2
1	+ 0.5.4

package/dist/cli.mjs CHANGED Viewed

@@ -13,6 +13,8 @@
  *   latest-feedback <repoRoot> <taskId>
  *   emit-gate-decision <repoRoot> <workItemId> <gate> <decision> <actor> [--comment=<str>]
  *   ui-review-config --repo-root <repoRoot>
+ *   read-ui-review-state <repoRoot> <taskId>
+ *   write-ui-review-state <repoRoot> <taskId> <baselines_pending>
  *   plugin-root
  *   load-rfc <repoRoot> <id>
  *   save-rfc <repoRoot> <filePath>
@@ -46,6 +48,7 @@ import { loadSpike, saveSpike, advanceSpikeStatus } from './spike.mjs';
 import { loadPlan, savePlan, advancePlanStatus, materialiseTasksFromPlan } from './plan.mjs';
 import { loadDiscoveryConfig } from './discovery-config.mjs';
 import { prepWorktree } from './prep-worktree.mjs';
+import { readUiReviewState, writeUiReviewState } from './ui-review-state.mjs';
 function die(msg, code = 1) {
     process.stderr.write(msg + '\n');
     process.exit(code);
@@ -63,6 +66,8 @@ function usage(msg) {
         '  latest-feedback <repoRoot> <taskId>\n' +
         '  emit-gate-decision <repoRoot> <workItemId> <gate> <decision> <actor> [--comment=<str>]\n' +
         '  ui-review-config --repo-root <repoRoot>\n' +
+        '  read-ui-review-state <repoRoot> <taskId>\n' +
+        '  write-ui-review-state <repoRoot> <taskId> <baselines_pending>\n' +
         '  plugin-root\n' +
         '  load-rfc <repoRoot> <id>\n' +
         '  save-rfc <repoRoot> <filePath>\n' +
@@ -269,6 +274,22 @@ try {
             process.stdout.write(JSON.stringify(config, null, 2));
             process.exit(0);
         }
+        case 'read-ui-review-state': {
+            const [repoRoot, taskId] = rest;
+            if (!repoRoot || !taskId)
+                usage('read-ui-review-state requires <repoRoot> <taskId>');
+            const state = readUiReviewState(repoRoot, taskId);
+            process.stdout.write(JSON.stringify(state, null, 2) + '\n');
+            break;
+        }
+        case 'write-ui-review-state': {
+            const [repoRoot, taskId, pendingArg] = rest;
+            if (!repoRoot || !taskId || pendingArg === undefined)
+                usage('write-ui-review-state requires <repoRoot> <taskId> <baselines_pending>');
+            const baselines_pending = pendingArg === 'true' || pendingArg === '1';
+            writeUiReviewState(repoRoot, taskId, { baselines_pending });
+            break;
+        }
         case 'plugin-root': {
             process.stdout.write(getPluginRoot());
             process.exit(0);

package/dist/index.mjs CHANGED Viewed

@@ -4,3 +4,4 @@ export * from './task.mjs';
 export * from './events.mjs';
 export * from './feedback.mjs';
 export * from './validate.mjs';
+export * from './ui-review-state.mjs';

package/dist/paths.mjs CHANGED Viewed

@@ -24,3 +24,9 @@ export function spikesDir(repoRoot) {
 export function plansDir(repoRoot) {
     return resolve(cloverleafDir(repoRoot), 'plans');
 }
+export function runsDir(repoRoot) {
+    return resolve(cloverleafDir(repoRoot), 'runs');
+}
+export function uiReviewRunDir(repoRoot, taskId) {
+    return resolve(runsDir(repoRoot), taskId, 'ui-review');
+}

package/dist/ui-browser.mjs ADDED Viewed

@@ -0,0 +1,74 @@
+// ---------------------------------------------------------------------------
+// Browser escalation
+// ---------------------------------------------------------------------------
+/**
+ * Build an escalation Finding for a missing Playwright browser binary.
+ *
+ * The finding names the missing engine and includes the install command per
+ * the CLV-9 RFC and CLV-10 spike:
+ *   - All platforms:  `npx playwright install webkit firefox`
+ *   - Linux only:     `npx playwright install-deps webkit`
+ *
+ * @param engine   The browser engine that is missing.
+ * @param platform The platform string (defaults to `process.platform`). Pass
+ *                 "linux" explicitly to include the install-deps hint; all
+ *                 other values are treated as non-Linux.
+ */
+export function buildBrowserEscalationFinding(engine, platform = process.platform) {
+    const isLinux = platform === 'linux';
+    const installCmd = 'npx playwright install webkit firefox';
+    const depsHint = isLinux
+        ? ` On Linux, also run: npx playwright install-deps webkit`
+        : '';
+    return {
+        severity: 'error',
+        rule: 'browser-missing',
+        message: `Playwright ${engine} not installed. Run '${installCmd}' on this machine.${depsHint}`,
+        metadata: { engine, installCommand: installCmd },
+    };
+}
+/**
+ * Enforce the maxCombinations cap.
+ *
+ * When `routes.length × viewportCount × browserCount > maxCombinations`,
+ * the affected routes are sorted by diff size (most-changed first) and only
+ * the first `floor(maxCombinations / (viewportCount × browserCount))` routes
+ * are processed. One `warning`-severity finding with rule `ui-review-cap` is
+ * emitted per skipped route.
+ *
+ * @param routes          Affected routes with their diff sizes.
+ * @param viewportCount   Number of viewports configured.
+ * @param browserCount    Number of browser engines configured.
+ * @param maxCombinations Maximum allowed combinations (routes × viewports × browsers).
+ * @returns               `{ routes, skippedFindings }` ready for use by the reviewer.
+ */
+export function applyMaxCombinationsCap(routes, viewportCount, browserCount, maxCombinations) {
+    const totalCombinations = routes.length * viewportCount * browserCount;
+    if (totalCombinations <= maxCombinations) {
+        return {
+            routes: routes.map((r) => r.route),
+            skippedFindings: [],
+        };
+    }
+    const perRouteSlots = viewportCount * browserCount;
+    const maxRoutes = Math.floor(maxCombinations / perRouteSlots);
+    // Sort most-changed first, then take first maxRoutes routes.
+    const sorted = [...routes].sort((a, b) => b.diffSize - a.diffSize);
+    const kept = sorted.slice(0, maxRoutes);
+    const skipped = sorted.slice(maxRoutes);
+    const skippedFindings = skipped.map((r) => ({
+        severity: 'warning',
+        rule: 'ui-review-cap',
+        message: `Route ${r.route} skipped: combination count ${totalCombinations} exceeds ` +
+            `maxCombinations (${maxCombinations}); review manually or raise the cap.`,
+        metadata: {
+            route: r.route,
+            combinationCount: totalCombinations,
+            maxCombinations,
+        },
+    }));
+    return {
+        routes: kept.map((r) => r.route),
+        skippedFindings,
+    };
+}

package/dist/ui-review-state.mjs ADDED Viewed

@@ -0,0 +1,40 @@
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { uiReviewRunDir } from './paths.mjs';
+const STATE_FILENAME = 'state.json';
+/**
+ * Returns the canonical path for the ui-review sidecar state file:
+ *   .cloverleaf/runs/{taskId}/ui-review/state.json
+ */
+export function uiReviewStatePath(repoRoot, taskId) {
+    return join(uiReviewRunDir(repoRoot, taskId), STATE_FILENAME);
+}
+/**
+ * Reads the ui-review state sidecar from disk.
+ *
+ * Returns `{ baselines_pending: false }` when the file is absent — the
+ * absence of the file is treated as "no pending baselines", which lets the
+ * ui-review → qa transition proceed normally.
+ */
+export function readUiReviewState(repoRoot, taskId) {
+    const path = uiReviewStatePath(repoRoot, taskId);
+    if (!existsSync(path)) {
+        return { baselines_pending: false };
+    }
+    const raw = JSON.parse(readFileSync(path, 'utf-8'));
+    return { baselines_pending: Boolean(raw.baselines_pending) };
+}
+/**
+ * Writes the ui-review state sidecar to disk, creating intermediate directories
+ * as needed.
+ *
+ * @param repoRoot  Absolute path to the repository root.
+ * @param taskId    Task identifier (e.g. "CLV-42").
+ * @param state     The state to persist.
+ */
+export function writeUiReviewState(repoRoot, taskId, state) {
+    const dir = uiReviewRunDir(repoRoot, taskId);
+    mkdirSync(dir, { recursive: true });
+    const path = join(dir, STATE_FILENAME);
+    writeFileSync(path, JSON.stringify(state, null, 2) + '\n');
+}

package/lib/cli.ts CHANGED Viewed

@@ -13,6 +13,8 @@
  *   latest-feedback <repoRoot> <taskId>
  *   emit-gate-decision <repoRoot> <workItemId> <gate> <decision> <actor> [--comment=<str>]
  *   ui-review-config --repo-root <repoRoot>
+ *   read-ui-review-state <repoRoot> <taskId>
+ *   write-ui-review-state <repoRoot> <taskId> <baselines_pending>
  *   plugin-root
  *   load-rfc <repoRoot> <id>
  *   save-rfc <repoRoot> <filePath>
@@ -48,6 +50,7 @@ import { loadSpike, saveSpike, advanceSpikeStatus, type SpikeDoc } from './spike
 import { loadPlan, savePlan, advancePlanStatus, materialiseTasksFromPlan, type PlanDoc } from './plan.js';
 import { loadDiscoveryConfig } from './discovery-config.js';
 import { prepWorktree } from './prep-worktree.js';
+import { readUiReviewState, writeUiReviewState } from './ui-review-state.js';
 function die(msg: string, code = 1): never {
   process.stderr.write(msg + '\n');
@@ -67,6 +70,8 @@ function usage(msg?: string): never {
       '  latest-feedback <repoRoot> <taskId>\n' +
       '  emit-gate-decision <repoRoot> <workItemId> <gate> <decision> <actor> [--comment=<str>]\n' +
       '  ui-review-config --repo-root <repoRoot>\n' +
+      '  read-ui-review-state <repoRoot> <taskId>\n' +
+      '  write-ui-review-state <repoRoot> <taskId> <baselines_pending>\n' +
       '  plugin-root\n' +
       '  load-rfc <repoRoot> <id>\n' +
       '  save-rfc <repoRoot> <filePath>\n' +
@@ -278,6 +283,23 @@ try {
       process.exit(0);
     }
+    case 'read-ui-review-state': {
+      const [repoRoot, taskId] = rest;
+      if (!repoRoot || !taskId) usage('read-ui-review-state requires <repoRoot> <taskId>');
+      const state = readUiReviewState(repoRoot, taskId);
+      process.stdout.write(JSON.stringify(state, null, 2) + '\n');
+      break;
+    }
+    case 'write-ui-review-state': {
+      const [repoRoot, taskId, pendingArg] = rest;
+      if (!repoRoot || !taskId || pendingArg === undefined)
+        usage('write-ui-review-state requires <repoRoot> <taskId> <baselines_pending>');
+      const baselines_pending = pendingArg === 'true' || pendingArg === '1';
+      writeUiReviewState(repoRoot, taskId, { baselines_pending });
+      break;
+    }
     case 'plugin-root': {
       process.stdout.write(getPluginRoot());
       process.exit(0);

package/lib/index.ts CHANGED Viewed

@@ -4,3 +4,4 @@ export * from './task.js';
 export * from './events.js';
 export * from './feedback.js';
 export * from './validate.js';
+export * from './ui-review-state.js';

package/lib/paths.ts CHANGED Viewed

@@ -33,3 +33,11 @@ export function spikesDir(repoRoot: string): string {
 export function plansDir(repoRoot: string): string {
   return resolve(cloverleafDir(repoRoot), 'plans');
 }
+export function runsDir(repoRoot: string): string {
+  return resolve(cloverleafDir(repoRoot), 'runs');
+}
+export function uiReviewRunDir(repoRoot: string, taskId: string): string {
+  return resolve(runsDir(repoRoot), taskId, 'ui-review');
+}

package/lib/ui-browser.ts ADDED Viewed

@@ -0,0 +1,122 @@
+import type { Finding } from './feedback.js';
+import type { BrowserEngine } from './ui-review-config.js';
+// ---------------------------------------------------------------------------
+// Browser escalation
+// ---------------------------------------------------------------------------
+/**
+ * Build an escalation Finding for a missing Playwright browser binary.
+ *
+ * The finding names the missing engine and includes the install command per
+ * the CLV-9 RFC and CLV-10 spike:
+ *   - All platforms:  `npx playwright install webkit firefox`
+ *   - Linux only:     `npx playwright install-deps webkit`
+ *
+ * @param engine   The browser engine that is missing.
+ * @param platform The platform string (defaults to `process.platform`). Pass
+ *                 "linux" explicitly to include the install-deps hint; all
+ *                 other values are treated as non-Linux.
+ */
+export function buildBrowserEscalationFinding(
+  engine: BrowserEngine,
+  platform: string = process.platform,
+): Finding {
+  const isLinux = platform === 'linux';
+  const installCmd = 'npx playwright install webkit firefox';
+  const depsHint = isLinux
+    ? ` On Linux, also run: npx playwright install-deps webkit`
+    : '';
+  return {
+    severity: 'error',
+    rule: 'browser-missing',
+    message:
+      `Playwright ${engine} not installed. Run '${installCmd}' on this machine.${depsHint}`,
+    metadata: { engine, installCommand: installCmd },
+  };
+}
+// ---------------------------------------------------------------------------
+// maxCombinations cap enforcement
+// ---------------------------------------------------------------------------
+/**
+ * Represents an affected route with a diff-size weight used for sorting
+ * when maxCombinations cap is applied.
+ */
+export interface RouteWithDiffSize {
+  route: string;
+  /** Number of changed lines (or any monotonic proxy for diff size). */
+  diffSize: number;
+}
+/**
+ * Result of applying the maxCombinations cap.
+ */
+export interface CapResult {
+  /** Routes that should be processed (up to the cap). */
+  routes: string[];
+  /**
+   * One `warning`-severity Finding per skipped route, with rule
+   * `ui-review-cap` and a message containing the route name plus the
+   * combination count vs cap.
+   */
+  skippedFindings: Finding[];
+}
+/**
+ * Enforce the maxCombinations cap.
+ *
+ * When `routes.length × viewportCount × browserCount > maxCombinations`,
+ * the affected routes are sorted by diff size (most-changed first) and only
+ * the first `floor(maxCombinations / (viewportCount × browserCount))` routes
+ * are processed. One `warning`-severity finding with rule `ui-review-cap` is
+ * emitted per skipped route.
+ *
+ * @param routes          Affected routes with their diff sizes.
+ * @param viewportCount   Number of viewports configured.
+ * @param browserCount    Number of browser engines configured.
+ * @param maxCombinations Maximum allowed combinations (routes × viewports × browsers).
+ * @returns               `{ routes, skippedFindings }` ready for use by the reviewer.
+ */
+export function applyMaxCombinationsCap(
+  routes: RouteWithDiffSize[],
+  viewportCount: number,
+  browserCount: number,
+  maxCombinations: number,
+): CapResult {
+  const totalCombinations = routes.length * viewportCount * browserCount;
+  if (totalCombinations <= maxCombinations) {
+    return {
+      routes: routes.map((r) => r.route),
+      skippedFindings: [],
+    };
+  }
+  const perRouteSlots = viewportCount * browserCount;
+  const maxRoutes = Math.floor(maxCombinations / perRouteSlots);
+  // Sort most-changed first, then take first maxRoutes routes.
+  const sorted = [...routes].sort((a, b) => b.diffSize - a.diffSize);
+  const kept = sorted.slice(0, maxRoutes);
+  const skipped = sorted.slice(maxRoutes);
+  const skippedFindings: Finding[] = skipped.map((r) => ({
+    severity: 'warning',
+    rule: 'ui-review-cap',
+    message:
+      `Route ${r.route} skipped: combination count ${totalCombinations} exceeds ` +
+      `maxCombinations (${maxCombinations}); review manually or raise the cap.`,
+    metadata: {
+      route: r.route,
+      combinationCount: totalCombinations,
+      maxCombinations,
+    },
+  }));
+  return {
+    routes: kept.map((r) => r.route),
+    skippedFindings,
+  };
+}

package/lib/ui-review-state.ts ADDED Viewed

@@ -0,0 +1,52 @@
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { uiReviewRunDir } from './paths.js';
+export interface UiReviewState {
+  baselines_pending: boolean;
+}
+const STATE_FILENAME = 'state.json';
+/**
+ * Returns the canonical path for the ui-review sidecar state file:
+ *   .cloverleaf/runs/{taskId}/ui-review/state.json
+ */
+export function uiReviewStatePath(repoRoot: string, taskId: string): string {
+  return join(uiReviewRunDir(repoRoot, taskId), STATE_FILENAME);
+}
+/**
+ * Reads the ui-review state sidecar from disk.
+ *
+ * Returns `{ baselines_pending: false }` when the file is absent — the
+ * absence of the file is treated as "no pending baselines", which lets the
+ * ui-review → qa transition proceed normally.
+ */
+export function readUiReviewState(repoRoot: string, taskId: string): UiReviewState {
+  const path = uiReviewStatePath(repoRoot, taskId);
+  if (!existsSync(path)) {
+    return { baselines_pending: false };
+  }
+  const raw = JSON.parse(readFileSync(path, 'utf-8')) as UiReviewState;
+  return { baselines_pending: Boolean(raw.baselines_pending) };
+}
+/**
+ * Writes the ui-review state sidecar to disk, creating intermediate directories
+ * as needed.
+ *
+ * @param repoRoot  Absolute path to the repository root.
+ * @param taskId    Task identifier (e.g. "CLV-42").
+ * @param state     The state to persist.
+ */
+export function writeUiReviewState(
+  repoRoot: string,
+  taskId: string,
+  state: UiReviewState,
+): void {
+  const dir = uiReviewRunDir(repoRoot, taskId);
+  mkdirSync(dir, { recursive: true });
+  const path = join(dir, STATE_FILENAME);
+  writeFileSync(path, JSON.stringify(state, null, 2) + '\n');
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@cloverleaf/reference-impl",
-  "version": "0.5.2",
+  "version": "0.5.4",
   "description": "Reference implementation of the Cloverleaf methodology as Claude Code skills. Implements the Tight Loop (Implementer + Reviewer).",
   "type": "module",
   "license": "MIT",

package/prompts/documenter.md CHANGED Viewed

@@ -52,10 +52,23 @@ If `## [Unreleased]` does not exist, create it at the top of the CHANGELOG (righ
 ## Commit discipline
-- One commit per file touched.
+- **Before committing, run `git status --porcelain` in the worktree and stage every modified doc file.** Do NOT hardcode a single path into `git add`; the subagent has historically forgotten README.md and committed only CHANGELOG.md when it edited both. The reliable pattern:
+  ```bash
+  cd <temp>
+  git status --porcelain
+  # For each modified doc file listed, stage it explicitly:
+  git add <package>/CHANGELOG.md <package>/README.md <package>/docs/*.md  # include all that were edited
+  git commit -m "docs(<scope>): <short>"
+  ```
+  Equivalently, if you are certain only doc files are modified (you never touched source code), `git add -A` is acceptable — it's the hardcoded-single-path pattern that must be avoided.
+- One commit per Documenter run, covering every doc file edited in that run. (If you need multiple scopes — e.g., both `standard/CHANGELOG.md` and `reference-impl/CHANGELOG.md` — make one commit per scope, but each commit still stages every edited file within that scope.)
 - Commit message: `docs(<scope>): <short>` where `<scope>` is the package name (`standard`, `reference-impl`, `site`, or `repo` for root-level).
 - All commits land on `{{branch}}` (the feature branch).
 - After all commits land, run `git worktree remove --force <temp>` to clean up.
+- **Self-check before returning**: `git status --porcelain` in the worktree must be empty. If it's not, you have uncommitted doc edits — stage and commit them, or revert them, before reporting back.
 ## Output

package/prompts/ui-reviewer.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # UI Reviewer Agent
-You are the Cloverleaf UI Reviewer. Your job: review a task's UI changes at multiple viewports for accessibility violations (axe-core) and visual regressions (pixelmatch) using a headless Playwright chromium browser. You are read-only for source code and tests — but you DO write baseline/diff artifacts under `.cloverleaf/` on the feature branch.
+You are the Cloverleaf UI Reviewer. Your job: review a task's UI changes at multiple viewports and browser engines for accessibility violations (axe-core) and visual regressions (pixelmatch) using headless Playwright browsers. You are read-only for source code and tests — but you DO write baseline/diff artifacts under `.cloverleaf/` on the feature branch.
 ## Input
@@ -11,7 +11,7 @@ You are the Cloverleaf UI Reviewer. Your job: review a task's UI changes at mult
 - **Diff from base**: {{diff}}
 - **Preview port**: {{preview_port}} (an already-allocated free local port; use it for the dev server)
 - **Affected routes**: {{affected_routes}} — either a JSON array of route paths (e.g., `["/faq/"]`), or the string `"all"`, or `[]`
-- **UI review config**: {{ui_review_config}} — the loaded `UiReviewConfig` object (viewports, visualDiff, axe) as JSON. The `viewports` array contains named entries such as `mobile`, `tablet`, and `desktop` with their respective `{ width, height }` dimensions.
+- **UI review config**: {{ui_review_config}} — the loaded `UiReviewConfig` object (browsers, viewports, visualDiff, axe, maxCombinations) as JSON. The `viewports` array contains named entries such as `mobile`, `tablet`, and `desktop` with their respective `{ width, height }` dimensions.
 ## Paths
@@ -24,22 +24,46 @@ You operate in two filesystem locations — keep them straight:
 The rationale: baselines on `{{repo_root}}/.cloverleaf/baselines/` get picked up by subsequent `git add` + `git commit` steps in the UI Reviewer, which run on the feature branch. The merge skill (v0.4.1+) then merges those commits to main via `git merge --no-ff`. Writing to the worktree's `.cloverleaf/` would strand the files and `git worktree remove --force` would discard them on teardown.
-## Scope (v0.4)
+## Scope (v0.5)
-- **Accessibility (axe-core):** run at the viewports listed in `{{ui_review_config}}.axe.viewports`.
+- **Browsers**: the reviewer runs separate Playwright sessions for each engine listed in `{{ui_review_config}}.browsers` (e.g., `["chromium", "webkit", "firefox"]`). Browser is the **outermost** loop, wrapping the viewport × route loops.
+- **Accessibility (axe-core):** run only for the engine specified by `{{ui_review_config}}.axe.browser` (default: `"chromium"`). webkit and firefox browser passes produce **no axe output and no axe findings** — this is intentional, to avoid engine-specific false positives from getComputedStyle, aria-required-children, and scrollable-region-focusable divergence across Blink, WebKit, and Gecko (see CLV-12 spike).
   Apply the allowlist in `{{ui_review_config}}.axe.ignored` to drop pre-existing violations that the consumer has accepted (e.g., a11y debt being tracked separately).
   Dedupe findings across viewports by the `{{ui_review_config}}.axe.dedupeBy` composite key (default `["ruleId", "target"]`).
   Emit one finding per (ruleId, target) pair, with a `metadata.viewports` array aggregating the viewports where the violation was detected.
-- **Visual diff (pixelmatch):** when `{{ui_review_config}}.visualDiff.enabled` is true, screenshot each route at each viewport in `{{ui_review_config}}.viewports`, compare to `.cloverleaf/baselines/{browser}/{route-slug}-{viewport}.png`, emit `severity: "info"` findings with baseline/candidate/diff attachments when the diff ratio exceeds `maxDiffRatio`.
+- **Visual diff (pixelmatch):** when `{{ui_review_config}}.visualDiff.enabled` is true, screenshot each route at each viewport in `{{ui_review_config}}.viewports` for **each browser**, compare to `.cloverleaf/baselines/{browser}/{route-slug}-{viewport}.png`, emit `severity: "info"` findings with baseline/candidate/diff attachments when the diff ratio exceeds `maxDiffRatio`.
 - Visual diffs are **informational**, never gating. A diff does not fail the review — it surfaces to the human final-gate reviewer.
 - Route empty-set / "all" handling preserves v0.3 behavior:
   - `{{affected_routes}}` is `[]` → `verdict: "pass"`, summary `"No renderable routes affected, skipping axe."`, do NOT start the preview server.
-  - `{{affected_routes}}` is `"all"` → crawl up to 20 pages reachable from `/` via same-origin link discovery (v0.2 fallback).
+  - `{{affected_routes}}` is `"all"` → crawl up to 20 pages reachable from `/` via same-origin link discovery (v0.2 fallback behavior).
   - otherwise → visit exactly the URLs listed.
+## maxCombinations cap
+Before starting any browser session, compute total combinations = `routes × viewports × browsers`.
+If the product exceeds `{{ui_review_config}}.maxCombinations` (default 90):
+1. Sort affected routes by diff size (most-changed first — use the character count of each route's section in `{{diff}}` as a proxy for diff size).
+2. Keep only the first `floor(maxCombinations / (viewportCount × browserCount))` routes.
+3. For each skipped route emit one `severity: "warning"` finding with `rule: "ui-review-cap"` and message:
+   `"Route {route} skipped: combination count {total} exceeds maxCombinations ({cap}); review manually or raise the cap."`
+   Include `metadata: { route, combinationCount: total, maxCombinations: cap }`.
+The cap enforcement helper is available in `lib/ui-browser.ts` as `applyMaxCombinationsCap`.
 ## Playwright cache
-The `PLAYWRIGHT_BROWSERS_PATH` environment variable is set to `~/.cache/ms-playwright` before you are invoked. If the browser is missing, return `verdict: "escalate"` with a synthetic finding: `"Playwright chromium not installed. Run 'npx playwright install chromium' on this machine."`
+The `PLAYWRIGHT_BROWSERS_PATH` environment variable is set to `~/.cache/ms-playwright` before you are invoked. Before launching each browser session, verify that the required engine binary exists in `PLAYWRIGHT_BROWSERS_PATH`. If a browser binary is absent, return `verdict: "escalate"` with a synthetic finding per missing engine:
+```
+"Playwright {engine} not installed. Run 'npx playwright install webkit firefox' on this machine."
+```
+On Linux, append: `" On Linux, also run: npx playwright install-deps webkit"`
+The escalation helper is available in `lib/ui-browser.ts` as `buildBrowserEscalationFinding`.
+Do not attempt to launch a missing engine — fail fast with `verdict: "escalate"` listing all missing engines before any browser session is started.
 ## Runtime procedure
@@ -49,6 +73,7 @@ The `PLAYWRIGHT_BROWSERS_PATH` environment variable is set to `~/.cache/ms-playw
    ```bash
    TMPDIR=$(mktemp -d)
    git worktree add "$TMPDIR" {{branch}}
+   npx cloverleaf-cli prep-worktree {{repo_root}} "$TMPDIR"
    ```
 3. For this repo, UI lives in `site/` (or another directory if ui-paths.json scopes it elsewhere). Install dependencies and start the dev server:
@@ -66,53 +91,86 @@ The `PLAYWRIGHT_BROWSERS_PATH` environment variable is set to `~/.cache/ms-playw
    2. Otherwise, attempt to locate and parse an astro config file (common locations: `site/astro.config.mjs`, `astro.config.mjs` at repo root, `apps/web/astro.config.mjs`). Best-effort fallback.
    3. If both fail, treat base as empty string.
-6. **Visual-diff pass (when `visualDiff.enabled` is true):**
-   For each route in `{{affected_routes}}` (or the crawl set) × each viewport in `{{ui_review_config}}.viewports`:
-   - Set Playwright viewport to `{ width, height }` from the config.
-   - Apply mask CSS — inject a style that sets `visibility: hidden` on any selector in `visualDiff.mask`.
-   - Navigate to `http://localhost:{{preview_port}}<base><route>`. If 404, retry without the base.
-   - `page.screenshot({ fullPage: false })` → candidate PNG buffer.
-   - Compute slug for the route (lowercase, strip leading/trailing slashes, replace slashes with hyphens; `/` → `index`).
-   - Note: use `{{repo_root}}` (the absolute main-repo path), NOT `$TMPDIR` or the worktree. See the "Paths" section.
-   - Call `compareVisual` (from `lib/visual-diff.ts`) with:
-     - `baselinePath = {{repo_root}}/.cloverleaf/baselines/{browser}/{slug}-{viewport}.png`
-     - `candidateBuf = <candidate PNG>`
-     - `diffPath = {{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/diff-{slug}-{viewport}.png`
-     - `candidateOutPath = {{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/candidate-{slug}-{viewport}.png`
-     - `threshold = visualDiff.threshold`
-     - `maxDiffRatio = visualDiff.maxDiffRatio`
-   - Map result to a finding:
-     - `new-baseline` → `severity: "info"`, `rule: "visual-diff"`, `message: "new baseline established for {route} @ {viewport}"`, `metadata: { route, viewport, status: "new-baseline" }`. No attachments.
-     - `dimension-mismatch` → `severity: "info"`, `rule: "visual-diff"`, `message: "baseline dimensions changed for {route} @ {viewport}; regenerated"`, `metadata: { route, viewport, status: "dimension-mismatch" }`.
-     - `diff` → `severity: "info"`, `rule: "visual-diff"`, `message: "visual diff: {route} @ {viewport} — {diffRatio*100}% pixels differ"`, `metadata: { route, viewport, diffRatio, status: "diff" }`, `attachments: [baseline, candidate, diff]`.
-     - `match` → no finding emitted.
-7. **Axe pass:**
-   For each viewport in `{{ui_review_config}}.axe.viewports`:
-   - Set Playwright viewport to `{ width, height }`.
-   - For each route in `{{affected_routes}}` (or crawl set):
-     - Navigate.
-     - Inject and run axe-core:
-       ```javascript
-       import axe from 'axe-core';
-       const results = await axe.run(document);
-       ```
-     - Collect each violation as a raw tuple: `{ viewport, ruleId, target, impact, message, helpUrl }` (from `axe.run` output).
-8. Dedupe raw axe findings via `dedupeAxeFindings(raws, {{ui_review_config}}.axe.dedupeBy, {{ui_review_config}}.axe.ignored)` (from `lib/axe-dedupe.ts`). The `ignored` parameter drops any finding whose `(ruleId, target)` exactly matches an allowlist entry BEFORE dedupe/grouping. Emit the returned `Finding[]`.
-9. Severity mapping (preserved from v0.3 via `dedupeAxeFindings`):
-   - axe `impact: "critical"` → `severity: "blocker"`
-   - axe `impact: "serious"` → `severity: "error"`
-   - axe `impact: "moderate"` → `severity: "warning"`
-   - axe `impact: "minor"` → `severity: "info"`
-10. Compute verdict (visual-diff findings are **never** considered for gating):
-    - `pass` — zero non-visual-diff findings with severity `blocker` or `error`
-    - `bounce` — ≥1 non-visual-diff finding with severity `blocker` or `error`
-    - `escalate` — preview server failed to start, OR axe threw ≥3 consecutive times, OR Playwright chromium missing.
-11. Teardown:
+6. **Apply maxCombinations cap** (when `affected_routes` is a list, not `"all"`):
+   - Compute `routes × viewports × browsers`. Use diff line counts as proxy for route diff size.
+   - Call `applyMaxCombinationsCap` from `lib/ui-browser.ts`.
+   - The `skippedFindings` are collected now and included in the final output.
+   - Use only the returned `routes` list for the browser passes below.
+7. **Verify browser binaries** — before starting any browser session:
+   - Check each engine in `{{ui_review_config}}.browsers` against `PLAYWRIGHT_BROWSERS_PATH`.
+   - Collect all missing engines.
+   - If any engine is missing, call `buildBrowserEscalationFinding(engine, process.platform)` for each, teardown the worktree (step 13), and return `verdict: "escalate"` with those findings.
+8. **Per-browser outer loop** — for each `browser` in `{{ui_review_config}}.browsers`:
+   a. Launch a Playwright browser context using the `browser` engine.
+   b. **Visual-diff pass (when `visualDiff.enabled` is true):**
+      For each route in the (capped) route list × each viewport in `{{ui_review_config}}.viewports`:
+      - Set Playwright viewport to `{ width, height }` from the config.
+      - Apply mask CSS — inject a style that sets `visibility: hidden` on any selector in `visualDiff.mask`.
+      - Navigate to `http://localhost:{{preview_port}}<base><route>`. If 404, retry without the base.
+      - `page.screenshot({ fullPage: false })` → candidate PNG buffer.
+      - Compute slug for the route (lowercase, strip leading/trailing slashes, replace slashes with hyphens; `/` → `index`).
+      - Note: use `{{repo_root}}` (the absolute main-repo path), NOT `$TMPDIR` or the worktree. See the "Paths" section.
+      - Call `compareVisual` (from `lib/visual-diff.ts`) with:
+        - `baselinePath = {{repo_root}}/.cloverleaf/baselines/{browser}/{slug}-{viewport}.png`
+        - `candidateBuf = <candidate PNG>`
+        - `diffPath = {{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/diff-{slug}-{viewport}.png`
+        - `candidateOutPath = {{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/candidate-{slug}-{viewport}.png`
+        - `threshold = visualDiff.threshold`
+        - `maxDiffRatio = visualDiff.maxDiffRatio`
+      - Map result to a finding:
+        - `new-baseline` → `severity: "info"`, `rule: "visual-diff"`, `message: "new baseline established for {route} @ {viewport} [{browser}]"`, `metadata: { route, viewport, browser, status: "new-baseline" }`. No attachments.
+        - `dimension-mismatch` → `severity: "info"`, `rule: "visual-diff"`, `message: "baseline dimensions changed for {route} @ {viewport} [{browser}]; regenerated"`, `metadata: { route, viewport, browser, status: "dimension-mismatch" }`.
+        - `diff` → `severity: "info"`, `rule: "visual-diff"`, `message: "visual diff: {route} @ {viewport} [{browser}] — {diffRatio*100}% pixels differ"`, `metadata: { route, viewport, browser, diffRatio, status: "diff" }`, `attachments: [baseline, candidate, diff]`.
+        - `match` → no finding emitted.
+   c. **Axe pass (only when `browser === {{ui_review_config}}.axe.browser`):**
+      Skip this section entirely if the current browser is NOT the configured `axe.browser`. webkit and firefox runs produce no axe output and no axe findings.
+      For each viewport in `{{ui_review_config}}.axe.viewports`:
+      - Set Playwright viewport to `{ width, height }`.
+      - For each route in the (capped) route list:
+        - Navigate.
+        - Inject and run axe-core:
+          ```javascript
+          import axe from 'axe-core';
+          const results = await axe.run(document);
+          ```
+        - Collect each violation as a raw tuple: `{ viewport, ruleId, target, impact, message, helpUrl }` (from `axe.run` output).
+   d. Close the browser context before launching the next engine.
+9. Dedupe raw axe findings via `dedupeAxeFindings(raws, {{ui_review_config}}.axe.dedupeBy, {{ui_review_config}}.axe.ignored)` (from `lib/axe-dedupe.ts`). The `ignored` parameter drops any finding whose `(ruleId, target)` exactly matches an allowlist entry BEFORE dedupe/grouping. Emit the returned `Finding[]`.
+10. Severity mapping (preserved from v0.3 via `dedupeAxeFindings`):
+    - axe `impact: "critical"` → `severity: "blocker"`
+    - axe `impact: "serious"` → `severity: "error"`
+    - axe `impact: "moderate"` → `severity: "warning"`
+    - axe `impact: "minor"` → `severity: "info"`
+11. Compute verdict (visual-diff and ui-review-cap findings are **never** considered for gating):
+    - `pass` — zero non-visual-diff, non-cap findings with severity `blocker` or `error`
+    - `bounce` — ≥1 non-visual-diff, non-cap finding with severity `blocker` or `error`
+    - `escalate` — preview server failed to start, OR axe threw ≥3 consecutive times, OR any required browser binary was absent.
+12. **Write ui-review state sidecar** — after all browser passes complete and before teardown, determine whether any `compareVisual` call returned `new-baseline` or `dimension-mismatch` across all routes, viewports, and browsers in this run.
+    - If **yes**: write `{{repo_root}}/.cloverleaf/runs/{{taskId}}/ui-review/state.json` containing:
+      ```json
+      {"baselines_pending": true}
+      ```
+      (Create intermediate directories as needed.)
+    - If **no**: write `{{repo_root}}/.cloverleaf/runs/{{taskId}}/ui-review/state.json` containing:
+      ```json
+      {"baselines_pending": false}
+      ```
+    This sidecar is the baseline-approval gate read by the `cloverleaf-ui-review` skill. Writing `baselines_pending: false` explicitly (rather than omitting the file) lets the skill distinguish "no new baselines" from "reviewer did not run at all".
+13. Teardown:
     ```bash
     kill $SERVER_PID 2>/dev/null || true
     cd {{repo_root}}
@@ -122,7 +180,7 @@ The `PLAYWRIGHT_BROWSERS_PATH` environment variable is set to `~/.cache/ms-playw
 ## Tool constraints
 - Read-only for source files and tests.
-- You MAY write under `{{repo_root}}/.cloverleaf/baselines/` and `{{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/` on the feature branch — these are the baselines and artifacts.
+- You MAY write under `{{repo_root}}/.cloverleaf/baselines/` and `{{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/` on the feature branch — these are the baselines, artifacts, and the `state.json` sidecar.
 - Use `git worktree`: do NOT `git checkout` in the main working directory.
 - Always teardown the server and worktree, even on error.
@@ -132,7 +190,7 @@ Respond with exactly one JSON object and nothing else. Finding shape must match
 - required: `severity`, `message`
 - optional: `rule`, `suggestion`, `location`, `attachments`, `metadata`
-For a11y findings there is usually no meaningful file/line, so OMIT `location` entirely.
+For a11y findings there is usually no meaningful file/line, so OMIT `location` entirely. For `location`, use an object shape when present — do not emit `location` as a URL string.
 ```json
 {
@@ -141,9 +199,9 @@ For a11y findings there is usually no meaningful file/line, so OMIT `location` e
   "findings": [
     {
       "severity": "blocker" | "error" | "warning" | "info",
-      "rule": "a11y.<rule-id>" | "visual-diff",
-      "message": "<description; include the page URL for a11y, route+viewport+diff for visual-diff>",
-      "metadata": { /* per §7/§8 above */ },
+      "rule": "a11y.<rule-id>" | "visual-diff" | "ui-review-cap" | "browser-missing",
+      "message": "<description; include the page URL for a11y, route+viewport+browser for visual-diff>",
+      "metadata": { /* per §8/§9 above */ },
       "attachments": [ /* for visual-diff with status="diff" */
         { "label": "baseline",  "path": ".cloverleaf/baselines/{browser}/{slug}-{viewport}.png" },
         { "label": "candidate", "path": ".cloverleaf/runs/{taskId}/ui-review/candidate-{slug}-{viewport}.png" },
@@ -154,4 +212,4 @@ For a11y findings there is usually no meaningful file/line, so OMIT `location` e
 }
 ```
-If verdict is `pass`, `findings` may be empty or include only `warning`/`info`-level findings. If verdict is `escalate`, include a finding explaining what went wrong.
+If verdict is `pass`, `findings` may be empty or include only `warning`/`info`-level findings (including `ui-review-cap` warnings and visual-diff info). If verdict is `escalate`, include a finding explaining what went wrong.

package/skills/cloverleaf-approve-baselines/SKILL.md ADDED Viewed

@@ -0,0 +1,85 @@
+---
+name: cloverleaf-approve-baselines
+description: Human baseline-approval gate for the Cloverleaf UI Review pipeline. When the UI Reviewer captures new or resized visual baselines it sets baselines_pending=true in .cloverleaf/runs/{taskId}/ui-review/state.json and blocks the ui-review → qa transition. Run this skill after inspecting the new baseline images to approve them and allow the task to advance to qa. Usage — /cloverleaf-approve-baselines <TASK-ID>.
+---
+# Cloverleaf — approve-baselines
+## Trigger condition
+This skill is invoked **only** when the `cloverleaf-ui-review` skill reports that `baselines_pending` is `true` — i.e., the UI Reviewer captured at least one `new-baseline` or `dimension-mismatch` result during its run, meaning one or more baseline PNGs under `.cloverleaf/baselines/{browser}/` were created or replaced.
+Do not run this skill if the task is not in `ui-review` status or if `state.json` already has `baselines_pending: false`.
+## Effect
+1. Writes `baselines_pending: false` to `.cloverleaf/runs/{taskId}/ui-review/state.json`.
+2. Advances the task from `ui-review` → `qa` via the normal agent transition.
+3. Commits the updated state and status to the feature branch.
+---
+## Steps
+0. Pre-flight:
+   ```bash
+   cd <repo_root>
+   current=$(git rev-parse --abbrev-ref HEAD)
+   if [ "$current" != "main" ]; then git checkout main; fi
+   ```
+   If main has uncommitted changes, stop and report.
+1. Capture the TASK-ID argument.
+2. Load the task and verify status:
+   ```bash
+   cloverleaf-cli load-task <repo_root> <TASK-ID>
+   ```
+   Verify `status === "ui-review"`. If not, report and stop.
+3. Read the current ui-review state:
+   ```bash
+   cloverleaf-cli read-ui-review-state <repo_root> <TASK-ID>
+   ```
+   If `baselines_pending` is already `false` (or the file is absent), report that no approval is needed and stop.
+4. Present the new baseline images to the human for review. The baselines live at:
+   ```
+   <repo_root>/.cloverleaf/baselines/{browser}/{slug}-{viewport}.png
+   ```
+   List the files that were modified since the last commit on the feature branch:
+   ```bash
+   git diff --name-only main..cloverleaf/<TASK-ID> -- .cloverleaf/baselines/
+   ```
+   Display the list. Ask the human to confirm they have reviewed the images and approve the baselines before proceeding.
+5. Once approved, write `baselines_pending: false`:
+   ```bash
+   cloverleaf-cli write-ui-review-state <repo_root> <TASK-ID> false
+   ```
+6. Advance the task to qa:
+   ```bash
+   cloverleaf-cli advance-status <repo_root> <TASK-ID> qa agent '' full_pipeline
+   ```
+7. Commit the changes to the feature branch:
+   ```bash
+   cd <repo_root>
+   git add .cloverleaf/
+   git commit -m "cloverleaf: <TASK-ID> baselines approved → qa"
+   ```
+8. Report:
+   > "✓ Baselines approved. `baselines_pending` cleared. State → qa. Next: `/cloverleaf-qa <TASK-ID>`."
+---
+## Rules
+- Never push.
+- Do not modify source code or test files.
+- Do not skip step 4 — the human must acknowledge the baseline images before approval is recorded.
+- On illegal state transition, report and stop without partial commits.

package/skills/cloverleaf-ui-review/SKILL.md CHANGED Viewed

@@ -74,14 +74,36 @@ description: Run the UI Reviewer agent on a task in the `ui-review` state (full
 11. Parse the subagent's response. Expect `{"verdict": "pass"|"bounce"|"escalate", "summary": "...", "findings": [...]}`.
-12. Branch on verdict:
-    **Pass:**
+12. **Read the baseline-approval sidecar** (after the subagent completes, regardless of verdict):
+    ```bash
+    UI_STATE=$(cloverleaf-cli read-ui-review-state <repo_root> <TASK-ID>)
+    BASELINES_PENDING=$(echo "$UI_STATE" | node -e "process.stdout.write(JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8')).baselines_pending ? 'true' : 'false')")
     ```
-    cloverleaf-cli advance-status <repo_root> <TASK-ID> qa agent '' full_pipeline
+    Or more concisely:
+    ```bash
+    BASELINES_PENDING=$(cloverleaf-cli read-ui-review-state <repo_root> <TASK-ID> | node -e "const s=require('fs').readFileSync('/dev/stdin','utf-8'); process.stdout.write(JSON.parse(s).baselines_pending?'true':'false')")
     ```
-    Commit: `git add .cloverleaf/ && git commit -m "cloverleaf: <TASK-ID> ui-review passed → qa"`.
-    Report: "✓ UI Review passed. State → qa. Next: `/cloverleaf-qa <TASK-ID>`."
+13. Branch on verdict:
+    **Pass:**
+    Check `BASELINES_PENDING`:
+    - If `BASELINES_PENDING` is `true`:
+      - Do NOT advance to `qa`.
+      - Commit artifacts: `git add .cloverleaf/ && git commit -m "cloverleaf: <TASK-ID> ui-review passed (baselines pending approval)"`.
+      - Report:
+        > "✓ UI Review passed (no a11y errors), but **baselines_pending** is true: one or more new or resized visual baselines were captured and require human approval before advancing to qa.
+        > Run `/cloverleaf-approve-baselines <TASK-ID>` to review the new baseline images and approve them, which will clear the flag and advance the task to qa."
+      - Stop here (task remains in `ui-review` status).
+    - If `BASELINES_PENDING` is `false` (or state.json is absent):
+      ```
+      cloverleaf-cli advance-status <repo_root> <TASK-ID> qa agent '' full_pipeline
+      ```
+      Commit: `git add .cloverleaf/ && git commit -m "cloverleaf: <TASK-ID> ui-review passed → qa"`.
+      Report: "✓ UI Review passed. State → qa. Next: `/cloverleaf-qa <TASK-ID>`."
     **Bounce:**
     1. Write feedback: `echo '<envelope-json>' > /tmp/cloverleaf-fb-u.json`