@cloverleaf/reference-impl 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "cloverleaf",
3
3
  "description": "Cloverleaf reference implementation — Claude Code skills for task scaffolding and the Delivery pipeline (implementer, documenter, reviewer, UI reviewer with multi-viewport visual diff, QA, merge).",
4
- "version": "0.5.2",
4
+ "version": "0.5.4",
5
5
  "author": {
6
6
  "name": "Renato D'Arrigo",
7
7
  "email": "renato.darrigo@gmail.com"
package/README.md CHANGED
@@ -29,7 +29,7 @@ v0.2 implements both paths of the Delivery track:
29
29
  | Implementer | Real | Subagent, code + tests on feature branch |
30
30
  | Documenter | Real (v0.2) | Subagent, doc-only commits per file-path rules |
31
31
  | Reviewer | Real | Subagent, read-only review of diff |
32
- | UI Reviewer | Real (v0.3) | Playwright + axe-core, diff-scoped to affected routes, single viewport, a11y only |
32
+ | UI Reviewer | Real (v0.5) | Playwright + axe-core + pixelmatch; multi-browser outer loop (chromium/webkit/firefox); axe-core runs on `axe.browser` engine only (default chromium); maxCombinations cap with per-route warnings |
33
33
  | QA | Real (v0.2) | Per-package test runner via `git worktree` |
34
34
  | Plan | Stub | Deferred to v0.3 |
35
35
  | Researcher | Stub | Deferred to v0.3 |
@@ -41,6 +41,7 @@ v0.2 implements both paths of the Delivery track:
41
41
  - `/cloverleaf-document` — run Documenter *(new in v0.2)*
42
42
  - `/cloverleaf-review` — run Reviewer
43
43
  - `/cloverleaf-ui-review` — run UI Reviewer *(new in v0.2)*
44
+ - `/cloverleaf-approve-baselines` — human baseline-approval gate; clears `baselines_pending` and advances `ui-review → qa` *(new in CLV-19)*
44
45
  - `/cloverleaf-qa` — run QA *(new in v0.2)*
45
46
  - `/cloverleaf-merge` — human gate (branches on state)
46
47
  - `/cloverleaf-run` — orchestrator (dispatches by `risk_class`)
@@ -145,7 +146,7 @@ The Reviewer never switches branches. It reads files via `git show` and runs tes
145
146
 
146
147
  ## Package layout
147
148
 
148
- - `lib/` — TypeScript library used by the CLI. State, events, feedback, IDs, paths. Includes `buildBaselinePath(repoRoot, browser, slug, viewport)` (`lib/visual-diff.ts`) for constructing canonical baseline paths under `.cloverleaf/baselines/{browser}/`.
149
+ - `lib/` — TypeScript library used by the CLI. State, events, feedback, IDs, paths. Includes `buildBaselinePath(repoRoot, browser, slug, viewport)` (`lib/visual-diff.ts`) for constructing canonical baseline paths under `.cloverleaf/baselines/{browser}/`. `lib/ui-browser.ts` exports `buildBrowserEscalationFinding` and `applyMaxCombinationsCap` (used by the UI Reviewer prompt for per-engine escalation and combination-count capping). `lib/ui-review-state.ts` exports `readUiReviewState`, `writeUiReviewState`, and `uiReviewStatePath` — the baseline-approval sidecar API for `.cloverleaf/runs/{taskId}/ui-review/state.json`.
149
150
  - `skills/` — Claude Code skill markdown files.
150
151
  - `prompts/` — Implementer/Reviewer subagent system prompts.
151
152
  - `examples/toy-repo/` — standalone demo repo.
package/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.2
1
+ 0.5.4
package/dist/cli.mjs CHANGED
@@ -13,6 +13,8 @@
13
13
  * latest-feedback <repoRoot> <taskId>
14
14
  * emit-gate-decision <repoRoot> <workItemId> <gate> <decision> <actor> [--comment=<str>]
15
15
  * ui-review-config --repo-root <repoRoot>
16
+ * read-ui-review-state <repoRoot> <taskId>
17
+ * write-ui-review-state <repoRoot> <taskId> <baselines_pending>
16
18
  * plugin-root
17
19
  * load-rfc <repoRoot> <id>
18
20
  * save-rfc <repoRoot> <filePath>
@@ -46,6 +48,7 @@ import { loadSpike, saveSpike, advanceSpikeStatus } from './spike.mjs';
46
48
  import { loadPlan, savePlan, advancePlanStatus, materialiseTasksFromPlan } from './plan.mjs';
47
49
  import { loadDiscoveryConfig } from './discovery-config.mjs';
48
50
  import { prepWorktree } from './prep-worktree.mjs';
51
+ import { readUiReviewState, writeUiReviewState } from './ui-review-state.mjs';
49
52
  function die(msg, code = 1) {
50
53
  process.stderr.write(msg + '\n');
51
54
  process.exit(code);
@@ -63,6 +66,8 @@ function usage(msg) {
63
66
  ' latest-feedback <repoRoot> <taskId>\n' +
64
67
  ' emit-gate-decision <repoRoot> <workItemId> <gate> <decision> <actor> [--comment=<str>]\n' +
65
68
  ' ui-review-config --repo-root <repoRoot>\n' +
69
+ ' read-ui-review-state <repoRoot> <taskId>\n' +
70
+ ' write-ui-review-state <repoRoot> <taskId> <baselines_pending>\n' +
66
71
  ' plugin-root\n' +
67
72
  ' load-rfc <repoRoot> <id>\n' +
68
73
  ' save-rfc <repoRoot> <filePath>\n' +
@@ -269,6 +274,22 @@ try {
269
274
  process.stdout.write(JSON.stringify(config, null, 2));
270
275
  process.exit(0);
271
276
  }
277
+ case 'read-ui-review-state': {
278
+ const [repoRoot, taskId] = rest;
279
+ if (!repoRoot || !taskId)
280
+ usage('read-ui-review-state requires <repoRoot> <taskId>');
281
+ const state = readUiReviewState(repoRoot, taskId);
282
+ process.stdout.write(JSON.stringify(state, null, 2) + '\n');
283
+ break;
284
+ }
285
+ case 'write-ui-review-state': {
286
+ const [repoRoot, taskId, pendingArg] = rest;
287
+ if (!repoRoot || !taskId || pendingArg === undefined)
288
+ usage('write-ui-review-state requires <repoRoot> <taskId> <baselines_pending>');
289
+ const baselines_pending = pendingArg === 'true' || pendingArg === '1';
290
+ writeUiReviewState(repoRoot, taskId, { baselines_pending });
291
+ break;
292
+ }
272
293
  case 'plugin-root': {
273
294
  process.stdout.write(getPluginRoot());
274
295
  process.exit(0);
package/dist/index.mjs CHANGED
@@ -4,3 +4,4 @@ export * from './task.mjs';
4
4
  export * from './events.mjs';
5
5
  export * from './feedback.mjs';
6
6
  export * from './validate.mjs';
7
+ export * from './ui-review-state.mjs';
package/dist/paths.mjs CHANGED
@@ -24,3 +24,9 @@ export function spikesDir(repoRoot) {
24
24
  export function plansDir(repoRoot) {
25
25
  return resolve(cloverleafDir(repoRoot), 'plans');
26
26
  }
27
+ export function runsDir(repoRoot) {
28
+ return resolve(cloverleafDir(repoRoot), 'runs');
29
+ }
30
+ export function uiReviewRunDir(repoRoot, taskId) {
31
+ return resolve(runsDir(repoRoot), taskId, 'ui-review');
32
+ }
@@ -0,0 +1,74 @@
1
+ // ---------------------------------------------------------------------------
2
+ // Browser escalation
3
+ // ---------------------------------------------------------------------------
4
+ /**
5
+ * Build an escalation Finding for a missing Playwright browser binary.
6
+ *
7
+ * The finding names the missing engine and includes the install command per
8
+ * the CLV-9 RFC and CLV-10 spike:
9
+ * - All platforms: `npx playwright install webkit firefox`
10
+ * - Linux only: `npx playwright install-deps webkit`
11
+ *
12
+ * @param engine The browser engine that is missing.
13
+ * @param platform The platform string (defaults to `process.platform`). Pass
14
+ * "linux" explicitly to include the install-deps hint; all
15
+ * other values are treated as non-Linux.
16
+ */
17
+ export function buildBrowserEscalationFinding(engine, platform = process.platform) {
18
+ const isLinux = platform === 'linux';
19
+ const installCmd = 'npx playwright install webkit firefox';
20
+ const depsHint = isLinux
21
+ ? ` On Linux, also run: npx playwright install-deps webkit`
22
+ : '';
23
+ return {
24
+ severity: 'error',
25
+ rule: 'browser-missing',
26
+ message: `Playwright ${engine} not installed. Run '${installCmd}' on this machine.${depsHint}`,
27
+ metadata: { engine, installCommand: installCmd },
28
+ };
29
+ }
30
+ /**
31
+ * Enforce the maxCombinations cap.
32
+ *
33
+ * When `routes.length × viewportCount × browserCount > maxCombinations`,
34
+ * the affected routes are sorted by diff size (most-changed first) and only
35
+ * the first `floor(maxCombinations / (viewportCount × browserCount))` routes
36
+ * are processed. One `warning`-severity finding with rule `ui-review-cap` is
37
+ * emitted per skipped route.
38
+ *
39
+ * @param routes Affected routes with their diff sizes.
40
+ * @param viewportCount Number of viewports configured.
41
+ * @param browserCount Number of browser engines configured.
42
+ * @param maxCombinations Maximum allowed combinations (routes × viewports × browsers).
43
+ * @returns `{ routes, skippedFindings }` ready for use by the reviewer.
44
+ */
45
+ export function applyMaxCombinationsCap(routes, viewportCount, browserCount, maxCombinations) {
46
+ const totalCombinations = routes.length * viewportCount * browserCount;
47
+ if (totalCombinations <= maxCombinations) {
48
+ return {
49
+ routes: routes.map((r) => r.route),
50
+ skippedFindings: [],
51
+ };
52
+ }
53
+ const perRouteSlots = viewportCount * browserCount;
54
+ const maxRoutes = Math.floor(maxCombinations / perRouteSlots);
55
+ // Sort most-changed first, then take first maxRoutes routes.
56
+ const sorted = [...routes].sort((a, b) => b.diffSize - a.diffSize);
57
+ const kept = sorted.slice(0, maxRoutes);
58
+ const skipped = sorted.slice(maxRoutes);
59
+ const skippedFindings = skipped.map((r) => ({
60
+ severity: 'warning',
61
+ rule: 'ui-review-cap',
62
+ message: `Route ${r.route} skipped: combination count ${totalCombinations} exceeds ` +
63
+ `maxCombinations (${maxCombinations}); review manually or raise the cap.`,
64
+ metadata: {
65
+ route: r.route,
66
+ combinationCount: totalCombinations,
67
+ maxCombinations,
68
+ },
69
+ }));
70
+ return {
71
+ routes: kept.map((r) => r.route),
72
+ skippedFindings,
73
+ };
74
+ }
@@ -0,0 +1,40 @@
1
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
2
+ import { join } from 'node:path';
3
+ import { uiReviewRunDir } from './paths.mjs';
4
+ const STATE_FILENAME = 'state.json';
5
+ /**
6
+ * Returns the canonical path for the ui-review sidecar state file:
7
+ * .cloverleaf/runs/{taskId}/ui-review/state.json
8
+ */
9
+ export function uiReviewStatePath(repoRoot, taskId) {
10
+ return join(uiReviewRunDir(repoRoot, taskId), STATE_FILENAME);
11
+ }
12
+ /**
13
+ * Reads the ui-review state sidecar from disk.
14
+ *
15
+ * Returns `{ baselines_pending: false }` when the file is absent — the
16
+ * absence of the file is treated as "no pending baselines", which lets the
17
+ * ui-review → qa transition proceed normally.
18
+ */
19
+ export function readUiReviewState(repoRoot, taskId) {
20
+ const path = uiReviewStatePath(repoRoot, taskId);
21
+ if (!existsSync(path)) {
22
+ return { baselines_pending: false };
23
+ }
24
+ const raw = JSON.parse(readFileSync(path, 'utf-8'));
25
+ return { baselines_pending: Boolean(raw.baselines_pending) };
26
+ }
27
+ /**
28
+ * Writes the ui-review state sidecar to disk, creating intermediate directories
29
+ * as needed.
30
+ *
31
+ * @param repoRoot Absolute path to the repository root.
32
+ * @param taskId Task identifier (e.g. "CLV-42").
33
+ * @param state The state to persist.
34
+ */
35
+ export function writeUiReviewState(repoRoot, taskId, state) {
36
+ const dir = uiReviewRunDir(repoRoot, taskId);
37
+ mkdirSync(dir, { recursive: true });
38
+ const path = join(dir, STATE_FILENAME);
39
+ writeFileSync(path, JSON.stringify(state, null, 2) + '\n');
40
+ }
package/lib/cli.ts CHANGED
@@ -13,6 +13,8 @@
13
13
  * latest-feedback <repoRoot> <taskId>
14
14
  * emit-gate-decision <repoRoot> <workItemId> <gate> <decision> <actor> [--comment=<str>]
15
15
  * ui-review-config --repo-root <repoRoot>
16
+ * read-ui-review-state <repoRoot> <taskId>
17
+ * write-ui-review-state <repoRoot> <taskId> <baselines_pending>
16
18
  * plugin-root
17
19
  * load-rfc <repoRoot> <id>
18
20
  * save-rfc <repoRoot> <filePath>
@@ -48,6 +50,7 @@ import { loadSpike, saveSpike, advanceSpikeStatus, type SpikeDoc } from './spike
48
50
  import { loadPlan, savePlan, advancePlanStatus, materialiseTasksFromPlan, type PlanDoc } from './plan.js';
49
51
  import { loadDiscoveryConfig } from './discovery-config.js';
50
52
  import { prepWorktree } from './prep-worktree.js';
53
+ import { readUiReviewState, writeUiReviewState } from './ui-review-state.js';
51
54
 
52
55
  function die(msg: string, code = 1): never {
53
56
  process.stderr.write(msg + '\n');
@@ -67,6 +70,8 @@ function usage(msg?: string): never {
67
70
  ' latest-feedback <repoRoot> <taskId>\n' +
68
71
  ' emit-gate-decision <repoRoot> <workItemId> <gate> <decision> <actor> [--comment=<str>]\n' +
69
72
  ' ui-review-config --repo-root <repoRoot>\n' +
73
+ ' read-ui-review-state <repoRoot> <taskId>\n' +
74
+ ' write-ui-review-state <repoRoot> <taskId> <baselines_pending>\n' +
70
75
  ' plugin-root\n' +
71
76
  ' load-rfc <repoRoot> <id>\n' +
72
77
  ' save-rfc <repoRoot> <filePath>\n' +
@@ -278,6 +283,23 @@ try {
278
283
  process.exit(0);
279
284
  }
280
285
 
286
+ case 'read-ui-review-state': {
287
+ const [repoRoot, taskId] = rest;
288
+ if (!repoRoot || !taskId) usage('read-ui-review-state requires <repoRoot> <taskId>');
289
+ const state = readUiReviewState(repoRoot, taskId);
290
+ process.stdout.write(JSON.stringify(state, null, 2) + '\n');
291
+ break;
292
+ }
293
+
294
+ case 'write-ui-review-state': {
295
+ const [repoRoot, taskId, pendingArg] = rest;
296
+ if (!repoRoot || !taskId || pendingArg === undefined)
297
+ usage('write-ui-review-state requires <repoRoot> <taskId> <baselines_pending>');
298
+ const baselines_pending = pendingArg === 'true' || pendingArg === '1';
299
+ writeUiReviewState(repoRoot, taskId, { baselines_pending });
300
+ break;
301
+ }
302
+
281
303
  case 'plugin-root': {
282
304
  process.stdout.write(getPluginRoot());
283
305
  process.exit(0);
package/lib/index.ts CHANGED
@@ -4,3 +4,4 @@ export * from './task.js';
4
4
  export * from './events.js';
5
5
  export * from './feedback.js';
6
6
  export * from './validate.js';
7
+ export * from './ui-review-state.js';
package/lib/paths.ts CHANGED
@@ -33,3 +33,11 @@ export function spikesDir(repoRoot: string): string {
33
33
  export function plansDir(repoRoot: string): string {
34
34
  return resolve(cloverleafDir(repoRoot), 'plans');
35
35
  }
36
+
37
+ export function runsDir(repoRoot: string): string {
38
+ return resolve(cloverleafDir(repoRoot), 'runs');
39
+ }
40
+
41
+ export function uiReviewRunDir(repoRoot: string, taskId: string): string {
42
+ return resolve(runsDir(repoRoot), taskId, 'ui-review');
43
+ }
@@ -0,0 +1,122 @@
1
+ import type { Finding } from './feedback.js';
2
+ import type { BrowserEngine } from './ui-review-config.js';
3
+
4
+ // ---------------------------------------------------------------------------
5
+ // Browser escalation
6
+ // ---------------------------------------------------------------------------
7
+
8
+ /**
9
+ * Build an escalation Finding for a missing Playwright browser binary.
10
+ *
11
+ * The finding names the missing engine and includes the install command per
12
+ * the CLV-9 RFC and CLV-10 spike:
13
+ * - All platforms: `npx playwright install webkit firefox`
14
+ * - Linux only: `npx playwright install-deps webkit`
15
+ *
16
+ * @param engine The browser engine that is missing.
17
+ * @param platform The platform string (defaults to `process.platform`). Pass
18
+ * "linux" explicitly to include the install-deps hint; all
19
+ * other values are treated as non-Linux.
20
+ */
21
+ export function buildBrowserEscalationFinding(
22
+ engine: BrowserEngine,
23
+ platform: string = process.platform,
24
+ ): Finding {
25
+ const isLinux = platform === 'linux';
26
+ const installCmd = 'npx playwright install webkit firefox';
27
+ const depsHint = isLinux
28
+ ? ` On Linux, also run: npx playwright install-deps webkit`
29
+ : '';
30
+ return {
31
+ severity: 'error',
32
+ rule: 'browser-missing',
33
+ message:
34
+ `Playwright ${engine} not installed. Run '${installCmd}' on this machine.${depsHint}`,
35
+ metadata: { engine, installCommand: installCmd },
36
+ };
37
+ }
38
+
39
+ // ---------------------------------------------------------------------------
40
+ // maxCombinations cap enforcement
41
+ // ---------------------------------------------------------------------------
42
+
43
+ /**
44
+ * Represents an affected route with a diff-size weight used for sorting
45
+ * when maxCombinations cap is applied.
46
+ */
47
+ export interface RouteWithDiffSize {
48
+ route: string;
49
+ /** Number of changed lines (or any monotonic proxy for diff size). */
50
+ diffSize: number;
51
+ }
52
+
53
+ /**
54
+ * Result of applying the maxCombinations cap.
55
+ */
56
+ export interface CapResult {
57
+ /** Routes that should be processed (up to the cap). */
58
+ routes: string[];
59
+ /**
60
+ * One `warning`-severity Finding per skipped route, with rule
61
+ * `ui-review-cap` and a message containing the route name plus the
62
+ * combination count vs cap.
63
+ */
64
+ skippedFindings: Finding[];
65
+ }
66
+
67
+ /**
68
+ * Enforce the maxCombinations cap.
69
+ *
70
+ * When `routes.length × viewportCount × browserCount > maxCombinations`,
71
+ * the affected routes are sorted by diff size (most-changed first) and only
72
+ * the first `floor(maxCombinations / (viewportCount × browserCount))` routes
73
+ * are processed. One `warning`-severity finding with rule `ui-review-cap` is
74
+ * emitted per skipped route.
75
+ *
76
+ * @param routes Affected routes with their diff sizes.
77
+ * @param viewportCount Number of viewports configured.
78
+ * @param browserCount Number of browser engines configured.
79
+ * @param maxCombinations Maximum allowed combinations (routes × viewports × browsers).
80
+ * @returns `{ routes, skippedFindings }` ready for use by the reviewer.
81
+ */
82
+ export function applyMaxCombinationsCap(
83
+ routes: RouteWithDiffSize[],
84
+ viewportCount: number,
85
+ browserCount: number,
86
+ maxCombinations: number,
87
+ ): CapResult {
88
+ const totalCombinations = routes.length * viewportCount * browserCount;
89
+
90
+ if (totalCombinations <= maxCombinations) {
91
+ return {
92
+ routes: routes.map((r) => r.route),
93
+ skippedFindings: [],
94
+ };
95
+ }
96
+
97
+ const perRouteSlots = viewportCount * browserCount;
98
+ const maxRoutes = Math.floor(maxCombinations / perRouteSlots);
99
+
100
+ // Sort most-changed first, then take first maxRoutes routes.
101
+ const sorted = [...routes].sort((a, b) => b.diffSize - a.diffSize);
102
+ const kept = sorted.slice(0, maxRoutes);
103
+ const skipped = sorted.slice(maxRoutes);
104
+
105
+ const skippedFindings: Finding[] = skipped.map((r) => ({
106
+ severity: 'warning',
107
+ rule: 'ui-review-cap',
108
+ message:
109
+ `Route ${r.route} skipped: combination count ${totalCombinations} exceeds ` +
110
+ `maxCombinations (${maxCombinations}); review manually or raise the cap.`,
111
+ metadata: {
112
+ route: r.route,
113
+ combinationCount: totalCombinations,
114
+ maxCombinations,
115
+ },
116
+ }));
117
+
118
+ return {
119
+ routes: kept.map((r) => r.route),
120
+ skippedFindings,
121
+ };
122
+ }
@@ -0,0 +1,52 @@
1
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
2
+ import { join } from 'node:path';
3
+ import { uiReviewRunDir } from './paths.js';
4
+
5
+ export interface UiReviewState {
6
+ baselines_pending: boolean;
7
+ }
8
+
9
+ const STATE_FILENAME = 'state.json';
10
+
11
+ /**
12
+ * Returns the canonical path for the ui-review sidecar state file:
13
+ * .cloverleaf/runs/{taskId}/ui-review/state.json
14
+ */
15
+ export function uiReviewStatePath(repoRoot: string, taskId: string): string {
16
+ return join(uiReviewRunDir(repoRoot, taskId), STATE_FILENAME);
17
+ }
18
+
19
+ /**
20
+ * Reads the ui-review state sidecar from disk.
21
+ *
22
+ * Returns `{ baselines_pending: false }` when the file is absent — the
23
+ * absence of the file is treated as "no pending baselines", which lets the
24
+ * ui-review → qa transition proceed normally.
25
+ */
26
+ export function readUiReviewState(repoRoot: string, taskId: string): UiReviewState {
27
+ const path = uiReviewStatePath(repoRoot, taskId);
28
+ if (!existsSync(path)) {
29
+ return { baselines_pending: false };
30
+ }
31
+ const raw = JSON.parse(readFileSync(path, 'utf-8')) as UiReviewState;
32
+ return { baselines_pending: Boolean(raw.baselines_pending) };
33
+ }
34
+
35
+ /**
36
+ * Writes the ui-review state sidecar to disk, creating intermediate directories
37
+ * as needed.
38
+ *
39
+ * @param repoRoot Absolute path to the repository root.
40
+ * @param taskId Task identifier (e.g. "CLV-42").
41
+ * @param state The state to persist.
42
+ */
43
+ export function writeUiReviewState(
44
+ repoRoot: string,
45
+ taskId: string,
46
+ state: UiReviewState,
47
+ ): void {
48
+ const dir = uiReviewRunDir(repoRoot, taskId);
49
+ mkdirSync(dir, { recursive: true });
50
+ const path = join(dir, STATE_FILENAME);
51
+ writeFileSync(path, JSON.stringify(state, null, 2) + '\n');
52
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cloverleaf/reference-impl",
3
- "version": "0.5.2",
3
+ "version": "0.5.4",
4
4
  "description": "Reference implementation of the Cloverleaf methodology as Claude Code skills. Implements the Tight Loop (Implementer + Reviewer).",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -52,10 +52,23 @@ If `## [Unreleased]` does not exist, create it at the top of the CHANGELOG (righ
52
52
 
53
53
  ## Commit discipline
54
54
 
55
- - One commit per file touched.
55
+ - **Before committing, run `git status --porcelain` in the worktree and stage every modified doc file.** Do NOT hardcode a single path into `git add`; the subagent has historically forgotten README.md and committed only CHANGELOG.md when it edited both. The reliable pattern:
56
+
57
+ ```bash
58
+ cd <temp>
59
+ git status --porcelain
60
+ # For each modified doc file listed, stage it explicitly:
61
+ git add <package>/CHANGELOG.md <package>/README.md <package>/docs/*.md # include all that were edited
62
+ git commit -m "docs(<scope>): <short>"
63
+ ```
64
+
65
+ Equivalently, if you are certain only doc files are modified (you never touched source code), `git add -A` is acceptable — it's the hardcoded-single-path pattern that must be avoided.
66
+
67
+ - One commit per Documenter run, covering every doc file edited in that run. (If you need multiple scopes — e.g., both `standard/CHANGELOG.md` and `reference-impl/CHANGELOG.md` — make one commit per scope, but each commit still stages every edited file within that scope.)
56
68
  - Commit message: `docs(<scope>): <short>` where `<scope>` is the package name (`standard`, `reference-impl`, `site`, or `repo` for root-level).
57
69
  - All commits land on `{{branch}}` (the feature branch).
58
70
  - After all commits land, run `git worktree remove --force <temp>` to clean up.
71
+ - **Self-check before returning**: `git status --porcelain` in the worktree must be empty. If it's not, you have uncommitted doc edits — stage and commit them, or revert them, before reporting back.
59
72
 
60
73
  ## Output
61
74
 
@@ -1,6 +1,6 @@
1
1
  # UI Reviewer Agent
2
2
 
3
- You are the Cloverleaf UI Reviewer. Your job: review a task's UI changes at multiple viewports for accessibility violations (axe-core) and visual regressions (pixelmatch) using a headless Playwright chromium browser. You are read-only for source code and tests — but you DO write baseline/diff artifacts under `.cloverleaf/` on the feature branch.
3
+ You are the Cloverleaf UI Reviewer. Your job: review a task's UI changes at multiple viewports and browser engines for accessibility violations (axe-core) and visual regressions (pixelmatch) using headless Playwright browsers. You are read-only for source code and tests — but you DO write baseline/diff artifacts under `.cloverleaf/` on the feature branch.
4
4
 
5
5
  ## Input
6
6
 
@@ -11,7 +11,7 @@ You are the Cloverleaf UI Reviewer. Your job: review a task's UI changes at mult
11
11
  - **Diff from base**: {{diff}}
12
12
  - **Preview port**: {{preview_port}} (an already-allocated free local port; use it for the dev server)
13
13
  - **Affected routes**: {{affected_routes}} — either a JSON array of route paths (e.g., `["/faq/"]`), or the string `"all"`, or `[]`
14
- - **UI review config**: {{ui_review_config}} — the loaded `UiReviewConfig` object (viewports, visualDiff, axe) as JSON. The `viewports` array contains named entries such as `mobile`, `tablet`, and `desktop` with their respective `{ width, height }` dimensions.
14
+ - **UI review config**: {{ui_review_config}} — the loaded `UiReviewConfig` object (browsers, viewports, visualDiff, axe, maxCombinations) as JSON. The `viewports` array contains named entries such as `mobile`, `tablet`, and `desktop` with their respective `{ width, height }` dimensions.
15
15
 
16
16
  ## Paths
17
17
 
@@ -24,22 +24,46 @@ You operate in two filesystem locations — keep them straight:
24
24
 
25
25
  The rationale: baselines on `{{repo_root}}/.cloverleaf/baselines/` get picked up by subsequent `git add` + `git commit` steps in the UI Reviewer, which run on the feature branch. The merge skill (v0.4.1+) then merges those commits to main via `git merge --no-ff`. Writing to the worktree's `.cloverleaf/` would strand the files and `git worktree remove --force` would discard them on teardown.
26
26
 
27
- ## Scope (v0.4)
27
+ ## Scope (v0.5)
28
28
 
29
- - **Accessibility (axe-core):** run at the viewports listed in `{{ui_review_config}}.axe.viewports`.
29
+ - **Browsers**: the reviewer runs separate Playwright sessions for each engine listed in `{{ui_review_config}}.browsers` (e.g., `["chromium", "webkit", "firefox"]`). Browser is the **outermost** loop, wrapping the viewport × route loops.
30
+ - **Accessibility (axe-core):** run only for the engine specified by `{{ui_review_config}}.axe.browser` (default: `"chromium"`). webkit and firefox browser passes produce **no axe output and no axe findings** — this is intentional, to avoid engine-specific false positives from getComputedStyle, aria-required-children, and scrollable-region-focusable divergence across Blink, WebKit, and Gecko (see CLV-12 spike).
30
31
  Apply the allowlist in `{{ui_review_config}}.axe.ignored` to drop pre-existing violations that the consumer has accepted (e.g., a11y debt being tracked separately).
31
32
  Dedupe findings across viewports by the `{{ui_review_config}}.axe.dedupeBy` composite key (default `["ruleId", "target"]`).
32
33
  Emit one finding per (ruleId, target) pair, with a `metadata.viewports` array aggregating the viewports where the violation was detected.
33
- - **Visual diff (pixelmatch):** when `{{ui_review_config}}.visualDiff.enabled` is true, screenshot each route at each viewport in `{{ui_review_config}}.viewports`, compare to `.cloverleaf/baselines/{browser}/{route-slug}-{viewport}.png`, emit `severity: "info"` findings with baseline/candidate/diff attachments when the diff ratio exceeds `maxDiffRatio`.
34
+ - **Visual diff (pixelmatch):** when `{{ui_review_config}}.visualDiff.enabled` is true, screenshot each route at each viewport in `{{ui_review_config}}.viewports` for **each browser**, compare to `.cloverleaf/baselines/{browser}/{route-slug}-{viewport}.png`, emit `severity: "info"` findings with baseline/candidate/diff attachments when the diff ratio exceeds `maxDiffRatio`.
34
35
  - Visual diffs are **informational**, never gating. A diff does not fail the review — it surfaces to the human final-gate reviewer.
35
36
  - Route empty-set / "all" handling preserves v0.3 behavior:
36
37
  - `{{affected_routes}}` is `[]` → `verdict: "pass"`, summary `"No renderable routes affected, skipping axe."`, do NOT start the preview server.
37
- - `{{affected_routes}}` is `"all"` → crawl up to 20 pages reachable from `/` via same-origin link discovery (v0.2 fallback).
38
+ - `{{affected_routes}}` is `"all"` → crawl up to 20 pages reachable from `/` via same-origin link discovery (v0.2 fallback behavior).
38
39
  - otherwise → visit exactly the URLs listed.
39
40
 
41
+ ## maxCombinations cap
42
+
43
+ Before starting any browser session, compute total combinations = `routes × viewports × browsers`.
44
+
45
+ If the product exceeds `{{ui_review_config}}.maxCombinations` (default 90):
46
+ 1. Sort affected routes by diff size (most-changed first — use the character count of each route's section in `{{diff}}` as a proxy for diff size).
47
+ 2. Keep only the first `floor(maxCombinations / (viewportCount × browserCount))` routes.
48
+ 3. For each skipped route emit one `severity: "warning"` finding with `rule: "ui-review-cap"` and message:
49
+ `"Route {route} skipped: combination count {total} exceeds maxCombinations ({cap}); review manually or raise the cap."`
50
+ Include `metadata: { route, combinationCount: total, maxCombinations: cap }`.
51
+
52
+ The cap enforcement helper is available in `lib/ui-browser.ts` as `applyMaxCombinationsCap`.
53
+
40
54
  ## Playwright cache
41
55
 
42
- The `PLAYWRIGHT_BROWSERS_PATH` environment variable is set to `~/.cache/ms-playwright` before you are invoked. If the browser is missing, return `verdict: "escalate"` with a synthetic finding: `"Playwright chromium not installed. Run 'npx playwright install chromium' on this machine."`
56
+ The `PLAYWRIGHT_BROWSERS_PATH` environment variable is set to `~/.cache/ms-playwright` before you are invoked. Before launching each browser session, verify that the required engine binary exists in `PLAYWRIGHT_BROWSERS_PATH`. If a browser binary is absent, return `verdict: "escalate"` with a synthetic finding per missing engine:
57
+
58
+ ```
59
+ "Playwright {engine} not installed. Run 'npx playwright install webkit firefox' on this machine."
60
+ ```
61
+
62
+ On Linux, append: `" On Linux, also run: npx playwright install-deps webkit"`
63
+
64
+ The escalation helper is available in `lib/ui-browser.ts` as `buildBrowserEscalationFinding`.
65
+
66
+ Do not attempt to launch a missing engine — fail fast with `verdict: "escalate"` listing all missing engines before any browser session is started.
43
67
 
44
68
  ## Runtime procedure
45
69
 
@@ -49,6 +73,7 @@ The `PLAYWRIGHT_BROWSERS_PATH` environment variable is set to `~/.cache/ms-playw
49
73
  ```bash
50
74
  TMPDIR=$(mktemp -d)
51
75
  git worktree add "$TMPDIR" {{branch}}
76
+ npx cloverleaf-cli prep-worktree {{repo_root}} "$TMPDIR"
52
77
  ```
53
78
 
54
79
  3. For this repo, UI lives in `site/` (or another directory if ui-paths.json scopes it elsewhere). Install dependencies and start the dev server:
@@ -66,53 +91,86 @@ The `PLAYWRIGHT_BROWSERS_PATH` environment variable is set to `~/.cache/ms-playw
66
91
  2. Otherwise, attempt to locate and parse an astro config file (common locations: `site/astro.config.mjs`, `astro.config.mjs` at repo root, `apps/web/astro.config.mjs`). Best-effort fallback.
67
92
  3. If both fail, treat base as empty string.
68
93
 
69
- 6. **Visual-diff pass (when `visualDiff.enabled` is true):**
70
- For each route in `{{affected_routes}}` (or the crawl set) × each viewport in `{{ui_review_config}}.viewports`:
71
- - Set Playwright viewport to `{ width, height }` from the config.
72
- - Apply mask CSS inject a style that sets `visibility: hidden` on any selector in `visualDiff.mask`.
73
- - Navigate to `http://localhost:{{preview_port}}<base><route>`. If 404, retry without the base.
74
- - `page.screenshot({ fullPage: false })` → candidate PNG buffer.
75
- - Compute slug for the route (lowercase, strip leading/trailing slashes, replace slashes with hyphens; `/` → `index`).
76
- - Note: use `{{repo_root}}` (the absolute main-repo path), NOT `$TMPDIR` or the worktree. See the "Paths" section.
77
- - Call `compareVisual` (from `lib/visual-diff.ts`) with:
78
- - `baselinePath = {{repo_root}}/.cloverleaf/baselines/{browser}/{slug}-{viewport}.png`
79
- - `candidateBuf = <candidate PNG>`
80
- - `diffPath = {{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/diff-{slug}-{viewport}.png`
81
- - `candidateOutPath = {{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/candidate-{slug}-{viewport}.png`
82
- - `threshold = visualDiff.threshold`
83
- - `maxDiffRatio = visualDiff.maxDiffRatio`
84
- - Map result to a finding:
85
- - `new-baseline` `severity: "info"`, `rule: "visual-diff"`, `message: "new baseline established for {route} @ {viewport}"`, `metadata: { route, viewport, status: "new-baseline" }`. No attachments.
86
- - `dimension-mismatch` `severity: "info"`, `rule: "visual-diff"`, `message: "baseline dimensions changed for {route} @ {viewport}; regenerated"`, `metadata: { route, viewport, status: "dimension-mismatch" }`.
87
- - `diff` `severity: "info"`, `rule: "visual-diff"`, `message: "visual diff: {route} @ {viewport} — {diffRatio*100}% pixels differ"`, `metadata: { route, viewport, diffRatio, status: "diff" }`, `attachments: [baseline, candidate, diff]`.
88
- - `match` no finding emitted.
89
-
90
- 7. **Axe pass:**
91
- For each viewport in `{{ui_review_config}}.axe.viewports`:
92
- - Set Playwright viewport to `{ width, height }`.
93
- - For each route in `{{affected_routes}}` (or crawl set):
94
- - Navigate.
95
- - Inject and run axe-core:
96
- ```javascript
97
- import axe from 'axe-core';
98
- const results = await axe.run(document);
99
- ```
100
- - Collect each violation as a raw tuple: `{ viewport, ruleId, target, impact, message, helpUrl }` (from `axe.run` output).
101
-
102
- 8. Dedupe raw axe findings via `dedupeAxeFindings(raws, {{ui_review_config}}.axe.dedupeBy, {{ui_review_config}}.axe.ignored)` (from `lib/axe-dedupe.ts`). The `ignored` parameter drops any finding whose `(ruleId, target)` exactly matches an allowlist entry BEFORE dedupe/grouping. Emit the returned `Finding[]`.
103
-
104
- 9. Severity mapping (preserved from v0.3 via `dedupeAxeFindings`):
105
- - axe `impact: "critical"` `severity: "blocker"`
106
- - axe `impact: "serious"` `severity: "error"`
107
- - axe `impact: "moderate"` → `severity: "warning"`
108
- - axe `impact: "minor"` `severity: "info"`
109
-
110
- 10. Compute verdict (visual-diff findings are **never** considered for gating):
111
- - `pass` — zero non-visual-diff findings with severity `blocker` or `error`
112
- - `bounce` ≥1 non-visual-diff finding with severity `blocker` or `error`
113
- - `escalate` — preview server failed to start, OR axe threw ≥3 consecutive times, OR Playwright chromium missing.
114
-
115
- 11. Teardown:
94
+ 6. **Apply maxCombinations cap** (when `affected_routes` is a list, not `"all"`):
95
+ - Compute `routes × viewports × browsers`. Use diff line counts as proxy for route diff size.
96
+ - Call `applyMaxCombinationsCap` from `lib/ui-browser.ts`.
97
+ - The `skippedFindings` are collected now and included in the final output.
98
+ - Use only the returned `routes` list for the browser passes below.
99
+
100
+ 7. **Verify browser binaries** before starting any browser session:
101
+ - Check each engine in `{{ui_review_config}}.browsers` against `PLAYWRIGHT_BROWSERS_PATH`.
102
+ - Collect all missing engines.
103
+ - If any engine is missing, call `buildBrowserEscalationFinding(engine, process.platform)` for each, teardown the worktree (step 13), and return `verdict: "escalate"` with those findings.
104
+
105
+ 8. **Per-browser outer loop** — for each `browser` in `{{ui_review_config}}.browsers`:
106
+
107
+ a. Launch a Playwright browser context using the `browser` engine.
108
+
109
+ b. **Visual-diff pass (when `visualDiff.enabled` is true):**
110
+ For each route in the (capped) route list × each viewport in `{{ui_review_config}}.viewports`:
111
+ - Set Playwright viewport to `{ width, height }` from the config.
112
+ - Apply mask CSS inject a style that sets `visibility: hidden` on any selector in `visualDiff.mask`.
113
+ - Navigate to `http://localhost:{{preview_port}}<base><route>`. If 404, retry without the base.
114
+ - `page.screenshot({ fullPage: false })` → candidate PNG buffer.
115
+ - Compute slug for the route (lowercase, strip leading/trailing slashes, replace slashes with hyphens; `/` → `index`).
116
+ - Note: use `{{repo_root}}` (the absolute main-repo path), NOT `$TMPDIR` or the worktree. See the "Paths" section.
117
+ - Call `compareVisual` (from `lib/visual-diff.ts`) with:
118
+ - `baselinePath = {{repo_root}}/.cloverleaf/baselines/{browser}/{slug}-{viewport}.png`
119
+ - `candidateBuf = <candidate PNG>`
120
+ - `diffPath = {{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/diff-{slug}-{viewport}.png`
121
+ - `candidateOutPath = {{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/candidate-{slug}-{viewport}.png`
122
+ - `threshold = visualDiff.threshold`
123
+ - `maxDiffRatio = visualDiff.maxDiffRatio`
124
+ - Map result to a finding:
125
+ - `new-baseline` `severity: "info"`, `rule: "visual-diff"`, `message: "new baseline established for {route} @ {viewport} [{browser}]"`, `metadata: { route, viewport, browser, status: "new-baseline" }`. No attachments.
126
+ - `dimension-mismatch` → `severity: "info"`, `rule: "visual-diff"`, `message: "baseline dimensions changed for {route} @ {viewport} [{browser}]; regenerated"`, `metadata: { route, viewport, browser, status: "dimension-mismatch" }`.
127
+ - `diff` `severity: "info"`, `rule: "visual-diff"`, `message: "visual diff: {route} @ {viewport} [{browser}] {diffRatio*100}% pixels differ"`, `metadata: { route, viewport, browser, diffRatio, status: "diff" }`, `attachments: [baseline, candidate, diff]`.
128
+ - `match` → no finding emitted.
129
+
130
+ c. **Axe pass (only when `browser === {{ui_review_config}}.axe.browser`):**
131
+ Skip this section entirely if the current browser is NOT the configured `axe.browser`. webkit and firefox runs produce no axe output and no axe findings.
132
+
133
+ For each viewport in `{{ui_review_config}}.axe.viewports`:
134
+ - Set Playwright viewport to `{ width, height }`.
135
+ - For each route in the (capped) route list:
136
+ - Navigate.
137
+ - Inject and run axe-core:
138
+ ```javascript
139
+ import axe from 'axe-core';
140
+ const results = await axe.run(document);
141
+ ```
142
+ - Collect each violation as a raw tuple: `{ viewport, ruleId, target, impact, message, helpUrl }` (from `axe.run` output).
143
+
144
+ d. Close the browser context before launching the next engine.
145
+
146
+ 9. Dedupe raw axe findings via `dedupeAxeFindings(raws, {{ui_review_config}}.axe.dedupeBy, {{ui_review_config}}.axe.ignored)` (from `lib/axe-dedupe.ts`). The `ignored` parameter drops any finding whose `(ruleId, target)` exactly matches an allowlist entry BEFORE dedupe/grouping. Emit the returned `Finding[]`.
147
+
148
+ 10. Severity mapping (preserved from v0.3 via `dedupeAxeFindings`):
149
+ - axe `impact: "critical"` → `severity: "blocker"`
150
+ - axe `impact: "serious"` → `severity: "error"`
151
+ - axe `impact: "moderate"` → `severity: "warning"`
152
+ - axe `impact: "minor"` → `severity: "info"`
153
+
154
+ 11. Compute verdict (visual-diff and ui-review-cap findings are **never** considered for gating):
155
+ - `pass` — zero non-visual-diff, non-cap findings with severity `blocker` or `error`
156
+ - `bounce` — ≥1 non-visual-diff, non-cap finding with severity `blocker` or `error`
157
+ - `escalate` — preview server failed to start, OR axe threw ≥3 consecutive times, OR any required browser binary was absent.
158
+
159
+ 12. **Write ui-review state sidecar** — after all browser passes complete and before teardown, determine whether any `compareVisual` call returned `new-baseline` or `dimension-mismatch` across all routes, viewports, and browsers in this run.
160
+
161
+ - If **yes**: write `{{repo_root}}/.cloverleaf/runs/{{taskId}}/ui-review/state.json` containing:
162
+ ```json
163
+ {"baselines_pending": true}
164
+ ```
165
+ (Create intermediate directories as needed.)
166
+ - If **no**: write `{{repo_root}}/.cloverleaf/runs/{{taskId}}/ui-review/state.json` containing:
167
+ ```json
168
+ {"baselines_pending": false}
169
+ ```
170
+
171
+ This sidecar is the baseline-approval gate read by the `cloverleaf-ui-review` skill. Writing `baselines_pending: false` explicitly (rather than omitting the file) lets the skill distinguish "no new baselines" from "reviewer did not run at all".
172
+
173
+ 13. Teardown:
116
174
  ```bash
117
175
  kill $SERVER_PID 2>/dev/null || true
118
176
  cd {{repo_root}}
@@ -122,7 +180,7 @@ The `PLAYWRIGHT_BROWSERS_PATH` environment variable is set to `~/.cache/ms-playw
122
180
  ## Tool constraints
123
181
 
124
182
  - Read-only for source files and tests.
125
- - You MAY write under `{{repo_root}}/.cloverleaf/baselines/` and `{{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/` on the feature branch — these are the baselines and artifacts.
183
+ - You MAY write under `{{repo_root}}/.cloverleaf/baselines/` and `{{repo_root}}/.cloverleaf/runs/{taskId}/ui-review/` on the feature branch — these are the baselines, artifacts, and the `state.json` sidecar.
126
184
  - Use `git worktree`: do NOT `git checkout` in the main working directory.
127
185
  - Always teardown the server and worktree, even on error.
128
186
 
@@ -132,7 +190,7 @@ Respond with exactly one JSON object and nothing else. Finding shape must match
132
190
  - required: `severity`, `message`
133
191
  - optional: `rule`, `suggestion`, `location`, `attachments`, `metadata`
134
192
 
135
- For a11y findings there is usually no meaningful file/line, so OMIT `location` entirely.
193
+ For a11y findings there is usually no meaningful file/line, so OMIT `location` entirely. For `location`, use an object shape when present — do not emit `location` as a URL string.
136
194
 
137
195
  ```json
138
196
  {
@@ -141,9 +199,9 @@ For a11y findings there is usually no meaningful file/line, so OMIT `location` e
141
199
  "findings": [
142
200
  {
143
201
  "severity": "blocker" | "error" | "warning" | "info",
144
- "rule": "a11y.<rule-id>" | "visual-diff",
145
- "message": "<description; include the page URL for a11y, route+viewport+diff for visual-diff>",
146
- "metadata": { /* per §78 above */ },
202
+ "rule": "a11y.<rule-id>" | "visual-diff" | "ui-review-cap" | "browser-missing",
203
+ "message": "<description; include the page URL for a11y, route+viewport+browser for visual-diff>",
204
+ "metadata": { /* per §89 above */ },
147
205
  "attachments": [ /* for visual-diff with status="diff" */
148
206
  { "label": "baseline", "path": ".cloverleaf/baselines/{browser}/{slug}-{viewport}.png" },
149
207
  { "label": "candidate", "path": ".cloverleaf/runs/{taskId}/ui-review/candidate-{slug}-{viewport}.png" },
@@ -154,4 +212,4 @@ For a11y findings there is usually no meaningful file/line, so OMIT `location` e
154
212
  }
155
213
  ```
156
214
 
157
- If verdict is `pass`, `findings` may be empty or include only `warning`/`info`-level findings. If verdict is `escalate`, include a finding explaining what went wrong.
215
+ If verdict is `pass`, `findings` may be empty or include only `warning`/`info`-level findings (including `ui-review-cap` warnings and visual-diff info). If verdict is `escalate`, include a finding explaining what went wrong.
@@ -0,0 +1,85 @@
1
+ ---
2
+ name: cloverleaf-approve-baselines
3
+ description: Human baseline-approval gate for the Cloverleaf UI Review pipeline. When the UI Reviewer captures new or resized visual baselines it sets baselines_pending=true in .cloverleaf/runs/{taskId}/ui-review/state.json and blocks the ui-review → qa transition. Run this skill after inspecting the new baseline images to approve them and allow the task to advance to qa. Usage — /cloverleaf-approve-baselines <TASK-ID>.
4
+ ---
5
+
6
+ # Cloverleaf — approve-baselines
7
+
8
+ ## Trigger condition
9
+
10
+ This skill is invoked **only** when the `cloverleaf-ui-review` skill reports that `baselines_pending` is `true` — i.e., the UI Reviewer captured at least one `new-baseline` or `dimension-mismatch` result during its run, meaning one or more baseline PNGs under `.cloverleaf/baselines/{browser}/` were created or replaced.
11
+
12
+ Do not run this skill if the task is not in `ui-review` status or if `state.json` already has `baselines_pending: false`.
13
+
14
+ ## Effect
15
+
16
+ 1. Writes `baselines_pending: false` to `.cloverleaf/runs/{taskId}/ui-review/state.json`.
17
+ 2. Advances the task from `ui-review` → `qa` via the normal agent transition.
18
+ 3. Commits the updated state and status to the feature branch.
19
+
20
+ ---
21
+
22
+ ## Steps
23
+
24
+ 0. Pre-flight:
25
+
26
+ ```bash
27
+ cd <repo_root>
28
+ current=$(git rev-parse --abbrev-ref HEAD)
29
+ if [ "$current" != "main" ]; then git checkout main; fi
30
+ ```
31
+
32
+ If main has uncommitted changes, stop and report.
33
+
34
+ 1. Capture the TASK-ID argument.
35
+
36
+ 2. Load the task and verify status:
37
+ ```bash
38
+ cloverleaf-cli load-task <repo_root> <TASK-ID>
39
+ ```
40
+ Verify `status === "ui-review"`. If not, report and stop.
41
+
42
+ 3. Read the current ui-review state:
43
+ ```bash
44
+ cloverleaf-cli read-ui-review-state <repo_root> <TASK-ID>
45
+ ```
46
+ If `baselines_pending` is already `false` (or the file is absent), report that no approval is needed and stop.
47
+
48
+ 4. Present the new baseline images to the human for review. The baselines live at:
49
+ ```
50
+ <repo_root>/.cloverleaf/baselines/{browser}/{slug}-{viewport}.png
51
+ ```
52
+ List the files that were modified since the last commit on the feature branch:
53
+ ```bash
54
+ git diff --name-only main..cloverleaf/<TASK-ID> -- .cloverleaf/baselines/
55
+ ```
56
+ Display the list. Ask the human to confirm they have reviewed the images and approve the baselines before proceeding.
57
+
58
+ 5. Once approved, write `baselines_pending: false`:
59
+ ```bash
60
+ cloverleaf-cli write-ui-review-state <repo_root> <TASK-ID> false
61
+ ```
62
+
63
+ 6. Advance the task to qa:
64
+ ```bash
65
+ cloverleaf-cli advance-status <repo_root> <TASK-ID> qa agent '' full_pipeline
66
+ ```
67
+
68
+ 7. Commit the changes to the feature branch:
69
+ ```bash
70
+ cd <repo_root>
71
+ git add .cloverleaf/
72
+ git commit -m "cloverleaf: <TASK-ID> baselines approved → qa"
73
+ ```
74
+
75
+ 8. Report:
76
+ > "✓ Baselines approved. `baselines_pending` cleared. State → qa. Next: `/cloverleaf-qa <TASK-ID>`."
77
+
78
+ ---
79
+
80
+ ## Rules
81
+
82
+ - Never push.
83
+ - Do not modify source code or test files.
84
+ - Do not skip step 4 — the human must acknowledge the baseline images before approval is recorded.
85
+ - On illegal state transition, report and stop without partial commits.
@@ -74,14 +74,36 @@ description: Run the UI Reviewer agent on a task in the `ui-review` state (full
74
74
 
75
75
  11. Parse the subagent's response. Expect `{"verdict": "pass"|"bounce"|"escalate", "summary": "...", "findings": [...]}`.
76
76
 
77
- 12. Branch on verdict:
78
-
79
- **Pass:**
77
+ 12. **Read the baseline-approval sidecar** (after the subagent completes, regardless of verdict):
78
+ ```bash
79
+ UI_STATE=$(cloverleaf-cli read-ui-review-state <repo_root> <TASK-ID>)
80
+ BASELINES_PENDING=$(echo "$UI_STATE" | node -e "process.stdout.write(JSON.parse(require('fs').readFileSync('/dev/stdin','utf-8')).baselines_pending ? 'true' : 'false')")
80
81
  ```
81
- cloverleaf-cli advance-status <repo_root> <TASK-ID> qa agent '' full_pipeline
82
+ Or more concisely:
83
+ ```bash
84
+ BASELINES_PENDING=$(cloverleaf-cli read-ui-review-state <repo_root> <TASK-ID> | node -e "const s=require('fs').readFileSync('/dev/stdin','utf-8'); process.stdout.write(JSON.parse(s).baselines_pending?'true':'false')")
82
85
  ```
83
- Commit: `git add .cloverleaf/ && git commit -m "cloverleaf: <TASK-ID> ui-review passed → qa"`.
84
- Report: "✓ UI Review passed. State qa. Next: `/cloverleaf-qa <TASK-ID>`."
86
+
87
+ 13. Branch on verdict:
88
+
89
+ **Pass:**
90
+
91
+ Check `BASELINES_PENDING`:
92
+
93
+ - If `BASELINES_PENDING` is `true`:
94
+ - Do NOT advance to `qa`.
95
+ - Commit artifacts: `git add .cloverleaf/ && git commit -m "cloverleaf: <TASK-ID> ui-review passed (baselines pending approval)"`.
96
+ - Report:
97
+ > "✓ UI Review passed (no a11y errors), but **baselines_pending** is true: one or more new or resized visual baselines were captured and require human approval before advancing to qa.
98
+ > Run `/cloverleaf-approve-baselines <TASK-ID>` to review the new baseline images and approve them, which will clear the flag and advance the task to qa."
99
+ - Stop here (task remains in `ui-review` status).
100
+
101
+ - If `BASELINES_PENDING` is `false` (or state.json is absent):
102
+ ```
103
+ cloverleaf-cli advance-status <repo_root> <TASK-ID> qa agent '' full_pipeline
104
+ ```
105
+ Commit: `git add .cloverleaf/ && git commit -m "cloverleaf: <TASK-ID> ui-review passed → qa"`.
106
+ Report: "✓ UI Review passed. State → qa. Next: `/cloverleaf-qa <TASK-ID>`."
85
107
 
86
108
  **Bounce:**
87
109
  1. Write feedback: `echo '<envelope-json>' > /tmp/cloverleaf-fb-u.json`