start-vibing 4.3.0 → 4.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/package.json +2 -2
  2. package/template/.claude/agents/sd-audit.md +121 -2
  3. package/template/.claude/agents/sd-fix.md +11 -0
  4. package/template/.claude/agents/sd-research.md +49 -2
  5. package/template/.claude/skills/super-design/.schema-version +1 -0
  6. package/template/.claude/skills/super-design/SKILL.md +94 -2
  7. package/template/.claude/skills/super-design/audit-state.schema.json +226 -0
  8. package/template/.claude/skills/super-design/references/audit-methodology.md +118 -0
  9. package/template/.claude/skills/super-design/references/design-intelligence-rubric.md +92 -11
  10. package/template/.claude/skills/super-design/references/design-skills-catalog.md +31 -0
  11. package/template/.claude/skills/super-design/scripts/build-import-graph.sh +208 -0
  12. package/template/.claude/skills/super-design/scripts/detect-apps.sh +180 -0
  13. package/template/.claude/skills/super-design/scripts/detect-changes.sh +177 -21
  14. package/template/.claude/skills/super-design/scripts/discover-routes.sh +120 -6
  15. package/template/.claude/skills/super-design/scripts/extract-tokens.mjs +165 -4
  16. package/template/.claude/skills/super-design/scripts/hash-pages.sh +209 -23
  17. package/template/.claude/skills/super-design/scripts/setup-git-notes.sh +21 -0
  18. package/template/.claude/skills/super-design/scripts/validate-state.sh +74 -11
  19. package/template/.claude/skills/super-design/scripts/verify-audit.sh +62 -9
  20. package/template/.claude/skills/super-design/scripts/visual-regression.sh +275 -0
  21. package/template/.claude/skills/super-design/scripts/write-state.sh +53 -0
  22. package/template/.claude/skills/super-design/templates/audit-state.schema.json +0 -57
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "start-vibing",
3
- "version": "4.3.0",
4
- "description": "Setup Claude Code with 9 plugins, 6 community skills, and 8 MCP servers. Parallel install, auto-accept, superpowers + ralph-loop. super-design 0.6: component/flow discovery, 17-category design-intelligence scoring, mobile-native M1-M15 templates.",
3
+ "version": "4.3.2",
4
+ "description": "Setup Claude Code with 9 plugins, 6 community skills, and 8 MCP servers. Parallel install, auto-accept, superpowers + ralph-loop. super-design 0.6.2: canonical audit-state.schema.json + verify --strict, per-viewport {sha256,phash,png_path} hashes with sharp/fpr fallback + MASK_SELECTORS, visual-regression.sh (pixelmatch→odiff→sha256-fallback), DTCG *.tokens.json + Tokens Studio aliases, @fixture-<id> dynamic routes + madge import-graph N=3, per-app monorepo state (pnpm/npm/yarn/Bun/Nx/Turbo).",
5
5
  "type": "module",
6
6
  "bin": {
7
7
  "start-vibing": "./dist/cli.js"
@@ -150,6 +150,22 @@ Phase E — Form state coverage
150
150
  all valid, simulated 500, offline, paste into password, autocomplete
151
151
  tokens, Tab order vs visual order, Enter submits, mobile input zoom
152
152
  (font-size < 16px on iOS Safari).
153
+
154
+ Postel's Law robustness check (artifact Part 1 law table; "be liberal in
155
+ what you accept, conservative in what you send"). Per text/tel/email/date
156
+ input, verify the field is liberal on input:
157
+ - trims leading/trailing whitespace before validation;
158
+ - accepts the common format variants users actually type (phone:
159
+ "+55 (11) 9 9999-9999", "5511999999999", "11999999999"; date:
160
+ "2026-04-19", "19/04/2026", "Apr 19 2026"; email: case-insensitive
161
+ local-part where the provider allows it);
162
+ - accepts pasted values with mixed whitespace / soft hyphens / unicode
163
+ thin spaces without rejecting.
164
+ And strict on output: the value submitted to the backend and the value
165
+ re-rendered to the user are canonicalized (E.164 phone, ISO-8601 date,
166
+ trimmed). Record any field that rejects a legitimate variant that a
167
+ reasonable user would type as finding code `form-postel-<slug>`
168
+ (severity MEDIUM unless blocking primary conversion → HIGH).
153
169
  ```
154
170
 
155
171
  **Budget rule:** On large apps, cap to top 5 triggers per page (ranked by
@@ -172,11 +188,79 @@ For each of 10 heuristics (methodology §1), work audit questions. Score 0–4 v
172
188
  ### 3c. WCAG 2.2 AA manual pass
173
189
  Items NOT covered by axe (methodology §2.3): keyboard traps, focus-order-matches-visual-order, `:focus-visible` quality, reflow at 320px, text-spacing override, `prefers-reduced-motion`, alt text quality, link/button text adequacy.
174
190
 
191
+ **WCAG 2.2 new Success Criteria — explicit checks (finding code prefix `a11y-wcag22-<sc>`):**
192
+
193
+ - **2.4.11 Focus Not Obscured (Minimum) — AA** → `a11y-wcag22-2.4.11`. Tab/Shift+Tab through every page at all 3 viewports; verify every focused control is at least partly visible. Common fail: sticky headers/footers (`position:fixed`) covering focused links/inputs. Fix pattern: `html { scroll-padding-top: <header-h>; scroll-padding-bottom: <footer-h>; }`.
194
+ - **2.5.7 Dragging Movements — AA** → `a11y-wcag22-2.5.7`. Enumerate every `draggable="true"`, drag-to-reorder list, range slider, kanban column, canvas drag. Each must expose a single-pointer non-dragging alternative (up/down buttons, ± steppers, numeric input, menu action). Keyboard-only is NOT sufficient (touch-only users).
195
+ - **3.2.6 Consistent Help — A** → `a11y-wcag22-3.2.6`. If help mechanisms exist (contact, chat, self-help link, support email), verify they appear in the same relative DOM order across every page they occur on. Record snapshot quote per page, diff order, file finding if inconsistent.
196
+ - **3.3.7 Redundant Entry — A** → `a11y-wcag22-3.3.7`. In any multi-step process (checkout, onboarding, registration), verify information previously entered is auto-populated or available for selection (e.g., "billing same as shipping" prefilled). Exceptions: essential re-entry (password confirmation), security-related, stale data. Browser autocomplete does not satisfy — the site must provide the value.
197
+ - **3.3.8 Accessible Authentication (Minimum) — AA** → `a11y-wcag22-3.3.8`. On every auth surface (login, re-auth, 2FA, password reset), confirm no cognitive-function test (memorize password, transcribe OTP, puzzle CAPTCHA) is required unless an alternative exists (passkey, magic link), a mechanism helps (paste allowed + `autocomplete="username | current-password | one-time-code"`), or an object/personal-content exception applies. Fail pattern: `onpaste="return false"` or `autocomplete="off"` on password.
198
+ - **3.3.9 Accessible Authentication (Enhanced) — AAA** → `a11y-wcag22-3.3.9` (advisory only, not required for AA audits). Flag as an advisory finding when AA passes only via the object-recognition or personal-content exception (e.g., "select all crosswalks" CAPTCHA). Passkeys / WebAuthn / biometrics / magic links clear this bar.
199
+
175
200
  ### 3d. Baymard (if e-commerce detected)
176
201
  If `package.json` has stripe/shopify/medusajs/saleor OR routes include /checkout /cart /products: checkout-flow + form-design + filter + PDP checklist (methodology §3).
177
202
 
203
+ ### 3e.0 Phase 0 — CrUX field data (MUST run before 3e synthetic lab audit)
204
+
205
+ Lab numbers (Lighthouse / Playwright / web-vitals injected at audit time)
206
+ are deterministic but reflect a single throttled machine. Google ranks on
207
+ **field** data — Chrome User Experience Report (CrUX), 28-day p75 over real
208
+ users. A site can score 95 in the lab and "Poor" in field due to device
209
+ diversity. Field is authoritative; lab is indicative only.
210
+
211
+ Before the synthetic pass in 3e, fetch CrUX for the origin (and for each
212
+ templated page type if a key is configured):
213
+
214
+ ```bash
215
+ # CrUX API (Chrome UX Report) — requires $CRUX_KEY or PageSpeed Insights key
216
+ curl -s "https://chromeuxreport.googleapis.com/v1/records:queryOrigin?key=$CRUX_KEY" \
217
+ -H 'Content-Type: application/json' \
218
+ -d "{\"origin\":\"<site-origin>\",\"formFactor\":\"PHONE\"}" \
219
+ > "$SESSION_DIR/vitals/crux_origin_mobile.json"
220
+
221
+ curl -s "https://chromeuxreport.googleapis.com/v1/records:queryOrigin?key=$CRUX_KEY" \
222
+ -H 'Content-Type: application/json' \
223
+ -d "{\"origin\":\"<site-origin>\",\"formFactor\":\"DESKTOP\"}" \
224
+ > "$SESSION_DIR/vitals/crux_origin_desktop.json"
225
+
226
+ # Optional: per-URL record (only if URL has sufficient traffic)
227
+ curl -s "https://chromeuxreport.googleapis.com/v1/records:queryRecord?key=$CRUX_KEY" \
228
+ -H 'Content-Type: application/json' \
229
+ -d "{\"url\":\"<full-url>\",\"formFactor\":\"PHONE\"}" \
230
+ > "$SESSION_DIR/vitals/crux_<slug>_mobile.json"
231
+ ```
232
+
233
+ Capture field `p75` for LCP / INP / CLS (and FCP / TTFB when present).
234
+ Outcomes:
235
+ - **CrUX present + sufficient traffic** → field values are the verdict; lab
236
+ values annotate drill-down only.
237
+ - **CrUX absent or insufficient traffic** → record the gap, fall back to
238
+ lab, and tag every performance finding as `source: "lab"`.
239
+
178
240
  ### 3e. Core Web Vitals
179
- Parse `session_dir/vitals/<page>.json`. LCP/INP/CLS/FCP/TTFB/TBT against thresholds (methodology §4). Doherty: interactions <400ms feedback.
241
+ Parse `session_dir/vitals/<page>.json` (lab) AND `crux_*_mobile.json` /
242
+ `crux_*_desktop.json` (field). LCP/INP/CLS/FCP/TTFB/TBT against thresholds
243
+ (methodology §4). Doherty: interactions <400ms feedback.
244
+
245
+ **Tag every performance finding with a `source` field:**
246
+
247
+ ```json
248
+ {
249
+ "rule": "cwv-lcp",
250
+ "source": "lab" | "field" | "both",
251
+ "lab_value_ms": 3200,
252
+ "field_p75_ms": 4100,
253
+ "field_sample": "CrUX 28-day p75, PHONE",
254
+ "verdict": "needs-improvement"
255
+ }
256
+ ```
257
+
258
+ - `source: "both"` when lab + CrUX agree → highest confidence; proceed to fix.
259
+ - `source: "field"` when CrUX fails but lab passes → real users hit it; still real.
260
+ - `source: "lab"` when CrUX is absent / insufficient → note the gap and
261
+ surface as "unverified by field data" in the executive summary.
262
+ - If lab and field disagree by > 30%, file a meta-finding
263
+ (`rule: perf-lab-field-divergence`) with both numbers and `source: "both"`.
180
264
 
181
265
  ### 3f. Implicit criteria (methodology §5)
182
266
  60+ checks: empty/loading/error states, focus restoration after modals, aria-live for toasts, password affordances, autocomplete tokens, touch target spacing, deep linking, back-button in SPAs, scroll restoration, copy-paste tolerance, timeout/offline/5xx, session expiration, i18n edges, print stylesheet. pass/fail/n-a with evidence.
@@ -265,6 +349,37 @@ mobile-native. Run the 21-item checklist verbatim against each mobile page:
265
349
  Each failed item → finding with `rule: mobile-pattern-M<N>`, evidence from
266
350
  Step 2.5 artifacts (NOT a fresh snapshot), `template_id: M<N>`.
267
351
 
352
+ **Real-device vs emulation disclaimer (MANDATORY).** Playwright MCP drives
353
+ Blink/Chromium in a resized viewport — it is NOT real iOS Safari (WebKit),
354
+ Android Chrome on a low-end device, or any in-app WebView. Emulation can
355
+ confirm layout, DOM, a11y tree, tab order, reduced-motion / forced-colors,
356
+ computed CSS. It CANNOT confirm touch haptics, iOS safe-area rendering,
357
+ iOS Safari font rasterization, PWA install/add-to-home-screen, iOS keyboard
358
+ overlap via `visualViewport`, viewport-zoom quirks under pinch, Samsung
359
+ Internet auto-dark, real Pointer Event latency, or hover-only fallbacks on
360
+ real touch (iOS "sticky hover"). See methodology §9 for the full list.
361
+
362
+ Any mobile finding whose verdict would require iOS Safari or Android Chrome
363
+ on a real device to confirm — touch haptics, iOS safe-area insets, PWA
364
+ install, pinch-zoom quirks, `@media (hover: hover)` behavior on real touch,
365
+ payment sheet (Apple Pay / Google Pay), biometrics, push — MUST be tagged
366
+ in the finding JSON as:
367
+
368
+ ```json
369
+ {
370
+ "category": "real-device-required",
371
+ "real_device_required": true,
372
+ "emulation_verdict": "likely_fail | likely_pass | indeterminate",
373
+ "requires": ["ios-safari", "android-chrome"],
374
+ "rationale": "Playwright runs Blink; iOS Safari uses WebKit; cannot confirm X on emulation."
375
+ }
376
+ ```
377
+
378
+ `sd-synthesis` MUST surface a "real-device verification needed" banner at
379
+ the top of the executive summary listing every finding where
380
+ `real_device_required=true`, grouped by `requires` platform, so the human
381
+ reviewer books a BrowserStack / Sauce / LambdaTest session before sign-off.
382
+
268
383
  Cross-reference the competitor component vocabulary from
269
384
  `.cache/evidence/component-comparison.md` — if every competitor uses bottom
270
385
  tabs on mobile and the product uses hamburger-only, density score drops AND
@@ -328,7 +443,11 @@ this to produce the executive DIS summary.
328
443
  document.head.appendChild(s);
329
444
  });
330
445
  window.__axe = await window.axe.run(document, {
331
- runOnly: { type: 'tag', values: ['wcag2a','wcag2aa','wcag21a','wcag21aa','wcag22aa','best-practice'] }
446
+ runOnly: { type: 'tag', values: ['wcag2a','wcag2aa','wcag21a','wcag21aa','wcag22aa','best-practice'] },
447
+ // WCAG 2.2 rules (e.g., focus-not-obscured) ship under axe-core's
448
+ // experimental flag — without this, SC 2.4.11 / 2.5.7 / 2.5.8 etc.
449
+ // simply will NOT execute. Always enable for super-design audits.
450
+ experimental: true
332
451
  });
333
452
  })();
334
453
  ```
@@ -195,6 +195,17 @@ Source of truth: `references/fix-agent-playbook.md` §7.
195
195
  - Swap color used in >5 files → needs_human (too broad for single fix)
196
196
  - Convert design token itself → MEDIUM, escalate
197
197
 
198
+ **Color-space rule (V4 and any new token):** When emitting new color tokens
199
+ (V4 snap-to-nearest and any fresh tokens proposed by V-templates), express
200
+ them in **OKLCH** — the perceptually uniform color space used by modern
201
+ design systems (Tailwind v4, shadcn 2024+, Radix Colors). Hex / RGB are
202
+ accepted ONLY when they match the existing codebase convention (e.g., the
203
+ project's `tokens.css` / `globals.css` already defines all colors as hex).
204
+ Mixing OKLCH tokens into a hex-only codebase requires a separate
205
+ token-migration finding and is `needs_human`. Format:
206
+ `--color-primary-500: oklch(0.65 0.20 265);` (lightness 0-1, chroma 0+,
207
+ hue 0-360).
208
+
198
209
  ## ux templates (U1–U10)
199
210
 
200
211
  | ID | Fix |
@@ -22,8 +22,29 @@ Output: exactly one file `docs/super-design/market-analysis.md` + evidence under
22
22
 
23
23
  3. **Detect niche.** Apply 8-signal scoring (playbook §1). Confidence = top / (top + second). If <0.55, use AskUserQuestion with 3 options from top verticals. Record reasoning to `.cache/evidence/niche.md`.
24
24
 
25
+ **Regulated-niche always-confirm rule.** Regulated niches: compliance-driven design choices override aesthetic preference, so always confirm. If the detected niche falls into any of the following — **fintech, healthtech, legaltech, gambling, crypto, insurance, children's-app** — ALWAYS fire `AskUserQuestion` to confirm niche + regulatory scope even when detector confidence is ≥0.95. These niches carry compliance implications (SOC2, HIPAA, PCI-DSS, GDPR, PSD2, COPPA, KYC/AML, age-gating, disclosure-mandated copy) that design directly affects — getting the niche wrong wastes the audit. Record the confirmation (selected scope, applicable regulations) to `.cache/evidence/niche.md` under `regulatory_scope:`.
26
+
25
27
  4. **Discover competitors.** 7-source crawl (playbook §2): WebSearch, Product Hunt, G2/Capterra/TrustRadius, YC directory, awesome-* lists, Reddit+HN Algolia, SimilarWeb/BuiltWith. Dedupe by domain. Rank fame × similarity × design-signal. Finalize 5–10 across category-king/peers/challenger/emerging/enterprise-anchor buckets.
26
28
 
29
+ **4a. Neumeier insertion test during discovery (per candidate).** For every candidate competitor considered for the final 5–10, apply Neumeier's insertion test (playbook §5.4): *"If this competitor's brand mark were swapped with the project's, would users notice?"* Score each on a 0–5 scale:
30
+
31
+ | Score | Meaning |
32
+ |---|---|
33
+ | 0 | Fully swappable — no brand equity, pure commodity visual language |
34
+ | 1 | Mostly swappable — generic category codes only |
35
+ | 2–3 | Partially distinct — some ownable elements but weak |
36
+ | 4 | Strong distinct identity — clear ownable signals |
37
+ | 5 | Instantly distinct — singular, unmistakable brand mark |
38
+
39
+ Competitors scoring ≤1 are **commodity benchmarks** (show what the category looks like by default); competitors scoring ≥4 **reveal defensible territory** (show what ownable positioning looks like). Include a healthy mix of both. Record the score and one-line justification per competitor in `market-analysis.md` (competitor table) and the per-competitor row in `.cache/evidence/<slug>/component-catalog.md` under a new `Insertion-test score:` field.
40
+
41
+ **4b. Vibe-quadrant final gate (self-check before step 5).** Before moving to step 5, plot the project draft position and each finalized competitor on a 2-axis vibe quadrant:
42
+
43
+ - **X axis:** serious ↔ playful
44
+ - **Y axis:** minimal ↔ expressive
45
+
46
+ If the project lands in the **same quadrant as ≥3 competitors**, surface a warning in `market-analysis.md` under a `## Positioning risk` section — exact text: `crowded quadrant — positioning risk` — and recommend **one axis shift** (per Kapferer prism §4.3 / Aaker dimensions §4.2) that would move the project into a less-occupied quadrant. **Do not auto-decide the shift**; document the warning and the recommended axis for synthesis (step 8) to reconcile with the user. Save the quadrant plot data (project + competitor coordinates) to `.cache/evidence/vibe-quadrant.md`.
47
+
27
48
  5. **Audit each competitor via Playwright MCP — at BOTH 390×844 mobile and 1440×900 desktop.** Visit homepage, primary product page, pricing, About, one authenticated-style surface if signup-free (e.g., docs, app tour). Per playbook §3 PLUS component-level extraction per §3bis below. Save to `.cache/evidence/<slug>/<viewport>/`.
28
49
 
29
50
  ### §3bis. Component-level extraction (mandatory, not optional)
@@ -97,11 +118,37 @@ and sd-fix use to recommend aesthetic direction.
97
118
 
98
119
  6. **Classify each.** Archetype (§4.1), Aaker peak (§4.2), vibe class, NN/g 4D tone (§7.1), hero-pattern.
99
120
 
121
+ **6a. Voice/tone capture — 8–12 copy sample rule (mandatory).** For each competitor, collect **8–12 distinct copy samples** (≥8 minimum; fewer = insufficient signal), one per surface where available:
122
+
123
+ - Hero headline
124
+ - Primary CTA label
125
+ - Error message
126
+ - Empty state
127
+ - 404 page
128
+ - Onboarding step 1
129
+ - Pricing caption / plan blurb
130
+ - Footer blurb
131
+ - ToS / legal excerpt
132
+ - (Optional extras: subhead, feature card, support article opener, confirmation toast)
133
+
134
+ Grade **each sample** on the NN/g 4D tone dimensions (playbook §7.1) using integers in {−1, 0, +1}:
135
+
136
+ - formal ↔ casual
137
+ - funny ↔ serious
138
+ - respectful ↔ irreverent
139
+ - enthusiastic ↔ matter-of-fact
140
+
141
+ Report per-sample scores + verbatim quote + source URL in `.cache/evidence/<slug>/copy-samples.md`, and the **mean + variance** per axis in `market-analysis.md` (tone row per competitor). Healthy brands are constant on voice, variable on tone.
142
+
143
+ **Insufficient-signal rule.** If fewer than 8 distinct samples can be collected (static site, gated app, locale blockers), **do not compute a tone profile** — flag the competitor as `tone-inconclusive` in `market-analysis.md` with a note listing which surfaces were missing. Never fabricate samples or scores to reach the threshold.
144
+
100
145
  7. **Build category-code matrix.** Tabulate dimensions (§5.1). Frequency per column. Classify codes obey/extend/subvert/open (§5.2).
101
146
 
102
- 8. **Synthesize.** Archetype in whitespace via Neumeier insertion test (§5.4). Palette, typography, tone, audience, JTBD. Draft onliness statement.
147
+ 8. **Synthesize.** Archetype in whitespace via Neumeier insertion test (§5.4). Palette, typography, tone, audience, JTBD.
148
+
149
+ 8b. **Three-territories pitch (Q7 — Part 7 of `docs/compass_artifact_wf-2e33af6e-127f-402e-8ce6-cb506fc91b94_text_markdown.md` lines 515–519, 652–653).** Before drafting the onliness statement, produce THREE parallel variants of the design direction — **safe** (conforms to category codes), **expected** (the obvious evolution of category codes), **edgy** (the considered provocation / subversion). Each variant MUST include: palette strip (3–6 tokens), type specimen (primary + optional display), motion character (duration + easing archetype), one-line rationale tying back to archetype + category-code matrix from step 7. Build them in parallel — never serialize, or you will anchor to the first. Save to `.cache/evidence/territories/{safe,expected,edgy}.md` and include a summary table in the brief. The user chooses the primary territory (optionally stealing one detail from another) BEFORE the onliness statement lands. "Presenting one direction looks like opinion; presenting three looks like strategy" (artifact line 519).
103
150
 
104
- 9. **Write `market-analysis.md`** per playbook §8 schema.
151
+ 9. **Draft onliness statement** against the chosen territory, then **write `market-analysis.md`** per playbook §8 schema (include the three-territories summary + chosen primary).
105
152
 
106
153
  10. **Self-check.** Fix gaps before returning.
107
154
 
@@ -8,7 +8,7 @@ description: >
8
8
  UX audit (WCAG 2.2 AA, Nielsen heuristics, Baymard, CWV), and synthesized
9
9
  overview. Re-audits only what changed since last run. On explicit user request,
10
10
  applies surgical fixes with full rollback.
11
- version: 0.6.0
11
+ version: 0.6.2
12
12
  ---
13
13
 
14
14
  # super-design
@@ -86,9 +86,14 @@ Pass findings via files under `.super-design/sessions/<id>/`, not chat.
86
86
 
87
87
  ### Step 4: Write state + history
88
88
 
89
- - Atomic write `.audit-state.json` (.tmp then rename).
89
+ - Atomic write `.audit-state.json` via `scripts/write-state.sh` (takes JSON
90
+ on stdin, writes `.tmp`, validates with `jq`, then renames). Do NOT write
91
+ the state file directly.
90
92
  - Append session to `audit-history.md`.
91
93
  - `git notes --ref=super-design add -f -m <json> HEAD`.
94
+ - First-time notes setup (run once per clone, also in `setup-git-notes.sh`
95
+ if you extract it): `git config --add remote.origin.fetch '+refs/notes/super-design/*:refs/notes/super-design/*'`
96
+ — without this, notes don't round-trip across clones (artifact §7).
92
97
 
93
98
  ### Step 5: Return summary (≤5 sentences)
94
99
 
@@ -100,9 +105,96 @@ Do NOT paste overview into chat.
100
105
  - `--refresh-research` — rerun sd-research
101
106
  - `--only <cat>` — a11y | design | ux | perf | research
102
107
  - `--scope <url>` — specific route
108
+ - `--app <name>` — scope the entire run to one monorepo app (matches a
109
+ `name` entry from `scripts/detect-apps.sh`). Required when `--scope <url>`
110
+ is ambiguous between multiple apps.
103
111
  - `--fix` — run sd-fix after audit
104
112
  - `--dry-run` — artifacts without committing state
105
113
  - `--ci` — non-interactive, create PR, exit non-zero on blockers
114
+ - `--update-baselines` — Re-hash pages and tokens without re-auditing (use after accepted cosmetic drift). Also accepted by `scripts/visual-regression.sh` to overwrite `.super-design/baselines/*.png` with the current capture.
115
+ - `--visual-regression` — Run `scripts/visual-regression.sh` after hashing. Reads the `visual_regression` block from `.audit-state.json` (engine: pixelmatch | odiff | sha256-fallback; threshold 0.1; max_diff_pixel_ratio 0.01). See artifact §16.
116
+ - `MASK_SELECTORS=<sel,sel,...>` (env) — Extra CSS selectors masked in every screenshot captured by `scripts/hash-pages.sh`. Artifact §3.4 defaults (`[data-timestamp], .relative-time, [data-react-hydration], video, canvas`) are always applied.
117
+
118
+ ## Monorepo support
119
+
120
+ Audit state is per-app (artifact §11 line 902) so independent deploys
121
+ carry independent freshness, `git_sha_at_audit`, and tool results. Layout
122
+ is auto-detected; nothing else to configure.
123
+
124
+ ### Detection
125
+
126
+ `scripts/detect-apps.sh` reads the first workspace manifest it finds:
127
+
128
+ | Manifest | Source of globs |
129
+ |----------|-----------------|
130
+ | `pnpm-workspace.yaml` | `packages:` list |
131
+ | `package.json` | `workspaces: [...]` or `workspaces.packages: [...]` (npm, yarn, Bun) |
132
+ | `turbo.json` | Presence → uses pnpm/npm/yarn workspaces; falls back to `apps/*` + `packages/*` if none |
133
+ | `nx.json` | `workspaceLayout.appsDir` / `libsDir` (default `apps/*`, `libs/*`) |
134
+ | `bunfig.toml` | Presence → falls back to `apps/*` + `packages/*` if package.json has no workspaces |
135
+
136
+ Each matched directory that also has a `package.json` becomes an app
137
+ with `name` taken from `package.json#name` (scope stripped), `path` the
138
+ directory, and `state_path` = `<path>/docs/super-design/.audit-state.json`.
139
+ If nothing matches, `detect-apps.sh` emits a `single` layout with
140
+ `path: "."` and the repo-root state path — preserving existing single-app
141
+ behavior.
142
+
143
+ ### Per-app pipeline
144
+
145
+ - **Preflight**: per app, read `<app>/docs/super-design/.audit-state.json`
146
+ via `validate-state.sh <app_path>`.
147
+ - **Change detection**: `scripts/detect-changes.sh --all-apps` loops over
148
+ every app and narrows `git diff` to `-- <app_path>/` so each app's
149
+ scope decision sees only its own files. Single-app shape is preserved
150
+ with `detect-changes.sh <last_sha>`.
151
+ - **Write state**: `scripts/write-state.sh <app_path>` derives the target
152
+ path; for single-app repos pass `.` or omit.
153
+
154
+ ### URL → app disambiguation
155
+
156
+ `--scope <url>` still targets one URL. When the URL maps cleanly to a
157
+ single app (e.g. `apps/admin` serves `https://admin.example.com`), the
158
+ pipeline picks that app automatically. When mapping is ambiguous
159
+ (multiple apps serve overlapping hostnames, or URL patterns cross apps),
160
+ the user MUST pass `--app <name>` — otherwise the skill aborts with a
161
+ `{"error":"ambiguous-app","candidates":[...]}` verdict instead of
162
+ guessing.
163
+
164
+ ## Scripts
165
+
166
+ Reusable shell helpers under `scripts/`. All POSIX/bash, tested on
167
+ Windows git-bash + Linux.
168
+
169
+ - `discover-routes.sh` — emits `route_map` as a JSON array. Dynamic
170
+ segments (`[slug]`, `[[...all]]`, `$id`, `:uid`) are suffixed with
171
+ `@fixture-<id>` (artifact §2.7). Fixtures resolved from sibling
172
+ `*.fixture.json`, `fixtures/<name>.json`, or `$SUPER_DESIGN_FIXTURES`
173
+ env JSON; falls back to `@fixture-default` with a warning. Consumers
174
+ (hash-pages, sd-audit) MUST strip the suffix before navigating.
175
+ - `build-import-graph.sh` — builds `.super-design/import-graph.json`
176
+ (`{nodes, edges, hash, backend}`) and persists `import_graph_sha` to
177
+ state. Prefers `npx madge --json <roots>`; falls back to a regex
178
+ scanner (JS/TS only, no alias resolution) if madge is missing.
179
+ - Query: `bash .../build-import-graph.sh importers <file> --hops 3`
180
+ → BFS over reversed edges; `detect-changes.sh` uses this to close
181
+ the component→pages gap when only components changed (Step 2 scope
182
+ decision: "Only components changed → re-audit pages importing them
183
+ (N=3 hops via madge)").
184
+ - `hash-pages.sh` — captures 3 viewports per URL (mobile_375, tablet_768,
185
+ desktop_1280), emits `{html_hash, dom_structure_hash, viewport_hashes:
186
+ {<vp>: {sha256, phash, png_path}}}` per page to
187
+ `docs/super-design/.cache/hashes/hashes.json` and persists each PNG to
188
+ `<cache>/screenshots/<url-enc>/<vp>.png`. Applies artifact §3.4 mask
189
+ defaults plus `MASK_SELECTORS`; `phash` uses `sharp` when available
190
+ (tagged `phash:`) or a deterministic PNG fingerprint otherwise
191
+ (tagged `fpr:`, only useful for exact-match comparison).
192
+ - `visual-regression.sh [--update-baselines] [<state>]` — reads the
193
+ `visual_regression` block from `.audit-state.json` and diffs current
194
+ screenshots against `.super-design/baselines/`. Engine chain:
195
+ `pixelmatch` → `odiff` → `sha256-fallback`. Emits
196
+ `{page, viewport, diff_ratio, threshold, pass, diff_image_path}` to
197
+ `<diff_dir>/results.json`. Exits non-zero if any page fails.
106
198
 
107
199
  ## References (Read on demand)
108
200
 
@@ -0,0 +1,226 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "$id": "https://hakutaku.ai/schemas/super-design/audit-state.schema.json",
4
+ "title": "super-design audit state",
5
+ "description": "Canonical schema for docs/super-design/.audit-state.json — the state file that super-design reads on every run to decide what to re-audit. Derived from docs/compass_artifact_wf-f52f98c2-73c9-4483-b49c-6b080ef7dc92_text_markdown.md sections 5, 10, and 16.",
6
+ "type": "object",
7
+ "additionalProperties": true,
8
+ "required": [
9
+ "schema_version",
10
+ "skill_version",
11
+ "last_audit_at",
12
+ "git_sha_at_audit"
13
+ ],
14
+ "properties": {
15
+ "schema_version": {
16
+ "type": "string",
17
+ "description": "Semver string for this state file's schema. Major bumps force a full re-audit (see §12 line 934, §10).",
18
+ "pattern": "^\\d+\\.\\d+\\.\\d+(?:[-+].+)?$"
19
+ },
20
+ "skill_version": {
21
+ "type": "string",
22
+ "description": "Semver of the super-design skill that wrote this state.",
23
+ "pattern": "^\\d+\\.\\d+\\.\\d+(?:[-+].+)?$"
24
+ },
25
+ "last_audit_at": {
26
+ "type": "string",
27
+ "description": "ISO-8601 timestamp of the last completed audit (used for freshness cascade: 90d soft, 180d hard).",
28
+ "format": "date-time"
29
+ },
30
+ "git_sha_at_audit": {
31
+ "type": "string",
32
+ "description": "Git SHA (7–64 hex chars) that HEAD pointed to at audit time. Used as the range-start for next audit's git log (§2.1).",
33
+ "pattern": "^[0-9a-f]{7,64}$"
34
+ },
35
+ "git_branch": {
36
+ "type": "string",
37
+ "description": "Branch name at audit time (informational)."
38
+ },
39
+ "is_shallow_clone": {
40
+ "type": "boolean",
41
+ "description": "True if the repo was a shallow clone at audit time. Signals that __MISSING__ fallback (§6) may need git fetch --unshallow."
42
+ },
43
+
44
+ "theory_doc_sha": {
45
+ "type": "string",
46
+ "description": "sha256: of references/design-theory.md. Mismatch forces full re-audit (§9.1).",
47
+ "pattern": "^sha256:[0-9a-f]{4,64}$"
48
+ },
49
+ "market_analysis_sha": {
50
+ "type": "string",
51
+ "description": "sha256: of the current market-analysis.md output.",
52
+ "pattern": "^sha256:[0-9a-f]{4,64}$"
53
+ },
54
+
55
+ "tools": {
56
+ "type": "object",
57
+ "description": "Versions of the audit toolchain at audit time. Major bumps can invalidate prior findings.",
58
+ "additionalProperties": true,
59
+ "properties": {
60
+ "axe-core": { "type": "string" },
61
+ "lighthouse": { "type": "string" },
62
+ "playwright": { "type": "string" },
63
+ "playwright-mcp": { "type": "string" },
64
+ "pa11y": { "type": "string" }
65
+ }
66
+ },
67
+
68
+ "framework": {
69
+ "type": "object",
70
+ "description": "Detected framework info. Router/version migration triggers full re-audit (§7).",
71
+ "additionalProperties": true,
72
+ "properties": {
73
+ "name": { "type": "string" },
74
+ "router": { "type": "string" },
75
+ "version": { "type": "string" }
76
+ },
77
+ "required": ["name"]
78
+ },
79
+
80
+ "route_map": {
81
+ "type": "array",
82
+ "description": "Canonical route list at audit time. Diff against prior route_map produces new/deleted routes (§7).",
83
+ "items": { "type": "string" }
84
+ },
85
+
86
+ "pages_audited": {
87
+ "type": "array",
88
+ "description": "Per-page hashes + findings (§3, §5). Incremental audits compare these to decide which pages to re-visit.",
89
+ "items": {
90
+ "type": "object",
91
+ "additionalProperties": true,
92
+ "required": ["url"],
93
+ "properties": {
94
+ "url": { "type": "string" },
95
+ "route_file": { "type": "string" },
96
+ "html_hash": { "type": "string", "pattern": "^sha256:[0-9a-f]{4,64}$" },
97
+ "dom_structure_hash": { "type": "string", "pattern": "^sha256:[0-9a-f]{4,64}$" },
98
+ "screenshot_hash": { "type": "string", "pattern": "^(sha256|phash):[0-9a-f]{4,64}$" },
99
+ "viewport_hashes": {
100
+ "type": "object",
101
+ "description": "Per-viewport hashes (mobile_375/tablet_768/desktop_1280). Each entry may be a bare phash string (back-compat with artifact §10 line 492) or the extended {sha256,phash,png_path} object emitted by hash-pages.sh. Any viewport drift -> re-audit that page.",
102
+ "additionalProperties": {
103
+ "oneOf": [
104
+ { "type": "string" },
105
+ {
106
+ "type": "object",
107
+ "properties": {
108
+ "sha256": { "type": "string" },
109
+ "phash": { "type": "string" },
110
+ "png_path": { "type": "string" }
111
+ }
112
+ }
113
+ ]
114
+ }
115
+ },
116
+ "mask_selectors": {
117
+ "type": "array",
118
+ "description": "CSS selectors masked in this page's screenshots (artifact §3.4). Merged from hash-pages.sh defaults + MASK_SELECTORS env.",
119
+ "items": { "type": "string" }
120
+ },
121
+ "phash_engine": {
122
+ "type": "string",
123
+ "description": "Which engine produced the phash values. `phash:` = sharp-based aHash. `fpr:` = zero-dep PNG fingerprint fallback (exact-match only)."
124
+ },
125
+ "last_audited": { "type": "string", "format": "date-time" },
126
+ "findings_ids": {
127
+ "type": "array",
128
+ "items": { "type": "string" }
129
+ }
130
+ }
131
+ }
132
+ },
133
+
134
+ "components": {
135
+ "type": "object",
136
+ "description": "Map of component path -> xxh3 hash (§8). Source of truth for component-level change detection.",
137
+ "additionalProperties": { "type": "string" }
138
+ },
139
+
140
+ "token_hash": {
141
+ "type": "string",
142
+ "description": "sha256: hash of the canonicalized design-token map. Mismatch invalidates every page (§9).",
143
+ "pattern": "^sha256:[0-9a-f]{4,64}$"
144
+ },
145
+
146
+ "import_graph_sha": {
147
+ "type": "string",
148
+ "description": "sha256: of the serialized import graph (madge). Used to compute blast radius of component changes.",
149
+ "pattern": "^sha256:[0-9a-f]{4,64}$"
150
+ },
151
+
152
+ "findings_counts": {
153
+ "type": "object",
154
+ "description": "Aggregate tally after last audit (§5).",
155
+ "additionalProperties": false,
156
+ "properties": {
157
+ "blockers": { "type": "integer", "minimum": 0 },
158
+ "high": { "type": "integer", "minimum": 0 },
159
+ "medium": { "type": "integer", "minimum": 0 },
160
+ "nitpicks": { "type": "integer", "minimum": 0 }
161
+ }
162
+ },
163
+
164
+ "research_at": {
165
+ "type": "string",
166
+ "description": "ISO-8601 timestamp of the last sd-research run. Used to decide whether to rerun research (>90d -> refresh).",
167
+ "format": "date-time"
168
+ },
169
+
170
+ "ignored_paths": {
171
+ "type": "array",
172
+ "description": "Globs excluded from design-relevance classification (§2.3).",
173
+ "items": { "type": "string" }
174
+ },
175
+
176
+ "visual_regression": {
177
+ "type": "object",
178
+ "description": "Optional visual-regression config (§16). Absent when user hasn't opted in. Consumed by scripts/visual-regression.sh.",
179
+ "additionalProperties": true,
180
+ "properties": {
181
+ "enabled": { "type": "boolean", "default": false },
182
+ "engine": {
183
+ "type": "string",
184
+ "description": "Engine chain: scripts/visual-regression.sh tries in order and falls back. `sha256-fallback` = exact-match only, always available.",
185
+ "enum": ["pixelmatch", "odiff", "resemble", "looks-same", "playwright", "sha256-fallback"],
186
+ "default": "pixelmatch"
187
+ },
188
+ "threshold": { "type": "number", "minimum": 0, "default": 0.1 },
189
+ "max_diff_pixel_ratio": { "type": "number", "minimum": 0, "maximum": 1, "default": 0.01 },
190
+ "antialiasing": { "type": "boolean", "default": true },
191
+ "viewports": {
192
+ "type": "array",
193
+ "items": {
194
+ "type": "object",
195
+ "required": ["label", "width", "height"],
196
+ "properties": {
197
+ "label": { "type": "string" },
198
+ "width": { "type": "integer", "minimum": 1 },
199
+ "height": { "type": "integer", "minimum": 1 }
200
+ }
201
+ }
202
+ },
203
+ "mask_selectors": {
204
+ "type": "array",
205
+ "items": { "type": "string" }
206
+ },
207
+ "baseline_dir": {
208
+ "type": "string",
209
+ "description": "Where accepted baseline PNGs live. Committed to git (small repos) or stored via LFS/artifacts (large).",
210
+ "default": ".super-design/baselines"
211
+ },
212
+ "current_dir": {
213
+ "type": "string",
214
+ "description": "Where hash-pages.sh writes the current run's PNGs. Usually gitignored.",
215
+ "default": "docs/super-design/.cache/hashes/screenshots"
216
+ },
217
+ "diff_dir": {
218
+ "type": "string",
219
+ "description": "Where diff images + results.json are emitted by visual-regression.sh.",
220
+ "default": "docs/super-design/.cache/hashes/diffs"
221
+ },
222
+ "docker_image": { "type": "string" }
223
+ }
224
+ }
225
+ }
226
+ }