start-vibing 4.3.0 → 4.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/package.json +2 -2
  2. package/template/.claude/agents/sd-audit.md +121 -2
  3. package/template/.claude/agents/sd-fix.md +11 -0
  4. package/template/.claude/agents/sd-research.md +49 -2
  5. package/template/.claude/skills/super-design/.schema-version +1 -0
  6. package/template/.claude/skills/super-design/SKILL.md +94 -2
  7. package/template/.claude/skills/super-design/audit-state.schema.json +226 -0
  8. package/template/.claude/skills/super-design/references/audit-methodology.md +118 -0
  9. package/template/.claude/skills/super-design/references/design-intelligence-rubric.md +92 -11
  10. package/template/.claude/skills/super-design/references/design-skills-catalog.md +31 -0
  11. package/template/.claude/skills/super-design/scripts/build-import-graph.sh +208 -0
  12. package/template/.claude/skills/super-design/scripts/detect-apps.sh +180 -0
  13. package/template/.claude/skills/super-design/scripts/detect-changes.sh +177 -21
  14. package/template/.claude/skills/super-design/scripts/discover-routes.sh +120 -6
  15. package/template/.claude/skills/super-design/scripts/extract-tokens.mjs +165 -4
  16. package/template/.claude/skills/super-design/scripts/hash-pages.sh +209 -23
  17. package/template/.claude/skills/super-design/scripts/setup-git-notes.sh +21 -0
  18. package/template/.claude/skills/super-design/scripts/validate-state.sh +74 -11
  19. package/template/.claude/skills/super-design/scripts/verify-audit.sh +62 -9
  20. package/template/.claude/skills/super-design/scripts/visual-regression.sh +275 -0
  21. package/template/.claude/skills/super-design/scripts/write-state.sh +53 -0
  22. package/template/.claude/skills/super-design/templates/audit-state.schema.json +0 -57
@@ -187,6 +187,66 @@ The 2026 WebAIM Million report (released ~30 March 2026, https://webaim.org/proj
187
187
 
188
188
  **What automation CANNOT catch** (the manual 40–70%): keyboard trap detection beyond trivial cases; screen-reader announcement quality; meaningful alt text (tools detect missing, not quality); logical focus order when CSS reorders; error message clarity; appropriate heading structure (presence vs meaning); context-appropriate link text; video caption accuracy/sync; color meaning (1.4.1); reading order under CSS transforms; reflow at 320px (no tool); text-spacing override; focus-indicator quality (2.4.11 / 2.4.13); `prefers-reduced-motion` honoring; target size / spacing (partial); autocomplete correctness; captcha alternatives; content-on-hover persistence (1.4.13); page title descriptiveness; consistent navigation/identification (3.2.3 / 3.2.4).
189
189
 
190
+ ### 2.5 Tool triangulation for automated a11y (MANDATORY)
191
+
192
+ axe-core alone catches only ~57% of WCAG issues by volume (Deque's own
193
+ coverage study) and ~40% by Success Criterion count. Running a single
194
+ engine guarantees blind spots. sd-audit MUST run axe **plus** at least two
195
+ more engines and dedupe the merged results by `{rule, selector}`.
196
+
197
+ **Engines to run in parallel (don't serialize — they're cheap):**
198
+
199
+ ```bash
200
+ # 1. axe-core — inject via Playwright MCP (already done in sd-audit Step 2,
201
+ # with `experimental: true` so WCAG 2.2 rules fire).
202
+
203
+ # 2. Pa11y — runs both htmlcs and axe runners for broader coverage
204
+ pa11y "$URL" \
205
+ --standard WCAG2AA \
206
+ --runner axe \
207
+ --runner htmlcs \
208
+ --reporter json \
209
+ > "$SESSION_DIR/a11y/pa11y_<slug>_<vp>.json"
210
+
211
+ # 3. WAVE API — WebAIM's detector, overlay-oriented, strong on contrast and ARIA
212
+ curl -s "https://wave.webaim.org/api/request?key=$WAVE_KEY&url=$(printf %s "$URL" | jq -sRr @uri)&reporttype=4" \
213
+ > "$SESSION_DIR/a11y/wave_<slug>_<vp>.json"
214
+
215
+ # 4. IBM Equal Access Accessibility Checker — ACT-rule aligned, ~140 rules
216
+ achecker "$URL" \
217
+ --reportLevel violation,potentialviolation \
218
+ --outputFormat json \
219
+ --outputFolder "$SESSION_DIR/a11y/achecker_<slug>_<vp>/"
220
+ ```
221
+
222
+ **Dedup and severity normalization:**
223
+
224
+ ```js
225
+ // Pseudocode — run in sd-audit Step 3a after parsing all engine outputs.
226
+ const key = (f) => `${f.rule}::${f.selector}`;
227
+ const bucket = new Map();
228
+ for (const engine of ['axe', 'pa11y', 'wave', 'achecker']) {
229
+ for (const f of findings[engine]) {
230
+ const k = key(f);
231
+ if (!bucket.has(k)) bucket.set(k, { ...f, engines: [] });
232
+ bucket.get(k).engines.push(engine);
233
+ }
234
+ }
235
+ // Confidence ladder:
236
+ // engines.length === 1 → single-source, keep but tag "unverified"
237
+ // engines.length >= 2 → triangulated, high confidence
238
+ // engines.length >= 3 → near-certain, promote severity +1 step
239
+ ```
240
+
241
+ **Why each engine is non-redundant:**
242
+ - **axe-core** — strongest on `4.1.2`, `1.4.3`, `1.1.1`, `1.3.1` subsets; experimental flag gates 2.2.
243
+ - **Pa11y** — htmlcs runner catches rule shapes axe doesn't ship; dual-runner mode is unique.
244
+ - **WAVE** — contrast slider tool + in-context overlay; different heuristics for ARIA landmarks; never marks a page "passed" (useful bias — never false-clean).
245
+ - **IBM Equal Access** — ACT-Rules aligned (different normative base than axe); baseline-file regression; severity categories (`violation / potentialviolation / recommendation / manual`) map onto Nielsen 0–4.
246
+
247
+ Record per-finding `source_engines: [...]` so sd-synthesis can promote
248
+ triangulated issues and down-weight single-engine noise.
249
+
190
250
  ### 2.4 Regulatory context
191
251
 
192
252
  **European Accessibility Act** (Directive (EU) 2019/882) enforceable **28 June 2025** for new products/services; 28 June 2030 deadline for pre-existing. References **EN 301 549** (currently WCAG 2.1 AA baseline; being updated to 2.2). Extraterritorial.
@@ -201,6 +261,64 @@ The 2026 WebAIM Million report (released ~30 March 2026, https://webaim.org/proj
201
261
 
202
262
  Baymard (Copenhagen, founded ~2009) is used by 71% of Fortune 500 e-commerce companies. Research: 54 rounds of benchmarking, 327 top-grossing US/EU sites, 275,000+ manually assigned UX performance scores, 200,000+ research hours. Methodology page: https://baymard.com/research/methodology. Headline finding: the average large e-commerce site can gain **+35.26% conversion** through better checkout design — **$260B** in recoverable US+EU orders.
203
263
 
264
+ ### 3.0 Baymard sub-rule enumeration (finding code prefixes)
265
+
266
+ The single blanket "Baymard" verdict is too coarse for a structured audit.
267
+ Baymard organises findings by **surface** (checkout, PDP, search, etc.) and
268
+ each surface has a bounded, enumerable set of sub-rules. Every Baymard
269
+ finding raised by sd-audit MUST use one of the prefixes below so
270
+ sd-synthesis can group them and so the fix-playbook can route to the right
271
+ U-template. Rules that do not yet have an official Baymard number are
272
+ prefixed by count only (e.g. `baymard-cc-01` … `baymard-cc-14`) with the
273
+ source section cited.
274
+
275
+ | Surface | Prefix | Count | Source (methodology §) |
276
+ |---|---|---|---|
277
+ | Credit card form | `baymard-cc-<NN>` | 14 rules | §3.3 + https://baymard.com/checkout-usability/credit-card-patterns |
278
+ | Address form | `baymard-addr-<NN>` | 8 rules | §3.4 + https://baymard.com/blog/address-line-2 + https://baymard.com/blog/automatic-address-lookup |
279
+ | Search | `baymard-search-<NN>` | 12 rules | §3.6 + https://baymard.com/ecommerce-design-examples/34-autocomplete-suggestions |
280
+ | Filter | `baymard-filter-<NN>` | 10 rules | §3.6 + https://baymard.com/blog/promoting-product-filters + https://baymard.com/blog/have-filters-for-list-item-info |
281
+ | Breadcrumbs | `baymard-bread-<NN>` | 6 rules | §3.7 + https://baymard.com/blog/ecommerce-breadcrumbs |
282
+ | PDP / Product Detail | `baymard-pdp-<NN>` | 18 rules | §3.8 + https://baymard.com/research/product-page |
283
+
284
+ **`baymard-cc-*` — Credit card form (14 rules, §3.3):**
285
+ 1. `baymard-cc-01` auto-format spaces per card brand (4-4-4-4 / 4-6-5 AMEX / 4-4-4-4-3 19-digit)
286
+ 2. `baymard-cc-02` auto-detect card type via IIN ranges (adapt length limit, CVV help, spacing)
287
+ 3. `baymard-cc-03` expiration as single MM/YY field with auto-inserted slash (NOT YYYY, NOT two dropdowns)
288
+ 4. `baymard-cc-04` CVV field with adaptive help image (3-digit back for Visa/MC, 4-digit front for AMEX)
289
+ 5. `baymard-cc-05` autocomplete tokens present: `cc-number`, `cc-name`, `cc-exp`, `cc-exp-month`, `cc-exp-year`, `cc-csc`
290
+ 6. `baymard-cc-06` `inputmode="numeric"` (never `type="number"`) on card number, expiration, CVV
291
+ 7. `baymard-cc-07` never clear CC number/CVV on validation error (preserve entered data)
292
+ 8. `baymard-cc-08` stored-card edit uses "fake editing" (delete + re-add) per PCI; clear messaging
293
+ 9. `baymard-cc-09` inline validation with 5%-abandonment guardrail (no vague "Card declined" without reason)
294
+ 10. `baymard-cc-10` fallback entry path when wallet (Apple Pay / Google Pay) fails
295
+ 11. `baymard-cc-11` accept pasted numbers; strip spaces/dashes server+client
296
+ 12. `baymard-cc-12` surface accepted card brands near the input (logo strip) before submit
297
+ 13. `baymard-cc-13` billing-address reuse: "Billing same as shipping" default-checked with editable prefill
298
+ 14. `baymard-cc-14` error messages identify which field failed + how to fix (not "Payment failed")
299
+
300
+ > Source: §3.3 bullets + https://baymard.com/checkout-usability/credit-card-patterns. Rules 10–14 derived from §3.3 narrative; number them in this table and supersede with Baymard's official IDs when available.
301
+
302
+ **`baymard-addr-*` — Address form (8 rules, §3.4):**
303
+ 1. `baymard-addr-01` country selector FIRST (drives all subsequent field formats/validation)
304
+ 2. `baymard-addr-02` single "Address Line 1" + optional "Address Line 2" labeled "Apt, Suite — optional" (never omit Line 2)
305
+ 3. `baymard-addr-03` automatic address autocomplete preferred (9% manual-entry typo rate without it)
306
+ 4. `baymard-addr-04` postal-code autodetect of city/state (28% mobile sites fail)
307
+ 5. `baymard-addr-05` US state as dropdown (not free text); UK optional county; hide for countries without subdivisions
308
+ 6. `baymard-addr-06` autocomplete tokens: `street-address`, `address-line1`, `address-line2`, `address-level1/2`, `postal-code`, `country`
309
+ 7. `baymard-addr-07` "Billing same as shipping" default-checked with editable prefilled fields (also WCAG 3.3.7 Redundant Entry)
310
+ 8. `baymard-addr-08` name inputs accept Unicode, hyphens, apostrophes, single-name users, >20 chars
311
+
312
+ > Source: §3.4.
313
+
314
+ **`baymard-search-*` — Search (12 rules, §3.6):** placeholders for 12 rules covering autocomplete presence, scope suggestions in autocomplete, search-within-current-category, autodirect on category match, query-term pluralization tolerance, typo tolerance, "no results" with recovery options, recent-searches memory, sort-vs-filter separation, faceted-search state in URL, submit-without-suggestion-selection, voice search on mobile. Rules `baymard-search-01` … `baymard-search-12`. Source: §3.6 bullets + https://baymard.com/ecommerce-design-examples/34-autocomplete-suggestions. Enumerate the exact wording when next Baymard PDF is purchased.
315
+
316
+ **`baymard-filter-*` — Filter (10 rules, §3.6):** placeholders `baymard-filter-01` … `baymard-filter-10` covering: promote top filters above the product grid; truncate long value lists >10 with styled "More" link; category-specific filters (megapixels, temperature rating); filters for every attribute displayed in list items; expand/collapse icons right-aligned; applied-filter pills visible and individually removable; "clear all" affordance; range sliders with keyboard + numeric input; multi-select affordance obvious; result count live-updated via `aria-live`. Source: §3.6 bullets + https://baymard.com/blog/promoting-product-filters + https://baymard.com/blog/have-filters-for-list-item-info.
317
+
318
+ **`baymard-bread-*` — Breadcrumbs (6 rules, §3.7):** placeholders `baymard-bread-01` … `baymard-bread-06` covering: present on all non-home pages; implement BOTH hierarchy-based AND history-based (68% of top 50 sub-par, 45% only hierarchy, 23% none); present on mobile (65% mobile fail); no "hidden in more" collapse on desktop without reveal; last crumb non-clickable; structured-data markup (`BreadcrumbList`). Source: §3.7 + https://baymard.com/blog/ecommerce-breadcrumbs.
319
+
320
+ **`baymard-pdp-*` — PDP / Product Detail (18 rules, §3.8):** placeholders `baymard-pdp-01` … `baymard-pdp-18` covering: single dominant Add-to-Cart (no 3–6 competing colorful buttons); shipping cost/ETA visible on PDP (64% of users look for it); total visible before checkout (24% abandon otherwise); accordion over horizontal tabs (67% of accordion users mis-implement; 28% use worst-performing tabs); UGC visuals present (67% lack them); minimum 3–5 images + zoom + variant-driven imagery; swatches not dropdowns for variants; out-of-stock variants visible-but-disabled (not removed); star ratings above the fold (up to +18% conversion with verified badges); stock urgency without dark patterns; delivery-date estimator; returns policy on PDP; size guide inline (not in a separate page); Q&A / reviews with filtering; cross-sell without shoving below CTA; "notify me" flow for OOS; price history for discount honesty; country/currency switcher persisted. Source: §3.8 + https://baymard.com/research/product-page.
321
+
204
322
  ### 3.1 Cart abandonment
205
323
  **70.19% average abandonment** across 49 studies 2006–2023, range 55–84.27% (https://baymard.com/lists/cart-abandonment-rate). By device: mobile 77.06%, tablet 66.39%, desktop 70.01%. **Reasons** (excluding 43% "just browsing"): extra costs 48%; forced account creation 24%; slow delivery 19%; distrust with CC 18–19%; too long/complicated 17–18%; couldn't see total up front 16%; errors/crashes 13%; returns policy 12%; declined CC 9%; limited payment methods 7%.
206
324
 
@@ -29,6 +29,8 @@ A score without evidence is invalid. Auditor records `n/a` instead of guessing.
29
29
 
30
30
  ## Category 1 — Visual hierarchy (weight 1.0)
31
31
 
32
+ **Rationale:** Reber et al. processing fluency + Tractinsky aesthetic-usability — clean dominance hierarchies literally reduce cognitive load; beauty buys friction tolerance only after hierarchy is solved (artifact Parts 1–3).
33
+
32
34
  **Question:** On this view, what is the single primary goal? Is it the most
33
35
  dominant element visually?
34
36
 
@@ -47,6 +49,8 @@ dominant element visually?
47
49
 
48
50
  ## Category 2 — Density calibration per viewport (weight 1.2)
49
51
 
52
+ **Rationale:** Fitts's Law + thumbzone ergonomics — density must respect the physical reach envelope of the device; cramming desktop density onto mobile violates motor cost.
53
+
50
54
  **Question:** Does information density match the device context?
51
55
 
52
56
  | Viewport | Expected primary entities visible above fold |
@@ -68,6 +72,8 @@ dominant element visually?
68
72
 
69
73
  ## Category 3 — Consistency: spacing scale (weight 0.8)
70
74
 
75
+ **Rationale:** Gestalt proximity + rhythm — shared spacing units fuse related elements and separate distinct ones; arbitrary magic numbers break grouping perception.
76
+
71
77
  **Question:** Do paddings, margins, gaps come from a scale (4/8px or 0.25rem) or are they arbitrary magic numbers?
72
78
 
73
79
  **Detect:**
@@ -86,12 +92,16 @@ dominant element visually?
86
92
 
87
93
  ## Category 4 — Consistency: typography scale (weight 0.8)
88
94
 
95
+ **Rationale:** Processing fluency (Reber) — a discrete type scale accelerates recognition; a shapeless set of sizes forces re-parsing of hierarchy on every screen.
96
+
89
97
  Same method for font-size, font-weight, line-height. Look for `text-\[\d+px\]` and arbitrary font-size. Expected: 6–10 sizes total in a designed system; 30+ sizes = vibecoded.
90
98
 
91
99
  ---
92
100
 
93
101
  ## Category 5 — Consistency: color palette (weight 0.8)
94
102
 
103
+ **Rationale:** Valdez & Mehrabian (1994) — saturation × value drive emotional response more than hue; a disciplined palette with controlled lightness/saturation ranges is the single highest-leverage decision for perceived quality (artifact line 43).
104
+
95
105
  **Detect:**
96
106
  - Collect computed `color`, `background-color`, `border-color` from ≥100 elements.
97
107
  - Unique colors count. <15 = disciplined. 30+ = vibecoded.
@@ -108,6 +118,8 @@ Same method for font-size, font-weight, line-height. Look for `text-\[\d+px\]` a
108
118
 
109
119
  ## Category 6 — Whitespace & breathing room (weight 0.7)
110
120
 
121
+ **Rationale:** *Ma* (間) — negative space is substance, not absence; whitespace signals confidence and lets figure/ground perception resolve without strain.
122
+
111
123
  **Question:** Does content have room to breathe, or is it crammed?
112
124
 
113
125
  **Detect:** Compute average `padding-inline + margin-inline` per content block. Compare to container width. Measure content-to-chrome ratio.
@@ -123,6 +135,8 @@ Same method for font-size, font-weight, line-height. Look for `text-\[\d+px\]` a
123
135
 
124
136
  ## Category 7 — Text legibility (weight 1.2)
125
137
 
138
+ **Rationale:** Miller 4±1 (Cowan 2001) + Postel's robustness — legible bodies keep working-memory cost low; dense microtext forces re-reading and exhausts the 4-chunk budget that forms and scannable text depend on.
139
+
126
140
  **Detect:** `browser_evaluate` → collect `fontSize` computed px. Find minimum across visible text.
127
141
 
128
142
  | Viewport | Min body | Min meta | Min input |
@@ -142,6 +156,8 @@ Same method for font-size, font-weight, line-height. Look for `text-\[\d+px\]` a
142
156
 
143
157
  ## Category 8 — CTA hierarchy (weight 1.0)
144
158
 
159
+ **Rationale:** Hick-Hyman Law — decision time grows with the log of equally-weighted options; multiple competing primaries flatten hierarchy into a choose-your-adventure and cost measurable conversion (Baymard PDP data).
160
+
145
161
  **Question:** Is there ONE primary CTA per view?
146
162
 
147
163
  **Detect:** Count buttons with `variant=default | primary | filled` OR bg-primary class. >1 above fold = competing.
@@ -159,6 +175,8 @@ Reference: Baymard PDP — 51% of e-commerce pages fail due to competing CTAs.
159
175
 
160
176
  ## Category 9 — State coverage (weight 1.1)
161
177
 
178
+ **Rationale:** Norman's "make system state visible" (Seven Stages of Action) + Nielsen H1 visibility-of-system-status — missing loading/empty/error states break the user's feedback loop and strand them in uncertainty.
179
+
162
180
  Per page, does the UI handle: default / loading / empty / error / success?
163
181
 
164
182
  **Detect per scenario:**
@@ -177,37 +195,89 @@ Per page, does the UI handle: default / loading / empty / error / success?
177
195
 
178
196
  ## Category 10 — Touch targets (mobile only, weight 1.0)
179
197
 
180
- **Detect:** `browser_evaluate` get `getBoundingClientRect` of every clickable. Count targets < 44×44 px.
198
+ **Rationale:** Fitts's Law (MT = a + b·log₂(2D/W)) acquisition time scales inversely with target width; sub-44px targets on fingers multiply error rate and exhaust motor patience.
199
+
200
+ **Detect:** `browser_evaluate` → get `getBoundingClientRect` of every
201
+ clickable (buttons, links, `[role=button|link|tab|checkbox|radio|switch]`,
202
+ `<input>`, `<select>`, `<summary>`, anchors with click handlers). Record
203
+ the smaller of `width × height` per target.
204
+
205
+ ### Spec reconciliation
206
+
207
+ Three conflicting specs define "how big a touch target should be":
208
+
209
+ | Spec | Size | Nature | Citation |
210
+ |---|---|---|---|
211
+ | **WCAG 2.5.8 Target Size (Minimum) — AA** | **24 × 24 CSS px** | Baseline (legal/accessibility floor, with spacing exception) | https://www.w3.org/WAI/WCAG22/Understanding/target-size-minimum |
212
+ | **Apple Human Interface Guidelines** | **44 × 44 pt** | Platform-native target (iOS) | https://developer.apple.com/design/human-interface-guidelines/accessibility#Interactivity |
213
+ | **Material Design (Android)** | **48 × 48 dp** | Platform-native target (Android) | https://m3.material.io/foundations/accessible-design/accessibility-basics |
214
+ | **WCAG 2.5.5 Target Size (Enhanced) — AAA** | 44 × 44 CSS px | Advisory ceiling | https://www.w3.org/WAI/WCAG21/Understanding/target-size |
215
+
216
+ sd-audit reconciles as follows:
217
+ - **Baseline = 24 × 24 CSS px** — WCAG 2.5.8 AA pass; legally sufficient
218
+ (with 24px center-to-center spacing exception).
219
+ - **Target = 44 × 44 CSS px** — HIG / Material / WCAG AAA; the single
220
+ pragmatic "platform-native" size across iOS + Android + web (Android
221
+ 48 dp ≈ 44 CSS px at default DPI).
222
+ - Spacing exception keeps sub-44 icons compliant with WCAG AA but does NOT
223
+ earn full design-intelligence points — they still feel cramped on a
224
+ phone, which is what Fitts's Law above predicts.
225
+
226
+ ### Scoring ladder
227
+
228
+ Per-target classification:
229
+ - **Full points** (≥ 44 × 44 CSS px) — platform-native, HIG/Material-clean.
230
+ - **Half points** (24 – 43 CSS px, min dimension) — WCAG AA pass but
231
+ sub-optimal; counts as half a compliant target.
232
+ - **Zero points** (< 24 CSS px, min dimension) — WCAG AA FAIL; raises a
233
+ separate `a11y-wcag22-2.5.8` finding in addition to pulling this score.
234
+
235
+ Let `N` = total targets, `n44` = count ≥ 44×44, `n24` = count in
236
+ [24, 44), `n0` = count < 24. Compute
237
+ `compliance = (n44 + 0.5 × n24) / N`.
181
238
 
182
239
  | Score | Criteria |
183
240
  |---|---|
184
- | 10 | 100% targets 44×44 OR 24×24 with 8px+ gap |
185
- | 7 | 80–99% compliant |
186
- | 4 | 5080% compliant (common: icon-only buttons) |
187
- | 0 | Widespread <24px targets |
241
+ | 10 | `compliance0.95` AND `n0 == 0` essentially all targets ≥ 44 |
242
+ | 7 | `0.80 ≤ compliance < 0.95` AND `n0 == 0` — some half-credit (2443 px) targets, none under 24 |
243
+ | 4 | `0.50 ≤ compliance < 0.80` OR `0 < n0 ≤ 2` — common icon-only button fail; any WCAG breach |
244
+ | 0 | `compliance < 0.50` OR `n0 ≥ 3` — widespread <24 px targets, structural problem |
245
+
246
+ Any `n0 > 0` ALWAYS also raises a separate finding with prefix
247
+ `a11y-wcag22-2.5.8` (the rubric scores design intelligence; the finding
248
+ records the legal breach).
188
249
 
189
250
  ---
190
251
 
191
- ## Category 11 — Motion & feedback (weight 0.6)
252
+ ## Category 11 — Motion & feedback / perceived performance (weight 0.6)
192
253
 
193
- **Question:** Do interactions give feedback? Are animations tasteful and respect `prefers-reduced-motion`?
254
+ **Rationale:** Doherty threshold (Doherty & Thadhani, IBM 1982) system response <400 ms sustains flow; above that, perceived unresponsiveness begins. Paired with INP (Core Web Vitals) for the measurable proxy.
255
+
256
+ **Question:** Do interactions give feedback? Are animations tasteful and respect `prefers-reduced-motion`? Does every interaction land within the Doherty ceiling?
194
257
 
195
258
  **Detect:**
196
259
  - `browser_evaluate` with `matchMedia('(prefers-reduced-motion: reduce)')` + check for `transition` / `animation` on interactive elements.
197
260
  - Missing hover/focus feedback on buttons = major fail.
198
261
  - >3s animations = excessive.
199
262
 
263
+ **Perceived-performance sub-criterion (Doherty 400 ms ceiling, alongside INP).**
264
+ - Parse `session_dir/vitals/<page>.json` for INP (from `web-vitals@5` attribution build).
265
+ - For each primary interaction (Step 2.5 Phase A enumeration — clicks on CTA, form submit, nav link, combobox, modal trigger), compute **end-to-end response time** = click → visual feedback (spinner / state change / new pixels painted), not just INP.
266
+ - Fail rule: an interaction that **passes INP** (≤ 200 ms rating "good") but whose **user-perceivable response exceeds 400 ms** (e.g., INP fires at 150 ms but the resulting navigation/paint lands at 900 ms with no intermediate skeleton/optimistic UI) **penalizes C11**. Doherty is the ceiling; INP is the low-floor subset. Apply the Nielsen 0.1 s / 1 s / 10 s progress rule for anything over 400 ms (skeleton, optimistic UI, determinate bar + ETA + cancel for >10 s).
267
+
200
268
  | Score | Criteria |
201
269
  |---|---|
202
- | 10 | Hover/focus/active feedback everywhere; animations ≤300ms; reduced-motion respected |
203
- | 7 | Most interactions feedback; reduced-motion partial |
204
- | 4 | Some interactions static; reduced-motion ignored |
205
- | 0 | No hover/focus feedback at all OR autoplay video + parallax with no disable |
270
+ | 10 | Hover/focus/active feedback everywhere; animations ≤300 ms; reduced-motion respected; every interaction under Doherty 400 ms OR shows skeleton/optimistic state |
271
+ | 7 | Most interactions feedback; reduced-motion partial; occasional >400 ms interaction without feedback |
272
+ | 4 | Some interactions static; reduced-motion ignored; multiple interactions cross Doherty with no intermediate state |
273
+ | 0 | No hover/focus feedback at all OR autoplay video + parallax with no disable OR interactions routinely exceed 400 ms with blank waits |
206
274
 
207
275
  ---
208
276
 
209
277
  ## Category 12 — Nav pattern matches platform (weight 1.0)
210
278
 
279
+ **Rationale:** Fitts's Law + Hick-Hyman Law — nav patterns succeed when they minimize both motor cost (thumbzone/edge placement) and choice cost (limited top-level destinations, chunked per Miller 4±1).
280
+
211
281
  | Viewport | Expected nav |
212
282
  |---|---|
213
283
  | Mobile (≤768) | Bottom tab bar (3–5), full-screen menus, gesture back |
@@ -226,6 +296,9 @@ Per page, does the UI handle: default / loading / empty / error / success?
226
296
 
227
297
  ## Category 13 — Table-on-mobile detection (weight 1.2, mobile only)
228
298
 
299
+ **Rationale:** Platform affordance + thumbzone — desktop tables violate mobile reading models (microtext, horizontal overflow, no visible sort); transformation to card/list is the minimum cost to preserve parse-ability.
300
+
301
+
229
302
  **Detect:** At ≤768px, find `<table>` with >3 visible columns OR `display: table` containers with horizontal scroll AND text < 13px.
230
303
 
231
304
  | Score | Criteria |
@@ -240,6 +313,8 @@ Per page, does the UI handle: default / loading / empty / error / success?
240
313
 
241
314
  ## Category 14 — Modal/sheet appropriateness (weight 0.8)
242
315
 
316
+ **Rationale:** Fitts's Law + thumbzone — on mobile, close affordances belong where the thumb lives; centered dialogs with top-right dismiss violate reach on phones and strand users in forced-modal states.
317
+
243
318
  | Viewport | Expected modal pattern |
244
319
  |---|---|
245
320
  | Mobile | Bottom sheet (slide-up) or full-screen with close top-left |
@@ -258,6 +333,8 @@ Per page, does the UI handle: default / loading / empty / error / success?
258
333
 
259
334
  ## Category 15 — Color semantics (weight 0.6)
260
335
 
336
+ **Rationale:** Jakob's Law (users spend most time on other products) + learned convention — red/green/amber mappings are pre-installed in users' mental models; using them decoratively forces re-learning and breaks status recognition at a glance.
337
+
261
338
  **Detect:** Collect colors used on: error messages, success states, warnings, info. Red = error? Green = success? Or decorative-only?
262
339
 
263
340
  | Score | Criteria |
@@ -270,6 +347,8 @@ Per page, does the UI handle: default / loading / empty / error / success?
270
347
 
271
348
  ## Category 16 — Design-system coherence (weight 1.1)
272
349
 
350
+ **Rationale:** Tesler's Law (conservation of complexity) + von Neumann consistency — complexity does not disappear, it moves; a disciplined system absorbs variation once inside tokens/variants/primitives so every downstream surface stays predictable. Incoherent systems push the same complexity onto users (re-learning each screen) and onto engineers (ad-hoc classes per component). This is why C16 carries one of the highest weights: coherence is not polish, it is the mechanism that conserves attention.
351
+
273
352
  **The meta-category.** Does the app LOOK like it was designed by one team with one vision? Or does it look like a collection of shadcn defaults?
274
353
 
275
354
  **Detect (aesthetic signal):**
@@ -290,6 +369,8 @@ session. See `design-skills-catalog.md`.
290
369
 
291
370
  ## Category 17 — Vibecode detection (weight 1.0)
292
371
 
372
+ **Rationale:** Norman's reflective layer (Emotional Design, 2004 — artifact line 547) — vibecoded surfaces pass the visceral/behavioral layers but fail reflective judgment; code that reads as "hand-assembled divs" telegraphs lack of intentional system, which is exactly what distinguishes "designed" from "vibecoded" output.
373
+
293
374
  **Question:** Does the code follow patterns (components, variants, tokens)
294
375
  or is it hand-assembled divs with inline styles?
295
376
 
@@ -62,6 +62,37 @@ it follows the skill's specific tokens + patterns instead of defaults.
62
62
  | Editorial / blog | Readable long-form | `typeui-paper` |
63
63
  | Feature-grid homepage | Modular showcase | `typeui-bento` |
64
64
 
65
+ ### Vibe → typeui skill (primary → fallback)
66
+
67
+ Covers every vibe enumerated in the artifact Part 4 (12-vibe vocabulary). The
68
+ primary skill carries the aesthetic; the fallback handles adjacent contexts or
69
+ fills gaps when the primary would over-commit. When a project vibe has no
70
+ single-perfect skill (e.g. Premium/luxury, Warm/organic), the fallback plus
71
+ `/frontend-design` is the intended path.
72
+
73
+ | Part-4 vibe | Primary skill | Fallback | Notes |
74
+ |---|---|---|---|
75
+ | Minimal / clean | `typeui-clean` | `typeui-application` | Default pick for pre-launch marketing and "honest SaaS". |
76
+ | Bold / confident | `typeui-bold` | `typeui-dramatic` | Challenger brands, consumer launches. |
77
+ | Playful / friendly | `typeui-doodle` | `typeui-artistic` | Education, kids, creative tools. |
78
+ | Serious / professional (B2B) | `typeui-enterprise` | `typeui-ant` | Procurement-facing, compliance. |
79
+ | Technical / data-dense (SaaS admin) | `typeui-dashboard` | `typeui-application` | Dark-theme analytics, operator consoles. |
80
+ | Editorial / reading | `typeui-paper` | `typeui-clean` | Long-form content, publications. |
81
+ | Modular / showcase | `typeui-bento` | `typeui-application` | Feature grids, portfolios. |
82
+ | Expressive / artistic | `typeui-artistic` | `typeui-dramatic` | Design tools, non-enterprise vibe-forward. |
83
+ | Raw / statement (neobrutalism) | `typeui-neobrutalism` | `typeui-bold` | Gen-Z, indie, deliberate rule-breaking. |
84
+ | Premium / luxury | `typeui-dramatic` | `typeui-paper` | No dedicated luxury skill — combine dramatic hero with paper's typographic restraint, then commission custom tokens via `/frontend-design`. |
85
+ | Tech / cyberpunk | `typeui-dashboard` | `typeui-bold` | Dashboard dark base + bold accent/glow; extend via `/frontend-design` for neon/chromatic detail. |
86
+ | Warm / organic | `typeui-paper` | `typeui-doodle` | Paper carries the warmth via texture + typographic rhythm; doodle adds hand-made detail for craft brands. |
87
+ | Retro / nostalgic | `typeui-paper` | `typeui-doodle` | Paper's print-era cues fit mid-century/editorial retro; doodle for 90s/zine nostalgia. `/frontend-design` required for period-specific palettes. |
88
+ | Dark / cinematic | `typeui-dramatic` | `typeui-dashboard` | Dramatic for narrative hero surfaces; dashboard for operator/app surfaces that must stay dark through the product. |
89
+
90
+ Read this table as: "if the positioning brief (sd-research §4) lands on vibe X,
91
+ sd-audit/sd-fix should recommend the **primary** skill first; if the project
92
+ has constraints that rule it out (e.g. already on a light palette), fall back
93
+ to the secondary; if both are partial, log a non-blocking advisory that
94
+ `/frontend-design` is required to finish the aesthetic."
95
+
65
96
  ## Recommending a skill in a finding
66
97
 
67
98
  When `design-intelligence.categories.design_system_coherence.score ≤ 4`,
@@ -0,0 +1,208 @@
1
+ #!/usr/bin/env bash
2
+ # build-import-graph.sh — build a JS/TS import graph for the repo so
3
+ # super-design can propagate component-level changes to pages within N
4
+ # hops (artifact §8, line 666: default N=3).
5
+ #
6
+ # Usage:
7
+ # build-import-graph.sh [--state <path>] # full build
8
+ # build-import-graph.sh importers <file> [--hops N] # BFS query
9
+ #
10
+ # Output (full build): .super-design/import-graph.json with shape
11
+ # {
12
+ # "nodes": ["src/app/page.tsx", ...],
13
+ # "edges": [{"from":"src/app/page.tsx","to":"src/components/Nav.tsx"}, ...],
14
+ # "hash": "sha256:<hex>",
15
+ # "backend": "madge" | "regex-fallback",
16
+ # "built_at":"<ISO-8601>"
17
+ # }
18
+ #
19
+ # State: writes `import_graph_sha` back into .audit-state.json via
20
+ # scripts/write-state.sh. The sha lives in audit-state.schema.json so
21
+ # detect-changes.sh can short-circuit re-propagation if the graph is
22
+ # unchanged.
23
+ #
24
+ # Backend choice:
25
+ # 1. If `npx madge --version` works, use `npx madge --json <roots>`
26
+ # (artifact §8: madge reads .madgerc / package.json#madge).
27
+ # 2. Else fall back to a zero-dep regex scanner (JS/TS/JSX/TSX only —
28
+ # no CSS/Vue/Svelte detectives, no alias resolution). Logs a
29
+ # warning so the caller knows propagation is best-effort.
30
+ set -euo pipefail
31
+
32
+ SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
33
+ STATE_PATH="docs/super-design/.audit-state.json"
34
+ OUT_DIR=".super-design"
35
+ OUT_FILE="$OUT_DIR/import-graph.json"
36
+
37
+ log() { printf '[build-import-graph] %s\n' "$*" >&2; }
38
+
39
+ # Source roots — read from state, else default.
40
+ resolve_roots() {
41
+ local roots=""
42
+ if [[ -f "$STATE_PATH" ]]; then
43
+ roots="$(jq -r '.source_roots // empty | .[]?' "$STATE_PATH" 2>/dev/null || true)"
44
+ fi
45
+ if [[ -z "$roots" ]]; then
46
+ for d in src app pages; do [[ -d "$d" ]] && echo "$d"; done
47
+ else
48
+ printf '%s\n' "$roots"
49
+ fi
50
+ }
51
+
52
+ # --- Full build -----------------------------------------------------------
53
+
54
+ build_madge() {
55
+ # madge prints { "file": [deps], ... }. We flatten to edges.
56
+ local roots_json
57
+ roots_json="$(resolve_roots | jq -Rn '[inputs]')"
58
+ local roots
59
+ roots="$(printf '%s' "$roots_json" | jq -r '.[]' | tr '\n' ' ')"
60
+ [[ -z "$roots" ]] && { log "no source roots found"; echo '{}'; return; }
61
+ # shellcheck disable=SC2086
62
+ npx --yes madge --json $roots 2>/dev/null
63
+ }
64
+
65
+ build_regex_fallback() {
66
+ log "madge unavailable; using regex fallback (JS/TS only, no alias resolution)"
67
+ local roots
68
+ roots="$(resolve_roots | tr '\n' ' ')"
69
+ [[ -z "$roots" ]] && { echo '{}'; return; }
70
+
71
+ # Emit NDJSON: {"from":"<path>","to":"<dep>"} for each import line in
72
+ # each JS/TS file, then fold into {file: [deps]} via jq.
73
+ # shellcheck disable=SC2086
74
+ find $roots -type f \( \
75
+ -name '*.ts' -o -name '*.tsx' \
76
+ -o -name '*.js' -o -name '*.jsx' \
77
+ -o -name '*.mjs' -o -name '*.cjs' \) 2>/dev/null \
78
+ | while IFS= read -r f; do
79
+ # Grep import/require specifiers. Handles:
80
+ # import ... from 'x'
81
+ # import 'x'
82
+ # require('x')
83
+ # import('x') (dynamic)
84
+ # export ... from 'x'
85
+ grep -hoE "(from|require|import)[[:space:]]*\(?[[:space:]]*['\"][^'\"]+['\"]" "$f" 2>/dev/null \
86
+ | sed -E "s|.*['\"]([^'\"]+)['\"].*|\1|" \
87
+ | while IFS= read -r spec; do
88
+ [[ -z "$spec" ]] && continue
89
+ # Skip bare package specifiers for graph purposes (we only
90
+ # want local file edges to compute importers).
91
+ case "$spec" in
92
+ .*|/*) : ;;
93
+ *) continue ;;
94
+ esac
95
+ # Normalize: resolve relative to $f's directory, best-effort.
96
+ local dir resolved
97
+ dir="$(dirname "$f")"
98
+ resolved="$dir/$spec"
99
+ # Strip ./ and trailing /
100
+ resolved="$(printf '%s' "$resolved" \
101
+ | sed -E 's|/\./|/|g; s|/$||')"
102
+ jq -cn --arg from "$f" --arg to "$resolved" '{from:$from,to:$to}'
103
+ done
104
+ done \
105
+ | jq -sc 'group_by(.from) | map({key:.[0].from, value:(map(.to)|unique)}) | from_entries'
106
+ }
107
+
108
+ build_graph_json() {
109
+ local raw
110
+ if npx --yes madge --version >/dev/null 2>&1; then
111
+ raw="$(build_madge || echo '{}')"
112
+ BACKEND="madge"
113
+ else
114
+ raw="$(build_regex_fallback || echo '{}')"
115
+ BACKEND="regex-fallback"
116
+ fi
117
+ [[ -z "$raw" ]] && raw='{}'
118
+ printf '%s' "$raw"
119
+ }
120
+
121
+ emit_final() {
122
+ local raw="$1"
123
+ mkdir -p "$OUT_DIR"
124
+ # Build nodes + edges from the {file: [deps]} shape. Keep original
125
+ # paths verbatim (do not attempt alias resolution here).
126
+ local body
127
+ body="$(printf '%s' "$raw" | jq --arg backend "$BACKEND" --arg now "$(date -u +%FT%TZ)" '
128
+ . as $g
129
+ | ([keys[], (.[] | .[])] | unique) as $nodes
130
+ | [to_entries[] | .key as $from | .value[] | {from:$from, to:.}] as $edges
131
+ | {nodes:$nodes, edges:$edges, backend:$backend, built_at:$now}
132
+ ')"
133
+ # Hash over nodes+edges (stable serialization via jq --sort-keys -c).
134
+ local hash
135
+ hash="$(printf '%s' "$body" | jq -S -c '{nodes, edges}' | sha256sum | awk '{print "sha256:"$1}')"
136
+ printf '%s' "$body" | jq --arg h "$hash" '. + {hash:$h}' > "$OUT_FILE"
137
+
138
+ # Update state.import_graph_sha if state file exists.
139
+ if [[ -f "$STATE_PATH" ]]; then
140
+ jq --arg h "$hash" '. + {import_graph_sha:$h}' "$STATE_PATH" \
141
+ | bash "$SCRIPT_DIR/write-state.sh" "$STATE_PATH" >/dev/null \
142
+ || log "failed to persist import_graph_sha to state"
143
+ fi
144
+
145
+ jq -n --arg path "$OUT_FILE" --arg hash "$hash" --arg backend "$BACKEND" \
146
+ --argjson nodes "$(jq '.nodes|length' "$OUT_FILE")" \
147
+ --argjson edges "$(jq '.edges|length' "$OUT_FILE")" \
148
+ '{status:"ok", path:$path, hash:$hash, backend:$backend, nodes:$nodes, edges:$edges}'
149
+ }
150
+
151
+ # --- Query: importers_of --------------------------------------------------
152
+
153
+ # importers_of <file> [--hops N]
154
+ # BFS over the edge list using jq. Emits one path per line.
155
+ importers_of() {
156
+ local file="$1"; shift || true
157
+ local hops=3
158
+ while [[ $# -gt 0 ]]; do
159
+ case "$1" in
160
+ --hops) hops="${2:-3}"; shift 2 ;;
161
+ *) shift ;;
162
+ esac
163
+ done
164
+ [[ -f "$OUT_FILE" ]] || { log "no import graph; run build first"; return 1; }
165
+ jq -r --arg start "$file" --argjson hops "$hops" '
166
+ . as $g
167
+ # Reverse adjacency: to → [from...]
168
+ | ([.edges[] | {key:.to, value:.from}] | group_by(.key)
169
+ | map({key:.[0].key, value:(map(.value)|unique)}) | from_entries) as $rev
170
+ | def bfs(frontier; seen; depth):
171
+ if depth >= $hops or (frontier|length)==0 then seen
172
+ else
173
+ (frontier | map($rev[.] // []) | add // [] | unique) as $next
174
+ | ($next - (seen|keys | map(.))) as $fresh
175
+ | bfs($fresh; (seen + ($fresh | map({(.):true}) | add // {})); depth + 1)
176
+ end;
177
+ bfs([$start]; {}; 0) | keys[]
178
+ ' "$OUT_FILE"
179
+ }
180
+
181
+ # --- Dispatch -------------------------------------------------------------
182
+
183
+ main() {
184
+ case "${1:-build}" in
185
+ build|"")
186
+ shift || true
187
+ while [[ $# -gt 0 ]]; do
188
+ case "$1" in
189
+ --state) STATE_PATH="${2:?}"; shift 2 ;;
190
+ *) shift ;;
191
+ esac
192
+ done
193
+ raw="$(build_graph_json)"
194
+ emit_final "$raw"
195
+ ;;
196
+ importers)
197
+ shift
198
+ [[ -z "${1:-}" ]] && { log "usage: importers <file> [--hops N]"; exit 2; }
199
+ importers_of "$@"
200
+ ;;
201
+ *)
202
+ log "unknown subcommand: $1"
203
+ exit 2
204
+ ;;
205
+ esac
206
+ }
207
+
208
+ main "$@"