start-vibing 4.0.2 → 4.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/package.json +1 -1
  2. package/template/.claude/CLAUDE.md +86 -20
  3. package/template/.claude/agents/sd-audit.md +197 -0
  4. package/template/.claude/agents/sd-fix-verify-semantic.md +112 -0
  5. package/template/.claude/agents/sd-fix-verify-technical.md +36 -0
  6. package/template/.claude/agents/sd-fix.md +194 -0
  7. package/template/.claude/agents/sd-research.md +61 -0
  8. package/template/.claude/agents/sd-synthesis.md +74 -0
  9. package/template/.claude/commands/super-design.md +15 -0
  10. package/template/.claude/hooks/super-design-session-start.sh +4 -0
  11. package/template/.claude/settings.json +14 -0
  12. package/template/.claude/skills/codebase-knowledge/SKILL.md +145 -0
  13. package/template/.claude/skills/codebase-knowledge/TEMPLATE.md +35 -0
  14. package/template/.claude/skills/codebase-knowledge/domains/claude-system.md +93 -0
  15. package/template/.claude/skills/composition-patterns/SKILL.md +89 -0
  16. package/template/.claude/skills/docs-tracker/SKILL.md +239 -0
  17. package/template/.claude/skills/mcp-builder/SKILL.md +236 -0
  18. package/template/.claude/skills/quality-gate/scripts/check-all.sh +83 -0
  19. package/template/.claude/skills/react-best-practices/SKILL.md +146 -0
  20. package/template/.claude/skills/security-scan/reference/owasp-top-10.md +257 -0
  21. package/template/.claude/skills/security-scan/scripts/scan.py +190 -0
  22. package/template/.claude/skills/super-design/README.md +37 -0
  23. package/template/.claude/skills/super-design/SKILL.md +105 -0
  24. package/template/.claude/skills/super-design/hooks/guard-paths.py +35 -0
  25. package/template/.claude/skills/super-design/hooks/post-edit-lint.py +57 -0
  26. package/template/.claude/skills/super-design/references/audit-methodology.md +513 -0
  27. package/template/.claude/skills/super-design/references/change-detection-playbook.md +1432 -0
  28. package/template/.claude/skills/super-design/references/design-theory.md +706 -0
  29. package/template/.claude/skills/super-design/references/fix-agent-playbook.md +118 -0
  30. package/template/.claude/skills/super-design/references/market-research-playbook.md +773 -0
  31. package/template/.claude/skills/super-design/references/playwright-mcp-reference.md +1057 -0
  32. package/template/.claude/skills/super-design/references/skills-subagents-reference.md +784 -0
  33. package/template/.claude/skills/super-design/references/superpowers-and-distribution.md +136 -0
  34. package/template/.claude/skills/super-design/scripts/detect-changes.sh +61 -0
  35. package/template/.claude/skills/super-design/scripts/diff-tokens.sh +13 -0
  36. package/template/.claude/skills/super-design/scripts/discover-routes.sh +45 -0
  37. package/template/.claude/skills/super-design/scripts/extract-tokens.mjs +41 -0
  38. package/template/.claude/skills/super-design/scripts/hash-pages.sh +42 -0
  39. package/template/.claude/skills/super-design/scripts/validate-state.sh +15 -0
  40. package/template/.claude/skills/super-design/scripts/verify-audit.sh +19 -0
  41. package/template/.claude/skills/super-design/templates/audit-state.schema.json +57 -0
  42. package/template/.claude/skills/super-design/templates/findings.schema.json +57 -0
  43. package/template/.claude/skills/super-design/templates/fix-history.md.tpl +26 -0
  44. package/template/.claude/skills/super-design/templates/overview.md.tpl +52 -0
  45. package/template/.claude/skills/test-coverage/reference/playwright-patterns.md +260 -0
  46. package/template/.claude/skills/test-coverage/scripts/coverage-check.sh +52 -0
  47. package/template/.claude/skills/typeui-ant/SKILL.md +133 -0
  48. package/template/.claude/skills/typeui-application/SKILL.md +128 -0
  49. package/template/.claude/skills/typeui-artistic/SKILL.md +133 -0
  50. package/template/.claude/skills/typeui-bento/SKILL.md +127 -0
  51. package/template/.claude/skills/typeui-bold/SKILL.md +127 -0
  52. package/template/.claude/skills/typeui-clean/SKILL.md +128 -0
  53. package/template/.claude/skills/typeui-dashboard/SKILL.md +133 -0
  54. package/template/.claude/skills/typeui-doodle/SKILL.md +142 -0
  55. package/template/.claude/skills/typeui-dramatic/SKILL.md +127 -0
  56. package/template/.claude/skills/typeui-enterprise/SKILL.md +132 -0
  57. package/template/.claude/skills/typeui-neobrutalism/SKILL.md +127 -0
  58. package/template/.claude/skills/typeui-paper/SKILL.md +127 -0
  59. package/template/.claude/skills/ui-ux-audit/QUICK-START.md +450 -0
  60. package/template/.claude/skills/ui-ux-audit/README.md +470 -0
  61. package/template/.claude/skills/ui-ux-audit/templates/audit-report.md +591 -0
  62. package/template/.claude/skills/ui-ux-audit/templates/competitor-analysis.md +363 -0
  63. package/template/.claude/skills/ui-ux-audit/templates/component-spec.md +491 -0
  64. package/template/.claude/skills/ui-ux-audit/templates/improvement-recommendation.md +450 -0
  65. package/template/.claude/skills/web-design-guidelines/SKILL.md +39 -0
  66. package/template/.claude/skills/webapp-testing/SKILL.md +96 -0
  67. package/template/.claude/skills/workflow-state/workflow-state.json +77 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "start-vibing",
3
- "version": "4.0.2",
3
+ "version": "4.1.1",
4
4
  "description": "Setup Claude Code with 9 plugins, 6 community skills, and 8 MCP servers. Parallel install, auto-accept, superpowers + ralph-loop.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -47,7 +47,7 @@ All phases run best-effort. MCP and plugin failures are non-blocking — `settin
47
47
 
48
48
  ```
49
49
  .claude/
50
- ├── agents/ # 4 active subagents (flat structure)
50
+ ├── agents/ # 5 active subagents (flat structure)
51
51
  ├── skills/ # Custom + 5 community skills (auto-injected by description match)
52
52
  ├── scripts/ # Utility scripts
53
53
  ├── config/ # Project-specific configuration (JSON files)
@@ -56,11 +56,12 @@ All phases run best-effort. MCP and plugin failures are non-blocking — `settin
56
56
 
57
57
  ---
58
58
 
59
- ## Agents (4 Active Subagents)
59
+ ## Agents (5 Active Subagents)
60
60
 
61
61
  | Agent | Model | Purpose |
62
62
  |-------|-------|---------|
63
- | **research-web** | sonnet | Researches best practices (2024-2026) before implementing new features |
63
+ | **research-web** | sonnet | Researches best practices (2025-2026) with ontology-based structuring, output to `/docs/research/` |
64
+ | **documenter** | sonnet | Analyzes sessions via git log/diff, writes changelog + technical docs + ADRs to `/docs/` |
64
65
  | **commit-manager** | haiku | Manages git commits, conventional format, merge workflow |
65
66
  | **claude-md-compactor** | sonnet | Compacts CLAUDE.md when it exceeds 40k chars |
66
67
  | **tester-unit** | sonnet | Creates unit tests with Vitest for new functions and utilities |
@@ -68,11 +69,65 @@ All phases run best-effort. MCP and plugin failures are non-blocking — `settin
68
69
  ### Agent Workflow Order
69
70
 
70
71
  ```
71
- implement -> quality gates -> ask user about docs -> commit-manager -> complete
72
+ implement -> quality gates -> documenter -> commit-manager -> complete
72
73
  ```
73
74
 
74
75
  Agents are dispatched via the `Agent` tool with `subagent_type` matching agent names. They run autonomously and return results to the orchestrator.
75
76
 
77
+ ### Research Agent Details
78
+
79
+ The research-web agent outputs findings to `/docs/research/[topic].md` (NOT to `.claude/skills/research-cache/`).
80
+
81
+ **Research flow:**
82
+ 1. Check `/docs/research/` for existing findings (reuse if fresh < 3 months)
83
+ 2. Build ontology map (concepts → relationships → constraints)
84
+ 3. Search with `[topic] [aspect] [2025-2026] [context]` queries
85
+ 4. Triangulate (3+ sources for any claim)
86
+ 5. Save structured output to `/docs/research/`
87
+
88
+ **For UI/UX tasks, the agent also runs:**
89
+ - Competitor analysis (3-5 competitors, heuristic evaluation)
90
+ - Design system pattern check (shadcn/ui, Radix, WCAG 2.1)
91
+ - User flow mapping (happy path + 2 error paths)
92
+ - Accessibility audit plan (axe, keyboard nav, screen reader)
93
+
94
+ ### Documenter Agent Details
95
+
96
+ The documenter agent runs after implementation, analyzes the session, and writes structured docs to `/docs/`.
97
+
98
+ **Documentation flow:**
99
+ 1. Run `git log` + `git diff` to analyze what changed
100
+ 2. Classify changes: per-commit, per-feature, per-session
101
+ 3. Mini-research for technologies used (check `/docs/research/` first, else 1-2 queries)
102
+ 4. Write changelog (`/docs/changelog/`), technical docs (`/docs/technical/`), ADRs (`/docs/decisions/`)
103
+ 5. Update all indexes (`/docs/index.md` + per-folder indexes)
104
+
105
+ **Output structure:**
106
+ ```
107
+ /docs/
108
+ ├── index.md # Root index
109
+ ├── changelog/ # Per-session changelogs
110
+ │ ├── index.md
111
+ │ └── YYYY-MM-DD-summary.md
112
+ ├── technical/ # Deep feature/architecture docs
113
+ │ ├── index.md
114
+ │ └── feature-name.md
115
+ ├── decisions/ # Architecture Decision Records
116
+ │ ├── index.md
117
+ │ └── NNNN-decision.md
118
+ └── research/ # Managed by research-web
119
+ ├── index.md
120
+ └── topic.md
121
+ ```
122
+
123
+ **Writing rules:**
124
+ - Self-contained sections (AI RAG chunk retrieval)
125
+ - What→Why→How progression (humans first)
126
+ - Before→After pattern for all changes (mandatory)
127
+ - Consistent terminology (one name per concept)
128
+ - Official docs URLs for all technologies cited
129
+ - Every doc linked from its folder index AND root index
130
+
76
131
  ---
77
132
 
78
133
  ## Plugins (9 via enabledPlugins)
@@ -279,7 +334,7 @@ The `.claude/settings.json` file contains `enabledPlugins` which is the fallback
279
334
  1. Use `/brainstorming` for creative work or feature design
280
335
  2. Use `/write-plan` for multi-step tasks
281
336
  3. Use `context7` (auto via plugin) for library docs
282
- 4. Research if needed (research-web agent or web search)
337
+ 4. Research if needed (research-web agent saves to `/docs/research/`)
283
338
 
284
339
  ### During Implementation
285
340
 
@@ -291,31 +346,39 @@ The `.claude/settings.json` file contains `enabledPlugins` which is the fallback
291
346
 
292
347
  1. Run `/simplify` (code-simplifier) on changed files
293
348
  2. Run quality gates (`bun run typecheck && lint && test`)
294
- 3. Ask user if they want documentation in `/docs`
295
- 4. Commit via commit-manager agent (FINAL step)
296
- 5. Update CLAUDE.md with architecture changes
349
+ 3. Run documenter agent (changelog + technical docs + ADRs to `/docs/`)
350
+ 4. Update CLAUDE.md with architecture changes
351
+ 5. Commit via commit-manager agent (FINAL step)
297
352
 
298
353
  ---
299
354
 
300
355
  ## Documentation Policy
301
356
 
302
- > Documentation is NOT automatic. It lives in `/docs` and is created only when the user asks.
357
+ > Documentation is automatic via the **documenter agent**. It lives in `/docs/`.
303
358
 
304
359
  ### After Completing Work
305
360
 
306
- Always ask the user:
307
- ```
308
- Done! Finished [task]. Want me to:
309
- 1. Document this in /docs?
310
- 2. Move on to something else?
311
- ```
361
+ The documenter agent runs automatically after implementation:
362
+ 1. Analyzes git log/diff for the session
363
+ 2. Writes changelog + technical docs + ADRs as needed
364
+ 3. Updates all indexes
365
+ 4. Docs are ready for commit
366
+
367
+ ### Research Output
368
+
369
+ Research findings go to `/docs/research/[topic].md` — NOT to `.claude/skills/research-cache/`.
370
+
371
+ - Ontology-based structure: concepts → relationships → constraints → implementation path
372
+ - Freshness tracked per file (< 3 months fresh, 3-6 aging, 6-12 stale, > 12 outdated)
373
+ - Always check existing research before running new searches
312
374
 
313
375
  ### What NOT to Do
314
376
 
315
- - Do NOT auto-generate domain docs
316
- - Do NOT maintain `.claude/skills/codebase-knowledge/domains/`
317
- - Do NOT run documenter or domain-updater agents (they were removed in v4)
318
- - Do NOT create documentation without user consent
377
+ - Do NOT maintain `.claude/skills/codebase-knowledge/domains/` (legacy)
378
+ - Do NOT save research to `.claude/skills/research-cache/` — use `/docs/research/` instead
379
+ - Do NOT mix doc types in one file (changelog technical decision)
380
+ - Do NOT leave docs unlinked from indexes
381
+ - Do NOT skip Before→After pattern in changelogs
319
382
 
320
383
  ---
321
384
 
@@ -326,6 +389,7 @@ All implementations MUST:
326
389
  - [ ] Pass typecheck (`bun run typecheck`)
327
390
  - [ ] Pass lint (`bun run lint`)
328
391
  - [ ] Pass unit tests (`bun run test`)
392
+ - [ ] Be documented by documenter agent (changelog + technical/ADR if applicable)
329
393
  - [ ] Be committed with conventional commits
330
394
  - [ ] Have CLAUDE.md updated with architecture/rule changes
331
395
 
@@ -340,7 +404,9 @@ All implementations MUST:
340
404
  | Use `any` type | Defeats strict mode |
341
405
  | Define types in `src/` | Must be in `types/` |
342
406
  | Commit directly to main | Create feature/fix branches |
343
- | Auto-document without asking | Always ask user first |
407
+ | Skip documenter after implementation | Changelog + docs are mandatory |
408
+ | Mix doc types in one file | Changelog ≠ technical ≠ decision |
409
+ | Leave docs unlinked from index | Undiscoverable docs are useless |
344
410
  | Skip superpowers for features | Use brainstorming + TDD |
345
411
  | Skip code-simplifier | Run /simplify post-implementation |
346
412
  | Use MUI/Chakra | Use shadcn/ui + Radix |
@@ -0,0 +1,197 @@
1
+ ---
2
+ name: sd-audit
3
+ description: Performs the complete UX audit by driving the browser via Playwright MCP directly. Applies Nielsen's 10 heuristics, WCAG 2.2 AA, Baymard (if e-commerce), Core Web Vitals, and 60+ expert-tier implicit criteria. Invoked by super-design after research completes. Produces findings.json with strict SHOT+QUOTE+SEL+VAL evidence per finding.
4
+ tools:
5
+ - Read
6
+ - Write
7
+ - Edit
8
+ - Glob
9
+ - Grep
10
+ - Bash
11
+ - mcp__playwright__browser_navigate
12
+ - mcp__playwright__browser_navigate_back
13
+ - mcp__playwright__browser_resize
14
+ - mcp__playwright__browser_snapshot
15
+ - mcp__playwright__browser_take_screenshot
16
+ - mcp__playwright__browser_evaluate
17
+ - mcp__playwright__browser_click
18
+ - mcp__playwright__browser_type
19
+ - mcp__playwright__browser_hover
20
+ - mcp__playwright__browser_press_key
21
+ - mcp__playwright__browser_select_option
22
+ - mcp__playwright__browser_wait_for
23
+ - mcp__playwright__browser_console_messages
24
+ - mcp__playwright__browser_network_requests
25
+ - mcp__playwright__browser_handle_dialog
26
+ - mcp__playwright__browser_tabs
27
+ - mcp__playwright__browser_install
28
+ - mcp__playwright__browser_close
29
+ model: sonnet
30
+ permissionMode: acceptEdits
31
+ maxTurns: 150
32
+ color: cyan
33
+ mcpServers:
34
+ - playwright
35
+ ---
36
+
37
+ # Role
38
+
39
+ You are the UX/a11y/perf auditor. You drive the browser DIRECTLY via Playwright MCP (no delegation). You apply Nielsen's 10 heuristics, WCAG 2.2 AA, Baymard e-commerce findings (when applicable), Core Web Vitals, and 60+ expert "implicit" criteria. Every finding cites SHOT + QUOTE + SEL + VAL.
40
+
41
+ # Preflight
42
+
43
+ Read in order:
44
+ 1. `.claude/skills/super-design/references/audit-methodology.md` — methodology spine
45
+ 2. `.claude/skills/super-design/references/playwright-mcp-reference.md` — Playwright MCP API
46
+ 3. `docs/super-design/market-analysis.md` — context (archetype, audience, category)
47
+
48
+ # Non-negotiable rules
49
+
50
+ 1. Say "Playwright MCP" literally when invoking tools. Use only `mcp__playwright__*`.
51
+ 2. Every finding cites [SHOT], [QUOTE], [SEL], [VAL]. Missing any → file the gap, not the finding.
52
+ 3. Snapshots are per-call. Every `[ref=eNN]` valid for ONE action. Re-snapshot after any mutation.
53
+ 4. Save artifacts to disk BEFORE writing any finding.
54
+ 5. On JS console errors, stop auditing that page. Record errors verbatim.
55
+ 6. Dismiss cookie banners FIRST on every page before canonical snapshot.
56
+ 7. Text waits, never time waits. `browser_wait_for(text=…)` or `textGone=…`.
57
+ 8. Sequential, not parallel. Do not spawn parallel flows against the same browser tab.
58
+
59
+ # Procedure
60
+
61
+ ## Step 1 — Discover routes
62
+
63
+ Run `.claude/skills/super-design/scripts/discover-routes.sh`. If incremental mode, filter to scope (read `.super-design/sessions/<id>/scope.json`).
64
+
65
+ ## Step 2 — Launch audit loop
66
+
67
+ For each viewport ∈ [mobile 375×812, tablet 768×1024, desktop 1440×900], for each page in scope:
68
+
69
+ ```
70
+ 1. browser_resize(width, height)
71
+ 2. browser_navigate(url)
72
+ 3. browser_wait_for(text="<known copy>")
73
+ 4. browser_evaluate: disable animations via style injection
74
+ 5. Dismiss cookie banners (snapshot → identify role=button "accept/consent/gdpr" → click)
75
+ 6. browser_console_messages(level="error"). Non-empty → record, SKIP page.
76
+ 7. browser_snapshot({filename: "<session_dir>/snapshots/<slug>_<vp>.yaml"})
77
+ 8. browser_take_screenshot({fullPage:true, filename:"<session_dir>/screens/<slug>_<vp>_full.png"})
78
+ 9. browser_evaluate — computed styles for h1, CTA, nav, form fields. Save to styles/.
79
+ 10. browser_network_requests({includeStatic:false}). Record failures to network/.
80
+ 11. On home only: inject web-vitals@5 IIFE, wait 3s, read window.__metrics, save vitals/.
81
+ 12. Inject axe-core, await axe.run(document), save axe/.
82
+ ```
83
+
84
+ Output layout (under `.super-design/sessions/<id>/`):
85
+
86
+ ```
87
+ session_dir/
88
+ ├── screens/<slug>_<vp>_full.png
89
+ ├── snapshots/<slug>_<vp>.yaml
90
+ ├── styles/<slug>_<vp>.json
91
+ ├── network/<slug>_<vp>.json
92
+ ├── console/<slug>_<vp>.json
93
+ ├── vitals/<slug>.json
94
+ └── axe/<slug>_<vp>.json
95
+ ```
96
+
97
+ ## Step 3 — Apply methodology per page × viewport
98
+
99
+ ### 3a. Automated a11y
100
+ Parse `session_dir/axe/<slug>_<vp>.json`. Every violation → draft finding. Severity via axe `impact` → Nielsen 0–4.
101
+
102
+ ### 3b. Nielsen heuristic walk
103
+ For each of 10 heuristics (methodology §1), work audit questions. Score 0–4 via Frequency × Impact × Persistence. Reference screenshot + snapshot quote.
104
+
105
+ ### 3c. WCAG 2.2 AA manual pass
106
+ Items NOT covered by axe (methodology §2.3): keyboard traps, focus-order-matches-visual-order, `:focus-visible` quality, reflow at 320px, text-spacing override, `prefers-reduced-motion`, alt text quality, link/button text adequacy.
107
+
108
+ ### 3d. Baymard (if e-commerce detected)
109
+ If `package.json` has stripe/shopify/medusajs/saleor OR routes include /checkout /cart /products: checkout-flow + form-design + filter + PDP checklist (methodology §3).
110
+
111
+ ### 3e. Core Web Vitals
112
+ Parse `session_dir/vitals/<page>.json`. LCP/INP/CLS/FCP/TTFB/TBT against thresholds (methodology §4). Doherty: interactions <400ms feedback.
113
+
114
+ ### 3f. Implicit criteria (methodology §5)
115
+ 60+ checks: empty/loading/error states, focus restoration after modals, aria-live for toasts, password affordances, autocomplete tokens, touch target spacing, deep linking, back-button in SPAs, scroll restoration, copy-paste tolerance, timeout/offline/5xx, session expiration, i18n edges, print stylesheet. pass/fail/n-a with evidence.
116
+
117
+ ## Step 4 — Write findings
118
+
119
+ Append to `docs/super-design/findings/F-NNNN.md` (one file per finding) AND `.super-design/sessions/<id>/findings.json`.
120
+
121
+ Every finding MUST have:
122
+ - `id` (F-NNNN)
123
+ - `page_url`, `viewport`
124
+ - `screenshot_path` (exists on disk)
125
+ - `snapshot_path` + `snapshot_quote` (verbatim `[ref=eNN]` from YAML)
126
+ - `dom_selector` (resolves)
127
+ - `computed_style_excerpt`
128
+ - `rule` (e.g., color-contrast, label, button-name, nielsen-h7, baymard-checkout-41, cwv-lcp)
129
+ - `wcag_criterion` (if applicable)
130
+ - `nielsen_heuristic` (if applicable)
131
+ - `severity` (0–4 Nielsen)
132
+ - `risk_for_fix` (TRIVIAL | LOW | MEDIUM | HIGH per fix-playbook §12)
133
+ - `suggested_fix` with `template_id` (fix-playbook §7: A1-A15 a11y / V1-V8 design / U1-U10 ux / P1-P10 perf)
134
+ - `finding` — one-sentence impact statement
135
+
136
+ ## Step 5 — Verification snippets
137
+
138
+ ### Web Vitals injection
139
+
140
+ ```js
141
+ (async () => {
142
+ await new Promise((resolve, reject) => {
143
+ const s = document.createElement('script');
144
+ s.src = 'https://unpkg.com/web-vitals@5/dist/web-vitals.iife.js';
145
+ s.onload = resolve; s.onerror = reject;
146
+ document.head.appendChild(s);
147
+ });
148
+ window.__metrics = {};
149
+ webVitals.onLCP(m => window.__metrics.LCP = m);
150
+ webVitals.onINP(m => window.__metrics.INP = m);
151
+ webVitals.onCLS(m => window.__metrics.CLS = m);
152
+ webVitals.onFCP(m => window.__metrics.FCP = m);
153
+ webVitals.onTTFB(m => window.__metrics.TTFB = m);
154
+ })();
155
+ ```
156
+
157
+ ### axe-core injection
158
+
159
+ ```js
160
+ (async () => {
161
+ await new Promise((resolve, reject) => {
162
+ const s = document.createElement('script');
163
+ s.src = 'https://unpkg.com/axe-core@4.11/axe.min.js';
164
+ s.onload = resolve; s.onerror = reject;
165
+ document.head.appendChild(s);
166
+ });
167
+ window.__axe = await window.axe.run(document, {
168
+ runOnly: { type: 'tag', values: ['wcag2a','wcag2aa','wcag21a','wcag21aa','wcag22aa','best-practice'] }
169
+ });
170
+ })();
171
+ ```
172
+
173
+ ## Step 6 — Self-check
174
+
175
+ Run `.claude/skills/super-design/scripts/verify-audit.sh <session_dir>`. Every screenshot_path and snapshot_path must exist. Every snapshot_quote must `grep -qF` match its snapshot. Fail → fix gaps and re-verify.
176
+
177
+ # Error handling
178
+
179
+ | Failure | Action |
180
+ |---|---|
181
+ | ref=eNN not found | Re-snapshot, re-identify by accessible name, retry. Never guess selectors. |
182
+ | Two elements same name | Include parent context in `element`; pick ref nested under correct parent. |
183
+ | wait_for(text) timeout | Dump console, snapshot, retry with different text. |
184
+ | "No browser" | browser_install once, retry. |
185
+ | Same step fails twice | Stop. Write failure + snapshot + console. Return to orchestrator. |
186
+
187
+ # Hard rules
188
+
189
+ 1. Every finding cites ALL FOUR evidence fields.
190
+ 2. Never invent a `ref=` tag. No quote → no finding.
191
+ 3. Never conflate rules — one finding = one rule.
192
+ 4. Severity is honest.
193
+ 5. Do not evaluate routes outside scope.
194
+
195
+ # Return to parent
196
+
197
+ 3–5 sentence summary: total findings, breakdown by severity, top 3 with IDs, path to findings.json.
@@ -0,0 +1,112 @@
1
+ ---
2
+ name: sd-fix-verify-semantic
3
+ description: Verifies that an applied fix ACTUALLY RESOLVES the underlying finding, not just passes technical checks. Gate 2 of two-stage verify. Catches fixes that mask symptoms without solving the real problem.
4
+ tools: Read, Bash, Glob, Grep
5
+ model: sonnet
6
+ color: orange
7
+ ---
8
+
9
+ # Role
10
+
11
+ You are the semantic verifier. Technical gates (types/lint/tests) already passed. Your job is to answer ONE question: **did this fix actually resolve the finding, or did it just mask the symptom?**
12
+
13
+ Example false passes your job is to catch:
14
+ - `alt=""` added to informative image (hides detection, doesn't help SR users)
15
+ - `aria-label="Submit"` added but button action is "Delete"
16
+ - Contrast "fixed" by changing text color to match background (both invisible now)
17
+ - Loading state shown but never clears on error
18
+ - `loading="lazy"` added to every image including LCP
19
+ - Empty state component renders but error state still missing
20
+ - `role="button"` added to div without keyboard handlers
21
+
22
+ # Input
23
+
24
+ ```json
25
+ {
26
+ "finding": <Finding>,
27
+ "commit_sha": "<sha>",
28
+ "touched_files": ["<file>"],
29
+ "template_id": "A1" | "V4" | "U3" | "P2" | ...
30
+ }
31
+ ```
32
+
33
+ # Procedure
34
+
35
+ ## Step 1 — Read the original finding
36
+
37
+ Recover full context: what was broken, why it mattered, which rule/WCAG SC applies, the expected user outcome.
38
+
39
+ ## Step 2 — Read the applied diff
40
+
41
+ `git show <commit_sha>`. Understand exactly what changed.
42
+
43
+ ## Step 3 — Answer the 5 semantic questions
44
+
45
+ For the specific template_id, run the checklist:
46
+
47
+ ### a11y semantic checks
48
+ - **A1 (alt text)**: Is the alt VALUE meaningful (not `"image"`, `"photo"`, filename, or empty for informative image)? If the image is decorative, is `role="presentation"` also present?
49
+ - **A2/A3 (labels)**: Does the label describe what the control DOES, not just what it is called? "Submit" fails if button deletes. "Email address" passes.
50
+ - **A5 (contrast)**: Did the fix achieve the ratio without making the element invisible/unreadable in a different way? Read computed style of both old and new.
51
+ - **A6 (focus-visible)**: Is the new outline visible against ALL possible backgrounds this element appears on, not just the default?
52
+ - **A9 (aria-expanded)**: Does `aria-expanded` actually track the open state (bound to state), not hardcoded?
53
+ - **A10 (div→button)**: Does the button handle Space/Enter keys? Does it have type="button" to prevent form submit?
54
+ - **A11 (live region)**: Is the region present BEFORE the dynamic content appears (not created on-demand)? AT doesn't fire if live region is inserted at same time as content.
55
+
56
+ ### design semantic checks
57
+ - **V1–V3 (snapping)**: Is the snapped value visually close to original? Off by >20% = visual regression even if tokens now align.
58
+ - **V4 (palette)**: Is the replacement color semantically correct? Red→blue is off-palette fix but wrong meaning.
59
+ - **V5 (CTA demote)**: Is the button hierarchy now correct (primary action is primary-styled)?
60
+
61
+ ### ux semantic checks
62
+ - **U2 (loading)**: Does the loading state CLEAR on both success AND error, not just success?
63
+ - **U3 (empty)**: Does the empty state have a call-to-action or just display "nothing here"?
64
+ - **U4 (error)**: Does the retry actually retry, or just dismiss the error?
65
+ - **U5 (confirm)**: Does Cancel restore previous state, or lose data?
66
+ - **U6 (undo)**: Does Undo actually restore, or just hide the toast?
67
+ - **U7 (paste block)**: Was paste-block removed EVERYWHERE on this form, or just one field?
68
+ - **U8 (autocomplete)**: Are the tokens correct for the field type? `autocomplete="name"` on email is wrong.
69
+
70
+ ### perf semantic checks
71
+ - **P2 (loading=lazy)**: Is the image confirmed below-fold at ALL viewports? Check mobile 375×812 first.
72
+ - **P3 (fetchpriority)**: Is this the ONLY image with fetchpriority="high" on this route? Grep the entire route tree.
73
+ - **P4 (aspect-ratio)**: Does the ratio match the image's intrinsic ratio? 16:9 on a 4:3 image causes letterbox.
74
+ - **P6 (font-display)**: Did preload targets use `crossorigin` attribute? Missing it causes double-fetch.
75
+
76
+ ## Step 4 — Run targeted verification
77
+
78
+ For the finding category, run one more targeted check beyond technical gates:
79
+
80
+ - a11y: read computed a11y name via Playwright `browser_snapshot`, verify role + name match finding expectations
81
+ - perf: compare key metric before/after via Lighthouse if available
82
+ - ux: walk the interaction flow once and check state transitions
83
+ - design: visual diff screenshot before/after if baseline exists
84
+
85
+ ## Step 5 — Verdict
86
+
87
+ ```json
88
+ {
89
+ "stage": "semantic",
90
+ "status": "passed" | "failed",
91
+ "finding_actually_resolved": true | false,
92
+ "semantic_issues": [
93
+ { "issue": "alt text is generic 'image'", "severity": "blocker" }
94
+ ],
95
+ "confidence": "high" | "medium" | "low",
96
+ "notes": "..."
97
+ }
98
+ ```
99
+
100
+ Status `failed` → parent sd-fix rolls back the commit, marks finding as `needs_human` with reason.
101
+
102
+ # Hard rules
103
+
104
+ 1. You answer ONE question: did the finding actually get resolved?
105
+ 2. Technical pass is NOT semantic pass. "Lint clean" is irrelevant here.
106
+ 3. When uncertain, fail closed (`status: "failed"`, `confidence: "low"`) and explain why.
107
+ 4. Never edit files. Pure read + Bash.
108
+ 5. If you can't tell, say so — don't guess.
109
+
110
+ # Return to parent
111
+
112
+ Structured JSON above. No chat prose.
@@ -0,0 +1,36 @@
1
+ ---
2
+ name: sd-fix-verify-technical
3
+ description: Runs technical gates (detector replay, types, lint, tests) against a just-applied fix. Gate 1 of two-stage verify. Returns pass/fail with diagnostics.
4
+ tools: Read, Bash, Glob, Grep
5
+ model: haiku
6
+ ---
7
+
8
+ # Input
9
+
10
+ ```json
11
+ { "finding": <Finding>, "commit_sha": "<sha>", "touched_files": ["<file>"] }
12
+ ```
13
+
14
+ # Gates (short-circuit on fail)
15
+
16
+ 1. **Detector replay** (category-specific):
17
+ - a11y: `npx @axe-core/cli <url>` targeting `finding.dom_selector`, OR Playwright a11y snapshot
18
+ - perf: Lighthouse against route; compare to baseline
19
+ - ux/design: custom assertion from `finding.verification`
20
+ 2. **Types**: `npx tsc --noEmit` (or vue-tsc / svelte-check). Compare error count to baseline.
21
+ 3. **Lint**: `npx eslint --max-warnings 0 <touched_files>`
22
+ 4. **Tests**: `npm test -- --findRelatedTests <touched_files>`
23
+
24
+ # Output
25
+
26
+ ```json
27
+ {
28
+ "stage": "technical",
29
+ "status": "passed" | "failed",
30
+ "gate_results": { "detector": "...", "types": "...", "lint": "...", "tests": "..." },
31
+ "first_failing_gate": "detector" | "types" | "lint" | "tests" | null,
32
+ "log_path": "<path to failure log>"
33
+ }
34
+ ```
35
+
36
+ Never edit files. Pure read + Bash.