@hegemonart/get-design-done 1.47.0 → 1.49.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +5 -2
- package/CHANGELOG.md +91 -0
- package/README.md +4 -0
- package/agents/brief-auditor.md +147 -0
- package/agents/copy-auditor.md +215 -0
- package/agents/design-auditor.md +30 -7
- package/agents/design-context-builder.md +2 -0
- package/agents/design-debt-crawler.md +292 -0
- package/agents/design-executor.md +2 -0
- package/agents/design-fixer.md +6 -1
- package/agents/design-planner.md +2 -0
- package/agents/design-reflector.md +2 -0
- package/agents/design-research-synthesizer.md +2 -0
- package/agents/design-verifier.md +7 -15
- package/agents/quality-gate-runner.md +11 -10
- package/dist/claude-code/.claude/skills/brief/SKILL.md +17 -0
- package/dist/claude-code/.claude/skills/quality-gate/SKILL.md +2 -2
- package/hooks/gdd-a11y-gate.js +119 -0
- package/hooks/gdd-design-quality-check.js +340 -0
- package/hooks/hooks.json +17 -0
- package/package.json +5 -2
- package/reference/brief-quality-rubric.md +98 -0
- package/reference/copy-quality.md +135 -0
- package/reference/debt-categories.md +148 -0
- package/reference/registry.json +35 -0
- package/reference/reviewer-confidence-gate.md +108 -0
- package/reference/visual-tells.md +237 -0
- package/scripts/lib/confidence-route.cjs +60 -0
- package/scripts/lib/worktree-resolve.cjs +221 -0
- package/sdk/mcp/gdd-state/server.js +37 -4
- package/sdk/mcp/gdd-state/tools/shared.ts +61 -0
- package/skills/brief/SKILL.md +17 -0
- package/skills/quality-gate/SKILL.md +2 -2
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: debt-categories
|
|
3
|
+
type: reference
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
phase: 48
|
|
6
|
+
tags: [debt, taxonomy, audit, crawler, priority-scoring, retroactive]
|
|
7
|
+
last_updated: 2026-06-03
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Debt Categories
|
|
11
|
+
|
|
12
|
+
The taxonomy `agents/design-debt-crawler.md` uses to classify and rank design debt
|
|
13
|
+
found across an entire codebase. Each class has a definition, the detection signal
|
|
14
|
+
that surfaces it, and the typical fix shape. The priority model at the end converts
|
|
15
|
+
every finding into one comparable score so the catalog reads top to bottom by impact.
|
|
16
|
+
|
|
17
|
+
This taxonomy is detection-oriented, not a style guide. The reason behind each rule
|
|
18
|
+
lives in `reference/anti-patterns.md` (the BAN-NN and SLOP-NN catalog) and the domain
|
|
19
|
+
references `reference/color.md`, `reference/typography.md`, and `reference/spatial.md`.
|
|
20
|
+
This file is the catalog of what to look for and how to weigh it.
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Debt Classes
|
|
25
|
+
|
|
26
|
+
### color-literal
|
|
27
|
+
|
|
28
|
+
**Definition:** A raw color value written directly in source instead of a token
|
|
29
|
+
reference. Includes `#rgb` / `#rrggbb` / `#rrggbbaa` hex, `rgb()` / `rgba()`, and
|
|
30
|
+
`hsl()` / `hsla()` function literals that flow to a rendered surface.
|
|
31
|
+
**Detection signal:** Grep for `#[0-9a-fA-F]{3,8}`, `rgb\(`, `rgba\(`, `hsl\(`,
|
|
32
|
+
`hsla\(` in source files. Exclude token-definition files (the palette source itself)
|
|
33
|
+
and comments. A literal inside a `var(--token: #hex)` definition is the token, not debt.
|
|
34
|
+
**Fix shape:** Replace the literal with the matching semantic token. If no token
|
|
35
|
+
exists for that role, the fix is to define one first.
|
|
36
|
+
|
|
37
|
+
### untokenized-component
|
|
38
|
+
|
|
39
|
+
**Definition:** A component file that renders visual surface (color, spacing, type
|
|
40
|
+
size) using inline values or arbitrary utility classes rather than referencing the
|
|
41
|
+
design system's tokens or scale.
|
|
42
|
+
**Detection signal:** A component file (`.tsx` / `.jsx` / `.vue` / `.svelte`) that
|
|
43
|
+
contains color-literal or arbitrary-bracket utility hits (`\[[0-9]+px\]`,
|
|
44
|
+
`\[#[0-9a-f]+\]`) and zero `var(--` references or scale-class references. The ratio
|
|
45
|
+
of literal uses to token uses inside one file is the strength signal.
|
|
46
|
+
**Fix shape:** Route the component's visual values through tokens and the spacing
|
|
47
|
+
or type scale; remove the arbitrary brackets.
|
|
48
|
+
|
|
49
|
+
### anti-pattern
|
|
50
|
+
|
|
51
|
+
**Definition:** A confirmed instance of a banned construct (BAN-NN) or an AI-slop
|
|
52
|
+
tell (SLOP-NN) from `reference/anti-patterns.md`.
|
|
53
|
+
**Detection signal:** Run `gdd-detect <path> --json`. Each finding carries its
|
|
54
|
+
`ruleId`, `file`, `line`, and a link back to the matching paragraph. The detector
|
|
55
|
+
covers the statically matchable BAN rules; the two subjective rules it cannot match
|
|
56
|
+
(BAN-04 keyboard-action animation, BAN-10 nested equal radius) are noted as a
|
|
57
|
+
manual-review item, not auto-counted.
|
|
58
|
+
**Fix shape:** Apply the rule's documented rewrite. Hard bans take precedence over
|
|
59
|
+
SLOP tells when both touch the same element.
|
|
60
|
+
|
|
61
|
+
### contrast
|
|
62
|
+
|
|
63
|
+
**Definition:** A foreground and background pairing that falls below the WCAG 2.1 AA
|
|
64
|
+
contrast floor (4.5:1 for body text, 3:1 for large text and non-text indicators).
|
|
65
|
+
**Detection signal:** Resolve text-color and background-color pairs that share an
|
|
66
|
+
element or selector, compute the ratio, and flag pairs under the threshold. Pairs
|
|
67
|
+
built from unresolvable runtime values are a manual-review item.
|
|
68
|
+
**Fix shape:** Adjust the token or the role assignment so the pair clears AA. Never
|
|
69
|
+
rely on color alone to carry meaning; pair it with text or an icon.
|
|
70
|
+
|
|
71
|
+
### density-spacing
|
|
72
|
+
|
|
73
|
+
**Definition:** Spacing values that sit off the project's modular scale (for example
|
|
74
|
+
the 4 / 8 / 12 / 16 / 24 / 32 series), or inconsistent density between sibling
|
|
75
|
+
components that should share rhythm.
|
|
76
|
+
|
|
77
|
+
**Detection signal:** Collect every padding, margin, and gap value, then flag values
|
|
78
|
+
that are not on the declared scale and clusters where neighboring components use
|
|
79
|
+
different step counts for the same structural role.
|
|
80
|
+
|
|
81
|
+
**Fix shape:** Snap off-grid values to the nearest scale step; align sibling density.
|
|
82
|
+
|
|
83
|
+
### typography-drift
|
|
84
|
+
|
|
85
|
+
**Definition:** Font sizes, weights, or families that drift from a systematic type
|
|
86
|
+
scale: arbitrary pixel sizes, more than the agreed family count, or weight choices
|
|
87
|
+
that break the heading-to-body hierarchy.
|
|
88
|
+
|
|
89
|
+
**Detection signal:** Tally the distinct font-size and font-weight values and the
|
|
90
|
+
family count. A long tail of one-off sizes, more than two families, or `font-weight`
|
|
91
|
+
under 400 on small text are the drift markers.
|
|
92
|
+
|
|
93
|
+
**Fix shape:** Map each one-off value onto the nearest scale step; cap families at two.
|
|
94
|
+
|
|
95
|
+
### a11y-text
|
|
96
|
+
|
|
97
|
+
**Definition:** Text-content accessibility debt: missing `alt` on meaningful images,
|
|
98
|
+
icon-only controls without an accessible name, placeholder used as the only label,
|
|
99
|
+
and generic or developer-facing copy in empty and error states.
|
|
100
|
+
|
|
101
|
+
**Detection signal:** Grep for `<img` without `alt`, interactive elements without
|
|
102
|
+
`aria-label` or visible text, inputs with `placeholder` and no `<label>`, and
|
|
103
|
+
empty-state strings such as "No data" or raw error codes.
|
|
104
|
+
|
|
105
|
+
**Fix shape:** Add the accessible name or label; rewrite generic copy to be specific
|
|
106
|
+
and actionable. Copy-quality detail lives in `reference/copy-quality.md`.
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
## Priority Scoring Model
|
|
111
|
+
|
|
112
|
+
Every finding gets one priority score so the catalog ranks by impact, not by the
|
|
113
|
+
order files were walked. Three ordinal factors combine, each on a 1 to 3 scale.
|
|
114
|
+
|
|
115
|
+
**Visible-delta** (how much a user notices the fix):
|
|
116
|
+
|
|
117
|
+
| Value | Meaning |
|
|
118
|
+
|-------|---------|
|
|
119
|
+
| 3 | Changes a primary surface a user sees on first load |
|
|
120
|
+
| 2 | Changes a secondary or interior surface |
|
|
121
|
+
| 1 | Invisible at rest; shows only in an edge state or to assistive tech |
|
|
122
|
+
|
|
123
|
+
**Effort** (how cheap the fix is; cheaper scores higher so quick wins float up):
|
|
124
|
+
|
|
125
|
+
| Value | Meaning |
|
|
126
|
+
|-------|---------|
|
|
127
|
+
| 3 | Mechanical one-line swap (literal to token) |
|
|
128
|
+
| 2 | Localized edit across a single component |
|
|
129
|
+
| 1 | Needs a new token, scale decision, or cross-file refactor |
|
|
130
|
+
|
|
131
|
+
**Prevalence** (how many instances share this root cause):
|
|
132
|
+
|
|
133
|
+
| Value | Meaning |
|
|
134
|
+
|-------|---------|
|
|
135
|
+
| 3 | Ten or more instances of the same finding |
|
|
136
|
+
| 2 | Three to nine instances |
|
|
137
|
+
| 1 | One or two instances |
|
|
138
|
+
|
|
139
|
+
**Combine** by multiplying the three factors:
|
|
140
|
+
|
|
141
|
+
```
|
|
142
|
+
priority = visible-delta × effort × prevalence
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
The product ranges from 1 (low) to 27 (high). Sort the catalog by `priority`
|
|
146
|
+
descending so the highest-impact, lowest-cost, most-widespread debt sits at the top.
|
|
147
|
+
On ties, break by visible-delta first, then prevalence. Record the three factors next
|
|
148
|
+
to the score in each catalog row so the ranking is auditable, not opaque.
|
package/reference/registry.json
CHANGED
|
@@ -1100,6 +1100,41 @@
|
|
|
1100
1100
|
"type": "domain-index",
|
|
1101
1101
|
"phase": 45,
|
|
1102
1102
|
"description": "Phase 45 domain-index: UX-writing + voice - brand-voice, style-vocabulary, anti-patterns."
|
|
1103
|
+
},
|
|
1104
|
+
{
|
|
1105
|
+
"name": "copy-quality",
|
|
1106
|
+
"path": "reference/copy-quality.md",
|
|
1107
|
+
"type": "heuristic",
|
|
1108
|
+
"phase": 48,
|
|
1109
|
+
"description": "Phase 48 copy-quality pillar rubric: microcopy (button/CTA labels, error messages, empty states, ARIA/alt text, loading copy), voice alignment, i18n overflow lens."
|
|
1110
|
+
},
|
|
1111
|
+
{
|
|
1112
|
+
"name": "debt-categories",
|
|
1113
|
+
"path": "reference/debt-categories.md",
|
|
1114
|
+
"type": "taxonomy",
|
|
1115
|
+
"phase": 48,
|
|
1116
|
+
"description": "Phase 48 design-debt taxonomy: debt classes (color-literal, untokenized-component, anti-pattern, contrast, density-spacing, typography-drift, a11y-text) + visible-delta x effort x prevalence priority scoring."
|
|
1117
|
+
},
|
|
1118
|
+
{
|
|
1119
|
+
"name": "brief-quality-rubric",
|
|
1120
|
+
"path": "reference/brief-quality-rubric.md",
|
|
1121
|
+
"type": "output-contract",
|
|
1122
|
+
"phase": 48,
|
|
1123
|
+
"description": "Phase 48 brief-quality rubric: 5 anti-patterns (vague verbs, missing audience, immeasurable success criteria, scope creep, missing anti-goals) the brief-auditor surfaces."
|
|
1124
|
+
},
|
|
1125
|
+
{
|
|
1126
|
+
"name": "visual-tells",
|
|
1127
|
+
"path": "reference/visual-tells.md",
|
|
1128
|
+
"type": "heuristic",
|
|
1129
|
+
"phase": 49,
|
|
1130
|
+
"description": "Phase 49 visual-tells catalog: 8 default-AI-aesthetic categories (default-AI-hero, gradient-spam, isometric-illustration-fallback, centered-everything-syndrome, inter-everything, purple-violet-default, glassmorphism-spam, decorative-motion-without-intent) with diagnostic regex + remediation; backs the gdd-design-quality-check hook."
|
|
1131
|
+
},
|
|
1132
|
+
{
|
|
1133
|
+
"name": "reviewer-confidence-gate",
|
|
1134
|
+
"path": "reference/reviewer-confidence-gate.md",
|
|
1135
|
+
"type": "meta-rules",
|
|
1136
|
+
"phase": 49,
|
|
1137
|
+
"description": "Phase 49 reviewer confidence gate: 4-question Pre-Report Gate + confidence 0.0-1.0 field; HIGH/CRITICAL require >=0.8 + cited proof, <0.5 stays Tentative and never reaches design-fixer."
|
|
1103
1138
|
}
|
|
1104
1139
|
]
|
|
1105
1140
|
}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: reviewer-confidence-gate
|
|
3
|
+
type: meta-rules
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
phase: 49
|
|
6
|
+
tags: [review, confidence, audit, verify, gap, routing, anti-slop]
|
|
7
|
+
last_updated: 2026-06-03
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
# Reviewer Confidence Gate
|
|
11
|
+
|
|
12
|
+
Audit and verify findings can inflate severity without proof. A grep hit gets reported as a BLOCKER; a single line read out of context becomes a MAJOR. This contract adds a confidence discipline so review agents (`design-auditor`, `design-verifier`, `design-debt-crawler`) earn the severity they assign, and so `design-fixer` only auto-applies fixes that are backed by evidence.
|
|
13
|
+
|
|
14
|
+
Every emitting agent runs the Pre-Report Gate before writing a finding, stamps each finding with a `confidence` score, and parks weak findings in a `## Tentative` section that the fixer never reads. The routing helper `scripts/lib/confidence-route.cjs` encodes the same rule in code.
|
|
15
|
+
|
|
16
|
+
## Pre-Report Gate
|
|
17
|
+
|
|
18
|
+
Before you emit any finding or gap, answer these four questions. If you cannot answer all four with a clear yes, the finding is not ready to ship at its stated severity.
|
|
19
|
+
|
|
20
|
+
- **a. Can I cite `file:line`?** Point at the exact location. A finding with no concrete location is a hunch, not a defect.
|
|
21
|
+
- **b. Can I state the failure mode in one sentence?** Name what breaks for the user or the build. If the sentence needs an "and" plus a "maybe", the finding is two findings or none.
|
|
22
|
+
- **c. Did I read context beyond the modified file?** Confirm the call site, the token definition, or the parent component. A value that looks wrong in isolation is often correct once you read what feeds it.
|
|
23
|
+
- **d. Is the severity defensible?** A BLOCKER blocks shipping. A MAJOR is a real deviation from intent. If you would not defend the label to the author, lower it.
|
|
24
|
+
|
|
25
|
+
## The `confidence` field
|
|
26
|
+
|
|
27
|
+
Every finding carries a `confidence: 0.0-1.0` field. It records how sure you are that the finding is real and correctly classified, not how bad the issue is. Severity and confidence are independent axes: a cosmetic issue can be high confidence, and a suspected BLOCKER can be low confidence.
|
|
28
|
+
|
|
29
|
+
| Range | Meaning | Where it goes |
|
|
30
|
+
|-------|---------|---------------|
|
|
31
|
+
| `>= 0.8` | Cited `file:line`, one-sentence failure mode, context read. | Reported at full severity; eligible for auto-fix. |
|
|
32
|
+
| `0.5 - 0.8` | Real signal, but evidence is partial or context is incomplete. | Reported, routed to user review, never auto-fixed. |
|
|
33
|
+
| `< 0.5` | A hunch, a guess, or a pattern match you could not confirm. | Moved to `## Tentative`; never reaches `design-fixer`. |
|
|
34
|
+
|
|
35
|
+
## Routing rule
|
|
36
|
+
|
|
37
|
+
The gate controls what reaches the fixer. The rule is:
|
|
38
|
+
|
|
39
|
+
- A HIGH severity finding (BLOCKER or MAJOR) requires `confidence >= 0.8` **and** a `file:line` citation **and** a one-sentence failure mode. Below `0.8`, a HIGH finding is surfaced for user review instead of auto-fix.
|
|
40
|
+
- A finding with `confidence < 0.5` stays in the `## Tentative` section and never reaches `design-fixer`.
|
|
41
|
+
- A finding with `confidence` in the `0.5 - 0.8` band is surfaced in the report but routed to user review, not auto-fix.
|
|
42
|
+
|
|
43
|
+
`scripts/lib/confidence-route.cjs` exports `route({ severity, confidence, tentative })` and returns `'fix'`, `'user-review'`, or `'drop'`. Agents and the fixer share this single decision so the matrix stays consistent.
|
|
44
|
+
|
|
45
|
+
### Routing matrix
|
|
46
|
+
|
|
47
|
+
The full decision table the helper encodes:
|
|
48
|
+
|
|
49
|
+
| Severity | `tentative` | confidence | Destination |
|
|
50
|
+
|----------|-------------|------------|-------------|
|
|
51
|
+
| any | `true` | any | `drop` (never reaches fixer) |
|
|
52
|
+
| any | `false` | `< 0.5` | `drop` (stays tentative) |
|
|
53
|
+
| BLOCKER or MAJOR | `false` | `0.5 - 0.8` | `user-review` |
|
|
54
|
+
| BLOCKER or MAJOR | `false` | `>= 0.8` | `fix` |
|
|
55
|
+
| MINOR or COSMETIC | `false` | `0.5 - 0.8` | `user-review` |
|
|
56
|
+
| MINOR or COSMETIC | `false` | `>= 0.8` | `fix` |
|
|
57
|
+
|
|
58
|
+
Read the table as: tentative wins first, then the `0.5` floor, then the severity-specific `0.8` auto-fix gate.
|
|
59
|
+
|
|
60
|
+
## How to emit a finding
|
|
61
|
+
|
|
62
|
+
After the Pre-Report Gate passes, write the finding with the `confidence` field on its own line inside the existing locked format. For `design-verifier` gaps this sits alongside the other gap fields:
|
|
63
|
+
|
|
64
|
+
```text
|
|
65
|
+
### BLOCKER G-01: raw error object rendered on payment failure
|
|
66
|
+
- Phase: 2
|
|
67
|
+
- Description: Checkout.tsx renders the error object directly
|
|
68
|
+
- Expected: a human-readable failure message
|
|
69
|
+
- Actual: users see "[object Object]"
|
|
70
|
+
- Location: src/Checkout.tsx:88
|
|
71
|
+
- Suggested fix: render error.message with a fallback string
|
|
72
|
+
- confidence: 0.85
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
A finding that scores `< 0.5` is not written in the gap list at all. It goes under a `## Tentative` heading in the same report, in plain prose, so a human can promote it later if context proves it real.
|
|
76
|
+
|
|
77
|
+
## Paired examples
|
|
78
|
+
|
|
79
|
+
Each pair shows a raw finding (before the gate) and the same finding after the gate corrects it.
|
|
80
|
+
|
|
81
|
+
### Example 1: severity inflated, no context read
|
|
82
|
+
|
|
83
|
+
**Before:** `BLOCKER: hardcoded color #1a73e8 in Button.tsx breaks theming.`
|
|
84
|
+
|
|
85
|
+
**After:** `MINOR G-04: raw #1a73e8 instead of a semantic token. confidence: 0.9`. Reading context (question c) showed `Button.tsx:42` is the token definition file, so theming is not broken; the issue is a style-coherence nit, not a shipping blocker. High confidence, low severity.
|
|
86
|
+
|
|
87
|
+
### Example 2: a grep guess that could not be confirmed
|
|
88
|
+
|
|
89
|
+
**Before:** `MAJOR: missing reduced-motion guard, animations will trigger vestibular issues.`
|
|
90
|
+
|
|
91
|
+
**After:** moved to `## Tentative` with `confidence: 0.4`. The grep matched `framer-motion` but question a failed: no single `file:line` proves the guard is absent app-wide, and a root `MotionConfig` may cover it. Parked as tentative; the fixer never sees it.
|
|
92
|
+
|
|
93
|
+
### Example 3: real defect, evidence complete
|
|
94
|
+
|
|
95
|
+
**Before:** `error states look weak somewhere in the checkout flow.`
|
|
96
|
+
|
|
97
|
+
**After:** `BLOCKER G-01: Checkout.tsx:88 renders the raw error object, so users see "[object Object]" on a failed payment. confidence: 0.85`. All four questions pass: cited location, one-sentence failure mode, call site read, severity defensible. Auto-fix eligible.
|
|
98
|
+
|
|
99
|
+
### Example 4: partial evidence, honest mid-band score
|
|
100
|
+
|
|
101
|
+
**Before:** `MAJOR: empty state copy is generic across the app.`
|
|
102
|
+
|
|
103
|
+
**After:** `MINOR G-06: Inbox.tsx:30 empty state reads "No data". confidence: 0.65`. One real instance is cited, but question c is only half done: the "across the app" claim was not verified. Scored mid-band, surfaced for user review rather than auto-fixed, and the severity was lowered to match the single confirmed instance.
|
|
104
|
+
|
|
105
|
+
## Agent integration
|
|
106
|
+
|
|
107
|
+
- `design-auditor`, `design-verifier`, and `design-debt-crawler` run the Pre-Report Gate, stamp each finding with `confidence`, and route sub-0.5 findings to `## Tentative`.
|
|
108
|
+
- `design-fixer` skips every gap in `## Tentative` and skips BLOCKER or MAJOR gaps whose `confidence < 0.8`, routing those to user review instead of auto-fix.
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
# Visual Tells Catalog (v1)
|
|
2
|
+
|
|
3
|
+
The default-AI aesthetic has a fingerprint. When a model generates a front-end
|
|
4
|
+
without a brand brief, it falls back to the same handful of moves it saw most in
|
|
5
|
+
its training set. This catalog names those moves so the cheap regex floor in
|
|
6
|
+
`hooks/gdd-design-quality-check.js` can flag them on every `.tsx` / `.vue` /
|
|
7
|
+
`.svelte` / `.astro` write.
|
|
8
|
+
|
|
9
|
+
Severity here is WARN (advisory), in the vocabulary of `reference/audit-scoring.md`.
|
|
10
|
+
A WARN is not a FLAG: it does not block the write and it does not fail a gate. It
|
|
11
|
+
is a nudge that asks "did you choose this, or did the model default to it?" Each
|
|
12
|
+
category below maps 1:1 to a rule id in the hook. For the harder bans behind some
|
|
13
|
+
of these tells, see `reference/anti-patterns.md` (BAN-NN and SLOP-NN).
|
|
14
|
+
|
|
15
|
+
How to read each entry: a short description, three to five concrete instances of
|
|
16
|
+
the tell in the wild, the diagnostic regex the hook runs (where one applies), and
|
|
17
|
+
a remediation pattern you can paste in.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## default-AI-hero
|
|
22
|
+
|
|
23
|
+
Rule id: `generic-cta`
|
|
24
|
+
|
|
25
|
+
The stock landing-page hero: a centered headline, one line of filler subtext, and
|
|
26
|
+
a button that says "Get Started". The copy carries no subject and no verb specific
|
|
27
|
+
to the product. It reads like every template because it is every template.
|
|
28
|
+
|
|
29
|
+
Instances:
|
|
30
|
+
|
|
31
|
+
1. Button label "Get Started" with no object ("Get started with what?").
|
|
32
|
+
2. Headline opener "Welcome to [Product]" instead of a value statement.
|
|
33
|
+
3. "Learn More" as the secondary call to action, pointing nowhere specific.
|
|
34
|
+
4. "Lorem ipsum" body copy shipped past the mockup stage.
|
|
35
|
+
5. The triplet subhead with no verb (see SLOP-11 in `reference/anti-patterns.md`).
|
|
36
|
+
|
|
37
|
+
Diagnostic regex (hook): `\b(?:Get Started|Welcome to|Lorem ipsum|Learn More)\b`
|
|
38
|
+
(case-insensitive, word-boundaried).
|
|
39
|
+
|
|
40
|
+
Remediation: write the specific promise and the specific next step. "Start a free
|
|
41
|
+
trial" beats "Get Started". "Ship your first audit in ten minutes" beats "Welcome
|
|
42
|
+
to GDD". Name the noun and the verb. Delete placeholder copy before review.
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## gradient-spam
|
|
47
|
+
|
|
48
|
+
Rule id: `gradient-spam`
|
|
49
|
+
|
|
50
|
+
One tasteful gradient can anchor a page. Three or more on a single screen reads as
|
|
51
|
+
decoration standing in for hierarchy. The model reaches for `bg-gradient-to-r` on
|
|
52
|
+
the hero, again on the cards, again on the footer, because gradients look busy and
|
|
53
|
+
busy looks "designed".
|
|
54
|
+
|
|
55
|
+
Instances:
|
|
56
|
+
|
|
57
|
+
1. Hero background, card backgrounds, and a CTA all using a direction gradient.
|
|
58
|
+
2. `bg-gradient-to-br` on every feature tile in a grid.
|
|
59
|
+
3. A gradient on text plus a gradient on its container (double application).
|
|
60
|
+
4. Gradient borders faked with a gradient background and an inset child.
|
|
61
|
+
|
|
62
|
+
Diagnostic regex (hook): `\bbg-gradient-to-(?:r|br|tr|b|bl|l|tl|t)\b`, flagged at
|
|
63
|
+
a count of three or more occurrences in one file.
|
|
64
|
+
|
|
65
|
+
Remediation: pick one surface to carry a gradient and make the rest solid. Use
|
|
66
|
+
weight, size, and spacing for hierarchy instead of color washes. If you keep a
|
|
67
|
+
gradient, give it a documented role (one hero, one accent) rather than spraying it.
|
|
68
|
+
For the gradient-text ban specifically, see BAN-02.
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## isometric-illustration-fallback
|
|
73
|
+
|
|
74
|
+
Rule id: `isometric-illustration-fallback`
|
|
75
|
+
|
|
76
|
+
Pastel isometric scenes with floating icons, usually pulled straight from a free
|
|
77
|
+
clip-art set. The undraw.co style is the strongest tell: flat shapes, two-tone
|
|
78
|
+
palette, no brand character. It signals that no real screenshot or photography was
|
|
79
|
+
available, so a stock scene filled the hole.
|
|
80
|
+
|
|
81
|
+
Instances:
|
|
82
|
+
|
|
83
|
+
1. `src="/illustrations/undraw_dashboard.svg"` in an empty state.
|
|
84
|
+
2. An `isometric-hero.png` asset behind the headline.
|
|
85
|
+
3. A row of undraw spot illustrations as feature icons.
|
|
86
|
+
4. The same illustration set reused across unrelated products.
|
|
87
|
+
|
|
88
|
+
Diagnostic regex (hook): `\b(?:undraw|isometric)[\w./-]*` (matches the marker in an
|
|
89
|
+
asset path or `src`).
|
|
90
|
+
|
|
91
|
+
Remediation: show the actual product. A real screenshot, a short screen capture,
|
|
92
|
+
or a purpose-drawn illustration with brand character all beat stock clip art. See
|
|
93
|
+
SLOP-12 in `reference/anti-patterns.md` for the longer argument.
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
## centered-everything-syndrome
|
|
98
|
+
|
|
99
|
+
Rule id: `centered-everything-syndrome`
|
|
100
|
+
|
|
101
|
+
`mx-auto` plus `text-center` on block after block. Centered text is fine for a
|
|
102
|
+
single short hero line. Applied to body copy, feature lists, and multi-line cards
|
|
103
|
+
it destroys the reading edge: the eye loses the left margin it scans against, and
|
|
104
|
+
every block competes for the same axis.
|
|
105
|
+
|
|
106
|
+
Instances:
|
|
107
|
+
|
|
108
|
+
1. A hero, a feature grid, and a testimonial section all centered.
|
|
109
|
+
2. Centered multi-line paragraphs longer than two lines.
|
|
110
|
+
3. A centered card with centered heading, centered body, and a centered button.
|
|
111
|
+
4. Centered form labels above left-aligned inputs (axis mismatch).
|
|
112
|
+
|
|
113
|
+
Diagnostic regex (hook): a quoted class string containing both `mx-auto` and
|
|
114
|
+
`text-center`, in either order.
|
|
115
|
+
|
|
116
|
+
Remediation: center the hero line only. Left-align body copy and lists so they
|
|
117
|
+
share a reading edge. Reserve centering for short, single-line, high-emphasis text.
|
|
118
|
+
Center the container for width control, but left-align its text content.
|
|
119
|
+
|
|
120
|
+
---
|
|
121
|
+
|
|
122
|
+
## inter-everything
|
|
123
|
+
|
|
124
|
+
Rule id: `inter-everything`
|
|
125
|
+
|
|
126
|
+
Inter as the default with no documented reason. Inter is a fine typeface, which is
|
|
127
|
+
exactly why it is the safe pick a model makes when no brand font is specified. The
|
|
128
|
+
tell is not Inter itself: it is Inter used alone, with no second font and no token
|
|
129
|
+
that records a choice.
|
|
130
|
+
|
|
131
|
+
Instances:
|
|
132
|
+
|
|
133
|
+
1. `font-inter` on the root with no display or brand font anywhere.
|
|
134
|
+
2. `font-family: 'Inter'` in CSS with no second family in the stack file.
|
|
135
|
+
3. Inter paired with no `--font-display` or `--font-body` token.
|
|
136
|
+
4. DM Sans, Space Grotesk, or Plus Jakarta Sans in the same role (see SLOP-05).
|
|
137
|
+
|
|
138
|
+
Diagnostic regex (hook): `\bfont-inter\b` or `font-family:\s*['"]?Inter`, warned
|
|
139
|
+
only when no sibling custom-font token (a `font-<name>` utility, a `--font-*`
|
|
140
|
+
variable, or a second `font-family`) appears in the same file.
|
|
141
|
+
|
|
142
|
+
Remediation: choose a typeface you can defend in three sentences against the brand,
|
|
143
|
+
and record it as a token (`--font-display`, `--font-body`). If Inter is the right
|
|
144
|
+
call, pair it with a distinct display face or a deliberate weight system so the
|
|
145
|
+
choice is visible. Keeping the token is what turns a default into a decision.
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## purple-violet-default
|
|
150
|
+
|
|
151
|
+
Rule id: `purple-violet-default`
|
|
152
|
+
|
|
153
|
+
`bg-purple-600` and `bg-violet-600` are the colors a model picks when no palette is
|
|
154
|
+
given. Combined with indigo and cyan they form the exact accent set that ships on a
|
|
155
|
+
large share of generated UIs (SLOP-01). The tell is the raw Tailwind shade used as
|
|
156
|
+
the brand color with no theme token in sight.
|
|
157
|
+
|
|
158
|
+
Instances:
|
|
159
|
+
|
|
160
|
+
1. `bg-violet-600` on the primary button with no `bg-primary` token defined.
|
|
161
|
+
2. `bg-purple-500` headers across the app, hardcoded per component.
|
|
162
|
+
3. Purple-to-blue accent pairing on hero plus buttons (SLOP-02).
|
|
163
|
+
4. `text-violet-600` links with no semantic link color.
|
|
164
|
+
|
|
165
|
+
Diagnostic regex (hook): `\bbg-(?:purple|violet)-(?:500|600|700)\b`, warned only
|
|
166
|
+
when no theme-token class (`bg-primary`, `bg-brand`, `bg-accent`, or a
|
|
167
|
+
`bg-[var(--...)]` / `oklch` / `hsl` arbitrary value) is present in the file.
|
|
168
|
+
|
|
169
|
+
Remediation: route color through a token (`bg-primary`, `--color-accent`) so the
|
|
170
|
+
brand hue lives in one place. If purple is genuinely the brand, define it as the
|
|
171
|
+
token and reference the token, not the raw shade. Pick one primary accent and apply
|
|
172
|
+
it consistently. See the Color System rubric in `reference/audit-scoring.md`.
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
## glassmorphism-spam
|
|
177
|
+
|
|
178
|
+
Rule id: `glassmorphism-spam`
|
|
179
|
+
|
|
180
|
+
Frosted-glass panels stacked everywhere: `backdrop-blur` on cards, on the nav, on
|
|
181
|
+
modals, plus `bg-white/10` fills. One blurred overlay over busy content is a valid
|
|
182
|
+
move. Three or more blur or low-alpha-white treatments in one file is glass used as
|
|
183
|
+
the default surface, which hides the fact that no real layout system exists.
|
|
184
|
+
|
|
185
|
+
Instances:
|
|
186
|
+
|
|
187
|
+
1. `backdrop-blur-lg` on every card in a grid.
|
|
188
|
+
2. `bg-white/10` panels layered three deep.
|
|
189
|
+
3. A blurred nav over a blurred hero over a blurred section.
|
|
190
|
+
4. Glass cards on a flat background where there is nothing to blur.
|
|
191
|
+
|
|
192
|
+
Diagnostic regex (hook): `\bbackdrop-blur(?:-\w+)?\b` or `\bbg-white\/(?:10|20|30)\b`,
|
|
193
|
+
flagged at a count of three or more occurrences in one file.
|
|
194
|
+
|
|
195
|
+
Remediation: keep blur for cases where it has a job, such as a modal dimming the
|
|
196
|
+
content behind it, a floating command palette, or a sticky header over scrolling
|
|
197
|
+
content. Give other surfaces solid fills and a real elevation system. See SLOP-04
|
|
198
|
+
for the valid-use list.
|
|
199
|
+
|
|
200
|
+
---
|
|
201
|
+
|
|
202
|
+
## decorative-motion-without-intent
|
|
203
|
+
|
|
204
|
+
Rule id: `decorative-motion-without-intent`
|
|
205
|
+
|
|
206
|
+
`animate-pulse`, `animate-bounce`, and `animate-spin` are loading affordances. The
|
|
207
|
+
tell is using them as ambient decoration: a pulsing hero badge, a bouncing arrow
|
|
208
|
+
that never stops, a spinning accent that encodes no progress. Motion that loops
|
|
209
|
+
forever with no state behind it reads as filler and fights `prefers-reduced-motion`.
|
|
210
|
+
|
|
211
|
+
Instances:
|
|
212
|
+
|
|
213
|
+
1. `animate-pulse` on a static "New" badge in the hero.
|
|
214
|
+
2. `animate-bounce` on a scroll-down chevron looping with no end.
|
|
215
|
+
3. `animate-spin` on a decorative ring that is not a spinner.
|
|
216
|
+
4. A pulsing gradient blob behind the headline.
|
|
217
|
+
|
|
218
|
+
Diagnostic regex (hook, conservative): a quoted class string containing
|
|
219
|
+
`animate-(pulse|bounce|spin)` that does not also contain a loading signal
|
|
220
|
+
(`loading`, `loader`, `spinner`, `skeleton`, `icon`, `i-`, or `sr-only`).
|
|
221
|
+
|
|
222
|
+
Remediation: tie motion to a state. Use `animate-pulse` on skeletons while data
|
|
223
|
+
loads, `animate-spin` on a real spinner during a request, and drop ambient loops.
|
|
224
|
+
Respect `prefers-reduced-motion`. Enter with `ease-out`, exit shorter than enter,
|
|
225
|
+
and never animate keyboard-driven actions. See the Motion Anti-Patterns section of
|
|
226
|
+
`reference/anti-patterns.md`.
|
|
227
|
+
|
|
228
|
+
---
|
|
229
|
+
|
|
230
|
+
## Notes
|
|
231
|
+
|
|
232
|
+
This is a v1 floor, not a ceiling. The regexes favor precision over recall: they
|
|
233
|
+
aim to fire only on the obvious cases so the WARN stays trustworthy. A clean pass
|
|
234
|
+
here does not mean the design is good. It means the eight loudest default-AI tells
|
|
235
|
+
are absent. Real review still applies the full rubric in
|
|
236
|
+
`reference/audit-scoring.md` and the BAN / SLOP catalog in
|
|
237
|
+
`reference/anti-patterns.md`.
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
/**
|
|
3
|
+
* scripts/lib/confidence-route.cjs — pure routing helper for the reviewer
|
|
4
|
+
* confidence gate (Phase 49). Decides where a single finding/gap goes based on
|
|
5
|
+
* its severity, its `confidence` score (0.0-1.0), and whether the reviewer
|
|
6
|
+
* parked it in the `## Tentative` section.
|
|
7
|
+
*
|
|
8
|
+
* Canonical rule (mirrors reference/reviewer-confidence-gate.md):
|
|
9
|
+
* - A finding in `## Tentative` -> 'drop' (never reaches design-fixer)
|
|
10
|
+
* - confidence < 0.5 -> 'drop' (low-confidence floor; stays tentative)
|
|
11
|
+
* - HIGH/CRITICAL (BLOCKER|MAJOR) needs -> 'fix' only when confidence >= 0.8,
|
|
12
|
+
* otherwise 'user-review'
|
|
13
|
+
* - confidence in [0.5, 0.8) -> 'user-review' (surfaced, not auto-fixed)
|
|
14
|
+
* - confidence >= 0.8 -> 'fix'
|
|
15
|
+
*
|
|
16
|
+
* Returns one of: 'fix' | 'user-review' | 'drop'. Dependency-free and side
|
|
17
|
+
* effect free so the routing matrix is unit-testable in isolation.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
const HIGH_FLOOR = 0.8; // BLOCKER/MAJOR must clear this to auto-fix
|
|
21
|
+
const SURFACE_FLOOR = 0.5; // below this a finding is dropped (stays tentative)
|
|
22
|
+
|
|
23
|
+
// Severity labels that count as HIGH/CRITICAL for the auto-fix floor.
|
|
24
|
+
const HIGH_SEVERITIES = new Set(['blocker', 'major', 'high', 'critical']);
|
|
25
|
+
|
|
26
|
+
function isHighSeverity(severity) {
|
|
27
|
+
if (typeof severity !== 'string') return false;
|
|
28
|
+
return HIGH_SEVERITIES.has(severity.trim().toLowerCase());
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Route a finding/gap.
|
|
33
|
+
* @param {object} finding
|
|
34
|
+
* @param {string} finding.severity - BLOCKER | MAJOR | MINOR | COSMETIC (case-insensitive).
|
|
35
|
+
* @param {number} finding.confidence - 0.0-1.0 confidence score.
|
|
36
|
+
* @param {boolean} [finding.tentative] - true when the finding sits in `## Tentative`.
|
|
37
|
+
* @returns {'fix'|'user-review'|'drop'}
|
|
38
|
+
*/
|
|
39
|
+
function route({ severity, confidence, tentative = false } = {}) {
|
|
40
|
+
// 1. Tentative findings never reach the fixer, regardless of score.
|
|
41
|
+
if (tentative === true) return 'drop';
|
|
42
|
+
|
|
43
|
+
// 2. A missing/non-numeric confidence is treated as the lowest tier: surface
|
|
44
|
+
// for user review rather than silently auto-fixing or dropping.
|
|
45
|
+
const c = typeof confidence === 'number' && Number.isFinite(confidence) ? confidence : 0;
|
|
46
|
+
|
|
47
|
+
// 3. Low-confidence floor: anything under 0.5 is dropped (stays tentative).
|
|
48
|
+
if (c < SURFACE_FLOOR) return 'drop';
|
|
49
|
+
|
|
50
|
+
// 4. HIGH/CRITICAL findings must clear the 0.8 floor to auto-fix; otherwise
|
|
51
|
+
// they are routed to the user instead of the fixer.
|
|
52
|
+
if (isHighSeverity(severity)) {
|
|
53
|
+
return c >= HIGH_FLOOR ? 'fix' : 'user-review';
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// 5. Lower-severity findings: 0.5-0.8 surfaces for review, >= 0.8 auto-fixes.
|
|
57
|
+
return c >= HIGH_FLOOR ? 'fix' : 'user-review';
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
module.exports = { route, isHighSeverity, HIGH_FLOOR, SURFACE_FLOOR };
|