opengstack 0.13.10 → 0.14.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/AGENTS.md +4 -4
  2. package/CLAUDE.md +127 -110
  3. package/README.md +10 -5
  4. package/SKILL.md +500 -70
  5. package/bin/opengstack.js +69 -69
  6. package/{skills/land-and-deploy/SKILL.md → commands/autoplan.md} +7 -25
  7. package/{skills/benchmark/SKILL.md → commands/benchmark.md} +84 -108
  8. package/{skills/browse/SKILL.md → commands/browse.md} +60 -81
  9. package/{skills/ship/SKILL.md → commands/canary.md} +7 -27
  10. package/{skills/careful/SKILL.md → commands/careful.md} +2 -22
  11. package/{skills/canary/SKILL.md → commands/codex.md} +7 -26
  12. package/{skills/connect-chrome/SKILL.md → commands/connect-chrome.md} +7 -24
  13. package/commands/cso.md +70 -0
  14. package/commands/design-consultation.md +70 -0
  15. package/commands/design-review.md +70 -0
  16. package/commands/design-shotgun.md +70 -0
  17. package/commands/document-release.md +70 -0
  18. package/{skills/freeze/SKILL.md → commands/freeze.md} +3 -29
  19. package/{skills/guard/SKILL.md → commands/guard.md} +4 -35
  20. package/commands/investigate.md +70 -0
  21. package/commands/land-and-deploy.md +70 -0
  22. package/commands/office-hours.md +70 -0
  23. package/{skills/gstack-upgrade/SKILL.md → commands/opengstack-upgrade.md} +64 -79
  24. package/commands/plan-ceo-review.md +70 -0
  25. package/commands/plan-design-review.md +70 -0
  26. package/commands/plan-eng-review.md +70 -0
  27. package/commands/qa-only.md +70 -0
  28. package/commands/qa.md +70 -0
  29. package/commands/retro.md +70 -0
  30. package/commands/review.md +70 -0
  31. package/{skills/setup-browser-cookies/SKILL.md → commands/setup-browser-cookies.md} +22 -40
  32. package/commands/setup-deploy.md +70 -0
  33. package/commands/ship.md +70 -0
  34. package/commands/unfreeze.md +25 -0
  35. package/docs/designs/CHROME_VS_CHROMIUM_EXPLORATION.md +9 -9
  36. package/docs/designs/CONDUCTOR_CHROME_SIDEBAR_INTEGRATION.md +2 -2
  37. package/docs/designs/CONDUCTOR_SESSION_API.md +16 -16
  38. package/docs/designs/DESIGN_SHOTGUN.md +74 -74
  39. package/docs/designs/DESIGN_TOOLS_V1.md +111 -111
  40. package/docs/skills.md +483 -202
  41. package/package.json +42 -43
  42. package/scripts/analytics.ts +188 -0
  43. package/scripts/dev-skill.ts +83 -0
  44. package/scripts/discover-skills.ts +39 -0
  45. package/scripts/eval-compare.ts +97 -0
  46. package/scripts/eval-list.ts +117 -0
  47. package/scripts/eval-select.ts +86 -0
  48. package/scripts/eval-summary.ts +188 -0
  49. package/scripts/eval-watch.ts +172 -0
  50. package/scripts/gen-skill-docs.ts +473 -0
  51. package/scripts/resolvers/browse.ts +129 -0
  52. package/scripts/resolvers/codex-helpers.ts +133 -0
  53. package/scripts/resolvers/composition.ts +48 -0
  54. package/scripts/resolvers/confidence.ts +37 -0
  55. package/scripts/resolvers/constants.ts +50 -0
  56. package/scripts/resolvers/design.ts +950 -0
  57. package/scripts/resolvers/index.ts +59 -0
  58. package/scripts/resolvers/learnings.ts +96 -0
  59. package/scripts/resolvers/preamble.ts +505 -0
  60. package/scripts/resolvers/review.ts +884 -0
  61. package/scripts/resolvers/testing.ts +573 -0
  62. package/scripts/resolvers/types.ts +45 -0
  63. package/scripts/resolvers/utility.ts +421 -0
  64. package/scripts/skill-check.ts +190 -0
  65. package/scripts/cleanup.py +0 -100
  66. package/scripts/filter-skills.sh +0 -114
  67. package/scripts/filter_skills.py +0 -164
  68. package/scripts/install-skills.js +0 -60
  69. package/skills/autoplan/SKILL.md +0 -96
  70. package/skills/autoplan/SKILL.md.tmpl +0 -694
  71. package/skills/benchmark/SKILL.md.tmpl +0 -222
  72. package/skills/browse/SKILL.md.tmpl +0 -131
  73. package/skills/browse/bin/find-browse +0 -21
  74. package/skills/browse/bin/remote-slug +0 -14
  75. package/skills/browse/scripts/build-node-server.sh +0 -48
  76. package/skills/browse/src/activity.ts +0 -208
  77. package/skills/browse/src/browser-manager.ts +0 -959
  78. package/skills/browse/src/buffers.ts +0 -137
  79. package/skills/browse/src/bun-polyfill.cjs +0 -109
  80. package/skills/browse/src/cli.ts +0 -678
  81. package/skills/browse/src/commands.ts +0 -128
  82. package/skills/browse/src/config.ts +0 -150
  83. package/skills/browse/src/cookie-import-browser.ts +0 -625
  84. package/skills/browse/src/cookie-picker-routes.ts +0 -230
  85. package/skills/browse/src/cookie-picker-ui.ts +0 -688
  86. package/skills/browse/src/find-browse.ts +0 -61
  87. package/skills/browse/src/meta-commands.ts +0 -550
  88. package/skills/browse/src/platform.ts +0 -17
  89. package/skills/browse/src/read-commands.ts +0 -358
  90. package/skills/browse/src/server.ts +0 -1192
  91. package/skills/browse/src/sidebar-agent.ts +0 -280
  92. package/skills/browse/src/sidebar-utils.ts +0 -21
  93. package/skills/browse/src/snapshot.ts +0 -407
  94. package/skills/browse/src/url-validation.ts +0 -95
  95. package/skills/browse/src/write-commands.ts +0 -364
  96. package/skills/browse/test/activity.test.ts +0 -120
  97. package/skills/browse/test/adversarial-security.test.ts +0 -32
  98. package/skills/browse/test/browser-manager-unit.test.ts +0 -17
  99. package/skills/browse/test/bun-polyfill.test.ts +0 -72
  100. package/skills/browse/test/commands.test.ts +0 -2075
  101. package/skills/browse/test/compare-board.test.ts +0 -342
  102. package/skills/browse/test/config.test.ts +0 -316
  103. package/skills/browse/test/cookie-import-browser.test.ts +0 -519
  104. package/skills/browse/test/cookie-picker-routes.test.ts +0 -260
  105. package/skills/browse/test/file-drop.test.ts +0 -271
  106. package/skills/browse/test/find-browse.test.ts +0 -50
  107. package/skills/browse/test/findport.test.ts +0 -191
  108. package/skills/browse/test/fixtures/basic.html +0 -33
  109. package/skills/browse/test/fixtures/cursor-interactive.html +0 -22
  110. package/skills/browse/test/fixtures/dialog.html +0 -15
  111. package/skills/browse/test/fixtures/empty.html +0 -2
  112. package/skills/browse/test/fixtures/forms.html +0 -55
  113. package/skills/browse/test/fixtures/iframe.html +0 -30
  114. package/skills/browse/test/fixtures/network-idle.html +0 -30
  115. package/skills/browse/test/fixtures/qa-eval-checkout.html +0 -108
  116. package/skills/browse/test/fixtures/qa-eval-spa.html +0 -98
  117. package/skills/browse/test/fixtures/qa-eval.html +0 -51
  118. package/skills/browse/test/fixtures/responsive.html +0 -49
  119. package/skills/browse/test/fixtures/snapshot.html +0 -55
  120. package/skills/browse/test/fixtures/spa.html +0 -24
  121. package/skills/browse/test/fixtures/states.html +0 -17
  122. package/skills/browse/test/fixtures/upload.html +0 -25
  123. package/skills/browse/test/gstack-config.test.ts +0 -138
  124. package/skills/browse/test/gstack-update-check.test.ts +0 -514
  125. package/skills/browse/test/handoff.test.ts +0 -235
  126. package/skills/browse/test/path-validation.test.ts +0 -91
  127. package/skills/browse/test/platform.test.ts +0 -37
  128. package/skills/browse/test/server-auth.test.ts +0 -65
  129. package/skills/browse/test/sidebar-agent-roundtrip.test.ts +0 -226
  130. package/skills/browse/test/sidebar-agent.test.ts +0 -199
  131. package/skills/browse/test/sidebar-integration.test.ts +0 -320
  132. package/skills/browse/test/sidebar-unit.test.ts +0 -96
  133. package/skills/browse/test/snapshot.test.ts +0 -467
  134. package/skills/browse/test/state-ttl.test.ts +0 -35
  135. package/skills/browse/test/test-server.ts +0 -57
  136. package/skills/browse/test/url-validation.test.ts +0 -72
  137. package/skills/browse/test/watch.test.ts +0 -129
  138. package/skills/canary/SKILL.md.tmpl +0 -212
  139. package/skills/careful/SKILL.md.tmpl +0 -56
  140. package/skills/careful/bin/check-careful.sh +0 -112
  141. package/skills/codex/SKILL.md +0 -90
  142. package/skills/codex/SKILL.md.tmpl +0 -417
  143. package/skills/connect-chrome/SKILL.md.tmpl +0 -195
  144. package/skills/cso/ACKNOWLEDGEMENTS.md +0 -14
  145. package/skills/cso/SKILL.md +0 -93
  146. package/skills/cso/SKILL.md.tmpl +0 -606
  147. package/skills/design-consultation/SKILL.md +0 -94
  148. package/skills/design-consultation/SKILL.md.tmpl +0 -415
  149. package/skills/design-review/SKILL.md +0 -94
  150. package/skills/design-review/SKILL.md.tmpl +0 -290
  151. package/skills/design-shotgun/SKILL.md +0 -91
  152. package/skills/design-shotgun/SKILL.md.tmpl +0 -285
  153. package/skills/document-release/SKILL.md +0 -91
  154. package/skills/document-release/SKILL.md.tmpl +0 -359
  155. package/skills/freeze/SKILL.md.tmpl +0 -77
  156. package/skills/freeze/bin/check-freeze.sh +0 -79
  157. package/skills/gstack-upgrade/SKILL.md.tmpl +0 -222
  158. package/skills/guard/SKILL.md.tmpl +0 -77
  159. package/skills/investigate/SKILL.md +0 -105
  160. package/skills/investigate/SKILL.md.tmpl +0 -194
  161. package/skills/land-and-deploy/SKILL.md.tmpl +0 -881
  162. package/skills/office-hours/SKILL.md +0 -96
  163. package/skills/office-hours/SKILL.md.tmpl +0 -645
  164. package/skills/plan-ceo-review/SKILL.md +0 -94
  165. package/skills/plan-ceo-review/SKILL.md.tmpl +0 -811
  166. package/skills/plan-design-review/SKILL.md +0 -92
  167. package/skills/plan-design-review/SKILL.md.tmpl +0 -446
  168. package/skills/plan-eng-review/SKILL.md +0 -93
  169. package/skills/plan-eng-review/SKILL.md.tmpl +0 -303
  170. package/skills/qa/SKILL.md +0 -95
  171. package/skills/qa/SKILL.md.tmpl +0 -316
  172. package/skills/qa/references/issue-taxonomy.md +0 -85
  173. package/skills/qa/templates/qa-report-template.md +0 -126
  174. package/skills/qa-only/SKILL.md +0 -89
  175. package/skills/qa-only/SKILL.md.tmpl +0 -101
  176. package/skills/retro/SKILL.md +0 -89
  177. package/skills/retro/SKILL.md.tmpl +0 -820
  178. package/skills/review/SKILL.md +0 -92
  179. package/skills/review/SKILL.md.tmpl +0 -281
  180. package/skills/review/TODOS-format.md +0 -62
  181. package/skills/review/checklist.md +0 -220
  182. package/skills/review/design-checklist.md +0 -132
  183. package/skills/review/greptile-triage.md +0 -220
  184. package/skills/setup-browser-cookies/SKILL.md.tmpl +0 -81
  185. package/skills/setup-deploy/SKILL.md +0 -92
  186. package/skills/setup-deploy/SKILL.md.tmpl +0 -215
  187. package/skills/ship/SKILL.md.tmpl +0 -636
  188. package/skills/unfreeze/SKILL.md +0 -37
  189. package/skills/unfreeze/SKILL.md.tmpl +0 -36
@@ -0,0 +1,950 @@
1
+ import type { TemplateContext } from './types';
2
+ import { AI_SLOP_BLACKLIST, OPENAI_HARD_REJECTIONS, OPENAI_LITMUS_CHECKS } from './constants';
3
+
4
+ export function generateDesignReviewLite(ctx: TemplateContext): string {
5
+ const litmusList = OPENAI_LITMUS_CHECKS.map((item, i) => `${i + 1}. ${item}`).join(' ');
6
+ const rejectionList = OPENAI_HARD_REJECTIONS.map((item, i) => `${i + 1}. ${item}`).join(' ');
7
+ // Codex block only for Claude host
8
+ const codexBlock = ctx.host === 'codex' ? '' : `
9
+
10
+ 7. **Codex design voice** (optional, automatic if available):
11
+
12
+ \`\`\`bash
13
+ which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
14
+ \`\`\`
15
+
16
+ If Codex is available, run a lightweight design check on the diff:
17
+
18
+ \`\`\`bash
19
+ TMPERR_DRL=$(mktemp /tmp/codex-drl-XXXXXXXX)
20
+ _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; }
21
+ codex exec "Review the git diff on this branch. Run 7 litmus checks (YES/NO each): ${litmusList} Flag any hard rejections: ${rejectionList} 5 most important design findings only. Reference file:line." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="high"' --enable web_search_cached 2>"$TMPERR_DRL"
22
+ \`\`\`
23
+
24
+ Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr:
25
+ \`\`\`bash
26
+ cat "$TMPERR_DRL" && rm -f "$TMPERR_DRL"
27
+ \`\`\`
28
+
29
+ **Error handling:** All errors are non-blocking. On auth failure, timeout, or empty response — skip with a brief note and continue.
30
+
31
+ Present Codex output under a \`CODEX (design):\` header, merged with the checklist findings above.`;
32
+
33
+ return `## Design Review (conditional, diff-scoped)
34
+
35
+ Check if the diff touches frontend files using \`opengstack-diff-scope\`:
36
+
37
+ \`\`\`bash
38
+ source <(${ctx.paths.binDir}/opengstack-diff-scope <base> 2>/dev/null)
39
+ \`\`\`
40
+
41
+ **If \`SCOPE_FRONTEND=false\`:** Skip design review silently. No output.
42
+
43
+ **If \`SCOPE_FRONTEND=true\`:**
44
+
45
+ 1. **Check for DESIGN.md.** If \`DESIGN.md\` or \`design-system.md\` exists in the repo root, read it. All design findings are calibrated against it — patterns blessed in DESIGN.md are not flagged. If not found, use universal design principles.
46
+
47
+ 2. **Read \`.claude/skills/review/design-checklist.md\`.** If the file cannot be read, skip design review with a note: "Design checklist not found — skipping design review."
48
+
49
+ 3. **Read each changed frontend file** (full file, not just diff hunks). Frontend files are identified by the patterns listed in the checklist.
50
+
51
+ 4. **Apply the design checklist** against the changed files. For each item:
52
+ - **[HIGH] mechanical CSS fix** (\`outline: none\`, \`!important\`, \`font-size < 16px\`): classify as AUTO-FIX
53
+ - **[HIGH/MEDIUM] design judgment needed**: classify as ASK
54
+ - **[LOW] intent-based detection**: present as "Possible — verify visually or run /design-review"
55
+
56
+ 5. **Include findings** in the review output under a "Design Review" header, following the output format in the checklist. Design findings merge with code review findings into the same Fix-First flow.
57
+
58
+ 6. **Log the result** for the Review Readiness Dashboard:
59
+
60
+ \`\`\`bash
61
+ ${ctx.paths.binDir}/opengstack-review-log '{"skill":"design-review-lite","timestamp":"TIMESTAMP","status":"STATUS","findings":N,"auto_fixed":M,"commit":"COMMIT"}'
62
+ \`\`\`
63
+
64
+ Substitute: TIMESTAMP = ISO 8601 datetime, STATUS = "clean" if 0 findings or "issues_found", N = total findings, M = auto-fixed count, COMMIT = output of \`git rev-parse --short HEAD\`.${codexBlock}`;
65
+ }
66
+
67
+ // NOTE: design-checklist.md is a subset of this methodology for code-level detection.
68
+ // When adding items here, also update review/design-checklist.md, and vice versa.
69
+ export function generateDesignMethodology(_ctx: TemplateContext): string {
70
+ return `## Modes
71
+
72
+ ### Full (default)
73
+ Systematic review of all pages reachable from homepage. Visit 5-8 pages. Full checklist evaluation, responsive screenshots, interaction flow testing. Produces complete design audit report with letter grades.
74
+
75
+ ### Quick (\`--quick\`)
76
+ Homepage + 2 key pages only. First Impression + Design System Extraction + abbreviated checklist. Fastest path to a design score.
77
+
78
+ ### Deep (\`--deep\`)
79
+ Comprehensive review: 10-15 pages, every interaction flow, exhaustive checklist. For pre-launch audits or major redesigns.
80
+
81
+ ### Diff-aware (automatic when on a feature branch with no URL)
82
+ When on a feature branch, scope to pages affected by the branch changes:
83
+ 1. Analyze the branch diff: \`git diff main...HEAD --name-only\`
84
+ 2. Map changed files to affected pages/routes
85
+ 3. Detect running app on common local ports (3000, 4000, 8080)
86
+ 4. Audit only affected pages, compare design quality before/after
87
+
88
+ ### Regression (\`--regression\` or previous \`design-baseline.json\` found)
89
+ Run full audit, then load previous \`design-baseline.json\`. Compare: per-category grade deltas, new findings, resolved findings. Output regression table in report.
90
+
91
+ ---
92
+
93
+ ## Phase 1: First Impression
94
+
95
+ The most uniquely designer-like output. Form a gut reaction before analyzing anything.
96
+
97
+ 1. Navigate to the target URL
98
+ 2. Take a full-page desktop screenshot: \`$B screenshot "$REPORT_DIR/screenshots/first-impression.png"\`
99
+ 3. Write the **First Impression** using this structured critique format:
100
+ - "The site communicates **[what]**." (what it says at a glance — competence? playfulness? confusion?)
101
+ - "I notice **[observation]**." (what stands out, positive or negative — be specific)
102
+ - "The first 3 things my eye goes to are: **[1]**, **[2]**, **[3]**." (hierarchy check — are these intentional?)
103
+ - "If I had to describe this in one word: **[word]**." (gut verdict)
104
+
105
+ This is the section users read first. Be opinionated. A designer doesn't hedge — they react.
106
+
107
+ ---
108
+
109
+ ## Phase 2: Design System Extraction
110
+
111
+ Extract the actual design system the site uses (not what a DESIGN.md says, but what's rendered):
112
+
113
+ \`\`\`bash
114
+ # Fonts in use (capped at 500 elements to avoid timeout)
115
+ $B js "JSON.stringify([...new Set([...document.querySelectorAll('*')].slice(0,500).map(e => getComputedStyle(e).fontFamily))])"
116
+
117
+ # Color palette in use
118
+ $B js "JSON.stringify([...new Set([...document.querySelectorAll('*')].slice(0,500).flatMap(e => [getComputedStyle(e).color, getComputedStyle(e).backgroundColor]).filter(c => c !== 'rgba(0, 0, 0, 0)'))])"
119
+
120
+ # Heading hierarchy
121
+ $B js "JSON.stringify([...document.querySelectorAll('h1,h2,h3,h4,h5,h6')].map(h => ({tag:h.tagName, text:h.textContent.trim().slice(0,50), size:getComputedStyle(h).fontSize, weight:getComputedStyle(h).fontWeight})))"
122
+
123
+ # Touch target audit (find undersized interactive elements)
124
+ $B js "JSON.stringify([...document.querySelectorAll('a,button,input,[role=button]')].filter(e => {const r=e.getBoundingClientRect(); return r.width>0 && (r.width<44||r.height<44)}).map(e => ({tag:e.tagName, text:(e.textContent||'').trim().slice(0,30), w:Math.round(e.getBoundingClientRect().width), h:Math.round(e.getBoundingClientRect().height)})).slice(0,20))"
125
+
126
+ # Performance baseline
127
+ $B perf
128
+ \`\`\`
129
+
130
+ Structure findings as an **Inferred Design System**:
131
+ - **Fonts:** list with usage counts. Flag if >3 distinct font families.
132
+ - **Colors:** palette extracted. Flag if >12 unique non-gray colors. Note warm/cool/mixed.
133
+ - **Heading Scale:** h1-h6 sizes. Flag skipped levels, non-systematic size jumps.
134
+ - **Spacing Patterns:** sample padding/margin values. Flag non-scale values.
135
+
136
+ After extraction, offer: *"Want me to save this as your DESIGN.md? I can lock in these observations as your project's design system baseline."*
137
+
138
+ ---
139
+
140
+ ## Phase 3: Page-by-Page Visual Audit
141
+
142
+ For each page in scope:
143
+
144
+ \`\`\`bash
145
+ $B goto <url>
146
+ $B snapshot -i -a -o "$REPORT_DIR/screenshots/{page}-annotated.png"
147
+ $B responsive "$REPORT_DIR/screenshots/{page}"
148
+ $B console --errors
149
+ $B perf
150
+ \`\`\`
151
+
152
+ ### Auth Detection
153
+
154
+ After the first navigation, check if the URL changed to a login-like path:
155
+ \`\`\`bash
156
+ $B url
157
+ \`\`\`
158
+ If URL contains \`/login\`, \`/signin\`, \`/auth\`, or \`/sso\`: the site requires authentication. AskUserQuestion: "This site requires authentication. Want to import cookies from your browser? Run \`/setup-browser-cookies\` first if needed."
159
+
160
+ ### Design Audit Checklist (10 categories, ~80 items)
161
+
162
+ Apply these at each page. Each finding gets an impact rating (high/medium/polish) and category.
163
+
164
+ **1. Visual Hierarchy & Composition** (8 items)
165
+ - Clear focal point? One primary CTA per view?
166
+ - Eye flows naturally top-left to bottom-right?
167
+ - Visual noise — competing elements fighting for attention?
168
+ - Information density appropriate for content type?
169
+ - Z-index clarity — nothing unexpectedly overlapping?
170
+ - Above-the-fold content communicates purpose in 3 seconds?
171
+ - Squint test: hierarchy still visible when blurred?
172
+ - White space is intentional, not leftover?
173
+
174
+ **2. Typography** (15 items)
175
+ - Font count <=3 (flag if more)
176
+ - Scale follows ratio (1.25 major third or 1.333 perfect fourth)
177
+ - Line-height: 1.5x body, 1.15-1.25x headings
178
+ - Measure: 45-75 chars per line (66 ideal)
179
+ - Heading hierarchy: no skipped levels (h1→h3 without h2)
180
+ - Weight contrast: >=2 weights used for hierarchy
181
+ - No blacklisted fonts (Papyrus, Comic Sans, Lobster, Impact, Jokerman)
182
+ - If primary font is Inter/Roboto/Open Sans/Poppins → flag as potentially generic
183
+ - \`text-wrap: balance\` or \`text-pretty\` on headings (check via \`$B css <heading> text-wrap\`)
184
+ - Curly quotes used, not straight quotes
185
+ - Ellipsis character (\`…\`) not three dots (\`...\`)
186
+ - \`font-variant-numeric: tabular-nums\` on number columns
187
+ - Body text >= 16px
188
+ - Caption/label >= 12px
189
+ - No letterspacing on lowercase text
190
+
191
+ **3. Color & Contrast** (10 items)
192
+ - Palette coherent (<=12 unique non-gray colors)
193
+ - WCAG AA: body text 4.5:1, large text (18px+) 3:1, UI components 3:1
194
+ - Semantic colors consistent (success=green, error=red, warning=yellow/amber)
195
+ - No color-only encoding (always add labels, icons, or patterns)
196
+ - Dark mode: surfaces use elevation, not just lightness inversion
197
+ - Dark mode: text off-white (~#E0E0E0), not pure white
198
+ - Primary accent desaturated 10-20% in dark mode
199
+ - \`color-scheme: dark\` on html element (if dark mode present)
200
+ - No red/green only combinations (8% of men have red-green deficiency)
201
+ - Neutral palette is warm or cool consistently — not mixed
202
+
203
+ **4. Spacing & Layout** (12 items)
204
+ - Grid consistent at all breakpoints
205
+ - Spacing uses a scale (4px or 8px base), not arbitrary values
206
+ - Alignment is consistent — nothing floats outside the grid
207
+ - Rhythm: related items closer together, distinct sections further apart
208
+ - Border-radius hierarchy (not uniform bubbly radius on everything)
209
+ - Inner radius = outer radius - gap (nested elements)
210
+ - No horizontal scroll on mobile
211
+ - Max content width set (no full-bleed body text)
212
+ - \`env(safe-area-inset-*)\` for notch devices
213
+ - URL reflects state (filters, tabs, pagination in query params)
214
+ - Flex/grid used for layout (not JS measurement)
215
+ - Breakpoints: mobile (375), tablet (768), desktop (1024), wide (1440)
216
+
217
+ **5. Interaction States** (10 items)
218
+ - Hover state on all interactive elements
219
+ - \`focus-visible\` ring present (never \`outline: none\` without replacement)
220
+ - Active/pressed state with depth effect or color shift
221
+ - Disabled state: reduced opacity + \`cursor: not-allowed\`
222
+ - Loading: skeleton shapes match real content layout
223
+ - Empty states: warm message + primary action + visual (not just "No items.")
224
+ - Error messages: specific + include fix/next step
225
+ - Success: confirmation animation or color, auto-dismiss
226
+ - Touch targets >= 44px on all interactive elements
227
+ - \`cursor: pointer\` on all clickable elements
228
+
229
+ **6. Responsive Design** (8 items)
230
+ - Mobile layout makes *design* sense (not just stacked desktop columns)
231
+ - Touch targets sufficient on mobile (>= 44px)
232
+ - No horizontal scroll on any viewport
233
+ - Images handle responsive (srcset, sizes, or CSS containment)
234
+ - Text readable without zooming on mobile (>= 16px body)
235
+ - Navigation collapses appropriately (hamburger, bottom nav, etc.)
236
+ - Forms usable on mobile (correct input types, no autoFocus on mobile)
237
+ - No \`user-scalable=no\` or \`maximum-scale=1\` in viewport meta
238
+
239
+ **7. Motion & Animation** (6 items)
240
+ - Easing: ease-out for entering, ease-in for exiting, ease-in-out for moving
241
+ - Duration: 50-700ms range (nothing slower unless page transition)
242
+ - Purpose: every animation communicates something (state change, attention, spatial relationship)
243
+ - \`prefers-reduced-motion\` respected (check: \`$B js "matchMedia('(prefers-reduced-motion: reduce)').matches"\`)
244
+ - No \`transition: all\` — properties listed explicitly
245
+ - Only \`transform\` and \`opacity\` animated (not layout properties like width, height, top, left)
246
+
247
+ **8. Content & Microcopy** (8 items)
248
+ - Empty states designed with warmth (message + action + illustration/icon)
249
+ - Error messages specific: what happened + why + what to do next
250
+ - Button labels specific ("Save API Key" not "Continue" or "Submit")
251
+ - No placeholder/lorem ipsum text visible in production
252
+ - Truncation handled (\`text-overflow: ellipsis\`, \`line-clamp\`, or \`break-words\`)
253
+ - Active voice ("Install the CLI" not "The CLI will be installed")
254
+ - Loading states end with \`…\` ("Saving…" not "Saving...")
255
+ - Destructive actions have confirmation modal or undo window
256
+
257
+ **9. AI Slop Detection** (10 anti-patterns — the blacklist)
258
+
259
+ The test: would a human designer at a respected studio ever ship this?
260
+
261
+ ${AI_SLOP_BLACKLIST.map(item => `- ${item}`).join('\n')}
262
+
263
+ **10. Performance as Design** (6 items)
264
+ - LCP < 2.0s (web apps), < 1.5s (informational sites)
265
+ - CLS < 0.1 (no visible layout shifts during load)
266
+ - Skeleton quality: shapes match real content layout, shimmer animation
267
+ - Images: \`loading="lazy"\`, width/height dimensions set, WebP/AVIF format
268
+ - Fonts: \`font-display: swap\`, preconnect to CDN origins
269
+ - No visible font swap flash (FOUT) — critical fonts preloaded
270
+
271
+ ---
272
+
273
+ ## Phase 4: Interaction Flow Review
274
+
275
+ Walk 2-3 key user flows and evaluate the *feel*, not just the function:
276
+
277
+ \`\`\`bash
278
+ $B snapshot -i
279
+ $B click @e3 # perform action
280
+ $B snapshot -D # diff to see what changed
281
+ \`\`\`
282
+
283
+ Evaluate:
284
+ - **Response feel:** Does clicking feel responsive? Any delays or missing loading states?
285
+ - **Transition quality:** Are transitions intentional or generic/absent?
286
+ - **Feedback clarity:** Did the action clearly succeed or fail? Is the feedback immediate?
287
+ - **Form polish:** Focus states visible? Validation timing correct? Errors near the source?
288
+
289
+ ---
290
+
291
+ ## Phase 5: Cross-Page Consistency
292
+
293
+ Compare screenshots and observations across pages for:
294
+ - Navigation bar consistent across all pages?
295
+ - Footer consistent?
296
+ - Component reuse vs one-off designs (same button styled differently on different pages?)
297
+ - Tone consistency (one page playful while another is corporate?)
298
+ - Spacing rhythm carries across pages?
299
+
300
+ ---
301
+
302
+ ## Phase 6: Compile Report
303
+
304
+ ### Output Locations
305
+
306
+ **Local:** \`.OpenGStack/design-reports/design-audit-{domain}-{YYYY-MM-DD}.md\`
307
+
308
+ **Project-scoped:**
309
+ \`\`\`bash
310
+ eval "$(~/.claude/skills/opengstack/bin/opengstack-slug 2>/dev/null)" && mkdir -p ~/.opengstack/projects/$SLUG
311
+ \`\`\`
312
+ Write to: \`~/.opengstack/projects/{slug}/{user}-{branch}-design-audit-{datetime}.md\`
313
+
314
+ **Baseline:** Write \`design-baseline.json\` for regression mode:
315
+ \`\`\`json
316
+ {
317
+ "date": "YYYY-MM-DD",
318
+ "url": "<target>",
319
+ "designScore": "B",
320
+ "aiSlopScore": "C",
321
+ "categoryGrades": { "hierarchy": "A", "typography": "B", ... },
322
+ "findings": [{ "id": "FINDING-001", "title": "...", "impact": "high", "category": "typography" }]
323
+ }
324
+ \`\`\`
325
+
326
+ ### Scoring System
327
+
328
+ **Dual headline scores:**
329
+ - **Design Score: {A-F}** — weighted average of all 10 categories
330
+ - **AI Slop Score: {A-F}** — standalone grade with pithy verdict
331
+
332
+ **Per-category grades:**
333
+ - **A:** Intentional, polished, delightful. Shows design thinking.
334
+ - **B:** Solid fundamentals, minor inconsistencies. Looks professional.
335
+ - **C:** Functional but generic. No major problems, no design point of view.
336
+ - **D:** Noticeable problems. Feels unfinished or careless.
337
+ - **F:** Actively hurting user experience. Needs significant rework.
338
+
339
+ **Grade computation:** Each category starts at A. Each High-impact finding drops one letter grade. Each Medium-impact finding drops half a letter grade. Polish findings are noted but do not affect grade. Minimum is F.
340
+
341
+ **Category weights for Design Score:**
342
+ | Category | Weight |
343
+ |----------|--------|
344
+ | Visual Hierarchy | 15% |
345
+ | Typography | 15% |
346
+ | Spacing & Layout | 15% |
347
+ | Color & Contrast | 10% |
348
+ | Interaction States | 10% |
349
+ | Responsive | 10% |
350
+ | Content Quality | 10% |
351
+ | AI Slop | 5% |
352
+ | Motion | 5% |
353
+ | Performance Feel | 5% |
354
+
355
+ AI Slop is 5% of Design Score but also graded independently as a headline metric.
356
+
357
+ ### Regression Output
358
+
359
+ When previous \`design-baseline.json\` exists or \`--regression\` flag is used:
360
+ - Load baseline grades
361
+ - Compare: per-category deltas, new findings, resolved findings
362
+ - Append regression table to report
363
+
364
+ ---
365
+
366
+ ## Design Critique Format
367
+
368
+ Use structured feedback, not opinions:
369
+ - "I notice..." — observation (e.g., "I notice the primary CTA competes with the secondary action")
370
+ - "I wonder..." — question (e.g., "I wonder if users will understand what 'Process' means here")
371
+ - "What if..." — suggestion (e.g., "What if we moved search to a more prominent position?")
372
+ - "I think... because..." — reasoned opinion (e.g., "I think the spacing between sections is too uniform because it doesn't create hierarchy")
373
+
374
+ Tie everything to user goals and product objectives. Always suggest specific improvements alongside problems.
375
+
376
+ ---
377
+
378
+ ## Important Rules
379
+
380
+ 1. **Think like a designer, not a QA engineer.** You care whether things feel right, look intentional, and respect the user. You do NOT just care whether things "work."
381
+ 2. **Screenshots are evidence.** Every finding needs at least one screenshot. Use annotated screenshots (\`snapshot -a\`) to highlight elements.
382
+ 3. **Be specific and actionable.** "Change X to Y because Z" — not "the spacing feels off."
383
+ 4. **Never read source code.** Evaluate the rendered site, not the implementation. (Exception: offer to write DESIGN.md from extracted observations.)
384
+ 5. **AI Slop detection is your superpower.** Most developers can't evaluate whether their site looks AI-generated. You can. Be direct about it.
385
+ 6. **Quick wins matter.** Always include a "Quick Wins" section — the 3-5 highest-impact fixes that take <30 minutes each.
386
+ 7. **Use \`snapshot -C\` for tricky UIs.** Finds clickable divs that the accessibility tree misses.
387
+ 8. **Responsive is design, not just "not broken."** A stacked desktop layout on mobile is not responsive design — it's lazy. Evaluate whether the mobile layout makes *design* sense.
388
+ 9. **Document incrementally.** Write each finding to the report as you find it. Don't batch.
389
+ 10. **Depth over breadth.** 5-10 well-documented findings with screenshots and specific suggestions > 20 vague observations.
390
+ 11. **Show screenshots to the user.** After every \`$B screenshot\`, \`$B snapshot -a -o\`, or \`$B responsive\` command, use the Read tool on the output file(s) so the user can see them inline. For \`responsive\` (3 files), Read all three. This is critical — without it, screenshots are invisible to the user.`;
391
+ }
392
+
393
+ export function generateDesignSketch(_ctx: TemplateContext): string {
394
+ return `## Visual Sketch (UI ideas only)
395
+
396
+ If the chosen approach involves user-facing UI (screens, pages, forms, dashboards,
397
+ or interactive elements), generate a rough wireframe to help the user visualize it.
398
+ If the idea is backend-only, infrastructure, or has no UI component — skip this
399
+ section silently.
400
+
401
+ **Step 1: Gather design context**
402
+
403
+ 1. Check if \`DESIGN.md\` exists in the repo root. If it does, read it for design
404
+ system constraints (colors, typography, spacing, component patterns). Use these
405
+ constraints in the wireframe.
406
+ 2. Apply core design principles:
407
+ - **Information hierarchy** — what does the user see first, second, third?
408
+ - **Interaction states** — loading, empty, error, success, partial
409
+ - **Edge case paranoia** — what if the name is 47 chars? Zero results? Network fails?
410
+ - **Subtraction default** — "as little design as possible" (Rams). Every element earns its pixels.
411
+ - **Design for trust** — every interface element builds or erodes user trust.
412
+
413
+ **Step 2: Generate wireframe HTML**
414
+
415
+ Generate a single-page HTML file with these constraints:
416
+ - **Intentionally rough aesthetic** — use system fonts, thin gray borders, no color,
417
+ hand-drawn-style elements. This is a sketch, not a polished mockup.
418
+ - Self-contained — no external dependencies, no CDN links, inline CSS only
419
+ - Show the core interaction flow (1-3 screens/states max)
420
+ - Include realistic placeholder content (not "Lorem ipsum" — use content that
421
+ matches the actual use case)
422
+ - Add HTML comments explaining design decisions
423
+
424
+ Write to a temp file:
425
+ \`\`\`bash
426
+ SKETCH_FILE="/tmp/opengstack-sketch-$(date +%s).html"
427
+ \`\`\`
428
+
429
+ **Step 3: Render and capture**
430
+
431
+ \`\`\`bash
432
+ $B goto "file://$SKETCH_FILE"
433
+ $B screenshot /tmp/opengstack-sketch.png
434
+ \`\`\`
435
+
436
+ If \`$B\` is not available (browse binary not set up), skip the render step. Tell the
437
+ user: "Visual sketch requires the browse binary. Run the setup script to enable it."
438
+
439
+ **Step 4: Present and iterate**
440
+
441
+ Show the screenshot to the user. Ask: "Does this feel right? Want to iterate on the layout?"
442
+
443
+ If they want changes, regenerate the HTML with their feedback and re-render.
444
+ If they approve or say "good enough," proceed.
445
+
446
+ **Step 5: Include in design doc**
447
+
448
+ Reference the wireframe screenshot in the design doc's "Recommended Approach" section.
449
+ The screenshot file at \`/tmp/opengstack-sketch.png\` can be referenced by downstream skills
450
+ (\`/plan-design-review\`, \`/design-review\`) to see what was originally envisioned.
451
+
452
+ **Step 6: Outside design voices** (optional)
453
+
454
+ After the wireframe is approved, offer outside design perspectives:
455
+
456
+ \`\`\`bash
457
+ which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
458
+ \`\`\`
459
+
460
+ If Codex is available, use AskUserQuestion:
461
+ > "Want outside design perspectives on the chosen approach? Codex proposes a visual thesis, content plan, and interaction ideas. A Claude subagent proposes an alternative aesthetic direction."
462
+ >
463
+ > A) Yes — get outside design voices
464
+ > B) No — proceed without
465
+
466
+ If user chooses A, launch both voices simultaneously:
467
+
468
+ 1. **Codex** (via Bash, \`model_reasoning_effort="medium"\`):
469
+ \`\`\`bash
470
+ TMPERR_SKETCH=$(mktemp /tmp/codex-sketch-XXXXXXXX)
471
+ _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; }
472
+ codex exec "For this product approach, provide: a visual thesis (one sentence — mood, material, energy), a content plan (hero → support → detail → CTA), and 2 interaction ideas that change page feel. Apply beautiful defaults: composition-first, brand-first, cardless, poster not document. Be opinionated." -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="medium"' --enable web_search_cached 2>"$TMPERR_SKETCH"
473
+ \`\`\`
474
+ Use a 5-minute timeout (\`timeout: 300000\`). After completion: \`cat "$TMPERR_SKETCH" && rm -f "$TMPERR_SKETCH"\`
475
+
476
+ 2. **Claude subagent** (via Agent tool):
477
+ "For this product approach, what design direction would you recommend? What aesthetic, typography, and interaction patterns fit? What would make this approach feel inevitable to the user? Be specific — font names, hex colors, spacing values."
478
+
479
+ Present Codex output under \`CODEX SAYS (design sketch):\` and subagent output under \`CLAUDE SUBAGENT (design direction):\`.
480
+ Error handling: all non-blocking. On failure, skip and continue.`;
481
+ }
482
+
483
+ export function generateDesignOutsideVoices(ctx: TemplateContext): string {
484
+ // Codex host: strip entirely — Codex should never invoke itself
485
+ if (ctx.host === 'codex') return '';
486
+
487
+ const rejectionList = OPENAI_HARD_REJECTIONS.map((item, i) => `${i + 1}. ${item}`).join('\n');
488
+ const litmusList = OPENAI_LITMUS_CHECKS.map((item, i) => `${i + 1}. ${item}`).join('\n');
489
+
490
+ // Skill-specific configuration
491
+ const isPlanDesignReview = ctx.skillName === 'plan-design-review';
492
+ const isDesignReview = ctx.skillName === 'design-review';
493
+ const isDesignConsultation = ctx.skillName === 'design-consultation';
494
+
495
+ // Determine opt-in behavior and reasoning effort
496
+ const isAutomatic = isDesignReview; // design-review runs automatically
497
+ const reasoningEffort = isDesignConsultation ? 'medium' : 'high'; // creative vs analytical
498
+
499
+ // Build skill-specific Codex prompt
500
+ let codexPrompt: string;
501
+ let subagentPrompt: string;
502
+
503
+ if (isPlanDesignReview) {
504
+ codexPrompt = `Read the plan file at [plan-file-path]. Evaluate this plan's UI/UX design against these criteria.
505
+
506
+ HARD REJECTION — flag if ANY apply:
507
+ ${rejectionList}
508
+
509
+ LITMUS CHECKS — answer YES or NO for each:
510
+ ${litmusList}
511
+
512
+ HARD RULES — first classify as MARKETING/LANDING PAGE vs APP UI vs HYBRID, then flag violations of the matching rule set:
513
+ - MARKETING: First viewport as one composition, brand-first hierarchy, full-bleed hero, 2-3 intentional motions, composition-first layout
514
+ - APP UI: Calm surface hierarchy, dense but readable, utility language, minimal chrome
515
+ - UNIVERSAL: CSS variables for colors, no default font stacks, one job per section, cards earn existence
516
+
517
+ For each finding: what's wrong, what will happen if it ships unresolved, and the specific fix. Be opinionated. No hedging.`;
518
+
519
+ subagentPrompt = `Read the plan file at [plan-file-path]. You are an independent senior product designer reviewing this plan. You have NOT seen any prior review. Evaluate:
520
+
521
+ 1. Information hierarchy: what does the user see first, second, third? Is it right?
522
+ 2. Missing states: loading, empty, error, success, partial — which are unspecified?
523
+ 3. User journey: what's the emotional arc? Where does it break?
524
+ 4. Specificity: does the plan describe SPECIFIC UI ("48px Söhne Bold header, #1a1a1a on white") or generic patterns ("clean modern card-based layout")?
525
+ 5. What design decisions will haunt the implementer if left ambiguous?
526
+
527
+ For each finding: what's wrong, severity (critical/high/medium), and the fix.`;
528
+ } else if (isDesignReview) {
529
+ codexPrompt = `Review the frontend source code in this repo. Evaluate against these design hard rules:
530
+ - Spacing: systematic (design tokens / CSS variables) or magic numbers?
531
+ - Typography: expressive purposeful fonts or default stacks?
532
+ - Color: CSS variables with defined system, or hardcoded hex scattered?
533
+ - Responsive: breakpoints defined? calc(100svh - header) for heroes? Mobile tested?
534
+ - A11y: ARIA landmarks, alt text, contrast ratios, 44px touch targets?
535
+ - Motion: 2-3 intentional animations, or zero / ornamental only?
536
+ - Cards: used only when card IS the interaction? No decorative card grids?
537
+
538
+ First classify as MARKETING/LANDING PAGE vs APP UI vs HYBRID, then apply matching rules.
539
+
540
+ LITMUS CHECKS — answer YES/NO:
541
+ ${litmusList}
542
+
543
+ HARD REJECTION — flag if ANY apply:
544
+ ${rejectionList}
545
+
546
+ Be specific. Reference file:line for every finding.`;
547
+
548
+ subagentPrompt = `Review the frontend source code in this repo. You are an independent senior product designer doing a source-code design audit. Focus on CONSISTENCY PATTERNS across files rather than individual violations:
549
+ - Are spacing values systematic across the codebase?
550
+ - Is there ONE color system or scattered approaches?
551
+ - Do responsive breakpoints follow a consistent set?
552
+ - Is the accessibility approach consistent or spotty?
553
+
554
+ For each finding: what's wrong, severity (critical/high/medium), and the file:line.`;
555
+ } else if (isDesignConsultation) {
556
+ codexPrompt = `Given this product context, propose a complete design direction:
557
+ - Visual thesis: one sentence describing mood, material, and energy
558
+ - Typography: specific font names (not defaults — no Inter/Roboto/Arial/system) + hex colors
559
+ - Color system: CSS variables for background, surface, primary text, muted text, accent
560
+ - Layout: composition-first, not component-first. First viewport as poster, not document
561
+ - Differentiation: 2 deliberate departures from category norms
562
+ - Anti-slop: no purple gradients, no 3-column icon grids, no centered everything, no decorative blobs
563
+
564
+ Be opinionated. Be specific. Do not hedge. This is YOUR design direction — own it.`;
565
+
566
+ subagentPrompt = `Given this product context, propose a design direction that would SURPRISE. What would the cool indie studio do that the enterprise UI team wouldn't?
567
+ - Propose an aesthetic direction, typography stack (specific font names), color palette (hex values)
568
+ - 2 deliberate departures from category norms
569
+ - What emotional reaction should the user have in the first 3 seconds?
570
+
571
+ Be bold. Be specific. No hedging.`;
572
+ } else {
573
+ // Unknown skill — return empty
574
+ return '';
575
+ }
576
+
577
+ // Build the opt-in section
578
+ const optInSection = isAutomatic ? `
579
+ **Automatic:** Outside voices run automatically when Codex is available. No opt-in needed.` : `
580
+ Use AskUserQuestion:
581
+ > "Want outside design voices${isPlanDesignReview ? ' before the detailed review' : ''}? Codex evaluates against OpenAI's design hard rules + litmus checks; Claude subagent does an independent ${isDesignConsultation ? 'design direction proposal' : 'completeness review'}."
582
+ >
583
+ > A) Yes — run outside design voices
584
+ > B) No — proceed without
585
+
586
+ If user chooses B, skip this step and continue.`;
587
+
588
+ // Build the synthesis section
589
+ const synthesisSection = isPlanDesignReview ? `
590
+ **Synthesis — Litmus scorecard:**
591
+
592
+ \`\`\`
593
+ DESIGN OUTSIDE VOICES — LITMUS SCORECARD:
594
+ ═══════════════════════════════════════════════════════════════
595
+ Check Claude Codex Consensus
596
+ ─────────────────────────────────────── ─────── ─────── ─────────
597
+ 1. Brand unmistakable in first screen? — — —
598
+ 2. One strong visual anchor? — — —
599
+ 3. Scannable by headlines only? — — —
600
+ 4. Each section has one job? — — —
601
+ 5. Cards actually necessary? — — —
602
+ 6. Motion improves hierarchy? — — —
603
+ 7. Premium without decorative shadows? — — —
604
+ ─────────────────────────────────────── ─────── ─────── ─────────
605
+ Hard rejections triggered: — — —
606
+ ═══════════════════════════════════════════════════════════════
607
+ \`\`\`
608
+
609
+ Fill in each cell from the Codex and subagent outputs. CONFIRMED = both agree. DISAGREE = models differ. NOT SPEC'D = not enough info to evaluate.
610
+
611
+ **Pass integration (respects existing 7-pass contract):**
612
+ - Hard rejections → raised as the FIRST items in Pass 1, tagged \`[HARD REJECTION]\`
613
+ - Litmus DISAGREE items → raised in the relevant pass with both perspectives
614
+ - Litmus CONFIRMED failures → pre-loaded as known issues in the relevant pass
615
+ - Passes can skip discovery and go straight to fixing for pre-identified issues` :
616
+ isDesignConsultation ? `
617
+ **Synthesis:** Claude main references both Codex and subagent proposals in the Phase 3 proposal. Present:
618
+ - Areas of agreement between all three voices (Claude main + Codex + subagent)
619
+ - Genuine divergences as creative alternatives for the user to choose from
620
+ - "Codex and I agree on X. Codex suggested Y where I'm proposing Z — here's why..."` : `
621
+ **Synthesis — Litmus scorecard:**
622
+
623
+ Use the same scorecard format as /plan-design-review (shown above). Fill in from both outputs.
624
+ Merge findings into the triage with \`[codex]\` / \`[subagent]\` / \`[cross-model]\` tags.`;
625
+
626
+ const escapedCodexPrompt = codexPrompt.replace(/`/g, '\\`').replace(/\$/g, '\\$');
627
+
628
+ return `## Design Outside Voices (parallel)
629
+ ${optInSection}
630
+
631
+ **Check Codex availability:**
632
+ \`\`\`bash
633
+ which codex 2>/dev/null && echo "CODEX_AVAILABLE" || echo "CODEX_NOT_AVAILABLE"
634
+ \`\`\`
635
+
636
+ **If Codex is available**, launch both voices simultaneously:
637
+
638
+ 1. **Codex design voice** (via Bash):
639
+ \`\`\`bash
640
+ TMPERR_DESIGN=$(mktemp /tmp/codex-design-XXXXXXXX)
641
+ _REPO_ROOT=$(git rev-parse --show-toplevel) || { echo "ERROR: not in a git repo" >&2; exit 1; }
642
+ codex exec "${escapedCodexPrompt}" -C "$_REPO_ROOT" -s read-only -c 'model_reasoning_effort="${reasoningEffort}"' --enable web_search_cached 2>"$TMPERR_DESIGN"
643
+ \`\`\`
644
+ Use a 5-minute timeout (\`timeout: 300000\`). After the command completes, read stderr:
645
+ \`\`\`bash
646
+ cat "$TMPERR_DESIGN" && rm -f "$TMPERR_DESIGN"
647
+ \`\`\`
648
+
649
+ 2. **Claude design subagent** (via Agent tool):
650
+ Dispatch a subagent with this prompt:
651
+ "${subagentPrompt}"
652
+
653
+ **Error handling (all non-blocking):**
654
+ - **Auth failure:** If stderr contains "auth", "login", "unauthorized", or "API key": "Codex authentication failed. Run \`codex login\` to authenticate."
655
+ - **Timeout:** "Codex timed out after 5 minutes."
656
+ - **Empty response:** "Codex returned no response."
657
+ - On any Codex error: proceed with Claude subagent output only, tagged \`[single-model]\`.
658
+ - If Claude subagent also fails: "Outside voices unavailable — continuing with primary review."
659
+
660
+ Present Codex output under a \`CODEX SAYS (design ${isPlanDesignReview ? 'critique' : isDesignReview ? 'source audit' : 'direction'}):\` header.
661
+ Present subagent output under a \`CLAUDE SUBAGENT (design ${isPlanDesignReview ? 'completeness' : isDesignReview ? 'consistency' : 'direction'}):\` header.
662
+ ${synthesisSection}
663
+
664
+ **Log the result:**
665
+ \`\`\`bash
666
+ ${ctx.paths.binDir}/opengstack-review-log '{"skill":"design-outside-voices","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'","status":"STATUS","source":"SOURCE","commit":"'"$(git rev-parse --short HEAD)"'"}'
667
+ \`\`\`
668
+ Replace STATUS with "clean" or "issues_found", SOURCE with "codex+subagent", "codex-only", "subagent-only", or "unavailable".`;
669
+ }
670
+
671
+ // ─── Design Hard Rules (OpenAI framework + opengstack slop blacklist) ───
672
+ export function generateDesignHardRules(_ctx: TemplateContext): string {
673
+ const slopItems = AI_SLOP_BLACKLIST.map((item, i) => `${i + 1}. ${item}`).join('\n');
674
+ const rejectionItems = OPENAI_HARD_REJECTIONS.map((item, i) => `${i + 1}. ${item}`).join('\n');
675
+ const litmusItems = OPENAI_LITMUS_CHECKS.map((item, i) => `${i + 1}. ${item}`).join('\n');
676
+
677
+ return `### Design Hard Rules
678
+
679
+ **Classifier — determine rule set before evaluating:**
680
+ - **MARKETING/LANDING PAGE** (hero-driven, brand-forward, conversion-focused) → apply Landing Page Rules
681
+ - **APP UI** (workspace-driven, data-dense, task-focused: dashboards, admin, settings) → apply App UI Rules
682
+ - **HYBRID** (marketing shell with app-like sections) → apply Landing Page Rules to hero/marketing sections, App UI Rules to functional sections
683
+
684
+ **Hard rejection criteria** (instant-fail patterns — flag if ANY apply):
685
+ ${rejectionItems}
686
+
687
+ **Litmus checks** (answer YES/NO for each — used for cross-model consensus scoring):
688
+ ${litmusItems}
689
+
690
+ **Landing page rules** (apply when classifier = MARKETING/LANDING):
691
+ - First viewport reads as one composition, not a dashboard
692
+ - Brand-first hierarchy: brand > headline > body > CTA
693
+ - Typography: expressive, purposeful — no default stacks (Inter, Roboto, Arial, system)
694
+ - No flat single-color backgrounds — use gradients, images, subtle patterns
695
+ - Hero: full-bleed, edge-to-edge, no inset/tiled/rounded variants
696
+ - Hero budget: brand, one headline, one supporting sentence, one CTA group, one image
697
+ - No cards in hero. Cards only when card IS the interaction
698
+ - One job per section: one purpose, one headline, one short supporting sentence
699
+ - Motion: 2-3 intentional motions minimum (entrance, scroll-linked, hover/reveal)
700
+ - Color: define CSS variables, avoid purple-on-white defaults, one accent color default
701
+ - Copy: product language not design commentary. "If deleting 30% improves it, keep deleting"
702
+ - Beautiful defaults: composition-first, brand as loudest text, two typefaces max, cardless by default, first viewport as poster not document
703
+
704
+ **App UI rules** (apply when classifier = APP UI):
705
+ - Calm surface hierarchy, strong typography, few colors
706
+ - Dense but readable, minimal chrome
707
+ - Organize: primary workspace, navigation, secondary context, one accent
708
+ - Avoid: dashboard-card mosaics, thick borders, decorative gradients, ornamental icons
709
+ - Copy: utility language — orientation, status, action. Not mood/brand/aspiration
710
+ - Cards only when card IS the interaction
711
+ - Section headings state what area is or what user can do ("Selected KPIs", "Plan status")
712
+
713
+ **Universal rules** (apply to ALL types):
714
+ - Define CSS variables for color system
715
+ - No default font stacks (Inter, Roboto, Arial, system)
716
+ - One job per section
717
+ - "If deleting 30% of the copy improves it, keep deleting"
718
+ - Cards earn their existence — no decorative card grids
719
+
720
+ **AI Slop blacklist** (the 10 patterns that scream "AI-generated"):
721
+ ${slopItems}
722
+
723
+ Source: [OpenAI "Designing Delightful Frontends with GPT-5.4"](https://developers.openai.com/blog/designing-delightful-frontends-with-gpt-5-4) (Mar 2026) + opengstack design methodology.`;
724
+ }
725
+
726
+ export function generateDesignSetup(ctx: TemplateContext): string {
727
+ return `## DESIGN SETUP (run this check BEFORE any design mockup command)
728
+
729
+ \`\`\`bash
730
+ _ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
731
+ D=""
732
+ [ -n "$_ROOT" ] && [ -x "$_ROOT/${ctx.paths.localSkillRoot}/design/dist/design" ] && D="$_ROOT/${ctx.paths.localSkillRoot}/design/dist/design"
733
+ [ -z "$D" ] && D=${ctx.paths.designDir}/design
734
+ if [ -x "$D" ]; then
735
+ echo "DESIGN_READY: $D"
736
+ else
737
+ echo "DESIGN_NOT_AVAILABLE"
738
+ fi
739
+ B=""
740
+ [ -n "$_ROOT" ] && [ -x "$_ROOT/${ctx.paths.localSkillRoot}/browse/dist/browse" ] && B="$_ROOT/${ctx.paths.localSkillRoot}/browse/dist/browse"
741
+ [ -z "$B" ] && B=${ctx.paths.browseDir}/browse
742
+ if [ -x "$B" ]; then
743
+ echo "BROWSE_READY: $B"
744
+ else
745
+ echo "BROWSE_NOT_AVAILABLE (will use 'open' to view comparison boards)"
746
+ fi
747
+ \`\`\`
748
+
749
+ If \`DESIGN_NOT_AVAILABLE\`: skip visual mockup generation and fall back to the
750
+ existing HTML wireframe approach (\`DESIGN_SKETCH\`). Design mockups are a
751
+ progressive enhancement, not a hard requirement.
752
+
753
+ If \`BROWSE_NOT_AVAILABLE\`: use \`open file://...\` instead of \`$B goto\` to open
754
+ comparison boards. The user just needs to see the HTML file in any browser.
755
+
756
+ If \`DESIGN_READY\`: the design binary is available for visual mockup generation.
757
+ Commands:
758
+ - \`$D generate --brief "..." --output /path.png\` — generate a single mockup
759
+ - \`$D variants --brief "..." --count 3 --output-dir /path/\` — generate N style variants
760
+ - \`$D compare --images "a.png,b.png,c.png" --output /path/board.html --serve\` — comparison board + HTTP server
761
+ - \`$D serve --html /path/board.html\` — serve comparison board and collect feedback via HTTP
762
+ - \`$D check --image /path.png --brief "..."\` — vision quality gate
763
+ - \`$D iterate --session /path/session.json --feedback "..." --output /path.png\` — iterate
764
+
765
+ **CRITICAL PATH RULE:** All design artifacts (mockups, comparison boards, approved.json)
766
+ MUST be saved to \`~/.opengstack/projects/$SLUG/designs/\`, NEVER to \`.context/\`,
767
+ \`docs/designs/\`, \`/tmp/\`, or any project-local directory. Design artifacts are USER
768
+ data, not project files. They persist across branches, conversations, and workspaces.`;
769
+ }
770
+
771
+ export function generateDesignMockup(ctx: TemplateContext): string {
772
+ return `## Visual Design Exploration
773
+
774
+ \`\`\`bash
775
+ _ROOT=$(git rev-parse --show-toplevel 2>/dev/null)
776
+ D=""
777
+ [ -n "$_ROOT" ] && [ -x "$_ROOT/${ctx.paths.localSkillRoot}/design/dist/design" ] && D="$_ROOT/${ctx.paths.localSkillRoot}/design/dist/design"
778
+ [ -z "$D" ] && D=${ctx.paths.designDir}/design
779
+ [ -x "$D" ] && echo "DESIGN_READY" || echo "DESIGN_NOT_AVAILABLE"
780
+ \`\`\`
781
+
782
+ **If \`DESIGN_NOT_AVAILABLE\`:** Fall back to the HTML wireframe approach below
783
+ (the existing DESIGN_SKETCH section). Visual mockups require the design binary.
784
+
785
+ **If \`DESIGN_READY\`:** Generate visual mockup explorations for the user.
786
+
787
+ Generating visual mockups of the proposed design... (say "skip" if you don't need visuals)
788
+
789
+ **Step 1: Set up the design directory**
790
+
791
+ \`\`\`bash
792
+ eval "$(~/.claude/skills/opengstack/bin/opengstack-slug 2>/dev/null)"
793
+ _DESIGN_DIR=~/.opengstack/projects/$SLUG/designs/mockup-$(date +%Y%m%d)
794
+ mkdir -p "$_DESIGN_DIR"
795
+ echo "DESIGN_DIR: $_DESIGN_DIR"
796
+ \`\`\`
797
+
798
+ **Step 2: Construct the design brief**
799
+
800
+ Read DESIGN.md if it exists — use it to constrain the visual style. If no DESIGN.md,
801
+ explore wide across diverse directions.
802
+
803
+ **Step 3: Generate 3 variants**
804
+
805
+ \`\`\`bash
806
+ $D variants --brief "<assembled brief>" --count 3 --output-dir "$_DESIGN_DIR/"
807
+ \`\`\`
808
+
809
+ This generates 3 style variations of the same brief (~40 seconds total).
810
+
811
+ **Step 4: Show variants inline, then open comparison board**
812
+
813
+ Show each variant to the user inline first (read the PNGs with Read tool), then
814
+ create and serve the comparison board:
815
+
816
+ \`\`\`bash
817
+ $D compare --images "$_DESIGN_DIR/variant-A.png,$_DESIGN_DIR/variant-B.png,$_DESIGN_DIR/variant-C.png" --output "$_DESIGN_DIR/design-board.html" --serve
818
+ \`\`\`
819
+
820
+ This opens the board in the user's default browser and blocks until feedback is
821
+ received. Read stdout for the structured JSON result. No polling needed.
822
+
823
+ If \`$D serve\` is not available or fails, fall back to AskUserQuestion:
824
+ "I've opened the design board. Which variant do you prefer? Any feedback?"
825
+
826
+ **Step 5: Handle feedback**
827
+
828
+ If the JSON contains \`"regenerated": true\`:
829
+ 1. Read \`regenerateAction\` (or \`remixSpec\` for remix requests)
830
+ 2. Generate new variants with \`$D iterate\` or \`$D variants\` using updated brief
831
+ 3. Create new board with \`$D compare\`
832
+ 4. POST the new HTML to the running server via \`curl -X POST http://localhost:PORT/api/reload -H 'Content-Type: application/json' -d '{"html":"$_DESIGN_DIR/design-board.html"}'\`
833
+ (parse the port from stderr: look for \`SERVE_STARTED: port=XXXXX\`)
834
+ 5. Board auto-refreshes in the same tab
835
+
836
+ If \`"regenerated": false\`: proceed with the approved variant.
837
+
838
+ **Step 6: Save approved choice**
839
+
840
+ \`\`\`bash
841
+ echo '{"approved_variant":"<VARIANT>","feedback":"<FEEDBACK>","date":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","screen":"mockup","branch":"'$(git branch --show-current 2>/dev/null)'"}' > "$_DESIGN_DIR/approved.json"
842
+ \`\`\`
843
+
844
+ Reference the saved mockup in the design doc or plan.`;
845
+ }
846
+
847
+ export function generateDesignShotgunLoop(_ctx: TemplateContext): string {
848
+ return `### Comparison Board + Feedback Loop
849
+
850
+ Create the comparison board and serve it over HTTP:
851
+
852
+ \`\`\`bash
853
+ $D compare --images "$_DESIGN_DIR/variant-A.png,$_DESIGN_DIR/variant-B.png,$_DESIGN_DIR/variant-C.png" --output "$_DESIGN_DIR/design-board.html" --serve
854
+ \`\`\`
855
+
856
+ This command generates the board HTML, starts an HTTP server on a random port,
857
+ and opens it in the user's default browser. **Run it in the background** with \`&\`
858
+ because the server needs to stay running while the user interacts with the board.
859
+
860
+ Parse the port from stderr output: \`SERVE_STARTED: port=XXXXX\`. You need this
861
+ for the board URL and for reloading during regeneration cycles.
862
+
863
+ **PRIMARY WAIT: AskUserQuestion with board URL**
864
+
865
+ After the board is serving, use AskUserQuestion to wait for the user. Include the
866
+ board URL so they can click it if they lost the browser tab:
867
+
868
+ "I've opened a comparison board with the design variants:
869
+ http://127.0.0.1:<PORT>/ — Rate them, leave comments, remix
870
+ elements you like, and click Submit when you're done. Let me know when you've
871
+ submitted your feedback (or paste your preferences here). If you clicked
872
+ Regenerate or Remix on the board, tell me and I'll generate new variants."
873
+
874
+ **Do NOT use AskUserQuestion to ask which variant the user prefers.** The comparison
875
+ board IS the chooser. AskUserQuestion is just the blocking wait mechanism.
876
+
877
+ **After the user responds to AskUserQuestion:**
878
+
879
+ Check for feedback files next to the board HTML:
880
+ - \`$_DESIGN_DIR/feedback.json\` — written when user clicks Submit (final choice)
881
+ - \`$_DESIGN_DIR/feedback-pending.json\` — written when user clicks Regenerate/Remix/More Like This
882
+
883
+ \`\`\`bash
884
+ if [ -f "$_DESIGN_DIR/feedback.json" ]; then
885
+ echo "SUBMIT_RECEIVED"
886
+ cat "$_DESIGN_DIR/feedback.json"
887
+ elif [ -f "$_DESIGN_DIR/feedback-pending.json" ]; then
888
+ echo "REGENERATE_RECEIVED"
889
+ cat "$_DESIGN_DIR/feedback-pending.json"
890
+ rm "$_DESIGN_DIR/feedback-pending.json"
891
+ else
892
+ echo "NO_FEEDBACK_FILE"
893
+ fi
894
+ \`\`\`
895
+
896
+ The feedback JSON has this shape:
897
+ \`\`\`json
898
+ {
899
+ "preferred": "A",
900
+ "ratings": { "A": 4, "B": 3, "C": 2 },
901
+ "comments": { "A": "Love the spacing" },
902
+ "overall": "Go with A, bigger CTA",
903
+ "regenerated": false
904
+ }
905
+ \`\`\`
906
+
907
+ **If \`feedback.json\` found:** The user clicked Submit on the board.
908
+ Read \`preferred\`, \`ratings\`, \`comments\`, \`overall\` from the JSON. Proceed with
909
+ the approved variant.
910
+
911
+ **If \`feedback-pending.json\` found:** The user clicked Regenerate/Remix on the board.
912
+ 1. Read \`regenerateAction\` from the JSON (\`"different"\`, \`"match"\`, \`"more_like_B"\`,
913
+ \`"remix"\`, or custom text)
914
+ 2. If \`regenerateAction\` is \`"remix"\`, read \`remixSpec\` (e.g. \`{"layout":"A","colors":"B"}\`)
915
+ 3. Generate new variants with \`$D iterate\` or \`$D variants\` using updated brief
916
+ 4. Create new board: \`$D compare --images "..." --output "$_DESIGN_DIR/design-board.html"\`
917
+ 5. Reload the board in the user's browser (same tab):
918
+ \`curl -s -X POST http://127.0.0.1:PORT/api/reload -H 'Content-Type: application/json' -d '{"html":"$_DESIGN_DIR/design-board.html"}'\`
919
+ 6. The board auto-refreshes. **AskUserQuestion again** with the same board URL to
920
+ wait for the next round of feedback. Repeat until \`feedback.json\` appears.
921
+
922
+ **If \`NO_FEEDBACK_FILE\`:** The user typed their preferences directly in the
923
+ AskUserQuestion response instead of using the board. Use their text response
924
+ as the feedback.
925
+
926
+ **POLLING FALLBACK:** Only use polling if \`$D serve\` fails (no port available).
927
+ In that case, show each variant inline using the Read tool (so the user can see them),
928
+ then use AskUserQuestion:
929
+ "The comparison board server failed to start. I've shown the variants above.
930
+ Which do you prefer? Any feedback?"
931
+
932
+ **After receiving feedback (any path):** Output a clear summary confirming
933
+ what was understood:
934
+
935
+ "Here's what I understood from your feedback:
936
+ PREFERRED: Variant [X]
937
+ RATINGS: [list]
938
+ YOUR NOTES: [comments]
939
+ DIRECTION: [overall]
940
+
941
+ Is this right?"
942
+
943
+ Use AskUserQuestion to verify before proceeding.
944
+
945
+ **Save the approved choice:**
946
+ \`\`\`bash
947
+ echo '{"approved_variant":"<V>","feedback":"<FB>","date":"'$(date -u +%Y-%m-%dT%H:%M:%SZ)'","screen":"<SCREEN>","branch":"'$(git branch --show-current 2>/dev/null)'"}' > "$_DESIGN_DIR/approved.json"
948
+ \`\`\``;
949
+ }
950
+