opengstack 0.13.10 → 0.14.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/AGENTS.md +4 -4
  2. package/CLAUDE.md +127 -110
  3. package/README.md +10 -5
  4. package/SKILL.md +500 -70
  5. package/bin/opengstack.js +69 -69
  6. package/{skills/land-and-deploy/SKILL.md → commands/autoplan.md} +7 -25
  7. package/{skills/benchmark/SKILL.md → commands/benchmark.md} +84 -108
  8. package/{skills/browse/SKILL.md → commands/browse.md} +60 -81
  9. package/{skills/ship/SKILL.md → commands/canary.md} +7 -27
  10. package/{skills/careful/SKILL.md → commands/careful.md} +2 -22
  11. package/{skills/canary/SKILL.md → commands/codex.md} +7 -26
  12. package/{skills/connect-chrome/SKILL.md → commands/connect-chrome.md} +7 -24
  13. package/commands/cso.md +70 -0
  14. package/commands/design-consultation.md +70 -0
  15. package/commands/design-review.md +70 -0
  16. package/commands/design-shotgun.md +70 -0
  17. package/commands/document-release.md +70 -0
  18. package/{skills/freeze/SKILL.md → commands/freeze.md} +3 -29
  19. package/{skills/guard/SKILL.md → commands/guard.md} +4 -35
  20. package/commands/investigate.md +70 -0
  21. package/commands/land-and-deploy.md +70 -0
  22. package/commands/office-hours.md +70 -0
  23. package/{skills/gstack-upgrade/SKILL.md → commands/opengstack-upgrade.md} +64 -79
  24. package/commands/plan-ceo-review.md +70 -0
  25. package/commands/plan-design-review.md +70 -0
  26. package/commands/plan-eng-review.md +70 -0
  27. package/commands/qa-only.md +70 -0
  28. package/commands/qa.md +70 -0
  29. package/commands/retro.md +70 -0
  30. package/commands/review.md +70 -0
  31. package/{skills/setup-browser-cookies/SKILL.md → commands/setup-browser-cookies.md} +22 -40
  32. package/commands/setup-deploy.md +70 -0
  33. package/commands/ship.md +70 -0
  34. package/commands/unfreeze.md +25 -0
  35. package/docs/designs/CHROME_VS_CHROMIUM_EXPLORATION.md +9 -9
  36. package/docs/designs/CONDUCTOR_CHROME_SIDEBAR_INTEGRATION.md +2 -2
  37. package/docs/designs/CONDUCTOR_SESSION_API.md +16 -16
  38. package/docs/designs/DESIGN_SHOTGUN.md +74 -74
  39. package/docs/designs/DESIGN_TOOLS_V1.md +111 -111
  40. package/docs/skills.md +483 -202
  41. package/package.json +42 -43
  42. package/scripts/analytics.ts +188 -0
  43. package/scripts/dev-skill.ts +83 -0
  44. package/scripts/discover-skills.ts +39 -0
  45. package/scripts/eval-compare.ts +97 -0
  46. package/scripts/eval-list.ts +117 -0
  47. package/scripts/eval-select.ts +86 -0
  48. package/scripts/eval-summary.ts +188 -0
  49. package/scripts/eval-watch.ts +172 -0
  50. package/scripts/gen-skill-docs.ts +473 -0
  51. package/scripts/resolvers/browse.ts +129 -0
  52. package/scripts/resolvers/codex-helpers.ts +133 -0
  53. package/scripts/resolvers/composition.ts +48 -0
  54. package/scripts/resolvers/confidence.ts +37 -0
  55. package/scripts/resolvers/constants.ts +50 -0
  56. package/scripts/resolvers/design.ts +950 -0
  57. package/scripts/resolvers/index.ts +59 -0
  58. package/scripts/resolvers/learnings.ts +96 -0
  59. package/scripts/resolvers/preamble.ts +505 -0
  60. package/scripts/resolvers/review.ts +884 -0
  61. package/scripts/resolvers/testing.ts +573 -0
  62. package/scripts/resolvers/types.ts +45 -0
  63. package/scripts/resolvers/utility.ts +421 -0
  64. package/scripts/skill-check.ts +190 -0
  65. package/scripts/cleanup.py +0 -100
  66. package/scripts/filter-skills.sh +0 -114
  67. package/scripts/filter_skills.py +0 -164
  68. package/scripts/install-skills.js +0 -60
  69. package/skills/autoplan/SKILL.md +0 -96
  70. package/skills/autoplan/SKILL.md.tmpl +0 -694
  71. package/skills/benchmark/SKILL.md.tmpl +0 -222
  72. package/skills/browse/SKILL.md.tmpl +0 -131
  73. package/skills/browse/bin/find-browse +0 -21
  74. package/skills/browse/bin/remote-slug +0 -14
  75. package/skills/browse/scripts/build-node-server.sh +0 -48
  76. package/skills/browse/src/activity.ts +0 -208
  77. package/skills/browse/src/browser-manager.ts +0 -959
  78. package/skills/browse/src/buffers.ts +0 -137
  79. package/skills/browse/src/bun-polyfill.cjs +0 -109
  80. package/skills/browse/src/cli.ts +0 -678
  81. package/skills/browse/src/commands.ts +0 -128
  82. package/skills/browse/src/config.ts +0 -150
  83. package/skills/browse/src/cookie-import-browser.ts +0 -625
  84. package/skills/browse/src/cookie-picker-routes.ts +0 -230
  85. package/skills/browse/src/cookie-picker-ui.ts +0 -688
  86. package/skills/browse/src/find-browse.ts +0 -61
  87. package/skills/browse/src/meta-commands.ts +0 -550
  88. package/skills/browse/src/platform.ts +0 -17
  89. package/skills/browse/src/read-commands.ts +0 -358
  90. package/skills/browse/src/server.ts +0 -1192
  91. package/skills/browse/src/sidebar-agent.ts +0 -280
  92. package/skills/browse/src/sidebar-utils.ts +0 -21
  93. package/skills/browse/src/snapshot.ts +0 -407
  94. package/skills/browse/src/url-validation.ts +0 -95
  95. package/skills/browse/src/write-commands.ts +0 -364
  96. package/skills/browse/test/activity.test.ts +0 -120
  97. package/skills/browse/test/adversarial-security.test.ts +0 -32
  98. package/skills/browse/test/browser-manager-unit.test.ts +0 -17
  99. package/skills/browse/test/bun-polyfill.test.ts +0 -72
  100. package/skills/browse/test/commands.test.ts +0 -2075
  101. package/skills/browse/test/compare-board.test.ts +0 -342
  102. package/skills/browse/test/config.test.ts +0 -316
  103. package/skills/browse/test/cookie-import-browser.test.ts +0 -519
  104. package/skills/browse/test/cookie-picker-routes.test.ts +0 -260
  105. package/skills/browse/test/file-drop.test.ts +0 -271
  106. package/skills/browse/test/find-browse.test.ts +0 -50
  107. package/skills/browse/test/findport.test.ts +0 -191
  108. package/skills/browse/test/fixtures/basic.html +0 -33
  109. package/skills/browse/test/fixtures/cursor-interactive.html +0 -22
  110. package/skills/browse/test/fixtures/dialog.html +0 -15
  111. package/skills/browse/test/fixtures/empty.html +0 -2
  112. package/skills/browse/test/fixtures/forms.html +0 -55
  113. package/skills/browse/test/fixtures/iframe.html +0 -30
  114. package/skills/browse/test/fixtures/network-idle.html +0 -30
  115. package/skills/browse/test/fixtures/qa-eval-checkout.html +0 -108
  116. package/skills/browse/test/fixtures/qa-eval-spa.html +0 -98
  117. package/skills/browse/test/fixtures/qa-eval.html +0 -51
  118. package/skills/browse/test/fixtures/responsive.html +0 -49
  119. package/skills/browse/test/fixtures/snapshot.html +0 -55
  120. package/skills/browse/test/fixtures/spa.html +0 -24
  121. package/skills/browse/test/fixtures/states.html +0 -17
  122. package/skills/browse/test/fixtures/upload.html +0 -25
  123. package/skills/browse/test/gstack-config.test.ts +0 -138
  124. package/skills/browse/test/gstack-update-check.test.ts +0 -514
  125. package/skills/browse/test/handoff.test.ts +0 -235
  126. package/skills/browse/test/path-validation.test.ts +0 -91
  127. package/skills/browse/test/platform.test.ts +0 -37
  128. package/skills/browse/test/server-auth.test.ts +0 -65
  129. package/skills/browse/test/sidebar-agent-roundtrip.test.ts +0 -226
  130. package/skills/browse/test/sidebar-agent.test.ts +0 -199
  131. package/skills/browse/test/sidebar-integration.test.ts +0 -320
  132. package/skills/browse/test/sidebar-unit.test.ts +0 -96
  133. package/skills/browse/test/snapshot.test.ts +0 -467
  134. package/skills/browse/test/state-ttl.test.ts +0 -35
  135. package/skills/browse/test/test-server.ts +0 -57
  136. package/skills/browse/test/url-validation.test.ts +0 -72
  137. package/skills/browse/test/watch.test.ts +0 -129
  138. package/skills/canary/SKILL.md.tmpl +0 -212
  139. package/skills/careful/SKILL.md.tmpl +0 -56
  140. package/skills/careful/bin/check-careful.sh +0 -112
  141. package/skills/codex/SKILL.md +0 -90
  142. package/skills/codex/SKILL.md.tmpl +0 -417
  143. package/skills/connect-chrome/SKILL.md.tmpl +0 -195
  144. package/skills/cso/ACKNOWLEDGEMENTS.md +0 -14
  145. package/skills/cso/SKILL.md +0 -93
  146. package/skills/cso/SKILL.md.tmpl +0 -606
  147. package/skills/design-consultation/SKILL.md +0 -94
  148. package/skills/design-consultation/SKILL.md.tmpl +0 -415
  149. package/skills/design-review/SKILL.md +0 -94
  150. package/skills/design-review/SKILL.md.tmpl +0 -290
  151. package/skills/design-shotgun/SKILL.md +0 -91
  152. package/skills/design-shotgun/SKILL.md.tmpl +0 -285
  153. package/skills/document-release/SKILL.md +0 -91
  154. package/skills/document-release/SKILL.md.tmpl +0 -359
  155. package/skills/freeze/SKILL.md.tmpl +0 -77
  156. package/skills/freeze/bin/check-freeze.sh +0 -79
  157. package/skills/gstack-upgrade/SKILL.md.tmpl +0 -222
  158. package/skills/guard/SKILL.md.tmpl +0 -77
  159. package/skills/investigate/SKILL.md +0 -105
  160. package/skills/investigate/SKILL.md.tmpl +0 -194
  161. package/skills/land-and-deploy/SKILL.md.tmpl +0 -881
  162. package/skills/office-hours/SKILL.md +0 -96
  163. package/skills/office-hours/SKILL.md.tmpl +0 -645
  164. package/skills/plan-ceo-review/SKILL.md +0 -94
  165. package/skills/plan-ceo-review/SKILL.md.tmpl +0 -811
  166. package/skills/plan-design-review/SKILL.md +0 -92
  167. package/skills/plan-design-review/SKILL.md.tmpl +0 -446
  168. package/skills/plan-eng-review/SKILL.md +0 -93
  169. package/skills/plan-eng-review/SKILL.md.tmpl +0 -303
  170. package/skills/qa/SKILL.md +0 -95
  171. package/skills/qa/SKILL.md.tmpl +0 -316
  172. package/skills/qa/references/issue-taxonomy.md +0 -85
  173. package/skills/qa/templates/qa-report-template.md +0 -126
  174. package/skills/qa-only/SKILL.md +0 -89
  175. package/skills/qa-only/SKILL.md.tmpl +0 -101
  176. package/skills/retro/SKILL.md +0 -89
  177. package/skills/retro/SKILL.md.tmpl +0 -820
  178. package/skills/review/SKILL.md +0 -92
  179. package/skills/review/SKILL.md.tmpl +0 -281
  180. package/skills/review/TODOS-format.md +0 -62
  181. package/skills/review/checklist.md +0 -220
  182. package/skills/review/design-checklist.md +0 -132
  183. package/skills/review/greptile-triage.md +0 -220
  184. package/skills/setup-browser-cookies/SKILL.md.tmpl +0 -81
  185. package/skills/setup-deploy/SKILL.md +0 -92
  186. package/skills/setup-deploy/SKILL.md.tmpl +0 -215
  187. package/skills/ship/SKILL.md.tmpl +0 -636
  188. package/skills/unfreeze/SKILL.md +0 -37
  189. package/skills/unfreeze/SKILL.md.tmpl +0 -36
@@ -0,0 +1,421 @@
1
+ import type { TemplateContext } from './types';
2
+
3
+ export function generateSlugEval(ctx: TemplateContext): string {
4
+ return `eval "$(${ctx.paths.binDir}/opengstack-slug 2>/dev/null)"`;
5
+ }
6
+
7
+ export function generateSlugSetup(ctx: TemplateContext): string {
8
+ return `eval "$(${ctx.paths.binDir}/opengstack-slug 2>/dev/null)" && mkdir -p ~/.opengstack/projects/$SLUG`;
9
+ }
10
+
11
+ export function generateBaseBranchDetect(_ctx: TemplateContext): string {
12
+ return `## Step 0: Detect platform and base branch
13
+
14
+ First, detect the git hosting platform from the remote URL:
15
+
16
+ \`\`\`bash
17
+ git remote get-url origin 2>/dev/null
18
+ \`\`\`
19
+
20
+ - If the URL contains "github.com" → platform is **GitHub**
21
+ - If the URL contains "gitlab" → platform is **GitLab**
22
+ - Otherwise, check CLI availability:
23
+ - \`gh auth status 2>/dev/null\` succeeds → platform is **GitHub** (covers GitHub Enterprise)
24
+ - \`glab auth status 2>/dev/null\` succeeds → platform is **GitLab** (covers self-hosted)
25
+ - Neither → **unknown** (use git-native commands only)
26
+
27
+ Determine which branch this PR/MR targets, or the repo's default branch if no
28
+ PR/MR exists. Use the result as "the base branch" in all subsequent steps.
29
+
30
+ **If GitHub:**
31
+ 1. \`gh pr view --json baseRefName -q .baseRefName\` — if succeeds, use it
32
+ 2. \`gh repo view --json defaultBranchRef -q .defaultBranchRef.name\` — if succeeds, use it
33
+
34
+ **If GitLab:**
35
+ 1. \`glab mr view -F json 2>/dev/null\` and extract the \`target_branch\` field — if succeeds, use it
36
+ 2. \`glab repo view -F json 2>/dev/null\` and extract the \`default_branch\` field — if succeeds, use it
37
+
38
+ **Git-native fallback (if unknown platform, or CLI commands fail):**
39
+ 1. \`git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's|refs/remotes/origin/||'\`
40
+ 2. If that fails: \`git rev-parse --verify origin/main 2>/dev/null\` → use \`main\`
41
+ 3. If that fails: \`git rev-parse --verify origin/master 2>/dev/null\` → use \`master\`
42
+
43
+ If all fail, fall back to \`main\`.
44
+
45
+ Print the detected base branch name. In every subsequent \`git diff\`, \`git log\`,
46
+ \`git fetch\`, \`git merge\`, and PR/MR creation command, substitute the detected
47
+ branch name wherever the instructions say "the base branch" or \`<default>\`.
48
+
49
+ ---`;
50
+ }
51
+
52
+ export function generateDeployBootstrap(_ctx: TemplateContext): string {
53
+ return `\`\`\`bash
54
+ # Check for persisted deploy config in CLAUDE.md
55
+ DEPLOY_CONFIG=$(grep -A 20 "## Deploy Configuration" CLAUDE.md 2>/dev/null || echo "NO_CONFIG")
56
+ echo "$DEPLOY_CONFIG"
57
+
58
+ # If config exists, parse it
59
+ if [ "$DEPLOY_CONFIG" != "NO_CONFIG" ]; then
60
+ PROD_URL=$(echo "$DEPLOY_CONFIG" | grep -i "production.*url" | head -1 | sed 's/.*: *//')
61
+ PLATFORM=$(echo "$DEPLOY_CONFIG" | grep -i "platform" | head -1 | sed 's/.*: *//')
62
+ echo "PERSISTED_PLATFORM:$PLATFORM"
63
+ echo "PERSISTED_URL:$PROD_URL"
64
+ fi
65
+
66
+ # Auto-detect platform from config files
67
+ [ -f fly.toml ] && echo "PLATFORM:fly"
68
+ [ -f render.yaml ] && echo "PLATFORM:render"
69
+ ([ -f vercel.json ] || [ -d .vercel ]) && echo "PLATFORM:vercel"
70
+ [ -f netlify.toml ] && echo "PLATFORM:netlify"
71
+ [ -f Procfile ] && echo "PLATFORM:heroku"
72
+ ([ -f railway.json ] || [ -f railway.toml ]) && echo "PLATFORM:railway"
73
+
74
+ # Detect deploy workflows
75
+ for f in $(find .github/workflows -maxdepth 1 \\( -name '*.yml' -o -name '*.yaml' \\) 2>/dev/null); do
76
+ [ -f "$f" ] && grep -qiE "deploy|release|production|cd" "$f" 2>/dev/null && echo "DEPLOY_WORKFLOW:$f"
77
+ [ -f "$f" ] && grep -qiE "staging" "$f" 2>/dev/null && echo "STAGING_WORKFLOW:$f"
78
+ done
79
+ \`\`\`
80
+
81
+ If \`PERSISTED_PLATFORM\` and \`PERSISTED_URL\` were found in CLAUDE.md, use them directly
82
+ and skip manual detection. If no persisted config exists, use the auto-detected platform
83
+ to guide deploy verification. If nothing is detected, ask the user via AskUserQuestion
84
+ in the decision tree below.
85
+
86
+ If you want to persist deploy settings for future runs, suggest the user run \`/setup-deploy\`.`;
87
+ }
88
+
89
+ export function generateQAMethodology(_ctx: TemplateContext): string {
90
+ return `## Modes
91
+
92
+ ### Diff-aware (automatic when on a feature branch with no URL)
93
+
94
+ This is the **primary mode** for developers verifying their work. When the user says \`/qa\` without a URL and the repo is on a feature branch, automatically:
95
+
96
+ 1. **Analyze the branch diff** to understand what changed:
97
+ \`\`\`bash
98
+ git diff main...HEAD --name-only
99
+ git log main..HEAD --oneline
100
+ \`\`\`
101
+
102
+ 2. **Identify affected pages/routes** from the changed files:
103
+ - Controller/route files → which URL paths they serve
104
+ - View/template/component files → which pages render them
105
+ - Model/service files → which pages use those models (check controllers that reference them)
106
+ - CSS/style files → which pages include those stylesheets
107
+ - API endpoints → test them directly with \`$B js "await fetch('/api/...')"\`
108
+ - Static pages (markdown, HTML) → navigate to them directly
109
+
110
+ **If no obvious pages/routes are identified from the diff:** Do not skip browser testing. The user invoked /qa because they want browser-based verification. Fall back to Quick mode — navigate to the homepage, follow the top 5 navigation targets, check console for errors, and test any interactive elements found. Backend, config, and infrastructure changes affect app behavior — always verify the app still works.
111
+
112
+ 3. **Detect the running app** — check common local dev ports:
113
+ \`\`\`bash
114
+ $B goto http://localhost:3000 2>/dev/null && echo "Found app on :3000" || \\
115
+ $B goto http://localhost:4000 2>/dev/null && echo "Found app on :4000" || \\
116
+ $B goto http://localhost:8080 2>/dev/null && echo "Found app on :8080"
117
+ \`\`\`
118
+ If no local app is found, check for a staging/preview URL in the PR or environment. If nothing works, ask the user for the URL.
119
+
120
+ 4. **Test each affected page/route:**
121
+ - Navigate to the page
122
+ - Take a screenshot
123
+ - Check console for errors
124
+ - If the change was interactive (forms, buttons, flows), test the interaction end-to-end
125
+ - Use \`snapshot -D\` before and after actions to verify the change had the expected effect
126
+
127
+ 5. **Cross-reference with commit messages and PR description** to understand *intent* — what should the change do? Verify it actually does that.
128
+
129
+ 6. **Check TODOS.md** (if it exists) for known bugs or issues related to the changed files. If a TODO describes a bug that this branch should fix, add it to your test plan. If you find a new bug during QA that isn't in TODOS.md, note it in the report.
130
+
131
+ 7. **Report findings** scoped to the branch changes:
132
+ - "Changes tested: N pages/routes affected by this branch"
133
+ - For each: does it work? Screenshot evidence.
134
+ - Any regressions on adjacent pages?
135
+
136
+ **If the user provides a URL with diff-aware mode:** Use that URL as the base but still scope testing to the changed files.
137
+
138
+ ### Full (default when URL is provided)
139
+ Systematic exploration. Visit every reachable page. Document 5-10 well-evidenced issues. Produce health score. Takes 5-15 minutes depending on app size.
140
+
141
+ ### Quick (\`--quick\`)
142
+ 30-second smoke test. Visit homepage + top 5 navigation targets. Check: page loads? Console errors? Broken links? Produce health score. No detailed issue documentation.
143
+
144
+ ### Regression (\`--regression <baseline>\`)
145
+ Run full mode, then load \`baseline.json\` from a previous run. Diff: which issues are fixed? Which are new? What's the score delta? Append regression section to report.
146
+
147
+ ---
148
+
149
+ ## Workflow
150
+
151
+ ### Phase 1: Initialize
152
+
153
+ 1. Find browse binary (see Setup above)
154
+ 2. Create output directories
155
+ 3. Copy report template from \`qa/templates/qa-report-template.md\` to output dir
156
+ 4. Start timer for duration tracking
157
+
158
+ ### Phase 2: Authenticate (if needed)
159
+
160
+ **If the user specified auth credentials:**
161
+
162
+ \`\`\`bash
163
+ $B goto <login-url>
164
+ $B snapshot -i # find the login form
165
+ $B fill @e3 "user@example.com"
166
+ $B fill @e4 "[REDACTED]" # NEVER include real passwords in report
167
+ $B click @e5 # submit
168
+ $B snapshot -D # verify login succeeded
169
+ \`\`\`
170
+
171
+ **If the user provided a cookie file:**
172
+
173
+ \`\`\`bash
174
+ $B cookie-import cookies.json
175
+ $B goto <target-url>
176
+ \`\`\`
177
+
178
+ **If 2FA/OTP is required:** Ask the user for the code and wait.
179
+
180
+ **If CAPTCHA blocks you:** Tell the user: "Please complete the CAPTCHA in the browser, then tell me to continue."
181
+
182
+ ### Phase 3: Orient
183
+
184
+ Get a map of the application:
185
+
186
+ \`\`\`bash
187
+ $B goto <target-url>
188
+ $B snapshot -i -a -o "$REPORT_DIR/screenshots/initial.png"
189
+ $B links # map navigation structure
190
+ $B console --errors # any errors on landing?
191
+ \`\`\`
192
+
193
+ **Detect framework** (note in report metadata):
194
+ - \`__next\` in HTML or \`_next/data\` requests → Next.js
195
+ - \`csrf-token\` meta tag → Rails
196
+ - \`wp-content\` in URLs → WordPress
197
+ - Client-side routing with no page reloads → SPA
198
+
199
+ **For SPAs:** The \`links\` command may return few results because navigation is client-side. Use \`snapshot -i\` to find nav elements (buttons, menu items) instead.
200
+
201
+ ### Phase 4: Explore
202
+
203
+ Visit pages systematically. At each page:
204
+
205
+ \`\`\`bash
206
+ $B goto <page-url>
207
+ $B snapshot -i -a -o "$REPORT_DIR/screenshots/page-name.png"
208
+ $B console --errors
209
+ \`\`\`
210
+
211
+ Then follow the **per-page exploration checklist** (see \`qa/references/issue-taxonomy.md\`):
212
+
213
+ 1. **Visual scan** — Look at the annotated screenshot for layout issues
214
+ 2. **Interactive elements** — Click buttons, links, controls. Do they work?
215
+ 3. **Forms** — Fill and submit. Test empty, invalid, edge cases
216
+ 4. **Navigation** — Check all paths in and out
217
+ 5. **States** — Empty state, loading, error, overflow
218
+ 6. **Console** — Any new JS errors after interactions?
219
+ 7. **Responsiveness** — Check mobile viewport if relevant:
220
+ \`\`\`bash
221
+ $B viewport 375x812
222
+ $B screenshot "$REPORT_DIR/screenshots/page-mobile.png"
223
+ $B viewport 1280x720
224
+ \`\`\`
225
+
226
+ **Depth judgment:** Spend more time on core features (homepage, dashboard, checkout, search) and less on secondary pages (about, terms, privacy).
227
+
228
+ **Quick mode:** Only visit homepage + top 5 navigation targets from the Orient phase. Skip the per-page checklist — just check: loads? Console errors? Broken links visible?
229
+
230
+ ### Phase 5: Document
231
+
232
+ Document each issue **immediately when found** — don't batch them.
233
+
234
+ **Two evidence tiers:**
235
+
236
+ **Interactive bugs** (broken flows, dead buttons, form failures):
237
+ 1. Take a screenshot before the action
238
+ 2. Perform the action
239
+ 3. Take a screenshot showing the result
240
+ 4. Use \`snapshot -D\` to show what changed
241
+ 5. Write repro steps referencing screenshots
242
+
243
+ \`\`\`bash
244
+ $B screenshot "$REPORT_DIR/screenshots/issue-001-step-1.png"
245
+ $B click @e5
246
+ $B screenshot "$REPORT_DIR/screenshots/issue-001-result.png"
247
+ $B snapshot -D
248
+ \`\`\`
249
+
250
+ **Static bugs** (typos, layout issues, missing images):
251
+ 1. Take a single annotated screenshot showing the problem
252
+ 2. Describe what's wrong
253
+
254
+ \`\`\`bash
255
+ $B snapshot -i -a -o "$REPORT_DIR/screenshots/issue-002.png"
256
+ \`\`\`
257
+
258
+ **Write each issue to the report immediately** using the template format from \`qa/templates/qa-report-template.md\`.
259
+
260
+ ### Phase 6: Wrap Up
261
+
262
+ 1. **Compute health score** using the rubric below
263
+ 2. **Write "Top 3 Things to Fix"** — the 3 highest-severity issues
264
+ 3. **Write console health summary** — aggregate all console errors seen across pages
265
+ 4. **Update severity counts** in the summary table
266
+ 5. **Fill in report metadata** — date, duration, pages visited, screenshot count, framework
267
+ 6. **Save baseline** — write \`baseline.json\` with:
268
+ \`\`\`json
269
+ {
270
+ "date": "YYYY-MM-DD",
271
+ "url": "<target>",
272
+ "healthScore": N,
273
+ "issues": [{ "id": "ISSUE-001", "title": "...", "severity": "...", "category": "..." }],
274
+ "categoryScores": { "console": N, "links": N, ... }
275
+ }
276
+ \`\`\`
277
+
278
+ **Regression mode:** After writing the report, load the baseline file. Compare:
279
+ - Health score delta
280
+ - Issues fixed (in baseline but not current)
281
+ - New issues (in current but not baseline)
282
+ - Append the regression section to the report
283
+
284
+ ---
285
+
286
+ ## Health Score Rubric
287
+
288
+ Compute each category score (0-100), then take the weighted average.
289
+
290
+ ### Console (weight: 15%)
291
+ - 0 errors → 100
292
+ - 1-3 errors → 70
293
+ - 4-10 errors → 40
294
+ - 10+ errors → 10
295
+
296
+ ### Links (weight: 10%)
297
+ - 0 broken → 100
298
+ - Each broken link → -15 (minimum 0)
299
+
300
+ ### Per-Category Scoring (Visual, Functional, UX, Content, Performance, Accessibility)
301
+ Each category starts at 100. Deduct per finding:
302
+ - Critical issue → -25
303
+ - High issue → -15
304
+ - Medium issue → -8
305
+ - Low issue → -3
306
+ Minimum 0 per category.
307
+
308
+ ### Weights
309
+ | Category | Weight |
310
+ |----------|--------|
311
+ | Console | 15% |
312
+ | Links | 10% |
313
+ | Visual | 10% |
314
+ | Functional | 20% |
315
+ | UX | 15% |
316
+ | Performance | 10% |
317
+ | Content | 5% |
318
+ | Accessibility | 15% |
319
+
320
+ ### Final Score
321
+ \`score = Σ (category_score × weight)\`
322
+
323
+ ---
324
+
325
+ ## Framework-Specific Guidance
326
+
327
+ ### Next.js
328
+ - Check console for hydration errors (\`Hydration failed\`, \`Text content did not match\`)
329
+ - Monitor \`_next/data\` requests in network — 404s indicate broken data fetching
330
+ - Test client-side navigation (click links, don't just \`goto\`) — catches routing issues
331
+ - Check for CLS (Cumulative Layout Shift) on pages with dynamic content
332
+
333
+ ### Rails
334
+ - Check for N+1 query warnings in console (if development mode)
335
+ - Verify CSRF token presence in forms
336
+ - Test Turbo/Stimulus integration — do page transitions work smoothly?
337
+ - Check for flash messages appearing and dismissing correctly
338
+
339
+ ### WordPress
340
+ - Check for plugin conflicts (JS errors from different plugins)
341
+ - Verify admin bar visibility for logged-in users
342
+ - Test REST API endpoints (\`/wp-json/\`)
343
+ - Check for mixed content warnings (common with WP)
344
+
345
+ ### General SPA (React, Vue, Angular)
346
+ - Use \`snapshot -i\` for navigation — \`links\` command misses client-side routes
347
+ - Check for stale state (navigate away and back — does data refresh?)
348
+ - Test browser back/forward — does the app handle history correctly?
349
+ - Check for memory leaks (monitor console after extended use)
350
+
351
+ ---
352
+
353
+ ## Important Rules
354
+
355
+ 1. **Repro is everything.** Every issue needs at least one screenshot. No exceptions.
356
+ 2. **Verify before documenting.** Retry the issue once to confirm it's reproducible, not a fluke.
357
+ 3. **Never include credentials.** Write \`[REDACTED]\` for passwords in repro steps.
358
+ 4. **Write incrementally.** Append each issue to the report as you find it. Don't batch.
359
+ 5. **Never read source code.** Test as a user, not a developer.
360
+ 6. **Check console after every interaction.** JS errors that don't surface visually are still bugs.
361
+ 7. **Test like a user.** Use realistic data. Walk through complete workflows end-to-end.
362
+ 8. **Depth over breadth.** 5-10 well-documented issues with evidence > 20 vague descriptions.
363
+ 9. **Never delete output files.** Screenshots and reports accumulate — that's intentional.
364
+ 10. **Use \`snapshot -C\` for tricky UIs.** Finds clickable divs that the accessibility tree misses.
365
+ 11. **Show screenshots to the user.** After every \`$B screenshot\`, \`$B snapshot -a -o\`, or \`$B responsive\` command, use the Read tool on the output file(s) so the user can see them inline. For \`responsive\` (3 files), Read all three. This is critical — without it, screenshots are invisible to the user.
366
+ 12. **Never refuse to use the browser.** When the user invokes /qa or /qa-only, they are requesting browser-based testing. Never suggest evals, unit tests, or other alternatives as a substitute. Even if the diff appears to have no UI changes, backend changes affect app behavior — always open the browser and test.`;
367
+ }
368
+
369
+ export function generateCoAuthorTrailer(ctx: TemplateContext): string {
370
+ if (ctx.host === 'codex') {
371
+ return 'Co-Authored-By: OpenAI Codex <noreply@openai.com>';
372
+ }
373
+ if (ctx.host === 'factory') {
374
+ return 'Co-Authored-By: Factory Droid <droid@users.noreply.github.com>';
375
+ }
376
+ return 'Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>';
377
+ }
378
+
379
+ export function generateChangelogWorkflow(_ctx: TemplateContext): string {
380
+ return `## CHANGELOG (auto-generate)
381
+
382
+ 1. Read \`CHANGELOG.md\` header to know the format.
383
+
384
+ 2. **First, enumerate every commit on the branch:**
385
+ \`\`\`bash
386
+ git log <base>..HEAD --oneline
387
+ \`\`\`
388
+ Copy the full list. Count the commits. You will use this as a checklist.
389
+
390
+ 3. **Read the full diff** to understand what each commit actually changed:
391
+ \`\`\`bash
392
+ git diff <base>...HEAD
393
+ \`\`\`
394
+
395
+ 4. **Group commits by theme** before writing anything. Common themes:
396
+ - New features / capabilities
397
+ - Performance improvements
398
+ - Bug fixes
399
+ - Dead code removal / cleanup
400
+ - Infrastructure / tooling / tests
401
+ - Refactoring
402
+
403
+ 5. **Write the CHANGELOG entry** covering ALL groups:
404
+ - If existing CHANGELOG entries on the branch already cover some commits, replace them with one unified entry for the new version
405
+ - Categorize changes into applicable sections:
406
+ - \`### Added\` — new features
407
+ - \`### Changed\` — changes to existing functionality
408
+ - \`### Fixed\` — bug fixes
409
+ - \`### Removed\` — removed features
410
+ - Write concise, descriptive bullet points
411
+ - Insert after the file header (line 5), dated today
412
+ - Format: \`## [X.Y.Z.W] - YYYY-MM-DD\`
413
+ - **Voice:** Lead with what the user can now **do** that they couldn't before. Use plain language, not implementation details. Never mention TODOS.md, internal tracking, or contributor-facing details.
414
+
415
+ 6. **Cross-check:** Compare your CHANGELOG entry against the commit list from step 2.
416
+ Every commit must map to at least one bullet point. If any commit is unrepresented,
417
+ add it now. If the branch has N commits spanning K themes, the CHANGELOG must
418
+ reflect all K themes.
419
+
420
+ **Do NOT ask the user to describe changes.** Infer from the diff and commit history.`;
421
+ }
@@ -0,0 +1,190 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * skill:check — Health summary for all SKILL.md files.
4
+ *
5
+ * Reports:
6
+ * - Command validation (valid/invalid/snapshot errors)
7
+ * - Template coverage (which SKILL.md files have .tmpl sources)
8
+ * - Freshness check (generated files match committed files)
9
+ */
10
+
11
+ import { validateSkill } from '../test/helpers/skill-parser';
12
+ import { discoverTemplates, discoverSkillFiles } from './discover-skills';
13
+ import * as fs from 'fs';
14
+ import * as path from 'path';
15
+ import { execSync } from 'child_process';
16
+
17
+ const ROOT = path.resolve(import.meta.dir, '..');
18
+
19
+ // Find all SKILL.md files (dynamic discovery — no hardcoded list)
20
+ const SKILL_FILES = discoverSkillFiles(ROOT);
21
+
22
+ let hasErrors = false;
23
+
24
+ // ─── Skills ─────────────────────────────────────────────────
25
+
26
+ console.log(' Skills:');
27
+ for (const file of SKILL_FILES) {
28
+ const fullPath = path.join(ROOT, file);
29
+ const result = validateSkill(fullPath);
30
+
31
+ if (result.warnings.length > 0) {
32
+ console.log(` \u26a0\ufe0f ${file.padEnd(30)} — ${result.warnings.join(', ')}`);
33
+ continue;
34
+ }
35
+
36
+ const totalValid = result.valid.length;
37
+ const totalInvalid = result.invalid.length;
38
+ const totalSnapErrors = result.snapshotFlagErrors.length;
39
+
40
+ if (totalInvalid > 0 || totalSnapErrors > 0) {
41
+ hasErrors = true;
42
+ console.log(` \u274c ${file.padEnd(30)} — ${totalValid} valid, ${totalInvalid} invalid, ${totalSnapErrors} snapshot errors`);
43
+ for (const inv of result.invalid) {
44
+ console.log(` line ${inv.line}: unknown command '${inv.command}'`);
45
+ }
46
+ for (const se of result.snapshotFlagErrors) {
47
+ console.log(` line ${se.command.line}: ${se.error}`);
48
+ }
49
+ } else {
50
+ console.log(` \u2705 ${file.padEnd(30)} — ${totalValid} commands, all valid`);
51
+ }
52
+ }
53
+
54
+ // ─── Templates ──────────────────────────────────────────────
55
+
56
+ console.log('\n Templates:');
57
+ const TEMPLATES = discoverTemplates(ROOT);
58
+
59
+ for (const { tmpl, output } of TEMPLATES) {
60
+ const tmplPath = path.join(ROOT, tmpl);
61
+ const outPath = path.join(ROOT, output);
62
+ if (!fs.existsSync(tmplPath)) {
63
+ console.log(` \u26a0\ufe0f ${output.padEnd(30)} — no template`);
64
+ continue;
65
+ }
66
+ if (!fs.existsSync(outPath)) {
67
+ hasErrors = true;
68
+ console.log(` \u274c ${output.padEnd(30)} — generated file missing! Run: bun run gen:skill-docs`);
69
+ continue;
70
+ }
71
+ console.log(` \u2705 ${tmpl.padEnd(30)} \u2192 ${output}`);
72
+ }
73
+
74
+ // Skills without templates
75
+ for (const file of SKILL_FILES) {
76
+ const tmplPath = path.join(ROOT, file + '.tmpl');
77
+ if (!fs.existsSync(tmplPath) && !TEMPLATES.some(t => t.output === file)) {
78
+ console.log(` \u26a0\ufe0f ${file.padEnd(30)} — no template (OK if no $B commands)`);
79
+ }
80
+ }
81
+
82
+ // ─── Codex Skills ───────────────────────────────────────────
83
+
84
+ const AGENTS_DIR = path.join(ROOT, '.agents', 'skills');
85
+ if (fs.existsSync(AGENTS_DIR)) {
86
+ console.log('\n Codex Skills (.agents/skills/):');
87
+ const codexDirs = fs.readdirSync(AGENTS_DIR).sort();
88
+ let codexCount = 0;
89
+ let codexMissing = 0;
90
+ for (const dir of codexDirs) {
91
+ const skillMd = path.join(AGENTS_DIR, dir, 'SKILL.md');
92
+ if (fs.existsSync(skillMd)) {
93
+ codexCount++;
94
+ const content = fs.readFileSync(skillMd, 'utf-8');
95
+ // Quick validation: must have frontmatter with name + description only
96
+ const hasClaude = content.includes('.claude/skills');
97
+ if (hasClaude) {
98
+ hasErrors = true;
99
+ console.log(` \u274c ${dir.padEnd(30)} — contains .claude/skills reference`);
100
+ } else {
101
+ console.log(` \u2705 ${dir.padEnd(30)} — OK`);
102
+ }
103
+ } else {
104
+ codexMissing++;
105
+ hasErrors = true;
106
+ console.log(` \u274c ${dir.padEnd(30)} — SKILL.md missing`);
107
+ }
108
+ }
109
+ console.log(` Total: ${codexCount} skills, ${codexMissing} missing`);
110
+ } else {
111
+ console.log('\n Codex Skills: .agents/skills/ not found (run: bun run gen:skill-docs --host codex)');
112
+ }
113
+
114
+ // ─── Factory Skills ─────────────────────────────────────────
115
+
116
+ const FACTORY_DIR = path.join(ROOT, '.factory', 'skills');
117
+ if (fs.existsSync(FACTORY_DIR)) {
118
+ console.log('\n Factory Skills (.factory/skills/):');
119
+ const factoryDirs = fs.readdirSync(FACTORY_DIR).sort();
120
+ let factoryCount = 0;
121
+ let factoryMissing = 0;
122
+ for (const dir of factoryDirs) {
123
+ const skillMd = path.join(FACTORY_DIR, dir, 'SKILL.md');
124
+ if (fs.existsSync(skillMd)) {
125
+ factoryCount++;
126
+ const content = fs.readFileSync(skillMd, 'utf-8');
127
+ const hasClaude = content.includes('.claude/skills');
128
+ if (hasClaude) {
129
+ hasErrors = true;
130
+ console.log(` \u274c ${dir.padEnd(30)} — contains .claude/skills reference`);
131
+ } else {
132
+ console.log(` \u2705 ${dir.padEnd(30)} — OK`);
133
+ }
134
+ } else {
135
+ factoryMissing++;
136
+ hasErrors = true;
137
+ console.log(` \u274c ${dir.padEnd(30)} — SKILL.md missing`);
138
+ }
139
+ }
140
+ console.log(` Total: ${factoryCount} skills, ${factoryMissing} missing`);
141
+ } else {
142
+ console.log('\n Factory Skills: .factory/skills/ not found (run: bun run gen:skill-docs --host factory)');
143
+ }
144
+
145
+ // ─── Freshness ──────────────────────────────────────────────
146
+
147
+ console.log('\n Freshness (Claude):');
148
+ try {
149
+ execSync('bun run scripts/gen-skill-docs.ts --dry-run', { cwd: ROOT, stdio: 'pipe' });
150
+ console.log(' \u2705 All Claude generated files are fresh');
151
+ } catch (err: any) {
152
+ hasErrors = true;
153
+ const output = err.stdout?.toString() || '';
154
+ console.log(' \u274c Claude generated files are stale:');
155
+ for (const line of output.split('\n').filter((l: string) => l.startsWith('STALE'))) {
156
+ console.log(` ${line}`);
157
+ }
158
+ console.log(' Run: bun run gen:skill-docs');
159
+ }
160
+
161
+ console.log('\n Freshness (Codex):');
162
+ try {
163
+ execSync('bun run scripts/gen-skill-docs.ts --host codex --dry-run', { cwd: ROOT, stdio: 'pipe' });
164
+ console.log(' \u2705 All Codex generated files are fresh');
165
+ } catch (err: any) {
166
+ hasErrors = true;
167
+ const output = err.stdout?.toString() || '';
168
+ console.log(' \u274c Codex generated files are stale:');
169
+ for (const line of output.split('\n').filter((l: string) => l.startsWith('STALE'))) {
170
+ console.log(` ${line}`);
171
+ }
172
+ console.log(' Run: bun run gen:skill-docs --host codex');
173
+ }
174
+
175
+ console.log('\n Freshness (Factory):');
176
+ try {
177
+ execSync('bun run scripts/gen-skill-docs.ts --host factory --dry-run', { cwd: ROOT, stdio: 'pipe' });
178
+ console.log(' \u2705 All Factory generated files are fresh');
179
+ } catch (err: any) {
180
+ hasErrors = true;
181
+ const output = err.stdout?.toString() || '';
182
+ console.log(' \u274c Factory generated files are stale:');
183
+ for (const line of output.split('\n').filter((l: string) => l.startsWith('STALE'))) {
184
+ console.log(` ${line}`);
185
+ }
186
+ console.log(' Run: bun run gen:skill-docs --host factory');
187
+ }
188
+
189
+ console.log('');
190
+ process.exit(hasErrors ? 1 : 0);