npm - opengstack - Versions diffs - 0.13.10 → 0.14.2 - Mend

opengstack 0.13.10 → 0.14.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (189) hide show

package/AGENTS.md +4 -4
package/CLAUDE.md +127 -110
package/README.md +10 -5
package/SKILL.md +500 -70
package/bin/opengstack.js +69 -69
package/{skills/land-and-deploy/SKILL.md → commands/autoplan.md} +7 -25
package/{skills/benchmark/SKILL.md → commands/benchmark.md} +84 -108
package/{skills/browse/SKILL.md → commands/browse.md} +60 -81
package/{skills/ship/SKILL.md → commands/canary.md} +7 -27
package/{skills/careful/SKILL.md → commands/careful.md} +2 -22
package/{skills/canary/SKILL.md → commands/codex.md} +7 -26
package/{skills/connect-chrome/SKILL.md → commands/connect-chrome.md} +7 -24
package/commands/cso.md +70 -0
package/commands/design-consultation.md +70 -0
package/commands/design-review.md +70 -0
package/commands/design-shotgun.md +70 -0
package/commands/document-release.md +70 -0
package/{skills/freeze/SKILL.md → commands/freeze.md} +3 -29
package/{skills/guard/SKILL.md → commands/guard.md} +4 -35
package/commands/investigate.md +70 -0
package/commands/land-and-deploy.md +70 -0
package/commands/office-hours.md +70 -0
package/{skills/gstack-upgrade/SKILL.md → commands/opengstack-upgrade.md} +64 -79
package/commands/plan-ceo-review.md +70 -0
package/commands/plan-design-review.md +70 -0
package/commands/plan-eng-review.md +70 -0
package/commands/qa-only.md +70 -0
package/commands/qa.md +70 -0
package/commands/retro.md +70 -0
package/commands/review.md +70 -0
package/{skills/setup-browser-cookies/SKILL.md → commands/setup-browser-cookies.md} +22 -40
package/commands/setup-deploy.md +70 -0
package/commands/ship.md +70 -0
package/commands/unfreeze.md +25 -0
package/docs/designs/CHROME_VS_CHROMIUM_EXPLORATION.md +9 -9
package/docs/designs/CONDUCTOR_CHROME_SIDEBAR_INTEGRATION.md +2 -2
package/docs/designs/CONDUCTOR_SESSION_API.md +16 -16
package/docs/designs/DESIGN_SHOTGUN.md +74 -74
package/docs/designs/DESIGN_TOOLS_V1.md +111 -111
package/docs/skills.md +483 -202
package/package.json +42 -43
package/scripts/analytics.ts +188 -0
package/scripts/dev-skill.ts +83 -0
package/scripts/discover-skills.ts +39 -0
package/scripts/eval-compare.ts +97 -0
package/scripts/eval-list.ts +117 -0
package/scripts/eval-select.ts +86 -0
package/scripts/eval-summary.ts +188 -0
package/scripts/eval-watch.ts +172 -0
package/scripts/gen-skill-docs.ts +473 -0
package/scripts/resolvers/browse.ts +129 -0
package/scripts/resolvers/codex-helpers.ts +133 -0
package/scripts/resolvers/composition.ts +48 -0
package/scripts/resolvers/confidence.ts +37 -0
package/scripts/resolvers/constants.ts +50 -0
package/scripts/resolvers/design.ts +950 -0
package/scripts/resolvers/index.ts +59 -0
package/scripts/resolvers/learnings.ts +96 -0
package/scripts/resolvers/preamble.ts +505 -0
package/scripts/resolvers/review.ts +884 -0
package/scripts/resolvers/testing.ts +573 -0
package/scripts/resolvers/types.ts +45 -0
package/scripts/resolvers/utility.ts +421 -0
package/scripts/skill-check.ts +190 -0
package/scripts/cleanup.py +0 -100
package/scripts/filter-skills.sh +0 -114
package/scripts/filter_skills.py +0 -164
package/scripts/install-skills.js +0 -60
package/skills/autoplan/SKILL.md +0 -96
package/skills/autoplan/SKILL.md.tmpl +0 -694
package/skills/benchmark/SKILL.md.tmpl +0 -222
package/skills/browse/SKILL.md.tmpl +0 -131
package/skills/browse/bin/find-browse +0 -21
package/skills/browse/bin/remote-slug +0 -14
package/skills/browse/scripts/build-node-server.sh +0 -48
package/skills/browse/src/activity.ts +0 -208
package/skills/browse/src/browser-manager.ts +0 -959
package/skills/browse/src/buffers.ts +0 -137
package/skills/browse/src/bun-polyfill.cjs +0 -109
package/skills/browse/src/cli.ts +0 -678
package/skills/browse/src/commands.ts +0 -128
package/skills/browse/src/config.ts +0 -150
package/skills/browse/src/cookie-import-browser.ts +0 -625
package/skills/browse/src/cookie-picker-routes.ts +0 -230
package/skills/browse/src/cookie-picker-ui.ts +0 -688
package/skills/browse/src/find-browse.ts +0 -61
package/skills/browse/src/meta-commands.ts +0 -550
package/skills/browse/src/platform.ts +0 -17
package/skills/browse/src/read-commands.ts +0 -358
package/skills/browse/src/server.ts +0 -1192
package/skills/browse/src/sidebar-agent.ts +0 -280
package/skills/browse/src/sidebar-utils.ts +0 -21
package/skills/browse/src/snapshot.ts +0 -407
package/skills/browse/src/url-validation.ts +0 -95
package/skills/browse/src/write-commands.ts +0 -364
package/skills/browse/test/activity.test.ts +0 -120
package/skills/browse/test/adversarial-security.test.ts +0 -32
package/skills/browse/test/browser-manager-unit.test.ts +0 -17
package/skills/browse/test/bun-polyfill.test.ts +0 -72
package/skills/browse/test/commands.test.ts +0 -2075
package/skills/browse/test/compare-board.test.ts +0 -342
package/skills/browse/test/config.test.ts +0 -316
package/skills/browse/test/cookie-import-browser.test.ts +0 -519
package/skills/browse/test/cookie-picker-routes.test.ts +0 -260
package/skills/browse/test/file-drop.test.ts +0 -271
package/skills/browse/test/find-browse.test.ts +0 -50
package/skills/browse/test/findport.test.ts +0 -191
package/skills/browse/test/fixtures/basic.html +0 -33
package/skills/browse/test/fixtures/cursor-interactive.html +0 -22
package/skills/browse/test/fixtures/dialog.html +0 -15
package/skills/browse/test/fixtures/empty.html +0 -2
package/skills/browse/test/fixtures/forms.html +0 -55
package/skills/browse/test/fixtures/iframe.html +0 -30
package/skills/browse/test/fixtures/network-idle.html +0 -30
package/skills/browse/test/fixtures/qa-eval-checkout.html +0 -108
package/skills/browse/test/fixtures/qa-eval-spa.html +0 -98
package/skills/browse/test/fixtures/qa-eval.html +0 -51
package/skills/browse/test/fixtures/responsive.html +0 -49
package/skills/browse/test/fixtures/snapshot.html +0 -55
package/skills/browse/test/fixtures/spa.html +0 -24
package/skills/browse/test/fixtures/states.html +0 -17
package/skills/browse/test/fixtures/upload.html +0 -25
package/skills/browse/test/gstack-config.test.ts +0 -138
package/skills/browse/test/gstack-update-check.test.ts +0 -514
package/skills/browse/test/handoff.test.ts +0 -235
package/skills/browse/test/path-validation.test.ts +0 -91
package/skills/browse/test/platform.test.ts +0 -37
package/skills/browse/test/server-auth.test.ts +0 -65
package/skills/browse/test/sidebar-agent-roundtrip.test.ts +0 -226
package/skills/browse/test/sidebar-agent.test.ts +0 -199
package/skills/browse/test/sidebar-integration.test.ts +0 -320
package/skills/browse/test/sidebar-unit.test.ts +0 -96
package/skills/browse/test/snapshot.test.ts +0 -467
package/skills/browse/test/state-ttl.test.ts +0 -35
package/skills/browse/test/test-server.ts +0 -57
package/skills/browse/test/url-validation.test.ts +0 -72
package/skills/browse/test/watch.test.ts +0 -129
package/skills/canary/SKILL.md.tmpl +0 -212
package/skills/careful/SKILL.md.tmpl +0 -56
package/skills/careful/bin/check-careful.sh +0 -112
package/skills/codex/SKILL.md +0 -90
package/skills/codex/SKILL.md.tmpl +0 -417
package/skills/connect-chrome/SKILL.md.tmpl +0 -195
package/skills/cso/ACKNOWLEDGEMENTS.md +0 -14
package/skills/cso/SKILL.md +0 -93
package/skills/cso/SKILL.md.tmpl +0 -606
package/skills/design-consultation/SKILL.md +0 -94
package/skills/design-consultation/SKILL.md.tmpl +0 -415
package/skills/design-review/SKILL.md +0 -94
package/skills/design-review/SKILL.md.tmpl +0 -290
package/skills/design-shotgun/SKILL.md +0 -91
package/skills/design-shotgun/SKILL.md.tmpl +0 -285
package/skills/document-release/SKILL.md +0 -91
package/skills/document-release/SKILL.md.tmpl +0 -359
package/skills/freeze/SKILL.md.tmpl +0 -77
package/skills/freeze/bin/check-freeze.sh +0 -79
package/skills/gstack-upgrade/SKILL.md.tmpl +0 -222
package/skills/guard/SKILL.md.tmpl +0 -77
package/skills/investigate/SKILL.md +0 -105
package/skills/investigate/SKILL.md.tmpl +0 -194
package/skills/land-and-deploy/SKILL.md.tmpl +0 -881
package/skills/office-hours/SKILL.md +0 -96
package/skills/office-hours/SKILL.md.tmpl +0 -645
package/skills/plan-ceo-review/SKILL.md +0 -94
package/skills/plan-ceo-review/SKILL.md.tmpl +0 -811
package/skills/plan-design-review/SKILL.md +0 -92
package/skills/plan-design-review/SKILL.md.tmpl +0 -446
package/skills/plan-eng-review/SKILL.md +0 -93
package/skills/plan-eng-review/SKILL.md.tmpl +0 -303
package/skills/qa/SKILL.md +0 -95
package/skills/qa/SKILL.md.tmpl +0 -316
package/skills/qa/references/issue-taxonomy.md +0 -85
package/skills/qa/templates/qa-report-template.md +0 -126
package/skills/qa-only/SKILL.md +0 -89
package/skills/qa-only/SKILL.md.tmpl +0 -101
package/skills/retro/SKILL.md +0 -89
package/skills/retro/SKILL.md.tmpl +0 -820
package/skills/review/SKILL.md +0 -92
package/skills/review/SKILL.md.tmpl +0 -281
package/skills/review/TODOS-format.md +0 -62
package/skills/review/checklist.md +0 -220
package/skills/review/design-checklist.md +0 -132
package/skills/review/greptile-triage.md +0 -220
package/skills/setup-browser-cookies/SKILL.md.tmpl +0 -81
package/skills/setup-deploy/SKILL.md +0 -92
package/skills/setup-deploy/SKILL.md.tmpl +0 -215
package/skills/ship/SKILL.md.tmpl +0 -636
package/skills/unfreeze/SKILL.md +0 -37
package/skills/unfreeze/SKILL.md.tmpl +0 -36

package/scripts/resolvers/utility.ts ADDED Viewed

@@ -0,0 +1,421 @@
+import type { TemplateContext } from './types';
+export function generateSlugEval(ctx: TemplateContext): string {
+ return `eval "$(${ctx.paths.binDir}/opengstack-slug 2>/dev/null)"`;
+}
+export function generateSlugSetup(ctx: TemplateContext): string {
+ return `eval "$(${ctx.paths.binDir}/opengstack-slug 2>/dev/null)" && mkdir -p ~/.opengstack/projects/$SLUG`;
+}
+export function generateBaseBranchDetect(_ctx: TemplateContext): string {
+ return `## Step 0: Detect platform and base branch
+First, detect the git hosting platform from the remote URL:
+\`\`\`bash
+git remote get-url origin 2>/dev/null
+\`\`\`
+- If the URL contains "github.com" → platform is **GitHub**
+- If the URL contains "gitlab" → platform is **GitLab**
+- Otherwise, check CLI availability:
+ - \`gh auth status 2>/dev/null\` succeeds → platform is **GitHub** (covers GitHub Enterprise)
+ - \`glab auth status 2>/dev/null\` succeeds → platform is **GitLab** (covers self-hosted)
+ - Neither → **unknown** (use git-native commands only)
+Determine which branch this PR/MR targets, or the repo's default branch if no
+PR/MR exists. Use the result as "the base branch" in all subsequent steps.
+**If GitHub:**
+1. \`gh pr view --json baseRefName -q .baseRefName\` — if succeeds, use it
+2. \`gh repo view --json defaultBranchRef -q .defaultBranchRef.name\` — if succeeds, use it
+**If GitLab:**
+1. \`glab mr view -F json 2>/dev/null\` and extract the \`target_branch\` field — if succeeds, use it
+2. \`glab repo view -F json 2>/dev/null\` and extract the \`default_branch\` field — if succeeds, use it
+**Git-native fallback (if unknown platform, or CLI commands fail):**
+1. \`git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's|refs/remotes/origin/||'\`
+2. If that fails: \`git rev-parse --verify origin/main 2>/dev/null\` → use \`main\`
+3. If that fails: \`git rev-parse --verify origin/master 2>/dev/null\` → use \`master\`
+If all fail, fall back to \`main\`.
+Print the detected base branch name. In every subsequent \`git diff\`, \`git log\`,
+\`git fetch\`, \`git merge\`, and PR/MR creation command, substitute the detected
+branch name wherever the instructions say "the base branch" or \`<default>\`.
+---`;
+}
+export function generateDeployBootstrap(_ctx: TemplateContext): string {
+ return `\`\`\`bash
+# Check for persisted deploy config in CLAUDE.md
+DEPLOY_CONFIG=$(grep -A 20 "## Deploy Configuration" CLAUDE.md 2>/dev/null || echo "NO_CONFIG")
+echo "$DEPLOY_CONFIG"
+# If config exists, parse it
+if [ "$DEPLOY_CONFIG" != "NO_CONFIG" ]; then
+ PROD_URL=$(echo "$DEPLOY_CONFIG" | grep -i "production.*url" | head -1 | sed 's/.*: *//')
+ PLATFORM=$(echo "$DEPLOY_CONFIG" | grep -i "platform" | head -1 | sed 's/.*: *//')
+ echo "PERSISTED_PLATFORM:$PLATFORM"
+ echo "PERSISTED_URL:$PROD_URL"
+fi
+# Auto-detect platform from config files
+[ -f fly.toml ] && echo "PLATFORM:fly"
+[ -f render.yaml ] && echo "PLATFORM:render"
+([ -f vercel.json ] || [ -d .vercel ]) && echo "PLATFORM:vercel"
+[ -f netlify.toml ] && echo "PLATFORM:netlify"
+[ -f Procfile ] && echo "PLATFORM:heroku"
+([ -f railway.json ] || [ -f railway.toml ]) && echo "PLATFORM:railway"
+# Detect deploy workflows
+for f in $(find .github/workflows -maxdepth 1 \\( -name '*.yml' -o -name '*.yaml' \\) 2>/dev/null); do
+ [ -f "$f" ] && grep -qiE "deploy|release|production|cd" "$f" 2>/dev/null && echo "DEPLOY_WORKFLOW:$f"
+ [ -f "$f" ] && grep -qiE "staging" "$f" 2>/dev/null && echo "STAGING_WORKFLOW:$f"
+done
+\`\`\`
+If \`PERSISTED_PLATFORM\` and \`PERSISTED_URL\` were found in CLAUDE.md, use them directly
+and skip manual detection. If no persisted config exists, use the auto-detected platform
+to guide deploy verification. If nothing is detected, ask the user via AskUserQuestion
+in the decision tree below.
+If you want to persist deploy settings for future runs, suggest the user run \`/setup-deploy\`.`;
+}
+export function generateQAMethodology(_ctx: TemplateContext): string {
+ return `## Modes
+### Diff-aware (automatic when on a feature branch with no URL)
+This is the **primary mode** for developers verifying their work. When the user says \`/qa\` without a URL and the repo is on a feature branch, automatically:
+1. **Analyze the branch diff** to understand what changed:
+ \`\`\`bash
+ git diff main...HEAD --name-only
+ git log main..HEAD --oneline
+ \`\`\`
+2. **Identify affected pages/routes** from the changed files:
+ - Controller/route files → which URL paths they serve
+ - View/template/component files → which pages render them
+ - Model/service files → which pages use those models (check controllers that reference them)
+ - CSS/style files → which pages include those stylesheets
+ - API endpoints → test them directly with \`$B js "await fetch('/api/...')"\`
+ - Static pages (markdown, HTML) → navigate to them directly
+ **If no obvious pages/routes are identified from the diff:** Do not skip browser testing. The user invoked /qa because they want browser-based verification. Fall back to Quick mode — navigate to the homepage, follow the top 5 navigation targets, check console for errors, and test any interactive elements found. Backend, config, and infrastructure changes affect app behavior — always verify the app still works.
+3. **Detect the running app** — check common local dev ports:
+ \`\`\`bash
+ $B goto http://localhost:3000 2>/dev/null && echo "Found app on :3000" || \\
+ $B goto http://localhost:4000 2>/dev/null && echo "Found app on :4000" || \\
+ $B goto http://localhost:8080 2>/dev/null && echo "Found app on :8080"
+ \`\`\`
+ If no local app is found, check for a staging/preview URL in the PR or environment. If nothing works, ask the user for the URL.
+4. **Test each affected page/route:**
+ - Navigate to the page
+ - Take a screenshot
+ - Check console for errors
+ - If the change was interactive (forms, buttons, flows), test the interaction end-to-end
+ - Use \`snapshot -D\` before and after actions to verify the change had the expected effect
+5. **Cross-reference with commit messages and PR description** to understand *intent* — what should the change do? Verify it actually does that.
+6. **Check TODOS.md** (if it exists) for known bugs or issues related to the changed files. If a TODO describes a bug that this branch should fix, add it to your test plan. If you find a new bug during QA that isn't in TODOS.md, note it in the report.
+7. **Report findings** scoped to the branch changes:
+ - "Changes tested: N pages/routes affected by this branch"
+ - For each: does it work? Screenshot evidence.
+ - Any regressions on adjacent pages?
+**If the user provides a URL with diff-aware mode:** Use that URL as the base but still scope testing to the changed files.
+### Full (default when URL is provided)
+Systematic exploration. Visit every reachable page. Document 5-10 well-evidenced issues. Produce health score. Takes 5-15 minutes depending on app size.
+### Quick (\`--quick\`)
+30-second smoke test. Visit homepage + top 5 navigation targets. Check: page loads? Console errors? Broken links? Produce health score. No detailed issue documentation.
+### Regression (\`--regression <baseline>\`)
+Run full mode, then load \`baseline.json\` from a previous run. Diff: which issues are fixed? Which are new? What's the score delta? Append regression section to report.
+---
+## Workflow
+### Phase 1: Initialize
+1. Find browse binary (see Setup above)
+2. Create output directories
+3. Copy report template from \`qa/templates/qa-report-template.md\` to output dir
+4. Start timer for duration tracking
+### Phase 2: Authenticate (if needed)
+**If the user specified auth credentials:**
+\`\`\`bash
+$B goto <login-url>
+$B snapshot -i # find the login form
+$B fill @e3 "user@example.com"
+$B fill @e4 "[REDACTED]" # NEVER include real passwords in report
+$B click @e5 # submit
+$B snapshot -D # verify login succeeded
+\`\`\`
+**If the user provided a cookie file:**
+\`\`\`bash
+$B cookie-import cookies.json
+$B goto <target-url>
+\`\`\`
+**If 2FA/OTP is required:** Ask the user for the code and wait.
+**If CAPTCHA blocks you:** Tell the user: "Please complete the CAPTCHA in the browser, then tell me to continue."
+### Phase 3: Orient
+Get a map of the application:
+\`\`\`bash
+$B goto <target-url>
+$B snapshot -i -a -o "$REPORT_DIR/screenshots/initial.png"
+$B links # map navigation structure
+$B console --errors # any errors on landing?
+\`\`\`
+**Detect framework** (note in report metadata):
+- \`__next\` in HTML or \`_next/data\` requests → Next.js
+- \`csrf-token\` meta tag → Rails
+- \`wp-content\` in URLs → WordPress
+- Client-side routing with no page reloads → SPA
+**For SPAs:** The \`links\` command may return few results because navigation is client-side. Use \`snapshot -i\` to find nav elements (buttons, menu items) instead.
+### Phase 4: Explore
+Visit pages systematically. At each page:
+\`\`\`bash
+$B goto <page-url>
+$B snapshot -i -a -o "$REPORT_DIR/screenshots/page-name.png"
+$B console --errors
+\`\`\`
+Then follow the **per-page exploration checklist** (see \`qa/references/issue-taxonomy.md\`):
+1. **Visual scan** — Look at the annotated screenshot for layout issues
+2. **Interactive elements** — Click buttons, links, controls. Do they work?
+3. **Forms** — Fill and submit. Test empty, invalid, edge cases
+4. **Navigation** — Check all paths in and out
+5. **States** — Empty state, loading, error, overflow
+6. **Console** — Any new JS errors after interactions?
+7. **Responsiveness** — Check mobile viewport if relevant:
+ \`\`\`bash
+ $B viewport 375x812
+ $B screenshot "$REPORT_DIR/screenshots/page-mobile.png"
+ $B viewport 1280x720
+ \`\`\`
+**Depth judgment:** Spend more time on core features (homepage, dashboard, checkout, search) and less on secondary pages (about, terms, privacy).
+**Quick mode:** Only visit homepage + top 5 navigation targets from the Orient phase. Skip the per-page checklist — just check: loads? Console errors? Broken links visible?
+### Phase 5: Document
+Document each issue **immediately when found** — don't batch them.
+**Two evidence tiers:**
+**Interactive bugs** (broken flows, dead buttons, form failures):
+1. Take a screenshot before the action
+2. Perform the action
+3. Take a screenshot showing the result
+4. Use \`snapshot -D\` to show what changed
+5. Write repro steps referencing screenshots
+\`\`\`bash
+$B screenshot "$REPORT_DIR/screenshots/issue-001-step-1.png"
+$B click @e5
+$B screenshot "$REPORT_DIR/screenshots/issue-001-result.png"
+$B snapshot -D
+\`\`\`
+**Static bugs** (typos, layout issues, missing images):
+1. Take a single annotated screenshot showing the problem
+2. Describe what's wrong
+\`\`\`bash
+$B snapshot -i -a -o "$REPORT_DIR/screenshots/issue-002.png"
+\`\`\`
+**Write each issue to the report immediately** using the template format from \`qa/templates/qa-report-template.md\`.
+### Phase 6: Wrap Up
+1. **Compute health score** using the rubric below
+2. **Write "Top 3 Things to Fix"** — the 3 highest-severity issues
+3. **Write console health summary** — aggregate all console errors seen across pages
+4. **Update severity counts** in the summary table
+5. **Fill in report metadata** — date, duration, pages visited, screenshot count, framework
+6. **Save baseline** — write \`baseline.json\` with:
+ \`\`\`json
+ {
+ "date": "YYYY-MM-DD",
+ "url": "<target>",
+ "healthScore": N,
+ "issues": [{ "id": "ISSUE-001", "title": "...", "severity": "...", "category": "..." }],
+ "categoryScores": { "console": N, "links": N, ... }
+ }
+ \`\`\`
+**Regression mode:** After writing the report, load the baseline file. Compare:
+- Health score delta
+- Issues fixed (in baseline but not current)
+- New issues (in current but not baseline)
+- Append the regression section to the report
+---
+## Health Score Rubric
+Compute each category score (0-100), then take the weighted average.
+### Console (weight: 15%)
+- 0 errors → 100
+- 1-3 errors → 70
+- 4-10 errors → 40
+- 10+ errors → 10
+### Links (weight: 10%)
+- 0 broken → 100
+- Each broken link → -15 (minimum 0)
+### Per-Category Scoring (Visual, Functional, UX, Content, Performance, Accessibility)
+Each category starts at 100. Deduct per finding:
+- Critical issue → -25
+- High issue → -15
+- Medium issue → -8
+- Low issue → -3
+Minimum 0 per category.
+### Weights
+| Category | Weight |
+|----------|--------|
+| Console | 15% |
+| Links | 10% |
+| Visual | 10% |
+| Functional | 20% |
+| UX | 15% |
+| Performance | 10% |
+| Content | 5% |
+| Accessibility | 15% |
+### Final Score
+\`score = Σ (category_score × weight)\`
+---
+## Framework-Specific Guidance
+### Next.js
+- Check console for hydration errors (\`Hydration failed\`, \`Text content did not match\`)
+- Monitor \`_next/data\` requests in network — 404s indicate broken data fetching
+- Test client-side navigation (click links, don't just \`goto\`) — catches routing issues
+- Check for CLS (Cumulative Layout Shift) on pages with dynamic content
+### Rails
+- Check for N+1 query warnings in console (if development mode)
+- Verify CSRF token presence in forms
+- Test Turbo/Stimulus integration — do page transitions work smoothly?
+- Check for flash messages appearing and dismissing correctly
+### WordPress
+- Check for plugin conflicts (JS errors from different plugins)
+- Verify admin bar visibility for logged-in users
+- Test REST API endpoints (\`/wp-json/\`)
+- Check for mixed content warnings (common with WP)
+### General SPA (React, Vue, Angular)
+- Use \`snapshot -i\` for navigation — \`links\` command misses client-side routes
+- Check for stale state (navigate away and back — does data refresh?)
+- Test browser back/forward — does the app handle history correctly?
+- Check for memory leaks (monitor console after extended use)
+---
+## Important Rules
+1. **Repro is everything.** Every issue needs at least one screenshot. No exceptions.
+2. **Verify before documenting.** Retry the issue once to confirm it's reproducible, not a fluke.
+3. **Never include credentials.** Write \`[REDACTED]\` for passwords in repro steps.
+4. **Write incrementally.** Append each issue to the report as you find it. Don't batch.
+5. **Never read source code.** Test as a user, not a developer.
+6. **Check console after every interaction.** JS errors that don't surface visually are still bugs.
+7. **Test like a user.** Use realistic data. Walk through complete workflows end-to-end.
+8. **Depth over breadth.** 5-10 well-documented issues with evidence > 20 vague descriptions.
+9. **Never delete output files.** Screenshots and reports accumulate — that's intentional.
+10. **Use \`snapshot -C\` for tricky UIs.** Finds clickable divs that the accessibility tree misses.
+11. **Show screenshots to the user.** After every \`$B screenshot\`, \`$B snapshot -a -o\`, or \`$B responsive\` command, use the Read tool on the output file(s) so the user can see them inline. For \`responsive\` (3 files), Read all three. This is critical — without it, screenshots are invisible to the user.
+12. **Never refuse to use the browser.** When the user invokes /qa or /qa-only, they are requesting browser-based testing. Never suggest evals, unit tests, or other alternatives as a substitute. Even if the diff appears to have no UI changes, backend changes affect app behavior — always open the browser and test.`;
+}
+export function generateCoAuthorTrailer(ctx: TemplateContext): string {
+ if (ctx.host === 'codex') {
+ return 'Co-Authored-By: OpenAI Codex <noreply@openai.com>';
+ }
+ if (ctx.host === 'factory') {
+ return 'Co-Authored-By: Factory Droid <droid@users.noreply.github.com>';
+ }
+ return 'Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>';
+}
+export function generateChangelogWorkflow(_ctx: TemplateContext): string {
+ return `## CHANGELOG (auto-generate)
+1. Read \`CHANGELOG.md\` header to know the format.
+2. **First, enumerate every commit on the branch:**
+ \`\`\`bash
+ git log <base>..HEAD --oneline
+ \`\`\`
+ Copy the full list. Count the commits. You will use this as a checklist.
+3. **Read the full diff** to understand what each commit actually changed:
+ \`\`\`bash
+ git diff <base>...HEAD
+ \`\`\`
+4. **Group commits by theme** before writing anything. Common themes:
+ - New features / capabilities
+ - Performance improvements
+ - Bug fixes
+ - Dead code removal / cleanup
+ - Infrastructure / tooling / tests
+ - Refactoring
+5. **Write the CHANGELOG entry** covering ALL groups:
+ - If existing CHANGELOG entries on the branch already cover some commits, replace them with one unified entry for the new version
+ - Categorize changes into applicable sections:
+ - \`### Added\` — new features
+ - \`### Changed\` — changes to existing functionality
+ - \`### Fixed\` — bug fixes
+ - \`### Removed\` — removed features
+ - Write concise, descriptive bullet points
+ - Insert after the file header (line 5), dated today
+ - Format: \`## [X.Y.Z.W] - YYYY-MM-DD\`
+ - **Voice:** Lead with what the user can now **do** that they couldn't before. Use plain language, not implementation details. Never mention TODOS.md, internal tracking, or contributor-facing details.
+6. **Cross-check:** Compare your CHANGELOG entry against the commit list from step 2.
+ Every commit must map to at least one bullet point. If any commit is unrepresented,
+ add it now. If the branch has N commits spanning K themes, the CHANGELOG must
+ reflect all K themes.
+**Do NOT ask the user to describe changes.** Infer from the diff and commit history.`;
+}

package/scripts/skill-check.ts ADDED Viewed

@@ -0,0 +1,190 @@
+#!/usr/bin/env bun
+/**
+ * skill:check — Health summary for all SKILL.md files.
+ *
+ * Reports:
+ * - Command validation (valid/invalid/snapshot errors)
+ * - Template coverage (which SKILL.md files have .tmpl sources)
+ * - Freshness check (generated files match committed files)
+ */
+import { validateSkill } from '../test/helpers/skill-parser';
+import { discoverTemplates, discoverSkillFiles } from './discover-skills';
+import * as fs from 'fs';
+import * as path from 'path';
+import { execSync } from 'child_process';
+const ROOT = path.resolve(import.meta.dir, '..');
+// Find all SKILL.md files (dynamic discovery — no hardcoded list)
+const SKILL_FILES = discoverSkillFiles(ROOT);
+let hasErrors = false;
+// ─── Skills ─────────────────────────────────────────────────
+console.log(' Skills:');
+for (const file of SKILL_FILES) {
+ const fullPath = path.join(ROOT, file);
+ const result = validateSkill(fullPath);
+ if (result.warnings.length > 0) {
+ console.log(` \u26a0\ufe0f ${file.padEnd(30)} — ${result.warnings.join(', ')}`);
+ continue;
+ }
+ const totalValid = result.valid.length;
+ const totalInvalid = result.invalid.length;
+ const totalSnapErrors = result.snapshotFlagErrors.length;
+ if (totalInvalid > 0 || totalSnapErrors > 0) {
+ hasErrors = true;
+ console.log(` \u274c ${file.padEnd(30)} — ${totalValid} valid, ${totalInvalid} invalid, ${totalSnapErrors} snapshot errors`);
+ for (const inv of result.invalid) {
+ console.log(` line ${inv.line}: unknown command '${inv.command}'`);
+ }
+ for (const se of result.snapshotFlagErrors) {
+ console.log(` line ${se.command.line}: ${se.error}`);
+ }
+ } else {
+ console.log(` \u2705 ${file.padEnd(30)} — ${totalValid} commands, all valid`);
+ }
+}
+// ─── Templates ──────────────────────────────────────────────
+console.log('\n Templates:');
+const TEMPLATES = discoverTemplates(ROOT);
+for (const { tmpl, output } of TEMPLATES) {
+ const tmplPath = path.join(ROOT, tmpl);
+ const outPath = path.join(ROOT, output);
+ if (!fs.existsSync(tmplPath)) {
+ console.log(` \u26a0\ufe0f ${output.padEnd(30)} — no template`);
+ continue;
+ }
+ if (!fs.existsSync(outPath)) {
+ hasErrors = true;
+ console.log(` \u274c ${output.padEnd(30)} — generated file missing! Run: bun run gen:skill-docs`);
+ continue;
+ }
+ console.log(` \u2705 ${tmpl.padEnd(30)} \u2192 ${output}`);
+}
+// Skills without templates
+for (const file of SKILL_FILES) {
+ const tmplPath = path.join(ROOT, file + '.tmpl');
+ if (!fs.existsSync(tmplPath) && !TEMPLATES.some(t => t.output === file)) {
+ console.log(` \u26a0\ufe0f ${file.padEnd(30)} — no template (OK if no $B commands)`);
+ }
+}
+// ─── Codex Skills ───────────────────────────────────────────
+const AGENTS_DIR = path.join(ROOT, '.agents', 'skills');
+if (fs.existsSync(AGENTS_DIR)) {
+ console.log('\n Codex Skills (.agents/skills/):');
+ const codexDirs = fs.readdirSync(AGENTS_DIR).sort();
+ let codexCount = 0;
+ let codexMissing = 0;
+ for (const dir of codexDirs) {
+ const skillMd = path.join(AGENTS_DIR, dir, 'SKILL.md');
+ if (fs.existsSync(skillMd)) {
+ codexCount++;
+ const content = fs.readFileSync(skillMd, 'utf-8');
+ // Quick validation: must have frontmatter with name + description only
+ const hasClaude = content.includes('.claude/skills');
+ if (hasClaude) {
+ hasErrors = true;
+ console.log(` \u274c ${dir.padEnd(30)} — contains .claude/skills reference`);
+ } else {
+ console.log(` \u2705 ${dir.padEnd(30)} — OK`);
+ }
+ } else {
+ codexMissing++;
+ hasErrors = true;
+ console.log(` \u274c ${dir.padEnd(30)} — SKILL.md missing`);
+ }
+ }
+ console.log(` Total: ${codexCount} skills, ${codexMissing} missing`);
+} else {
+ console.log('\n Codex Skills: .agents/skills/ not found (run: bun run gen:skill-docs --host codex)');
+}
+// ─── Factory Skills ─────────────────────────────────────────
+const FACTORY_DIR = path.join(ROOT, '.factory', 'skills');
+if (fs.existsSync(FACTORY_DIR)) {
+ console.log('\n Factory Skills (.factory/skills/):');
+ const factoryDirs = fs.readdirSync(FACTORY_DIR).sort();
+ let factoryCount = 0;
+ let factoryMissing = 0;
+ for (const dir of factoryDirs) {
+ const skillMd = path.join(FACTORY_DIR, dir, 'SKILL.md');
+ if (fs.existsSync(skillMd)) {
+ factoryCount++;
+ const content = fs.readFileSync(skillMd, 'utf-8');
+ const hasClaude = content.includes('.claude/skills');
+ if (hasClaude) {
+ hasErrors = true;
+ console.log(` \u274c ${dir.padEnd(30)} — contains .claude/skills reference`);
+ } else {
+ console.log(` \u2705 ${dir.padEnd(30)} — OK`);
+ }
+ } else {
+ factoryMissing++;
+ hasErrors = true;
+ console.log(` \u274c ${dir.padEnd(30)} — SKILL.md missing`);
+ }
+ }
+ console.log(` Total: ${factoryCount} skills, ${factoryMissing} missing`);
+} else {
+ console.log('\n Factory Skills: .factory/skills/ not found (run: bun run gen:skill-docs --host factory)');
+}
+// ─── Freshness ──────────────────────────────────────────────
+console.log('\n Freshness (Claude):');
+try {
+ execSync('bun run scripts/gen-skill-docs.ts --dry-run', { cwd: ROOT, stdio: 'pipe' });
+ console.log(' \u2705 All Claude generated files are fresh');
+} catch (err: any) {
+ hasErrors = true;
+ const output = err.stdout?.toString() || '';
+ console.log(' \u274c Claude generated files are stale:');
+ for (const line of output.split('\n').filter((l: string) => l.startsWith('STALE'))) {
+ console.log(` ${line}`);
+ }
+ console.log(' Run: bun run gen:skill-docs');
+}
+console.log('\n Freshness (Codex):');
+try {
+ execSync('bun run scripts/gen-skill-docs.ts --host codex --dry-run', { cwd: ROOT, stdio: 'pipe' });
+ console.log(' \u2705 All Codex generated files are fresh');
+} catch (err: any) {
+ hasErrors = true;
+ const output = err.stdout?.toString() || '';
+ console.log(' \u274c Codex generated files are stale:');
+ for (const line of output.split('\n').filter((l: string) => l.startsWith('STALE'))) {
+ console.log(` ${line}`);
+ }
+ console.log(' Run: bun run gen:skill-docs --host codex');
+}
+console.log('\n Freshness (Factory):');
+try {
+ execSync('bun run scripts/gen-skill-docs.ts --host factory --dry-run', { cwd: ROOT, stdio: 'pipe' });
+ console.log(' \u2705 All Factory generated files are fresh');
+} catch (err: any) {
+ hasErrors = true;
+ const output = err.stdout?.toString() || '';
+ console.log(' \u274c Factory generated files are stale:');
+ for (const line of output.split('\n').filter((l: string) => l.startsWith('STALE'))) {
+ console.log(` ${line}`);
+ }
+ console.log(' Run: bun run gen:skill-docs --host factory');
+}
+console.log('');
+process.exit(hasErrors ? 1 : 0);