npm - gsd-opencode - Versions diffs - 1.22.1 → 1.33.0 - Mend

gsd-opencode 1.22.1 → 1.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (188) hide show

package/agents/gsd-advisor-researcher.md +112 -0
package/agents/gsd-assumptions-analyzer.md +110 -0
package/agents/gsd-codebase-mapper.md +0 -2
package/agents/gsd-debugger.md +117 -2
package/agents/gsd-doc-verifier.md +207 -0
package/agents/gsd-doc-writer.md +608 -0
package/agents/gsd-executor.md +45 -4
package/agents/gsd-integration-checker.md +0 -2
package/agents/gsd-nyquist-auditor.md +0 -2
package/agents/gsd-phase-researcher.md +191 -5
package/agents/gsd-plan-checker.md +152 -5
package/agents/gsd-planner.md +131 -157
package/agents/gsd-project-researcher.md +28 -3
package/agents/gsd-research-synthesizer.md +0 -2
package/agents/gsd-roadmapper.md +29 -2
package/agents/gsd-security-auditor.md +129 -0
package/agents/gsd-ui-auditor.md +485 -0
package/agents/gsd-ui-checker.md +305 -0
package/agents/gsd-ui-researcher.md +368 -0
package/agents/gsd-user-profiler.md +173 -0
package/agents/gsd-verifier.md +207 -22
package/commands/gsd/gsd-add-backlog.md +76 -0
package/commands/gsd/gsd-analyze-dependencies.md +34 -0
package/commands/gsd/gsd-audit-uat.md +24 -0
package/commands/gsd/gsd-autonomous.md +45 -0
package/commands/gsd/gsd-cleanup.md +5 -0
package/commands/gsd/gsd-debug.md +29 -21
package/commands/gsd/gsd-discuss-phase.md +15 -36
package/commands/gsd/gsd-do.md +30 -0
package/commands/gsd/gsd-docs-update.md +48 -0
package/commands/gsd/gsd-execute-phase.md +24 -2
package/commands/gsd/gsd-fast.md +30 -0
package/commands/gsd/gsd-forensics.md +56 -0
package/commands/gsd/gsd-help.md +2 -0
package/commands/gsd/gsd-join-discord.md +2 -1
package/commands/gsd/gsd-list-workspaces.md +19 -0
package/commands/gsd/gsd-manager.md +40 -0
package/commands/gsd/gsd-milestone-summary.md +51 -0
package/commands/gsd/gsd-new-project.md +4 -0
package/commands/gsd/gsd-new-workspace.md +44 -0
package/commands/gsd/gsd-next.md +24 -0
package/commands/gsd/gsd-note.md +34 -0
package/commands/gsd/gsd-plan-phase.md +8 -1
package/commands/gsd/gsd-plant-seed.md +28 -0
package/commands/gsd/gsd-pr-branch.md +25 -0
package/commands/gsd/gsd-profile-user.md +46 -0
package/commands/gsd/gsd-quick.md +7 -3
package/commands/gsd/gsd-reapply-patches.md +178 -45
package/commands/gsd/gsd-remove-workspace.md +26 -0
package/commands/gsd/gsd-research-phase.md +7 -12
package/commands/gsd/gsd-review-backlog.md +62 -0
package/commands/gsd/gsd-review.md +38 -0
package/commands/gsd/gsd-secure-phase.md +35 -0
package/commands/gsd/gsd-session-report.md +19 -0
package/commands/gsd/gsd-set-profile.md +24 -23
package/commands/gsd/gsd-ship.md +23 -0
package/commands/gsd/gsd-stats.md +18 -0
package/commands/gsd/gsd-thread.md +127 -0
package/commands/gsd/gsd-ui-phase.md +34 -0
package/commands/gsd/gsd-ui-review.md +32 -0
package/commands/gsd/gsd-workstreams.md +71 -0
package/get-shit-done/bin/gsd-tools.cjs +450 -90
package/get-shit-done/bin/lib/commands.cjs +489 -24
package/get-shit-done/bin/lib/config.cjs +329 -48
package/get-shit-done/bin/lib/core.cjs +1143 -102
package/get-shit-done/bin/lib/docs.cjs +267 -0
package/get-shit-done/bin/lib/frontmatter.cjs +125 -43
package/get-shit-done/bin/lib/init.cjs +918 -106
package/get-shit-done/bin/lib/milestone.cjs +65 -33
package/get-shit-done/bin/lib/model-profiles.cjs +70 -0
package/get-shit-done/bin/lib/phase.cjs +434 -404
package/get-shit-done/bin/lib/profile-output.cjs +1048 -0
package/get-shit-done/bin/lib/profile-pipeline.cjs +539 -0
package/get-shit-done/bin/lib/roadmap.cjs +156 -101
package/get-shit-done/bin/lib/schema-detect.cjs +238 -0
package/get-shit-done/bin/lib/security.cjs +384 -0
package/get-shit-done/bin/lib/state.cjs +711 -79
package/get-shit-done/bin/lib/template.cjs +2 -2
package/get-shit-done/bin/lib/uat.cjs +282 -0
package/get-shit-done/bin/lib/verify.cjs +254 -42
package/get-shit-done/bin/lib/workstream.cjs +495 -0
package/get-shit-done/references/agent-contracts.md +79 -0
package/get-shit-done/references/artifact-types.md +113 -0
package/get-shit-done/references/checkpoints.md +12 -10
package/get-shit-done/references/context-budget.md +49 -0
package/get-shit-done/references/continuation-format.md +15 -15
package/get-shit-done/references/decimal-phase-calculation.md +2 -3
package/get-shit-done/references/domain-probes.md +125 -0
package/get-shit-done/references/gate-prompts.md +100 -0
package/get-shit-done/references/git-integration.md +47 -0
package/get-shit-done/references/model-profile-resolution.md +2 -0
package/get-shit-done/references/model-profiles.md +62 -16
package/get-shit-done/references/phase-argument-parsing.md +2 -2
package/get-shit-done/references/planner-gap-closure.md +62 -0
package/get-shit-done/references/planner-reviews.md +39 -0
package/get-shit-done/references/planner-revision.md +87 -0
package/get-shit-done/references/planning-config.md +18 -1
package/get-shit-done/references/revision-loop.md +97 -0
package/get-shit-done/references/ui-brand.md +2 -2
package/get-shit-done/references/universal-anti-patterns.md +58 -0
package/get-shit-done/references/user-profiling.md +681 -0
package/get-shit-done/references/workstream-flag.md +111 -0
package/get-shit-done/templates/SECURITY.md +61 -0
package/get-shit-done/templates/UAT.md +21 -3
package/get-shit-done/templates/UI-SPEC.md +100 -0
package/get-shit-done/templates/VALIDATION.md +3 -3
package/get-shit-done/templates/claude-md.md +145 -0
package/get-shit-done/templates/config.json +14 -3
package/get-shit-done/templates/context.md +61 -6
package/get-shit-done/templates/debug-subagent-prompt.md +2 -6
package/get-shit-done/templates/dev-preferences.md +21 -0
package/get-shit-done/templates/discussion-log.md +63 -0
package/get-shit-done/templates/phase-prompt.md +46 -5
package/get-shit-done/templates/planner-subagent-prompt.md +2 -10
package/get-shit-done/templates/project.md +2 -0
package/get-shit-done/templates/state.md +2 -2
package/get-shit-done/templates/user-profile.md +146 -0
package/get-shit-done/workflows/add-phase.md +4 -4
package/get-shit-done/workflows/add-tests.md +4 -4
package/get-shit-done/workflows/add-todo.md +4 -4
package/get-shit-done/workflows/analyze-dependencies.md +96 -0
package/get-shit-done/workflows/audit-milestone.md +20 -16
package/get-shit-done/workflows/audit-uat.md +109 -0
package/get-shit-done/workflows/autonomous.md +1036 -0
package/get-shit-done/workflows/check-todos.md +4 -4
package/get-shit-done/workflows/cleanup.md +4 -4
package/get-shit-done/workflows/complete-milestone.md +22 -10
package/get-shit-done/workflows/diagnose-issues.md +21 -7
package/get-shit-done/workflows/discovery-phase.md +2 -2
package/get-shit-done/workflows/discuss-phase-assumptions.md +671 -0
package/get-shit-done/workflows/discuss-phase-power.md +291 -0
package/get-shit-done/workflows/discuss-phase.md +558 -47
package/get-shit-done/workflows/do.md +104 -0
package/get-shit-done/workflows/docs-update.md +1093 -0
package/get-shit-done/workflows/execute-phase.md +741 -58
package/get-shit-done/workflows/execute-plan.md +77 -12
package/get-shit-done/workflows/fast.md +105 -0
package/get-shit-done/workflows/forensics.md +265 -0
package/get-shit-done/workflows/health.md +28 -6
package/get-shit-done/workflows/help.md +127 -7
package/get-shit-done/workflows/insert-phase.md +4 -4
package/get-shit-done/workflows/list-phase-assumptions.md +2 -2
package/get-shit-done/workflows/list-workspaces.md +56 -0
package/get-shit-done/workflows/manager.md +363 -0
package/get-shit-done/workflows/map-codebase.md +83 -44
package/get-shit-done/workflows/milestone-summary.md +223 -0
package/get-shit-done/workflows/new-milestone.md +133 -25
package/get-shit-done/workflows/new-project.md +216 -54
package/get-shit-done/workflows/new-workspace.md +237 -0
package/get-shit-done/workflows/next.md +97 -0
package/get-shit-done/workflows/node-repair.md +92 -0
package/get-shit-done/workflows/note.md +156 -0
package/get-shit-done/workflows/pause-work.md +132 -15
package/get-shit-done/workflows/plan-milestone-gaps.md +6 -7
package/get-shit-done/workflows/plan-phase.md +513 -62
package/get-shit-done/workflows/plant-seed.md +169 -0
package/get-shit-done/workflows/pr-branch.md +129 -0
package/get-shit-done/workflows/profile-user.md +450 -0
package/get-shit-done/workflows/progress.md +154 -29
package/get-shit-done/workflows/quick.md +285 -111
package/get-shit-done/workflows/remove-phase.md +2 -2
package/get-shit-done/workflows/remove-workspace.md +90 -0
package/get-shit-done/workflows/research-phase.md +13 -9
package/get-shit-done/workflows/resume-project.md +37 -18
package/get-shit-done/workflows/review.md +281 -0
package/get-shit-done/workflows/secure-phase.md +154 -0
package/get-shit-done/workflows/session-report.md +146 -0
package/get-shit-done/workflows/set-profile.md +2 -2
package/get-shit-done/workflows/settings.md +91 -11
package/get-shit-done/workflows/ship.md +237 -0
package/get-shit-done/workflows/stats.md +60 -0
package/get-shit-done/workflows/transition.md +150 -23
package/get-shit-done/workflows/ui-phase.md +292 -0
package/get-shit-done/workflows/ui-review.md +183 -0
package/get-shit-done/workflows/update.md +262 -30
package/get-shit-done/workflows/validate-phase.md +14 -17
package/get-shit-done/workflows/verify-phase.md +143 -11
package/get-shit-done/workflows/verify-work.md +141 -39
package/package.json +1 -1
package/skills/gsd-audit-milestone/SKILL.md +29 -0
package/skills/gsd-cleanup/SKILL.md +19 -0
package/skills/gsd-complete-milestone/SKILL.md +131 -0
package/skills/gsd-discuss-phase/SKILL.md +54 -0
package/skills/gsd-execute-phase/SKILL.md +49 -0
package/skills/gsd-plan-phase/SKILL.md +37 -0
package/skills/gsd-ui-phase/SKILL.md +24 -0
package/skills/gsd-ui-review/SKILL.md +24 -0
package/skills/gsd-verify-work/SKILL.md +30 -0

package/get-shit-done/workflows/verify-phase.md CHANGED Viewed

@@ -1,8 +1,8 @@
-<purpose>
+<objective>
 Verify phase goal achievement through goal-backward analysis. Check that the codebase delivers what the phase promised, not just that tasks completed.
 Executed by a verification subagent spawned from execute-phase.md.
-</purpose>
+</objective>
 <core_principle>
 **task completion ≠ Goal achievement**
@@ -13,6 +13,7 @@ Goal-backward verification:
 1. What must be TRUE for the goal to be achieved?
 2. What must EXIST for those truths to hold?
 3. What must be WIRED for those artifacts to function?
+4. What must TESTS PROVE for those truths to be evidenced?
 Then verify each level against the actual codebase.
 </core_principle>
@@ -37,11 +38,16 @@ Extract from init JSON: `phase_dir`, `phase_number`, `phase_name`, `has_plans`,
 Then load phase details and list plans/summaries:
 ```bash
 node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" roadmap get-phase "${phase_number}"
-grep -E "^| ${phase_number}" .planning/REQUIREMENTS.md 2>/dev/null
-ls "$phase_dir"/*-SUMMARY.md "$phase_dir"/*-PLAN.md 2>/dev/null
+grep -E "^| ${phase_number}" .planning/REQUIREMENTS.md 2>/dev/null || true
+ls "$phase_dir"/*-SUMMARY.md "$phase_dir"/*-PLAN.md 2>/dev/null || true
 ```
-Extract **phase goal** from ROADMAP.md (the outcome to verify, not tasks) and **requirements** from REQUIREMENTS.md if it exists.
+Load full milestone phases for deferred-item filtering (Step 9b):
+```bash
+node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" roadmap analyze
+```
+Extract **phase goal** from ROADMAP.md (the outcome to verify, not tasks), **requirements** from REQUIREMENTS.md if it exists, and **all milestone phases** from roadmap analyze (for cross-referencing gaps against later phases).
 </step>
 <step name="establish_must_haves">
@@ -126,6 +132,17 @@ WIRED = imported AND used. ORPHANED = exists but not imported/used.
 | ✓ | ✓ | ✗ | ⚠️ ORPHANED |
 | ✓ | ✗ | - | ✗ STUB |
 | ✗ | - | - | ✗ MISSING |
+**Export-level spot check (WARNING severity):**
+For artifacts that pass Level 3, spot-check individual exports:
+- Extract key exported symbols (functions, constants, classes — skip types/interfaces)
+- For each, grep for usage outside the defining file
+- Flag exports with zero external call sites as "exported but unused"
+This catches dead stores like `setPlan()` that exist in a wired file but are
+never actually called. Report as WARNING — may indicate incomplete cross-plan
+wiring or leftover code from plan revisions.
 </step>
 <step name="verify_wiring">
@@ -160,7 +177,7 @@ Record status and evidence for each key link.
 <step name="verify_requirements">
 If REQUIREMENTS.md exists:
 ```bash
-grep -E "Phase ${PHASE_NUM}" .planning/REQUIREMENTS.md 2>/dev/null
+grep -E "Phase ${PHASE_NUM}" .planning/REQUIREMENTS.md 2>/dev/null || true
 ```
 For each requirement: parse description → identify supporting truths/artifacts → status: ✓ SATISFIED / ✗ BLOCKED / ? NEEDS HUMAN.
@@ -179,6 +196,93 @@ Extract files modified in this phase from SUMMARY.md, scan each:
 Categorize: 🛑 Blocker (prevents goal) | ⚠️ Warning (incomplete) | ℹ️ Info (notable).
 </step>
+<step name="audit_test_quality">
+**Verify that tests PROVE what they claim to prove.**
+This step catches test-level deceptions that pass all prior checks: files exist, are substantive, are wired, and tests pass — but the tests don't actually validate the requirement.
+**1. Identify requirement-linked test files**
+From PLAN and SUMMARY files, map each requirement to the test files that are supposed to prove it.
+**2. Disabled test scan**
+For ALL test files linked to requirements, search for disabled/skipped patterns:
+```bash
+grep -rn -E "it\.skip|describe\.skip|test\.skip|xit\(|xdescribe\(|xtest\(|@pytest\.mark\.skip|@unittest\.skip|#\[ignore\]|\.pending|it\.todo|test\.todo" "$TEST_FILE"
+```
+**Rule:** A disabled test linked to a requirement = requirement NOT tested.
+- 🛑 BLOCKER if the disabled test is the only test proving that requirement
+- ⚠️ WARNING if other active tests also cover the requirement
+**3. Circular test detection**
+Search for scripts/utilities that generate expected values by running the system under test:
+```bash
+grep -rn -E "writeFileSync|writeFile|fs\.write|open\(.*w\)" "$TEST_DIRS"
+```
+For each match, check if it also imports the system/service/module being tested. If a script both imports the system-under-test AND writes expected output values → CIRCULAR.
+**Circular test indicators:**
+- Script imports a service AND writes to fixture files
+- Expected values have comments like "computed from engine", "captured from baseline"
+- Script filename contains "capture", "baseline", "generate", "snapshot" in test context
+- Expected values were added in the same commit as the test assertions
+**Rule:** A test comparing system output against values generated by the same system is circular. It proves consistency, not correctness.
+**4. Expected value provenance** (for comparison/parity/migration requirements)
+When a requirement demands comparison with an external source ("identical to X", "matches Y", "same output as Z"):
+- Is the external source actually invoked or referenced in the test pipeline?
+- Do fixture files contain data sourced from the external system?
+- Or do all expected values come from the new system itself or from mathematical formulas?
+**Provenance classification:**
+- VALID: Expected value from external/legacy system output, manual capture, or independent oracle
+- PARTIAL: Expected value from mathematical derivation (proves formula, not system match)
+- CIRCULAR: Expected value from the system being tested
+- UNKNOWN: No provenance information — treat as SUSPECT
+**5. Assertion strength**
+For each test linked to a requirement, classify the strongest assertion:
+| Level | Examples | Proves |
+|-------|---------|--------|
+| Existence | `toBeDefined()`, `!= null` | Something returned |
+| Type | `typeof x === 'number'` | Correct shape |
+| Status | `code === 200` | No error |
+| Value | `toEqual(expected)`, `toBeCloseTo(x)` | Specific value |
+| Behavioral | Multi-step workflow assertions | End-to-end correctness |
+If a requirement demands value-level or behavioral-level proof and the test only has existence/type/status assertions → INSUFFICIENT.
+**6. Coverage quantity**
+If a requirement specifies a quantity of test cases (e.g., "30 calculations"), check if the actual number of active (non-skipped) test cases meets the requirement.
+**Reporting — add to VERIFICATION.md:**
+```markdown
+### Test Quality Audit
+| Test File | Linked Req | Active | Skipped | Circular | Assertion Level | Verdict |
+|-----------|-----------|--------|---------|----------|----------------|---------|
+**Disabled tests on requirements:** {N} → {BLOCKER if any req has ONLY disabled tests}
+**Circular patterns detected:** {N} → {BLOCKER if any}
+**Insufficient assertions:** {N} → {WARNING}
+```
+**Impact on status:** Any BLOCKER from test quality audit ��� overall status = `gaps_found`, regardless of other checks passing.
+</step>
 <step name="identify_human_verification">
 **Always needs human:** Visual appearance, user flow completion, real-time behavior (WebSocket/SSE), external service integration, performance feel, error message clarity.
@@ -188,15 +292,41 @@ Format each as: Test Name → What to do → Expected result → Why can't verif
 </step>
 <step name="determine_status">
-**passed:** All truths VERIFIED, all artifacts pass levels 1-3, all key links WIRED, no blocker anti-patterns.
+Classify status using this decision tree IN ORDER (most restrictive first):
+1. IF any truth FAILED, artifact MISSING/STUB, key link NOT_WIRED, blocker found, **or test quality audit found blockers (disabled requirement tests, circular tests)**:
+   → **gaps_found**
+2. IF the previous step produced ANY human verification items:
+   → **human_needed** (even if all truths VERIFIED and score is N/N)
-**gaps_found:** Any truth FAILED, artifact MISSING/STUB, key link NOT_WIRED, or blocker found.
+3. IF all checks pass AND no human verification items:
+   → **passed**
-**human_needed:** All automated checks pass but human verification items remain.
+**passed is ONLY valid when no human verification items exist.**
 **Score:** `verified_truths / total_truths`
 </step>
+<step name="filter_deferred_items">
+Before reporting gaps, cross-reference each gap against later phases in the milestone using the full roadmap data loaded in load_context (from `roadmap analyze`).
+For each potential gap identified in determine_status:
+1. Check if the gap's failed truth or missing item is covered by a later phase's goal or success criteria
+2. **Match criteria:** The gap's concern appears in a later phase's goal text, success criteria text, or the later phase's name clearly suggests it covers this area
+3. If a clear match is found → move the gap to a `deferred` list with the matching phase reference and evidence text
+4. If no match in any later phase → keep as a real `gap`
+**Important:** Be conservative. Only defer a gap when there is clear, specific evidence in a later phase. Vague or tangential matches should NOT cause deferral — when in doubt, keep it as a real gap.
+**Deferred items do NOT affect the status determination.** Recalculate after filtering:
+- If gaps list is now empty and no human items exist → `passed`
+- If gaps list is now empty but human items exist → `human_needed`
+- If gaps list still has items → `gaps_found`
+Include deferred items in VERIFICATION.md frontmatter (`deferred:` section) and body (Deferred Items table) for transparency. If no deferred items exist, omit these sections.
+</step>
 <step name="generate_fix_plans">
 If gaps_found:
@@ -204,7 +334,7 @@ If gaps_found:
 2. **Generate plan per cluster:** Objective, 2-3 tasks (files/action/verify each), re-verify step. Keep focused: single concern per plan.
-3. **Order by dependency:** Fix missing → fix stubs → fix wiring → verify.
+3. **Order by dependency:** Fix missing → fix stubs → fix wiring → **fix test evidence** → verify.
 </step>
 <step name="create_report">
@@ -235,9 +365,11 @@ Orchestrator routes: `passed` → update_roadmap | `gaps_found` → create/execu
 - [ ] All key links verified
 - [ ] Requirements coverage assessed (if applicable)
 - [ ] Anti-patterns scanned and categorized
+- [ ] Test quality audited (disabled tests, circular patterns, assertion strength, provenance)
 - [ ] Human verification items identified
 - [ ] Overall status determined
-- [ ] Fix plans generated (if gaps_found)
+- [ ] Deferred items filtered against later milestone phases (if gaps found)
+- [ ] Fix plans generated (if gaps_found after filtering)
 - [ ] VERIFICATION.md created with complete report
 - [ ] Results returned to orchestrator
 </success_criteria>

package/get-shit-done/workflows/verify-work.md CHANGED Viewed

@@ -1,8 +1,14 @@
-<purpose>
+<objective>
 Validate built features through conversational testing with persistent state. Creates UAT.md that tracks test progress, survives /new, and feeds gaps into /gsd-plan-phase --gaps.
 User tests, OpenCode records. One test at a time. Plain text responses.
-</purpose>
+</objective>
+<available_agent_types>
+Valid GSD subagent types (use exact names — do not fall back to 'general'):
+- gsd-planner — Creates detailed plans from phase scope
+- gsd-plan-checker — Reviews plan quality before execution
+</available_agent_types>
 <philosophy>
 **Show expected, ask if reality matches.**
@@ -26,16 +32,18 @@ If $ARGUMENTS contains a phase number, load context:
 ```bash
 INIT=$(node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" init verify-work "${PHASE_ARG}")
 if [[ "$INIT" == @file:* ]]; then INIT=$(cat "${INIT#@file:}"); fi
+AGENT_SKILLS_PLANNER=$(node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" agent-skills gsd-planner 2>/dev/null)
+AGENT_SKILLS_CHECKER=$(node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" agent-skills gsd-checker 2>/dev/null)
 ```
-Parse JSON for: `planner_model`, `checker_model`, `commit_docs`, `phase_found`, `phase_dir`, `phase_number`, `phase_name`, `has_verification`.
+Parse JSON for: `planner_model`, `checker_model`, `commit_docs`, `phase_found`, `phase_dir`, `phase_number`, `phase_name`, `has_verification`, `uat_path`.
 </step>
 <step name="check_active_session">
 **First: Check for active UAT sessions**
 ```bash
-find .planning/phases -name "*-UAT.md" -type f 2>/dev/null | head -5
+(find .planning/phases -name "*-UAT.md" -type f 2>/dev/null || true) | head -5
 ```
 **If active sessions exist AND no $ARGUMENTS provided:**
@@ -78,13 +86,49 @@ Provide a phase number to start testing (e.g., /gsd-verify-work 4)
 Continue to `create_uat_file`.
 </step>
+<step name="automated_ui_verification">
+**Automated UI Verification (when Playwright-MCP is available)**
+Before running manual UAT, check whether this phase has a UI component and whether
+`mcp__playwright__*` or `mcp__puppeteer__*` tools are available in the current session.
+```
+UI_PHASE_FLAG=$(node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" config-get workflow.ui_phase --raw 2>/dev/null || echo "true")
+UI_SPEC_FILE=$(ls "${PHASE_DIR}"/*-UI-SPEC.md 2>/dev/null | head -1)
+```
+**If Playwright-MCP tools are available in this session (`mcp__playwright__*` tools
+respond to tool calls) AND (`UI_PHASE_FLAG` is `true` OR `UI_SPEC_FILE` is non-empty):**
+For each UI checkpoint listed in the phase's UI-SPEC.md (or inferred from SUMMARY.md):
+1. Use `mcp__playwright__navigate` (or equivalent) to open the component's URL.
+2. Use `mcp__playwright__screenshot` to capture a screenshot.
+3. Compare the screenshot visually against the spec's stated requirements
+   (dimensions, color, layout, spacing).
+4. Automatically mark checkpoints as **passed** or **needs review** based on the
+   visual comparison — no manual question required for items that clearly match.
+5. Flag items that require human judgment (subjective aesthetics, content accuracy)
+   and present only those as manual UAT questions.
+If automated verification is not available, fall back to the standard manual
+checkpoint questions defined in this workflow unchanged. This step is entirely
+conditional: if Playwright-MCP is not configured, behavior is unchanged from today.
+**Display summary line before proceeding:**
+```
+UI checkpoints: {N} auto-verified, {M} queued for manual review
+```
+</step>
 <step name="find_summaries">
 **Find what to test:**
 Use `phase_dir` from init (or run init if not already done).
 ```bash
-ls "$phase_dir"/*-SUMMARY.md 2>/dev/null
+ls "$phase_dir"/*-SUMMARY.md 2>/dev/null || true
 ```
 read each SUMMARY.md to extract testable deliverables.
@@ -186,23 +230,23 @@ Proceed to `present_test`.
 <step name="present_test">
 **Present current test to user:**
-read Current Test section from UAT file.
-Display using checkpoint box format:
+Render the checkpoint from the structured UAT file instead of composing it freehand:
+```bash
+CHECKPOINT=$(node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" uat render-checkpoint --file "$uat_path" --raw)
+if [[ "$CHECKPOINT" == @file:* ]]; then CHECKPOINT=$(cat "${CHECKPOINT#@file:}"); fi
 ```
-╔══════════════════════════════════════════════════════════════╗
-║  CHECKPOINT: Verification Required                           ║
-╚══════════════════════════════════════════════════════════════╝
-**Test {number}: {name}**
+Display the returned checkpoint EXACTLY as-is:
-{expected}
-──────────────────────────────────────────────────────────────
-→ Type "pass" or describe what's wrong
-──────────────────────────────────────────────────────────────
 ```
+{CHECKPOINT}
+```
+**Critical response hygiene:**
+- Your entire response MUST equal `{CHECKPOINT}` byte-for-byte.
+- Do NOT add commentary before or after the block.
+- If you notice protocol/meta markers such as `to=all:`, role-routing text, XML system tags, hidden instruction markers, ad copy, or any unrelated suffix, discard the draft and output `{CHECKPOINT}` only.
 Wait for user response (plain text, no question).
 </step>
@@ -231,6 +275,29 @@ result: skipped
 reason: [user's reason if provided]
 ```
+**If response indicates blocked:**
+- "blocked", "can't test - server not running", "need physical device", "need release build"
+- Or any response containing: "server", "blocked", "not running", "physical device", "release build"
+Infer blocked_by tag from response:
+- Contains: server, not running, gateway, API → `server`
+- Contains: physical, device, hardware, real phone → `physical-device`
+- Contains: release, preview, build, EAS → `release-build`
+- Contains: stripe, twilio, third-party, configure → `third-party`
+- Contains: depends on, prior phase, prerequisite → `prior-phase`
+- Default: `other`
+Update Tests section:
+```
+### {N}. {name}
+expected: {expected}
+result: blocked
+blocked_by: {inferred tag}
+reason: "{verbatim user response}"
+```
+Note: Blocked tests do NOT go into the Gaps section (they aren't code issues — they're prerequisite gates).
 **If response is anything else:**
 - Treat as issue description
@@ -293,8 +360,24 @@ Proceed to `present_test`.
 <step name="complete_session">
 **Complete testing and commit:**
+**Determine final status:**
+Count results:
+- `pending_count`: tests with `result: [pending]`
+- `blocked_count`: tests with `result: blocked`
+- `skipped_no_reason`: tests with `result: skipped` and no `reason` field
+```
+if pending_count > 0 OR blocked_count > 0 OR skipped_no_reason > 0:
+  status: partial
+  # Session ended but not all tests resolved
+else:
+  status: complete
+  # All tests have a definitive result (pass, issue, or skipped-with-reason)
+```
 Update frontmatter:
-- status: complete
+- status: {computed status}
 - updated: [now]
 Clear Current Test section:
@@ -328,11 +411,39 @@ Present summary:
 **If issues > 0:** Proceed to `diagnose_issues`
 **If issues == 0:**
+```bash
+SECURITY_CFG=$(node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" config-get workflow.security_enforcement --raw 2>/dev/null || echo "true")
+SECURITY_FILE=$(ls "${PHASE_DIR}"/*-SECURITY.md 2>/dev/null | head -1)
+```
+If `SECURITY_CFG` is `true` AND `SECURITY_FILE` is empty:
+```
+⚠ Security enforcement enabled — /gsd-secure-phase {phase} has not run.
+Run before advancing to the next phase.
+All tests passed. Ready to continue.
+- `/gsd-secure-phase {phase}` — security review (required before advancing)
+- `/gsd-plan-phase {next}` — Plan next phase
+- `/gsd-execute-phase {next}` — Execute next phase
+- `/gsd-ui-review {phase}` — visual quality audit (if frontend files were modified)
+```
+If `SECURITY_CFG` is `true` AND `SECURITY_FILE` exists: check frontmatter `threats_open`. If > 0:
+```
+⚠ Security gate: {threats_open} threats open
+  /gsd-secure-phase {phase} — resolve before advancing
+```
+If `SECURITY_CFG` is `false` OR (`SECURITY_FILE` exists AND `threats_open` is `0`):
 ```
 All tests passed. Ready to continue.
 - `/gsd-plan-phase {next}` — Plan next phase
 - `/gsd-execute-phase {next}` — Execute next phase
+- `/gsd-secure-phase {phase}` — security review
+- `/gsd-ui-review {phase}` — visual quality audit (if frontend files were modified)
 ```
 </step>
@@ -372,8 +483,7 @@ Display:
 Spawn gsd-planner in --gaps mode:
 ```
-task(
-  prompt="""
+@gsd-planner """
 <planning_context>
 **Phase:** {phase_number}
@@ -385,17 +495,15 @@ task(
 - .planning/ROADMAP.md (Roadmap)
 </files_to_read>
+${AGENT_SKILLS_PLANNER}
 </planning_context>
 <downstream_consumer>
 Output consumed by /gsd-execute-phase
 Plans must be executable prompts.
 </downstream_consumer>
-""",
-  subagent_type="gsd-planner",
-  model="{planner_model}",
-  description="Plan gap fixes for Phase {phase}"
-)
+"""
 ```
 On return:
@@ -420,8 +528,7 @@ Initialize: `iteration_count = 1`
 Spawn gsd-plan-checker:
 ```
-task(
-  prompt="""
+@gsd-plan-checker """
 <verification_context>
 **Phase:** {phase_number}
@@ -431,6 +538,8 @@ task(
 - {phase_dir}/*-PLAN.md (Plans to verify)
 </files_to_read>
+${AGENT_SKILLS_CHECKER}
 </verification_context>
 <expected_output>
@@ -438,11 +547,7 @@ Return one of:
 - ## VERIFICATION PASSED — all checks pass
 - ## ISSUES FOUND — structured issue list
 </expected_output>
-""",
-  subagent_type="gsd-plan-checker",
-  model="{checker_model}",
-  description="Verify Phase {phase} fix plans"
-)
+"""
 ```
 On return:
@@ -460,8 +565,7 @@ Display: `Sending back to planner for revision... (iteration {N}/3)`
 Spawn gsd-planner with revision context:
 ```
-task(
-  prompt="""
+@gsd-planner """
 <revision_context>
 **Phase:** {phase_number}
@@ -471,6 +575,8 @@ task(
 - {phase_dir}/*-PLAN.md (Existing plans)
 </files_to_read>
+${AGENT_SKILLS_PLANNER}
 **Checker issues:**
 {structured_issues_from_checker}
@@ -480,11 +586,7 @@ task(
 read existing PLAN.md files. Make targeted updates to address checker issues.
 Do NOT replan from scratch unless issues are fundamental.
 </instructions>
-""",
-  subagent_type="gsd-planner",
-  model="{planner_model}",
-  description="Revise Phase {phase} plans"
-)
+"""
 ```
 After planner returns → spawn checker again (verify_gap_plans logic)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "gsd-opencode",
-  "version": "1.22.1",
+  "version": "1.33.0",
   "description": "GSD-OpenCode distribution manager - install, verify, and maintain your GSD-OpenCode installation",
   "type": "module",
   "main": "bin/gsd.js",

package/skills/gsd-audit-milestone/SKILL.md ADDED Viewed

@@ -0,0 +1,29 @@
+---
+name: gsd-audit-milestone
+description: Implementation of gsd-audit-milestone command
+---
+<objective>
+Verify milestone achieved its definition of done. Check requirements coverage, cross-phase integration, and end-to-end flows.
+**This command IS the orchestrator.** Reads existing VERIFICATION.md files (phases already verified during execute-phase), aggregates tech debt and deferred gaps, then spawns integration checker for cross-phase wiring.
+</objective>
+<execution_context>
+@$HOME/.config/opencode/get-shit-done/workflows/audit-milestone.md
+</execution_context>
+<context>
+Version: $ARGUMENTS (optional — defaults to current milestone)
+Core planning files are resolved in-workflow (`init milestone-op`) and loaded only as needed.
+**Completed Work:**
+glob: .planning/phases/*/*-SUMMARY.md
+glob: .planning/phases/*/*-VERIFICATION.md
+</context>
+<process>
+Execute the audit-milestone workflow from @$HOME/.config/opencode/get-shit-done/workflows/audit-milestone.md end-to-end.
+Preserve all workflow gates (scope determination, verification reading, integration check, requirements coverage, routing).
+</process>

package/skills/gsd-cleanup/SKILL.md ADDED Viewed

@@ -0,0 +1,19 @@
+---
+name: gsd-cleanup
+description: Implementation of gsd-cleanup command
+---
+<objective>
+Archive phase directories from completed milestones into `.planning/milestones/v{X.Y}-phases/`.
+Use when `.planning/phases/` has accumulated directories from past milestones.
+</objective>
+<execution_context>
+@$HOME/.config/opencode/get-shit-done/workflows/cleanup.md
+</execution_context>
+<process>
+Follow the cleanup workflow at @$HOME/.config/opencode/get-shit-done/workflows/cleanup.md.
+Identify completed milestones, show a dry-run summary, and archive on confirmation.
+</process>