gsd-opencode 1.22.1 → 1.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. package/agents/gsd-advisor-researcher.md +112 -0
  2. package/agents/gsd-assumptions-analyzer.md +110 -0
  3. package/agents/gsd-codebase-mapper.md +0 -2
  4. package/agents/gsd-debugger.md +117 -2
  5. package/agents/gsd-doc-verifier.md +207 -0
  6. package/agents/gsd-doc-writer.md +608 -0
  7. package/agents/gsd-executor.md +45 -4
  8. package/agents/gsd-integration-checker.md +0 -2
  9. package/agents/gsd-nyquist-auditor.md +0 -2
  10. package/agents/gsd-phase-researcher.md +191 -5
  11. package/agents/gsd-plan-checker.md +152 -5
  12. package/agents/gsd-planner.md +131 -157
  13. package/agents/gsd-project-researcher.md +28 -3
  14. package/agents/gsd-research-synthesizer.md +0 -2
  15. package/agents/gsd-roadmapper.md +29 -2
  16. package/agents/gsd-security-auditor.md +129 -0
  17. package/agents/gsd-ui-auditor.md +485 -0
  18. package/agents/gsd-ui-checker.md +305 -0
  19. package/agents/gsd-ui-researcher.md +368 -0
  20. package/agents/gsd-user-profiler.md +173 -0
  21. package/agents/gsd-verifier.md +207 -22
  22. package/commands/gsd/gsd-add-backlog.md +76 -0
  23. package/commands/gsd/gsd-analyze-dependencies.md +34 -0
  24. package/commands/gsd/gsd-audit-uat.md +24 -0
  25. package/commands/gsd/gsd-autonomous.md +45 -0
  26. package/commands/gsd/gsd-cleanup.md +5 -0
  27. package/commands/gsd/gsd-debug.md +29 -21
  28. package/commands/gsd/gsd-discuss-phase.md +15 -36
  29. package/commands/gsd/gsd-do.md +30 -0
  30. package/commands/gsd/gsd-docs-update.md +48 -0
  31. package/commands/gsd/gsd-execute-phase.md +24 -2
  32. package/commands/gsd/gsd-fast.md +30 -0
  33. package/commands/gsd/gsd-forensics.md +56 -0
  34. package/commands/gsd/gsd-help.md +2 -0
  35. package/commands/gsd/gsd-join-discord.md +2 -1
  36. package/commands/gsd/gsd-list-workspaces.md +19 -0
  37. package/commands/gsd/gsd-manager.md +40 -0
  38. package/commands/gsd/gsd-milestone-summary.md +51 -0
  39. package/commands/gsd/gsd-new-project.md +4 -0
  40. package/commands/gsd/gsd-new-workspace.md +44 -0
  41. package/commands/gsd/gsd-next.md +24 -0
  42. package/commands/gsd/gsd-note.md +34 -0
  43. package/commands/gsd/gsd-plan-phase.md +8 -1
  44. package/commands/gsd/gsd-plant-seed.md +28 -0
  45. package/commands/gsd/gsd-pr-branch.md +25 -0
  46. package/commands/gsd/gsd-profile-user.md +46 -0
  47. package/commands/gsd/gsd-quick.md +7 -3
  48. package/commands/gsd/gsd-reapply-patches.md +178 -45
  49. package/commands/gsd/gsd-remove-workspace.md +26 -0
  50. package/commands/gsd/gsd-research-phase.md +7 -12
  51. package/commands/gsd/gsd-review-backlog.md +62 -0
  52. package/commands/gsd/gsd-review.md +38 -0
  53. package/commands/gsd/gsd-secure-phase.md +35 -0
  54. package/commands/gsd/gsd-session-report.md +19 -0
  55. package/commands/gsd/gsd-set-profile.md +24 -23
  56. package/commands/gsd/gsd-ship.md +23 -0
  57. package/commands/gsd/gsd-stats.md +18 -0
  58. package/commands/gsd/gsd-thread.md +127 -0
  59. package/commands/gsd/gsd-ui-phase.md +34 -0
  60. package/commands/gsd/gsd-ui-review.md +32 -0
  61. package/commands/gsd/gsd-workstreams.md +71 -0
  62. package/get-shit-done/bin/gsd-tools.cjs +450 -90
  63. package/get-shit-done/bin/lib/commands.cjs +489 -24
  64. package/get-shit-done/bin/lib/config.cjs +329 -48
  65. package/get-shit-done/bin/lib/core.cjs +1143 -102
  66. package/get-shit-done/bin/lib/docs.cjs +267 -0
  67. package/get-shit-done/bin/lib/frontmatter.cjs +125 -43
  68. package/get-shit-done/bin/lib/init.cjs +918 -106
  69. package/get-shit-done/bin/lib/milestone.cjs +65 -33
  70. package/get-shit-done/bin/lib/model-profiles.cjs +70 -0
  71. package/get-shit-done/bin/lib/phase.cjs +434 -404
  72. package/get-shit-done/bin/lib/profile-output.cjs +1048 -0
  73. package/get-shit-done/bin/lib/profile-pipeline.cjs +539 -0
  74. package/get-shit-done/bin/lib/roadmap.cjs +156 -101
  75. package/get-shit-done/bin/lib/schema-detect.cjs +238 -0
  76. package/get-shit-done/bin/lib/security.cjs +384 -0
  77. package/get-shit-done/bin/lib/state.cjs +711 -79
  78. package/get-shit-done/bin/lib/template.cjs +2 -2
  79. package/get-shit-done/bin/lib/uat.cjs +282 -0
  80. package/get-shit-done/bin/lib/verify.cjs +254 -42
  81. package/get-shit-done/bin/lib/workstream.cjs +495 -0
  82. package/get-shit-done/references/agent-contracts.md +79 -0
  83. package/get-shit-done/references/artifact-types.md +113 -0
  84. package/get-shit-done/references/checkpoints.md +12 -10
  85. package/get-shit-done/references/context-budget.md +49 -0
  86. package/get-shit-done/references/continuation-format.md +15 -15
  87. package/get-shit-done/references/decimal-phase-calculation.md +2 -3
  88. package/get-shit-done/references/domain-probes.md +125 -0
  89. package/get-shit-done/references/gate-prompts.md +100 -0
  90. package/get-shit-done/references/git-integration.md +47 -0
  91. package/get-shit-done/references/model-profile-resolution.md +2 -0
  92. package/get-shit-done/references/model-profiles.md +62 -16
  93. package/get-shit-done/references/phase-argument-parsing.md +2 -2
  94. package/get-shit-done/references/planner-gap-closure.md +62 -0
  95. package/get-shit-done/references/planner-reviews.md +39 -0
  96. package/get-shit-done/references/planner-revision.md +87 -0
  97. package/get-shit-done/references/planning-config.md +18 -1
  98. package/get-shit-done/references/revision-loop.md +97 -0
  99. package/get-shit-done/references/ui-brand.md +2 -2
  100. package/get-shit-done/references/universal-anti-patterns.md +58 -0
  101. package/get-shit-done/references/user-profiling.md +681 -0
  102. package/get-shit-done/references/workstream-flag.md +111 -0
  103. package/get-shit-done/templates/SECURITY.md +61 -0
  104. package/get-shit-done/templates/UAT.md +21 -3
  105. package/get-shit-done/templates/UI-SPEC.md +100 -0
  106. package/get-shit-done/templates/VALIDATION.md +3 -3
  107. package/get-shit-done/templates/claude-md.md +145 -0
  108. package/get-shit-done/templates/config.json +14 -3
  109. package/get-shit-done/templates/context.md +61 -6
  110. package/get-shit-done/templates/debug-subagent-prompt.md +2 -6
  111. package/get-shit-done/templates/dev-preferences.md +21 -0
  112. package/get-shit-done/templates/discussion-log.md +63 -0
  113. package/get-shit-done/templates/phase-prompt.md +46 -5
  114. package/get-shit-done/templates/planner-subagent-prompt.md +2 -10
  115. package/get-shit-done/templates/project.md +2 -0
  116. package/get-shit-done/templates/state.md +2 -2
  117. package/get-shit-done/templates/user-profile.md +146 -0
  118. package/get-shit-done/workflows/add-phase.md +4 -4
  119. package/get-shit-done/workflows/add-tests.md +4 -4
  120. package/get-shit-done/workflows/add-todo.md +4 -4
  121. package/get-shit-done/workflows/analyze-dependencies.md +96 -0
  122. package/get-shit-done/workflows/audit-milestone.md +20 -16
  123. package/get-shit-done/workflows/audit-uat.md +109 -0
  124. package/get-shit-done/workflows/autonomous.md +1036 -0
  125. package/get-shit-done/workflows/check-todos.md +4 -4
  126. package/get-shit-done/workflows/cleanup.md +4 -4
  127. package/get-shit-done/workflows/complete-milestone.md +22 -10
  128. package/get-shit-done/workflows/diagnose-issues.md +21 -7
  129. package/get-shit-done/workflows/discovery-phase.md +2 -2
  130. package/get-shit-done/workflows/discuss-phase-assumptions.md +671 -0
  131. package/get-shit-done/workflows/discuss-phase-power.md +291 -0
  132. package/get-shit-done/workflows/discuss-phase.md +558 -47
  133. package/get-shit-done/workflows/do.md +104 -0
  134. package/get-shit-done/workflows/docs-update.md +1093 -0
  135. package/get-shit-done/workflows/execute-phase.md +741 -58
  136. package/get-shit-done/workflows/execute-plan.md +77 -12
  137. package/get-shit-done/workflows/fast.md +105 -0
  138. package/get-shit-done/workflows/forensics.md +265 -0
  139. package/get-shit-done/workflows/health.md +28 -6
  140. package/get-shit-done/workflows/help.md +127 -7
  141. package/get-shit-done/workflows/insert-phase.md +4 -4
  142. package/get-shit-done/workflows/list-phase-assumptions.md +2 -2
  143. package/get-shit-done/workflows/list-workspaces.md +56 -0
  144. package/get-shit-done/workflows/manager.md +363 -0
  145. package/get-shit-done/workflows/map-codebase.md +83 -44
  146. package/get-shit-done/workflows/milestone-summary.md +223 -0
  147. package/get-shit-done/workflows/new-milestone.md +133 -25
  148. package/get-shit-done/workflows/new-project.md +216 -54
  149. package/get-shit-done/workflows/new-workspace.md +237 -0
  150. package/get-shit-done/workflows/next.md +97 -0
  151. package/get-shit-done/workflows/node-repair.md +92 -0
  152. package/get-shit-done/workflows/note.md +156 -0
  153. package/get-shit-done/workflows/pause-work.md +132 -15
  154. package/get-shit-done/workflows/plan-milestone-gaps.md +6 -7
  155. package/get-shit-done/workflows/plan-phase.md +513 -62
  156. package/get-shit-done/workflows/plant-seed.md +169 -0
  157. package/get-shit-done/workflows/pr-branch.md +129 -0
  158. package/get-shit-done/workflows/profile-user.md +450 -0
  159. package/get-shit-done/workflows/progress.md +154 -29
  160. package/get-shit-done/workflows/quick.md +285 -111
  161. package/get-shit-done/workflows/remove-phase.md +2 -2
  162. package/get-shit-done/workflows/remove-workspace.md +90 -0
  163. package/get-shit-done/workflows/research-phase.md +13 -9
  164. package/get-shit-done/workflows/resume-project.md +37 -18
  165. package/get-shit-done/workflows/review.md +281 -0
  166. package/get-shit-done/workflows/secure-phase.md +154 -0
  167. package/get-shit-done/workflows/session-report.md +146 -0
  168. package/get-shit-done/workflows/set-profile.md +2 -2
  169. package/get-shit-done/workflows/settings.md +91 -11
  170. package/get-shit-done/workflows/ship.md +237 -0
  171. package/get-shit-done/workflows/stats.md +60 -0
  172. package/get-shit-done/workflows/transition.md +150 -23
  173. package/get-shit-done/workflows/ui-phase.md +292 -0
  174. package/get-shit-done/workflows/ui-review.md +183 -0
  175. package/get-shit-done/workflows/update.md +262 -30
  176. package/get-shit-done/workflows/validate-phase.md +14 -17
  177. package/get-shit-done/workflows/verify-phase.md +143 -11
  178. package/get-shit-done/workflows/verify-work.md +141 -39
  179. package/package.json +1 -1
  180. package/skills/gsd-audit-milestone/SKILL.md +29 -0
  181. package/skills/gsd-cleanup/SKILL.md +19 -0
  182. package/skills/gsd-complete-milestone/SKILL.md +131 -0
  183. package/skills/gsd-discuss-phase/SKILL.md +54 -0
  184. package/skills/gsd-execute-phase/SKILL.md +49 -0
  185. package/skills/gsd-plan-phase/SKILL.md +37 -0
  186. package/skills/gsd-ui-phase/SKILL.md +24 -0
  187. package/skills/gsd-ui-review/SKILL.md +24 -0
  188. package/skills/gsd-verify-work/SKILL.md +30 -0
@@ -1,8 +1,8 @@
1
- <purpose>
1
+ <objective>
2
2
  Verify phase goal achievement through goal-backward analysis. Check that the codebase delivers what the phase promised, not just that tasks completed.
3
3
 
4
4
  Executed by a verification subagent spawned from execute-phase.md.
5
- </purpose>
5
+ </objective>
6
6
 
7
7
  <core_principle>
8
8
  **task completion ≠ Goal achievement**
@@ -13,6 +13,7 @@ Goal-backward verification:
13
13
  1. What must be TRUE for the goal to be achieved?
14
14
  2. What must EXIST for those truths to hold?
15
15
  3. What must be WIRED for those artifacts to function?
16
+ 4. What must TESTS PROVE for those truths to be evidenced?
16
17
 
17
18
  Then verify each level against the actual codebase.
18
19
  </core_principle>
@@ -37,11 +38,16 @@ Extract from init JSON: `phase_dir`, `phase_number`, `phase_name`, `has_plans`,
37
38
  Then load phase details and list plans/summaries:
38
39
  ```bash
39
40
  node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" roadmap get-phase "${phase_number}"
40
- grep -E "^| ${phase_number}" .planning/REQUIREMENTS.md 2>/dev/null
41
- ls "$phase_dir"/*-SUMMARY.md "$phase_dir"/*-PLAN.md 2>/dev/null
41
+ grep -E "^| ${phase_number}" .planning/REQUIREMENTS.md 2>/dev/null || true
42
+ ls "$phase_dir"/*-SUMMARY.md "$phase_dir"/*-PLAN.md 2>/dev/null || true
42
43
  ```
43
44
 
44
- Extract **phase goal** from ROADMAP.md (the outcome to verify, not tasks) and **requirements** from REQUIREMENTS.md if it exists.
45
+ Load full milestone phases for deferred-item filtering (Step 9b):
46
+ ```bash
47
+ node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" roadmap analyze
48
+ ```
49
+
50
+ Extract **phase goal** from ROADMAP.md (the outcome to verify, not tasks), **requirements** from REQUIREMENTS.md if it exists, and **all milestone phases** from roadmap analyze (for cross-referencing gaps against later phases).
45
51
  </step>
46
52
 
47
53
  <step name="establish_must_haves">
@@ -126,6 +132,17 @@ WIRED = imported AND used. ORPHANED = exists but not imported/used.
126
132
  | ✓ | ✓ | ✗ | ⚠️ ORPHANED |
127
133
  | ✓ | ✗ | - | ✗ STUB |
128
134
  | ✗ | - | - | ✗ MISSING |
135
+
136
+ **Export-level spot check (WARNING severity):**
137
+
138
+ For artifacts that pass Level 3, spot-check individual exports:
139
+ - Extract key exported symbols (functions, constants, classes — skip types/interfaces)
140
+ - For each, grep for usage outside the defining file
141
+ - Flag exports with zero external call sites as "exported but unused"
142
+
143
+ This catches dead stores like `setPlan()` that exist in a wired file but are
144
+ never actually called. Report as WARNING — may indicate incomplete cross-plan
145
+ wiring or leftover code from plan revisions.
129
146
  </step>
130
147
 
131
148
  <step name="verify_wiring">
@@ -160,7 +177,7 @@ Record status and evidence for each key link.
160
177
  <step name="verify_requirements">
161
178
  If REQUIREMENTS.md exists:
162
179
  ```bash
163
- grep -E "Phase ${PHASE_NUM}" .planning/REQUIREMENTS.md 2>/dev/null
180
+ grep -E "Phase ${PHASE_NUM}" .planning/REQUIREMENTS.md 2>/dev/null || true
164
181
  ```
165
182
 
166
183
  For each requirement: parse description → identify supporting truths/artifacts → status: ✓ SATISFIED / ✗ BLOCKED / ? NEEDS HUMAN.
@@ -179,6 +196,93 @@ Extract files modified in this phase from SUMMARY.md, scan each:
179
196
  Categorize: 🛑 Blocker (prevents goal) | ⚠️ Warning (incomplete) | ℹ️ Info (notable).
180
197
  </step>
181
198
 
199
+ <step name="audit_test_quality">
200
+ **Verify that tests PROVE what they claim to prove.**
201
+
202
+ This step catches test-level deceptions that pass all prior checks: files exist, are substantive, are wired, and tests pass — but the tests don't actually validate the requirement.
203
+
204
+ **1. Identify requirement-linked test files**
205
+
206
+ From PLAN and SUMMARY files, map each requirement to the test files that are supposed to prove it.
207
+
208
+ **2. Disabled test scan**
209
+
210
+ For ALL test files linked to requirements, search for disabled/skipped patterns:
211
+
212
+ ```bash
213
+ grep -rn -E "it\.skip|describe\.skip|test\.skip|xit\(|xdescribe\(|xtest\(|@pytest\.mark\.skip|@unittest\.skip|#\[ignore\]|\.pending|it\.todo|test\.todo" "$TEST_FILE"
214
+ ```
215
+
216
+ **Rule:** A disabled test linked to a requirement = requirement NOT tested.
217
+ - 🛑 BLOCKER if the disabled test is the only test proving that requirement
218
+ - ⚠️ WARNING if other active tests also cover the requirement
219
+
220
+ **3. Circular test detection**
221
+
222
+ Search for scripts/utilities that generate expected values by running the system under test:
223
+
224
+ ```bash
225
+ grep -rn -E "writeFileSync|writeFile|fs\.write|open\(.*w\)" "$TEST_DIRS"
226
+ ```
227
+
228
+ For each match, check if it also imports the system/service/module being tested. If a script both imports the system-under-test AND writes expected output values → CIRCULAR.
229
+
230
+ **Circular test indicators:**
231
+ - Script imports a service AND writes to fixture files
232
+ - Expected values have comments like "computed from engine", "captured from baseline"
233
+ - Script filename contains "capture", "baseline", "generate", "snapshot" in test context
234
+ - Expected values were added in the same commit as the test assertions
235
+
236
+ **Rule:** A test comparing system output against values generated by the same system is circular. It proves consistency, not correctness.
237
+
238
+ **4. Expected value provenance** (for comparison/parity/migration requirements)
239
+
240
+ When a requirement demands comparison with an external source ("identical to X", "matches Y", "same output as Z"):
241
+
242
+ - Is the external source actually invoked or referenced in the test pipeline?
243
+ - Do fixture files contain data sourced from the external system?
244
+ - Or do all expected values come from the new system itself or from mathematical formulas?
245
+
246
+ **Provenance classification:**
247
+ - VALID: Expected value from external/legacy system output, manual capture, or independent oracle
248
+ - PARTIAL: Expected value from mathematical derivation (proves formula, not system match)
249
+ - CIRCULAR: Expected value from the system being tested
250
+ - UNKNOWN: No provenance information — treat as SUSPECT
251
+
252
+ **5. Assertion strength**
253
+
254
+ For each test linked to a requirement, classify the strongest assertion:
255
+
256
+ | Level | Examples | Proves |
257
+ |-------|---------|--------|
258
+ | Existence | `toBeDefined()`, `!= null` | Something returned |
259
+ | Type | `typeof x === 'number'` | Correct shape |
260
+ | Status | `code === 200` | No error |
261
+ | Value | `toEqual(expected)`, `toBeCloseTo(x)` | Specific value |
262
+ | Behavioral | Multi-step workflow assertions | End-to-end correctness |
263
+
264
+ If a requirement demands value-level or behavioral-level proof and the test only has existence/type/status assertions → INSUFFICIENT.
265
+
266
+ **6. Coverage quantity**
267
+
268
+ If a requirement specifies a quantity of test cases (e.g., "30 calculations"), check if the actual number of active (non-skipped) test cases meets the requirement.
269
+
270
+ **Reporting — add to VERIFICATION.md:**
271
+
272
+ ```markdown
273
+ ### Test Quality Audit
274
+
275
+ | Test File | Linked Req | Active | Skipped | Circular | Assertion Level | Verdict |
276
+ |-----------|-----------|--------|---------|----------|----------------|---------|
277
+
278
+ **Disabled tests on requirements:** {N} → {BLOCKER if any req has ONLY disabled tests}
279
+ **Circular patterns detected:** {N} → {BLOCKER if any}
280
+ **Insufficient assertions:** {N} → {WARNING}
281
+ ```
282
+
283
+ **Impact on status:** Any BLOCKER from test quality audit ��� overall status = `gaps_found`, regardless of other checks passing.
284
+ </step>
285
+
182
286
  <step name="identify_human_verification">
183
287
  **Always needs human:** Visual appearance, user flow completion, real-time behavior (WebSocket/SSE), external service integration, performance feel, error message clarity.
184
288
 
@@ -188,15 +292,41 @@ Format each as: Test Name → What to do → Expected result → Why can't verif
188
292
  </step>
189
293
 
190
294
  <step name="determine_status">
191
- **passed:** All truths VERIFIED, all artifacts pass levels 1-3, all key links WIRED, no blocker anti-patterns.
295
+ Classify status using this decision tree IN ORDER (most restrictive first):
296
+
297
+ 1. IF any truth FAILED, artifact MISSING/STUB, key link NOT_WIRED, blocker found, **or test quality audit found blockers (disabled requirement tests, circular tests)**:
298
+ → **gaps_found**
299
+
300
+ 2. IF the previous step produced ANY human verification items:
301
+ → **human_needed** (even if all truths VERIFIED and score is N/N)
192
302
 
193
- **gaps_found:** Any truth FAILED, artifact MISSING/STUB, key link NOT_WIRED, or blocker found.
303
+ 3. IF all checks pass AND no human verification items:
304
+ → **passed**
194
305
 
195
- **human_needed:** All automated checks pass but human verification items remain.
306
+ **passed is ONLY valid when no human verification items exist.**
196
307
 
197
308
  **Score:** `verified_truths / total_truths`
198
309
  </step>
199
310
 
311
+ <step name="filter_deferred_items">
312
+ Before reporting gaps, cross-reference each gap against later phases in the milestone using the full roadmap data loaded in load_context (from `roadmap analyze`).
313
+
314
+ For each potential gap identified in determine_status:
315
+ 1. Check if the gap's failed truth or missing item is covered by a later phase's goal or success criteria
316
+ 2. **Match criteria:** The gap's concern appears in a later phase's goal text, success criteria text, or the later phase's name clearly suggests it covers this area
317
+ 3. If a clear match is found → move the gap to a `deferred` list with the matching phase reference and evidence text
318
+ 4. If no match in any later phase → keep as a real `gap`
319
+
320
+ **Important:** Be conservative. Only defer a gap when there is clear, specific evidence in a later phase. Vague or tangential matches should NOT cause deferral — when in doubt, keep it as a real gap.
321
+
322
+ **Deferred items do NOT affect the status determination.** Recalculate after filtering:
323
+ - If gaps list is now empty and no human items exist → `passed`
324
+ - If gaps list is now empty but human items exist → `human_needed`
325
+ - If gaps list still has items → `gaps_found`
326
+
327
+ Include deferred items in VERIFICATION.md frontmatter (`deferred:` section) and body (Deferred Items table) for transparency. If no deferred items exist, omit these sections.
328
+ </step>
329
+
200
330
  <step name="generate_fix_plans">
201
331
  If gaps_found:
202
332
 
@@ -204,7 +334,7 @@ If gaps_found:
204
334
 
205
335
  2. **Generate plan per cluster:** Objective, 2-3 tasks (files/action/verify each), re-verify step. Keep focused: single concern per plan.
206
336
 
207
- 3. **Order by dependency:** Fix missing → fix stubs → fix wiring → verify.
337
+ 3. **Order by dependency:** Fix missing → fix stubs → fix wiring → **fix test evidence** → verify.
208
338
  </step>
209
339
 
210
340
  <step name="create_report">
@@ -235,9 +365,11 @@ Orchestrator routes: `passed` → update_roadmap | `gaps_found` → create/execu
235
365
  - [ ] All key links verified
236
366
  - [ ] Requirements coverage assessed (if applicable)
237
367
  - [ ] Anti-patterns scanned and categorized
368
+ - [ ] Test quality audited (disabled tests, circular patterns, assertion strength, provenance)
238
369
  - [ ] Human verification items identified
239
370
  - [ ] Overall status determined
240
- - [ ] Fix plans generated (if gaps_found)
371
+ - [ ] Deferred items filtered against later milestone phases (if gaps found)
372
+ - [ ] Fix plans generated (if gaps_found after filtering)
241
373
  - [ ] VERIFICATION.md created with complete report
242
374
  - [ ] Results returned to orchestrator
243
375
  </success_criteria>
@@ -1,8 +1,14 @@
1
- <purpose>
1
+ <objective>
2
2
  Validate built features through conversational testing with persistent state. Creates UAT.md that tracks test progress, survives /new, and feeds gaps into /gsd-plan-phase --gaps.
3
3
 
4
4
  User tests, OpenCode records. One test at a time. Plain text responses.
5
- </purpose>
5
+ </objective>
6
+
7
+ <available_agent_types>
8
+ Valid GSD subagent types (use exact names — do not fall back to 'general'):
9
+ - gsd-planner — Creates detailed plans from phase scope
10
+ - gsd-plan-checker — Reviews plan quality before execution
11
+ </available_agent_types>
6
12
 
7
13
  <philosophy>
8
14
  **Show expected, ask if reality matches.**
@@ -26,16 +32,18 @@ If $ARGUMENTS contains a phase number, load context:
26
32
  ```bash
27
33
  INIT=$(node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" init verify-work "${PHASE_ARG}")
28
34
  if [[ "$INIT" == @file:* ]]; then INIT=$(cat "${INIT#@file:}"); fi
35
+ AGENT_SKILLS_PLANNER=$(node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" agent-skills gsd-planner 2>/dev/null)
36
+ AGENT_SKILLS_CHECKER=$(node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" agent-skills gsd-checker 2>/dev/null)
29
37
  ```
30
38
 
31
- Parse JSON for: `planner_model`, `checker_model`, `commit_docs`, `phase_found`, `phase_dir`, `phase_number`, `phase_name`, `has_verification`.
39
+ Parse JSON for: `planner_model`, `checker_model`, `commit_docs`, `phase_found`, `phase_dir`, `phase_number`, `phase_name`, `has_verification`, `uat_path`.
32
40
  </step>
33
41
 
34
42
  <step name="check_active_session">
35
43
  **First: Check for active UAT sessions**
36
44
 
37
45
  ```bash
38
- find .planning/phases -name "*-UAT.md" -type f 2>/dev/null | head -5
46
+ (find .planning/phases -name "*-UAT.md" -type f 2>/dev/null || true) | head -5
39
47
  ```
40
48
 
41
49
  **If active sessions exist AND no $ARGUMENTS provided:**
@@ -78,13 +86,49 @@ Provide a phase number to start testing (e.g., /gsd-verify-work 4)
78
86
  Continue to `create_uat_file`.
79
87
  </step>
80
88
 
89
+ <step name="automated_ui_verification">
90
+ **Automated UI Verification (when Playwright-MCP is available)**
91
+
92
+ Before running manual UAT, check whether this phase has a UI component and whether
93
+ `mcp__playwright__*` or `mcp__puppeteer__*` tools are available in the current session.
94
+
95
+ ```
96
+ UI_PHASE_FLAG=$(node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" config-get workflow.ui_phase --raw 2>/dev/null || echo "true")
97
+ UI_SPEC_FILE=$(ls "${PHASE_DIR}"/*-UI-SPEC.md 2>/dev/null | head -1)
98
+ ```
99
+
100
+ **If Playwright-MCP tools are available in this session (`mcp__playwright__*` tools
101
+ respond to tool calls) AND (`UI_PHASE_FLAG` is `true` OR `UI_SPEC_FILE` is non-empty):**
102
+
103
+ For each UI checkpoint listed in the phase's UI-SPEC.md (or inferred from SUMMARY.md):
104
+
105
+ 1. Use `mcp__playwright__navigate` (or equivalent) to open the component's URL.
106
+ 2. Use `mcp__playwright__screenshot` to capture a screenshot.
107
+ 3. Compare the screenshot visually against the spec's stated requirements
108
+ (dimensions, color, layout, spacing).
109
+ 4. Automatically mark checkpoints as **passed** or **needs review** based on the
110
+ visual comparison — no manual question required for items that clearly match.
111
+ 5. Flag items that require human judgment (subjective aesthetics, content accuracy)
112
+ and present only those as manual UAT questions.
113
+
114
+ If automated verification is not available, fall back to the standard manual
115
+ checkpoint questions defined in this workflow unchanged. This step is entirely
116
+ conditional: if Playwright-MCP is not configured, behavior is unchanged from today.
117
+
118
+ **Display summary line before proceeding:**
119
+ ```
120
+ UI checkpoints: {N} auto-verified, {M} queued for manual review
121
+ ```
122
+
123
+ </step>
124
+
81
125
  <step name="find_summaries">
82
126
  **Find what to test:**
83
127
 
84
128
  Use `phase_dir` from init (or run init if not already done).
85
129
 
86
130
  ```bash
87
- ls "$phase_dir"/*-SUMMARY.md 2>/dev/null
131
+ ls "$phase_dir"/*-SUMMARY.md 2>/dev/null || true
88
132
  ```
89
133
 
90
134
  read each SUMMARY.md to extract testable deliverables.
@@ -186,23 +230,23 @@ Proceed to `present_test`.
186
230
  <step name="present_test">
187
231
  **Present current test to user:**
188
232
 
189
- read Current Test section from UAT file.
190
-
191
- Display using checkpoint box format:
233
+ Render the checkpoint from the structured UAT file instead of composing it freehand:
192
234
 
235
+ ```bash
236
+ CHECKPOINT=$(node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" uat render-checkpoint --file "$uat_path" --raw)
237
+ if [[ "$CHECKPOINT" == @file:* ]]; then CHECKPOINT=$(cat "${CHECKPOINT#@file:}"); fi
193
238
  ```
194
- ╔══════════════════════════════════════════════════════════════╗
195
- ║ CHECKPOINT: Verification Required ║
196
- ╚══════════════════════════════════════════════════════════════╝
197
239
 
198
- **Test {number}: {name}**
240
+ Display the returned checkpoint EXACTLY as-is:
199
241
 
200
- {expected}
201
-
202
- ──────────────────────────────────────────────────────────────
203
- → Type "pass" or describe what's wrong
204
- ──────────────────────────────────────────────────────────────
205
242
  ```
243
+ {CHECKPOINT}
244
+ ```
245
+
246
+ **Critical response hygiene:**
247
+ - Your entire response MUST equal `{CHECKPOINT}` byte-for-byte.
248
+ - Do NOT add commentary before or after the block.
249
+ - If you notice protocol/meta markers such as `to=all:`, role-routing text, XML system tags, hidden instruction markers, ad copy, or any unrelated suffix, discard the draft and output `{CHECKPOINT}` only.
206
250
 
207
251
  Wait for user response (plain text, no question).
208
252
  </step>
@@ -231,6 +275,29 @@ result: skipped
231
275
  reason: [user's reason if provided]
232
276
  ```
233
277
 
278
+ **If response indicates blocked:**
279
+ - "blocked", "can't test - server not running", "need physical device", "need release build"
280
+ - Or any response containing: "server", "blocked", "not running", "physical device", "release build"
281
+
282
+ Infer blocked_by tag from response:
283
+ - Contains: server, not running, gateway, API → `server`
284
+ - Contains: physical, device, hardware, real phone → `physical-device`
285
+ - Contains: release, preview, build, EAS → `release-build`
286
+ - Contains: stripe, twilio, third-party, configure → `third-party`
287
+ - Contains: depends on, prior phase, prerequisite → `prior-phase`
288
+ - Default: `other`
289
+
290
+ Update Tests section:
291
+ ```
292
+ ### {N}. {name}
293
+ expected: {expected}
294
+ result: blocked
295
+ blocked_by: {inferred tag}
296
+ reason: "{verbatim user response}"
297
+ ```
298
+
299
+ Note: Blocked tests do NOT go into the Gaps section (they aren't code issues — they're prerequisite gates).
300
+
234
301
  **If response is anything else:**
235
302
  - Treat as issue description
236
303
 
@@ -293,8 +360,24 @@ Proceed to `present_test`.
293
360
  <step name="complete_session">
294
361
  **Complete testing and commit:**
295
362
 
363
+ **Determine final status:**
364
+
365
+ Count results:
366
+ - `pending_count`: tests with `result: [pending]`
367
+ - `blocked_count`: tests with `result: blocked`
368
+ - `skipped_no_reason`: tests with `result: skipped` and no `reason` field
369
+
370
+ ```
371
+ if pending_count > 0 OR blocked_count > 0 OR skipped_no_reason > 0:
372
+ status: partial
373
+ # Session ended but not all tests resolved
374
+ else:
375
+ status: complete
376
+ # All tests have a definitive result (pass, issue, or skipped-with-reason)
377
+ ```
378
+
296
379
  Update frontmatter:
297
- - status: complete
380
+ - status: {computed status}
298
381
  - updated: [now]
299
382
 
300
383
  Clear Current Test section:
@@ -328,11 +411,39 @@ Present summary:
328
411
  **If issues > 0:** Proceed to `diagnose_issues`
329
412
 
330
413
  **If issues == 0:**
414
+
415
+ ```bash
416
+ SECURITY_CFG=$(node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" config-get workflow.security_enforcement --raw 2>/dev/null || echo "true")
417
+ SECURITY_FILE=$(ls "${PHASE_DIR}"/*-SECURITY.md 2>/dev/null | head -1)
418
+ ```
419
+
420
+ If `SECURITY_CFG` is `true` AND `SECURITY_FILE` is empty:
421
+ ```
422
+ ⚠ Security enforcement enabled — /gsd-secure-phase {phase} has not run.
423
+ Run before advancing to the next phase.
424
+
425
+ All tests passed. Ready to continue.
426
+
427
+ - `/gsd-secure-phase {phase}` — security review (required before advancing)
428
+ - `/gsd-plan-phase {next}` — Plan next phase
429
+ - `/gsd-execute-phase {next}` — Execute next phase
430
+ - `/gsd-ui-review {phase}` — visual quality audit (if frontend files were modified)
431
+ ```
432
+
433
+ If `SECURITY_CFG` is `true` AND `SECURITY_FILE` exists: check frontmatter `threats_open`. If > 0:
434
+ ```
435
+ ⚠ Security gate: {threats_open} threats open
436
+ /gsd-secure-phase {phase} — resolve before advancing
437
+ ```
438
+
439
+ If `SECURITY_CFG` is `false` OR (`SECURITY_FILE` exists AND `threats_open` is `0`):
331
440
  ```
332
441
  All tests passed. Ready to continue.
333
442
 
334
443
  - `/gsd-plan-phase {next}` — Plan next phase
335
444
  - `/gsd-execute-phase {next}` — Execute next phase
445
+ - `/gsd-secure-phase {phase}` — security review
446
+ - `/gsd-ui-review {phase}` — visual quality audit (if frontend files were modified)
336
447
  ```
337
448
  </step>
338
449
 
@@ -372,8 +483,7 @@ Display:
372
483
  Spawn gsd-planner in --gaps mode:
373
484
 
374
485
  ```
375
- task(
376
- prompt="""
486
+ @gsd-planner """
377
487
  <planning_context>
378
488
 
379
489
  **Phase:** {phase_number}
@@ -385,17 +495,15 @@ task(
385
495
  - .planning/ROADMAP.md (Roadmap)
386
496
  </files_to_read>
387
497
 
498
+ ${AGENT_SKILLS_PLANNER}
499
+
388
500
  </planning_context>
389
501
 
390
502
  <downstream_consumer>
391
503
  Output consumed by /gsd-execute-phase
392
504
  Plans must be executable prompts.
393
505
  </downstream_consumer>
394
- """,
395
- subagent_type="gsd-planner",
396
- model="{planner_model}",
397
- description="Plan gap fixes for Phase {phase}"
398
- )
506
+ """
399
507
  ```
400
508
 
401
509
  On return:
@@ -420,8 +528,7 @@ Initialize: `iteration_count = 1`
420
528
  Spawn gsd-plan-checker:
421
529
 
422
530
  ```
423
- task(
424
- prompt="""
531
+ @gsd-plan-checker """
425
532
  <verification_context>
426
533
 
427
534
  **Phase:** {phase_number}
@@ -431,6 +538,8 @@ task(
431
538
  - {phase_dir}/*-PLAN.md (Plans to verify)
432
539
  </files_to_read>
433
540
 
541
+ ${AGENT_SKILLS_CHECKER}
542
+
434
543
  </verification_context>
435
544
 
436
545
  <expected_output>
@@ -438,11 +547,7 @@ Return one of:
438
547
  - ## VERIFICATION PASSED — all checks pass
439
548
  - ## ISSUES FOUND — structured issue list
440
549
  </expected_output>
441
- """,
442
- subagent_type="gsd-plan-checker",
443
- model="{checker_model}",
444
- description="Verify Phase {phase} fix plans"
445
- )
550
+ """
446
551
  ```
447
552
 
448
553
  On return:
@@ -460,8 +565,7 @@ Display: `Sending back to planner for revision... (iteration {N}/3)`
460
565
  Spawn gsd-planner with revision context:
461
566
 
462
567
  ```
463
- task(
464
- prompt="""
568
+ @gsd-planner """
465
569
  <revision_context>
466
570
 
467
571
  **Phase:** {phase_number}
@@ -471,6 +575,8 @@ task(
471
575
  - {phase_dir}/*-PLAN.md (Existing plans)
472
576
  </files_to_read>
473
577
 
578
+ ${AGENT_SKILLS_PLANNER}
579
+
474
580
  **Checker issues:**
475
581
  {structured_issues_from_checker}
476
582
 
@@ -480,11 +586,7 @@ task(
480
586
  read existing PLAN.md files. Make targeted updates to address checker issues.
481
587
  Do NOT replan from scratch unless issues are fundamental.
482
588
  </instructions>
483
- """,
484
- subagent_type="gsd-planner",
485
- model="{planner_model}",
486
- description="Revise Phase {phase} plans"
487
- )
589
+ """
488
590
  ```
489
591
 
490
592
  After planner returns → spawn checker again (verify_gap_plans logic)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gsd-opencode",
3
- "version": "1.22.1",
3
+ "version": "1.33.0",
4
4
  "description": "GSD-OpenCode distribution manager - install, verify, and maintain your GSD-OpenCode installation",
5
5
  "type": "module",
6
6
  "main": "bin/gsd.js",
@@ -0,0 +1,29 @@
1
+ ---
2
+ name: gsd-audit-milestone
3
+ description: Implementation of gsd-audit-milestone command
4
+ ---
5
+
6
+ <objective>
7
+ Verify milestone achieved its definition of done. Check requirements coverage, cross-phase integration, and end-to-end flows.
8
+
9
+ **This command IS the orchestrator.** Reads existing VERIFICATION.md files (phases already verified during execute-phase), aggregates tech debt and deferred gaps, then spawns integration checker for cross-phase wiring.
10
+ </objective>
11
+
12
+ <execution_context>
13
+ @$HOME/.config/opencode/get-shit-done/workflows/audit-milestone.md
14
+ </execution_context>
15
+
16
+ <context>
17
+ Version: $ARGUMENTS (optional — defaults to current milestone)
18
+
19
+ Core planning files are resolved in-workflow (`init milestone-op`) and loaded only as needed.
20
+
21
+ **Completed Work:**
22
+ glob: .planning/phases/*/*-SUMMARY.md
23
+ glob: .planning/phases/*/*-VERIFICATION.md
24
+ </context>
25
+
26
+ <process>
27
+ Execute the audit-milestone workflow from @$HOME/.config/opencode/get-shit-done/workflows/audit-milestone.md end-to-end.
28
+ Preserve all workflow gates (scope determination, verification reading, integration check, requirements coverage, routing).
29
+ </process>
@@ -0,0 +1,19 @@
1
+ ---
2
+ name: gsd-cleanup
3
+ description: Implementation of gsd-cleanup command
4
+ ---
5
+
6
+ <objective>
7
+ Archive phase directories from completed milestones into `.planning/milestones/v{X.Y}-phases/`.
8
+
9
+ Use when `.planning/phases/` has accumulated directories from past milestones.
10
+ </objective>
11
+
12
+ <execution_context>
13
+ @$HOME/.config/opencode/get-shit-done/workflows/cleanup.md
14
+ </execution_context>
15
+
16
+ <process>
17
+ Follow the cleanup workflow at @$HOME/.config/opencode/get-shit-done/workflows/cleanup.md.
18
+ Identify completed milestones, show a dry-run summary, and archive on confirmation.
19
+ </process>