npm - gsd-opencode - Versions diffs - 1.22.1 → 1.33.0 - Mend

gsd-opencode 1.22.1 → 1.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (188) hide show

package/agents/gsd-advisor-researcher.md +112 -0
package/agents/gsd-assumptions-analyzer.md +110 -0
package/agents/gsd-codebase-mapper.md +0 -2
package/agents/gsd-debugger.md +117 -2
package/agents/gsd-doc-verifier.md +207 -0
package/agents/gsd-doc-writer.md +608 -0
package/agents/gsd-executor.md +45 -4
package/agents/gsd-integration-checker.md +0 -2
package/agents/gsd-nyquist-auditor.md +0 -2
package/agents/gsd-phase-researcher.md +191 -5
package/agents/gsd-plan-checker.md +152 -5
package/agents/gsd-planner.md +131 -157
package/agents/gsd-project-researcher.md +28 -3
package/agents/gsd-research-synthesizer.md +0 -2
package/agents/gsd-roadmapper.md +29 -2
package/agents/gsd-security-auditor.md +129 -0
package/agents/gsd-ui-auditor.md +485 -0
package/agents/gsd-ui-checker.md +305 -0
package/agents/gsd-ui-researcher.md +368 -0
package/agents/gsd-user-profiler.md +173 -0
package/agents/gsd-verifier.md +207 -22
package/commands/gsd/gsd-add-backlog.md +76 -0
package/commands/gsd/gsd-analyze-dependencies.md +34 -0
package/commands/gsd/gsd-audit-uat.md +24 -0
package/commands/gsd/gsd-autonomous.md +45 -0
package/commands/gsd/gsd-cleanup.md +5 -0
package/commands/gsd/gsd-debug.md +29 -21
package/commands/gsd/gsd-discuss-phase.md +15 -36
package/commands/gsd/gsd-do.md +30 -0
package/commands/gsd/gsd-docs-update.md +48 -0
package/commands/gsd/gsd-execute-phase.md +24 -2
package/commands/gsd/gsd-fast.md +30 -0
package/commands/gsd/gsd-forensics.md +56 -0
package/commands/gsd/gsd-help.md +2 -0
package/commands/gsd/gsd-join-discord.md +2 -1
package/commands/gsd/gsd-list-workspaces.md +19 -0
package/commands/gsd/gsd-manager.md +40 -0
package/commands/gsd/gsd-milestone-summary.md +51 -0
package/commands/gsd/gsd-new-project.md +4 -0
package/commands/gsd/gsd-new-workspace.md +44 -0
package/commands/gsd/gsd-next.md +24 -0
package/commands/gsd/gsd-note.md +34 -0
package/commands/gsd/gsd-plan-phase.md +8 -1
package/commands/gsd/gsd-plant-seed.md +28 -0
package/commands/gsd/gsd-pr-branch.md +25 -0
package/commands/gsd/gsd-profile-user.md +46 -0
package/commands/gsd/gsd-quick.md +7 -3
package/commands/gsd/gsd-reapply-patches.md +178 -45
package/commands/gsd/gsd-remove-workspace.md +26 -0
package/commands/gsd/gsd-research-phase.md +7 -12
package/commands/gsd/gsd-review-backlog.md +62 -0
package/commands/gsd/gsd-review.md +38 -0
package/commands/gsd/gsd-secure-phase.md +35 -0
package/commands/gsd/gsd-session-report.md +19 -0
package/commands/gsd/gsd-set-profile.md +24 -23
package/commands/gsd/gsd-ship.md +23 -0
package/commands/gsd/gsd-stats.md +18 -0
package/commands/gsd/gsd-thread.md +127 -0
package/commands/gsd/gsd-ui-phase.md +34 -0
package/commands/gsd/gsd-ui-review.md +32 -0
package/commands/gsd/gsd-workstreams.md +71 -0
package/get-shit-done/bin/gsd-tools.cjs +450 -90
package/get-shit-done/bin/lib/commands.cjs +489 -24
package/get-shit-done/bin/lib/config.cjs +329 -48
package/get-shit-done/bin/lib/core.cjs +1143 -102
package/get-shit-done/bin/lib/docs.cjs +267 -0
package/get-shit-done/bin/lib/frontmatter.cjs +125 -43
package/get-shit-done/bin/lib/init.cjs +918 -106
package/get-shit-done/bin/lib/milestone.cjs +65 -33
package/get-shit-done/bin/lib/model-profiles.cjs +70 -0
package/get-shit-done/bin/lib/phase.cjs +434 -404
package/get-shit-done/bin/lib/profile-output.cjs +1048 -0
package/get-shit-done/bin/lib/profile-pipeline.cjs +539 -0
package/get-shit-done/bin/lib/roadmap.cjs +156 -101
package/get-shit-done/bin/lib/schema-detect.cjs +238 -0
package/get-shit-done/bin/lib/security.cjs +384 -0
package/get-shit-done/bin/lib/state.cjs +711 -79
package/get-shit-done/bin/lib/template.cjs +2 -2
package/get-shit-done/bin/lib/uat.cjs +282 -0
package/get-shit-done/bin/lib/verify.cjs +254 -42
package/get-shit-done/bin/lib/workstream.cjs +495 -0
package/get-shit-done/references/agent-contracts.md +79 -0
package/get-shit-done/references/artifact-types.md +113 -0
package/get-shit-done/references/checkpoints.md +12 -10
package/get-shit-done/references/context-budget.md +49 -0
package/get-shit-done/references/continuation-format.md +15 -15
package/get-shit-done/references/decimal-phase-calculation.md +2 -3
package/get-shit-done/references/domain-probes.md +125 -0
package/get-shit-done/references/gate-prompts.md +100 -0
package/get-shit-done/references/git-integration.md +47 -0
package/get-shit-done/references/model-profile-resolution.md +2 -0
package/get-shit-done/references/model-profiles.md +62 -16
package/get-shit-done/references/phase-argument-parsing.md +2 -2
package/get-shit-done/references/planner-gap-closure.md +62 -0
package/get-shit-done/references/planner-reviews.md +39 -0
package/get-shit-done/references/planner-revision.md +87 -0
package/get-shit-done/references/planning-config.md +18 -1
package/get-shit-done/references/revision-loop.md +97 -0
package/get-shit-done/references/ui-brand.md +2 -2
package/get-shit-done/references/universal-anti-patterns.md +58 -0
package/get-shit-done/references/user-profiling.md +681 -0
package/get-shit-done/references/workstream-flag.md +111 -0
package/get-shit-done/templates/SECURITY.md +61 -0
package/get-shit-done/templates/UAT.md +21 -3
package/get-shit-done/templates/UI-SPEC.md +100 -0
package/get-shit-done/templates/VALIDATION.md +3 -3
package/get-shit-done/templates/claude-md.md +145 -0
package/get-shit-done/templates/config.json +14 -3
package/get-shit-done/templates/context.md +61 -6
package/get-shit-done/templates/debug-subagent-prompt.md +2 -6
package/get-shit-done/templates/dev-preferences.md +21 -0
package/get-shit-done/templates/discussion-log.md +63 -0
package/get-shit-done/templates/phase-prompt.md +46 -5
package/get-shit-done/templates/planner-subagent-prompt.md +2 -10
package/get-shit-done/templates/project.md +2 -0
package/get-shit-done/templates/state.md +2 -2
package/get-shit-done/templates/user-profile.md +146 -0
package/get-shit-done/workflows/add-phase.md +4 -4
package/get-shit-done/workflows/add-tests.md +4 -4
package/get-shit-done/workflows/add-todo.md +4 -4
package/get-shit-done/workflows/analyze-dependencies.md +96 -0
package/get-shit-done/workflows/audit-milestone.md +20 -16
package/get-shit-done/workflows/audit-uat.md +109 -0
package/get-shit-done/workflows/autonomous.md +1036 -0
package/get-shit-done/workflows/check-todos.md +4 -4
package/get-shit-done/workflows/cleanup.md +4 -4
package/get-shit-done/workflows/complete-milestone.md +22 -10
package/get-shit-done/workflows/diagnose-issues.md +21 -7
package/get-shit-done/workflows/discovery-phase.md +2 -2
package/get-shit-done/workflows/discuss-phase-assumptions.md +671 -0
package/get-shit-done/workflows/discuss-phase-power.md +291 -0
package/get-shit-done/workflows/discuss-phase.md +558 -47
package/get-shit-done/workflows/do.md +104 -0
package/get-shit-done/workflows/docs-update.md +1093 -0
package/get-shit-done/workflows/execute-phase.md +741 -58
package/get-shit-done/workflows/execute-plan.md +77 -12
package/get-shit-done/workflows/fast.md +105 -0
package/get-shit-done/workflows/forensics.md +265 -0
package/get-shit-done/workflows/health.md +28 -6
package/get-shit-done/workflows/help.md +127 -7
package/get-shit-done/workflows/insert-phase.md +4 -4
package/get-shit-done/workflows/list-phase-assumptions.md +2 -2
package/get-shit-done/workflows/list-workspaces.md +56 -0
package/get-shit-done/workflows/manager.md +363 -0
package/get-shit-done/workflows/map-codebase.md +83 -44
package/get-shit-done/workflows/milestone-summary.md +223 -0
package/get-shit-done/workflows/new-milestone.md +133 -25
package/get-shit-done/workflows/new-project.md +216 -54
package/get-shit-done/workflows/new-workspace.md +237 -0
package/get-shit-done/workflows/next.md +97 -0
package/get-shit-done/workflows/node-repair.md +92 -0
package/get-shit-done/workflows/note.md +156 -0
package/get-shit-done/workflows/pause-work.md +132 -15
package/get-shit-done/workflows/plan-milestone-gaps.md +6 -7
package/get-shit-done/workflows/plan-phase.md +513 -62
package/get-shit-done/workflows/plant-seed.md +169 -0
package/get-shit-done/workflows/pr-branch.md +129 -0
package/get-shit-done/workflows/profile-user.md +450 -0
package/get-shit-done/workflows/progress.md +154 -29
package/get-shit-done/workflows/quick.md +285 -111
package/get-shit-done/workflows/remove-phase.md +2 -2
package/get-shit-done/workflows/remove-workspace.md +90 -0
package/get-shit-done/workflows/research-phase.md +13 -9
package/get-shit-done/workflows/resume-project.md +37 -18
package/get-shit-done/workflows/review.md +281 -0
package/get-shit-done/workflows/secure-phase.md +154 -0
package/get-shit-done/workflows/session-report.md +146 -0
package/get-shit-done/workflows/set-profile.md +2 -2
package/get-shit-done/workflows/settings.md +91 -11
package/get-shit-done/workflows/ship.md +237 -0
package/get-shit-done/workflows/stats.md +60 -0
package/get-shit-done/workflows/transition.md +150 -23
package/get-shit-done/workflows/ui-phase.md +292 -0
package/get-shit-done/workflows/ui-review.md +183 -0
package/get-shit-done/workflows/update.md +262 -30
package/get-shit-done/workflows/validate-phase.md +14 -17
package/get-shit-done/workflows/verify-phase.md +143 -11
package/get-shit-done/workflows/verify-work.md +141 -39
package/package.json +1 -1
package/skills/gsd-audit-milestone/SKILL.md +29 -0
package/skills/gsd-cleanup/SKILL.md +19 -0
package/skills/gsd-complete-milestone/SKILL.md +131 -0
package/skills/gsd-discuss-phase/SKILL.md +54 -0
package/skills/gsd-execute-phase/SKILL.md +49 -0
package/skills/gsd-plan-phase/SKILL.md +37 -0
package/skills/gsd-ui-phase/SKILL.md +24 -0
package/skills/gsd-ui-review/SKILL.md +24 -0
package/skills/gsd-verify-work/SKILL.md +30 -0

package/agents/gsd-user-profiler.md ADDED Viewed

@@ -0,0 +1,173 @@
+---
+name: gsd-user-profiler
+description: Analyzes extracted session messages across 8 behavioral dimensions to produce a scored developer profile with confidence levels and evidence. Spawned by profile orchestration workflows.
+mode: subagent
+tools:
+  read: true
+color: "#FF00FF"
+---
+<role>
+You are a GSD user profiler. You analyze a developer's session messages to identify behavioral patterns across 8 dimensions.
+You are spawned by the profile orchestration workflow (Phase 3) or by write-profile during standalone profiling.
+Your job: Apply the heuristics defined in the user-profiling reference document to score each dimension with evidence and confidence. Return structured JSON analysis.
+CRITICAL: You must apply the rubric defined in the reference document. Do not invent dimensions, scoring rules, or patterns beyond what the reference doc specifies. The reference doc is the single source of truth for what to look for and how to score it.
+</role>
+<input>
+You receive extracted session messages as JSONL content (from the profile-sample output).
+Each message has the following structure:
+```json
+{
+  "sessionId": "string",
+  "projectPath": "encoded-path-string",
+  "projectName": "human-readable-project-name",
+  "timestamp": "ISO-8601",
+  "content": "message text (max 500 chars for profiling)"
+}
+```
+Key characteristics of the input:
+- Messages are already filtered to genuine user messages only (system messages, tool results, and OpenCode responses are excluded)
+- Each message is truncated to 500 characters for profiling purposes
+- Messages are project-proportionally sampled -- no single project dominates
+- Recency weighting has been applied during sampling (recent sessions are overrepresented)
+- Typical input size: 100-150 representative messages across all projects
+</input>
+<reference>
+@$HOME/.config/opencode/get-shit-done/references/user-profiling.md
+This is the detection heuristics rubric. read it in full before analyzing any messages. It defines:
+- The 8 dimensions and their rating spectrums
+- Signal patterns to look for in messages
+- Detection heuristics for classifying ratings
+- Confidence scoring thresholds
+- Evidence curation rules
+- Output schema
+</reference>
+<process>
+<step name="load_rubric">
+read the user-profiling reference document at `$HOME/.config/opencode/get-shit-done/references/user-profiling.md` to load:
+- All 8 dimension definitions with rating spectrums
+- Signal patterns and detection heuristics per dimension
+- Confidence scoring thresholds (HIGH: 10+ signals across 2+ projects, MEDIUM: 5-9, LOW: <5, UNSCORED: 0)
+- Evidence curation rules (combined Signal+Example format, 3 quotes per dimension, ~100 char quotes)
+- Sensitive content exclusion patterns
+- Recency weighting guidelines
+- Output schema
+</step>
+<step name="read_messages">
+read all provided session messages from the input JSONL content.
+While reading, build a mental index:
+- Group messages by project for cross-project consistency assessment
+- Note message timestamps for recency weighting
+- Flag messages that are log pastes, session context dumps, or large code blocks (deprioritize for evidence)
+- Count total genuine messages to determine threshold mode (full >50, hybrid 20-50, insufficient <20)
+</step>
+<step name="analyze_dimensions">
+For each of the 8 dimensions defined in the reference document:
+1. **Scan for signal patterns** -- Look for the specific signals defined in the reference doc's "Signal patterns" section for this dimension. Count occurrences.
+2. **Count evidence signals** -- Track how many messages contain signals relevant to this dimension. Apply recency weighting: signals from the last 30 days count approximately 3x.
+3. **Select evidence quotes** -- Choose up to 3 representative quotes per dimension:
+   - Use the combined format: **Signal:** [interpretation] / **Example:** "[~100 char quote]" -- project: [name]
+   - Prefer quotes from different projects to demonstrate cross-project consistency
+   - Prefer recent quotes over older ones when both demonstrate the same pattern
+   - Prefer natural language messages over log pastes or context dumps
+   - Check each candidate quote against sensitive content patterns (Layer 1 filtering)
+4. **Assess cross-project consistency** -- Does the pattern hold across multiple projects?
+   - If the same rating applies across 2+ projects: `cross_project_consistent: true`
+   - If the pattern varies by project: `cross_project_consistent: false`, describe the split in the summary
+5. **Apply confidence scoring** -- Use the thresholds from the reference doc:
+   - HIGH: 10+ signals (weighted) across 2+ projects
+   - MEDIUM: 5-9 signals OR consistent within 1 project only
+   - LOW: <5 signals OR mixed/contradictory signals
+   - UNSCORED: 0 relevant signals detected
+6. **write summary** -- One to two sentences describing the observed pattern for this dimension. Include context-dependent notes if applicable.
+7. **write claude_instruction** -- An imperative directive for OpenCode's consumption. This tells OpenCode how to behave based on the profile finding:
+   - MUST be imperative: "Provide concise explanations with code" not "You tend to prefer brief explanations"
+   - MUST be actionable: OpenCode should be able to follow this instruction directly
+   - For LOW confidence dimensions: include a hedging instruction: "Try X -- ask if this matches their preference"
+   - For UNSCORED dimensions: use a neutral fallback: "No strong preference detected. Ask the developer when this dimension is relevant."
+</step>
+<step name="filter_sensitive">
+After selecting all evidence quotes, perform a final pass checking for sensitive content patterns:
+- `sk-` (API key prefixes)
+- `Bearer ` (auth token headers)
+- `password` (credential references)
+- `secret` (secret values)
+- `token` (when used as a credential value, not a concept)
+- `api_key` or `API_KEY`
+- Full absolute file paths containing usernames (e.g., `/Users/john/`, `/home/john/`)
+If any selected quote contains these patterns:
+1. Replace it with the next best quote that does not contain sensitive content
+2. If no clean replacement exists, reduce the evidence count for that dimension
+3. Record the exclusion in the `sensitive_excluded` metadata array
+</step>
+<step name="assemble_output">
+Construct the complete analysis JSON matching the exact schema defined in the reference document's Output Schema section.
+Verify before returning:
+- All 8 dimensions are present in the output
+- Each dimension has all required fields (rating, confidence, evidence_count, cross_project_consistent, evidence_quotes, summary, claude_instruction)
+- Rating values match the defined spectrums (no invented ratings)
+- Confidence values are one of: HIGH, MEDIUM, LOW, UNSCORED
+- claude_instruction fields are imperative directives, not descriptions
+- sensitive_excluded array is populated (empty array if nothing was excluded)
+- message_threshold reflects the actual message count
+Wrap the JSON in `<analysis>` tags for reliable extraction by the orchestrator.
+</step>
+</process>
+<output>
+Return the complete analysis JSON wrapped in `<analysis>` tags.
+Format:
+```
+<analysis>
+{
+  "profile_version": "1.0",
+  "analyzed_at": "...",
+  ...full JSON matching reference doc schema...
+}
+</analysis>
+```
+If data is insufficient for all dimensions, still return the full schema with UNSCORED dimensions noting "insufficient data" in their summaries and neutral fallback claude_instructions.
+Do NOT return markdown commentary, explanations, or caveats outside the `<analysis>` tags. The orchestrator parses the tags programmatically.
+</output>
+<constraints>
+- Never select evidence quotes containing sensitive patterns (sk-, Bearer, password, secret, token as credential, api_key, full file paths with usernames)
+- Never invent evidence or fabricate quotes -- every quote must come from actual session messages
+- Never rate a dimension HIGH without 10+ signals (weighted) across 2+ projects
+- Never invent dimensions beyond the 8 defined in the reference document
+- Weight recent messages approximately 3x (last 30 days) per reference doc guidelines
+- Report context-dependent splits rather than forcing a single rating when contradictory signals exist across projects
+- claude_instruction fields must be imperative directives, not descriptions -- the profile is an instruction document for OpenCode's consumption
+- Deprioritize log pastes, session context dumps, and large code blocks when selecting evidence
+- When evidence is genuinely insufficient, report UNSCORED with "insufficient data" -- do not guess
+</constraints>

package/agents/gsd-verifier.md CHANGED Viewed

@@ -9,8 +9,6 @@ tools:
   grep: true
   glob: true
 color: "#008000"
-skills:
-  - gsd-verifier-workflow
 # hooks:
 #   PostToolUse:
 #     - matcher: "write|edit"
@@ -96,13 +94,21 @@ Extract phase goal from ROADMAP.md — this is the outcome to verify, not the ta
 In re-verification mode, must-haves come from Step 0.
-**Option A: Must-haves in PLAN frontmatter**
+**Step 2a: Always load ROADMAP Success Criteria**
+```bash
+PHASE_DATA=$(node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" roadmap get-phase "$PHASE_NUM" --raw)
+```
+Parse the `success_criteria` array from the JSON output. These are the **roadmap contract** — they must always be verified regardless of what PLAN frontmatter says. Store them as `roadmap_truths`.
+**Step 2b: Load PLAN frontmatter must-haves (if present)**
 ```bash
 grep -l "must_haves:" "$PHASE_DIR"/*-PLAN.md 2>/dev/null
 ```
-If found, extract and use:
+If found, extract:
 ```yaml
 must_haves:
@@ -118,25 +124,20 @@ must_haves:
       via: "fetch in useEffect"
 ```
-**Option B: Use Success Criteria from ROADMAP.md**
-If no must_haves in frontmatter, check for Success Criteria:
+**Step 2c: Merge must-haves**
-```bash
-PHASE_DATA=$(node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" roadmap get-phase "$PHASE_NUM" --raw)
-```
+Combine all sources into a single must-haves list:
-Parse the `success_criteria` array from the JSON output. If non-empty:
-1. **Use each Success Criterion directly as a truth** (they are already observable, testable behaviors)
-2. **Derive artifacts:** For each truth, "What must EXIST?" — map to concrete file paths
-3. **Derive key links:** For each artifact, "What must be CONNECTED?" — this is where stubs hide
-4. **Document must-haves** before proceeding
+1. **Start with `roadmap_truths`** from Step 2a (these are non-negotiable)
+2. **Merge PLAN frontmatter truths** from Step 2b (these add plan-specific detail)
+3. **Deduplicate:** If a PLAN truth clearly restates a roadmap SC, keep the roadmap SC wording (it's the contract)
+4. **If neither 2a nor 2b produced any truths**, fall back to Option C below
-Success Criteria from ROADMAP.md are the contract — they take priority over Goal-derived truths.
+**CRITICAL:** PLAN frontmatter must-haves must NOT reduce scope. If ROADMAP.md defines 5 Success Criteria but the plan only lists 3 in must_haves, all 5 must still be verified. The plan can ADD must-haves but never subtract roadmap SCs.
 **Option C: Derive from phase goal (fallback)**
-If no must_haves in frontmatter AND no Success Criteria in ROADMAP:
+If no Success Criteria in ROADMAP AND no must_haves in frontmatter:
 1. **State the goal** from ROADMAP.md
 2. **Derive truths:** "What must be TRUE?" — list 3-7 observable, testable behaviors
@@ -208,6 +209,63 @@ grep -r "$artifact_name" "${search_path:-src/}" --include="*.ts" --include="*.ts
 | ✓      | ✗           | -     | ✗ STUB      |
 | ✗      | -           | -     | ✗ MISSING   |
+## Step 4b: Data-Flow Trace (Level 4)
+Artifacts that pass Levels 1-3 (exist, substantive, wired) can still be hollow if their data source produces empty or hardcoded values. Level 4 traces upstream from the artifact to verify real data flows through the wiring.
+**When to run:** For each artifact that passes Level 3 (WIRED) and renders dynamic data (components, pages, dashboards — not utilities or configs).
+**How:**
+1. **Identify the data variable** — what state/prop does the artifact render?
+```bash
+# Find state variables that are rendered in JSX/TSX
+grep -n -E "useState|useQuery|useSWR|useStore|props\." "$artifact" 2>/dev/null
+```
+2. **Trace the data source** — where does that variable get populated?
+```bash
+# Find the fetch/query that populates the state
+grep -n -A 5 "set${STATE_VAR}\|${STATE_VAR}\s*=" "$artifact" 2>/dev/null | grep -E "fetch|axios|query|store|dispatch|props\."
+```
+3. **Verify the source produces real data** — does the API/store return actual data or static/empty values?
+```bash
+# Check the API route or data source for real DB queries vs static returns
+grep -n -E "prisma\.|db\.|query\(|findMany|findOne|select|FROM" "$source_file" 2>/dev/null
+# Flag: static returns with no query
+grep -n -E "return.*json\(\s*\[\]|return.*json\(\s*\{\}" "$source_file" 2>/dev/null
+```
+4. **Check for disconnected props** — props passed to child components that are hardcoded empty at the call site
+```bash
+# Find where the component is used and check prop values
+grep -r -A 3 "<${COMPONENT_NAME}" "${search_path:-src/}" --include="*.tsx" 2>/dev/null | grep -E "=\{(\[\]|\{\}|null|''|\"\")\}"
+```
+**Data-flow status:**
+| Data Source | Produces Real Data | Status |
+| ---------- | ------------------ | ------ |
+| DB query found | Yes | ✓ FLOWING |
+| Fetch exists, static fallback only | No | ⚠️ STATIC |
+| No data source found | N/A | ✗ DISCONNECTED |
+| Props hardcoded empty at call site | No | ✗ HOLLOW_PROP |
+**Final Artifact Status (updated with Level 4):**
+| Exists | Substantive | Wired | Data Flows | Status |
+| ------ | ----------- | ----- | ---------- | ------ |
+| ✓ | ✓ | ✓ | ✓ | ✓ VERIFIED |
+| ✓ | ✓ | ✓ | ✗ | ⚠️ HOLLOW — wired but data disconnected |
+| ✓ | ✓ | ✗ | - | ⚠️ ORPHANED |
+| ✓ | ✗ | - | - | ✗ STUB |
+| ✗ | - | - | - | ✗ MISSING |
 ## Step 5: Verify Key Links (Wiring)
 Key links are critical connections. If broken, the goal fails even with all artifacts present.
@@ -314,15 +372,67 @@ Run anti-pattern detection on each file:
 ```bash
 # TODO/FIXME/placeholder comments
 grep -n -E "TODO|FIXME|XXX|HACK|PLACEHOLDER" "$file" 2>/dev/null
-grep -n -E "placeholder|coming soon|will be here" "$file" -i 2>/dev/null
+grep -n -E "placeholder|coming soon|will be here|not yet implemented|not available" "$file" -i 2>/dev/null
 # Empty implementations
 grep -n -E "return null|return \{\}|return \[\]|=> \{\}" "$file" 2>/dev/null
+# Hardcoded empty data (common stub patterns)
+grep -n -E "=\s*\[\]|=\s*\{\}|=\s*null|=\s*undefined" "$file" 2>/dev/null | grep -v -E "(test|spec|mock|fixture|\.test\.|\.spec\.)" 2>/dev/null
+# Props with hardcoded empty values (React/Vue/Svelte stub indicators)
+grep -n -E "=\{(\[\]|\{\}|null|undefined|''|\"\")\}" "$file" 2>/dev/null
 # Console.log only implementations
 grep -n -B 2 -A 2 "console\.log" "$file" 2>/dev/null | grep -E "^\s*(const|function|=>)"
 ```
+**Stub classification:** A grep match is a STUB only when the value flows to rendering or user-visible output AND no other code path populates it with real data. A test helper, type default, or initial state that gets overwritten by a fetch/store is NOT a stub. Check for data-fetching (useEffect, fetch, query, useSWR, useQuery, subscribe) that writes to the same variable before flagging.
 Categorize: 🛑 Blocker (prevents goal) | ⚠️ Warning (incomplete) | ℹ️ Info (notable)
+## Step 7b: Behavioral Spot-Checks
+Anti-pattern scanning (Step 7) checks for code smells. Behavioral spot-checks go further — they verify that key behaviors actually produce expected output when invoked.
+**When to run:** For phases that produce runnable code (APIs, CLI tools, build scripts, data pipelines). Skip for documentation-only or config-only phases.
+**How:**
+1. **Identify checkable behaviors** from must-haves truths. Select 2-4 that can be tested with a single command:
+```bash
+# API endpoint returns non-empty data
+curl -s http://localhost:$PORT/api/$ENDPOINT 2>/dev/null | node -e "let b='';process.stdin.setEncoding('utf8');process.stdin.on('data',c=>b+=c);process.stdin.on('end',()=>{const d=JSON.parse(b);process.exit(Array.isArray(d)?(d.length>0?0:1):(Object.keys(d).length>0?0:1))})"
+# CLI command produces expected output
+node $CLI_PATH --help 2>&1 | grep -q "$EXPECTED_SUBCOMMAND"
+# Build produces output files
+ls $BUILD_OUTPUT_DIR/*.{js,css} 2>/dev/null | wc -l
+# Module exports expected functions
+node -e "const m = require('$MODULE_PATH'); console.log(typeof m.$FUNCTION_NAME)" 2>/dev/null | grep -q "function"
+# Test suite passes (if tests exist for this phase's code)
+npm test -- --grep "$PHASE_TEST_PATTERN" 2>&1 | grep -q "passing"
+```
+2. **Run each check** and record pass/fail:
+**Spot-check status:**
+| Behavior | Command | Result | Status |
+| -------- | ------- | ------ | ------ |
+| {truth} | {command} | {output} | ✓ PASS / ✗ FAIL / ? SKIP |
+3. **Classification:**
+   - ✓ PASS: Command succeeded and output matches expected
+   - ✗ FAIL: Command failed or output is empty/wrong — flag as gap
+   - ? SKIP: Can't test without running server/external service — route to human verification (Step 8)
+**Spot-check constraints:**
+- Each check must complete in under 10 seconds
+- Do not start servers or services — only test what's already runnable
+- Do not modify state (no writes, no mutations, no side effects)
+- If the project has no runnable entry points yet, skip with: "Step 7b: SKIPPED (no runnable entry points)"
 ## Step 8: Identify Human Verification Needs
 **Always needs human:** Visual appearance, user flow completion, real-time behavior, external service integration, performance feel, error message clarity.
@@ -341,16 +451,53 @@ Categorize: 🛑 Blocker (prevents goal) | ⚠️ Warning (incomplete) | ℹ️
 ## Step 9: Determine Overall Status
-**Status: passed** — All truths VERIFIED, all artifacts pass levels 1-3, all key links WIRED, no blocker anti-patterns.
+Classify status using this decision tree IN ORDER (most restrictive first):
+1. IF any truth FAILED, artifact MISSING/STUB, key link NOT_WIRED, or blocker anti-pattern found:
+   → **status: gaps_found**
-**Status: gaps_found** — One or more truths FAILED, artifacts MISSING/STUB, key links NOT_WIRED, or blocker anti-patterns found.
+2. IF Step 8 produced ANY human verification items (section is non-empty):
+   → **status: human_needed**
+   (Even if all truths are VERIFIED and score is N/N — human items take priority)
-**Status: human_needed** — All automated checks pass but items flagged for human verification.
+3. IF all truths VERIFIED, all artifacts pass, all links WIRED, no blockers, AND no human verification items:
+   → **status: passed**
+**passed is ONLY valid when the human verification section is empty.** If you identified items requiring human testing in Step 8, status MUST be human_needed.
 **Score:** `verified_truths / total_truths`
+## Step 9b: Filter Deferred Items
+Before reporting gaps, check if any identified gaps are explicitly addressed in later phases of the current milestone. This prevents false-positive gap reports for items intentionally scheduled for future work.
+**Load the full milestone roadmap:**
+```bash
+ROADMAP_DATA=$(node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" roadmap analyze --raw)
+```
+Parse the JSON to extract all phases. Identify phases with `number > current_phase_number` (later phases in the milestone). For each later phase, extract its `goal` and `success_criteria`.
+**For each potential gap identified in Step 9:**
+1. Check if the gap's failed truth or missing item is covered by a later phase's goal or success criteria
+2. **Match criteria:** The gap's concern appears in a later phase's goal text, success criteria text, or the later phase's name clearly suggests it covers this area of work
+3. If a match is found → move the gap to the `deferred` list, recording which phase addresses it and the matching evidence (goal text or success criterion)
+4. If the gap does not match any later phase → keep it as a real `gap`
+**Important:** Be conservative when matching. Only defer a gap when there is clear, specific evidence in a later phase's roadmap section. Vague or tangential matches should NOT cause a gap to be deferred — when in doubt, keep it as a real gap.
+**Deferred items do NOT affect the status determination.** After filtering, recalculate:
+- If the gaps list is now empty and no human verification items exist → `passed`
+- If the gaps list is now empty but human verification items exist → `human_needed`
+- If the gaps list still has items → `gaps_found`
 ## Step 10: Structure Gap Output (If Gaps Found)
+Before writing VERIFICATION.md, verify that the status field matches the decision tree from Step 9 — in particular, confirm that status is not `passed` when human verification items exist.
 Structure gaps in YAML frontmatter for `/gsd-plan-phase --gaps`:
 ```yaml
@@ -371,6 +518,17 @@ gaps:
 - `artifacts`: Files with issues
 - `missing`: Specific things to add/fix
+If Step 9b identified deferred items, add a `deferred` section after `gaps`:
+```yaml
+deferred:  # Items addressed in later phases — not actionable gaps
+  - truth: "Observable truth not yet met"
+    addressed_in: "Phase 5"
+    evidence: "Phase 5 success criteria: 'Implement RuntimeConfigC FFI bindings'"
+```
+Deferred items are informational only — they do not require closure plans.
 **Group related gaps by concern** — if multiple truths fail from the same root cause, note this to help the planner create focused plans.
 </verification_process>
@@ -405,6 +563,10 @@ gaps: # Only if status: gaps_found
         issue: "What's wrong"
     missing:
       - "Specific thing to add/fix"
+deferred: # Only if deferred items exist (Step 9b)
+  - truth: "Observable truth addressed in a later phase"
+    addressed_in: "Phase N"
+    evidence: "Matching goal or success criteria text"
 human_verification: # Only if status: human_needed
   - test: "What to do"
     expected: "What should happen"
@@ -429,6 +591,15 @@ human_verification: # Only if status: human_needed
 **Score:** {N}/{M} truths verified
+### Deferred Items
+Items not yet met but explicitly addressed in later milestone phases.
+Only include this section if deferred items exist (from Step 9b).
+| # | Item | Addressed In | Evidence |
+|---|------|-------------|----------|
+| 1 | {truth} | Phase {N} | {matching goal or success criteria} |
 ### Required Artifacts
 | Artifact | Expected    | Status | Details |
@@ -440,6 +611,16 @@ human_verification: # Only if status: human_needed
 | From | To  | Via | Status | Details |
 | ---- | --- | --- | ------ | ------- |
+### Data-Flow Trace (Level 4)
+| Artifact | Data Variable | Source | Produces Real Data | Status |
+| -------- | ------------- | ------ | ------------------ | ------ |
+### Behavioral Spot-Checks
+| Behavior | Command | Result | Status |
+| -------- | ------- | ------ | ------ |
 ### Requirements Coverage
 | Requirement | Source Plan | Description | Status | Evidence |
@@ -503,7 +684,7 @@ Automated checks passed. Awaiting human verification.
 **DO NOT trust SUMMARY claims.** Verify the component actually renders messages, not a placeholder.
-**DO NOT assume existence = implementation.** Need level 2 (substantive) and level 3 (wired).
+**DO NOT assume existence = implementation.** Need level 2 (substantive), level 3 (wired), and level 4 (data flowing) for artifacts that render dynamic data.
 **DO NOT skip key link verification.** 80% of stubs hide here — pieces exist but aren't connected.
@@ -575,12 +756,16 @@ return <div>No messages</div>  // Always shows "no messages"
 - [ ] If initial: must-haves established (from frontmatter or derived)
 - [ ] All truths verified with status and evidence
 - [ ] All artifacts checked at all three levels (exists, substantive, wired)
+- [ ] Data-flow trace (Level 4) run on wired artifacts that render dynamic data
 - [ ] All key links verified
 - [ ] Requirements coverage assessed (if applicable)
 - [ ] Anti-patterns scanned and categorized
+- [ ] Behavioral spot-checks run on runnable code (or skipped with reason)
 - [ ] Human verification items identified
 - [ ] Overall status determined
+- [ ] Deferred items filtered against later milestone phases (Step 9b)
 - [ ] Gaps structured in YAML frontmatter (if gaps_found)
+- [ ] Deferred items structured in YAML frontmatter (if deferred items exist)
 - [ ] Re-verification metadata included (if previous existed)
 - [ ] VERIFICATION.md created with complete report
 - [ ] Results returned to orchestrator (NOT committed)

package/commands/gsd/gsd-add-backlog.md ADDED Viewed

@@ -0,0 +1,76 @@
+---
+name: gsd-add-backlog
+description: Add an idea to the backlog parking lot (999.x numbering)
+argument-hint: <description>
+permissions:
+   read: true
+   write: true
+   bash: true
+---
+<objective>
+Add a backlog item to the roadmap using 999.x numbering. Backlog items are
+unsequenced ideas that aren't ready for active planning — they live outside
+the normal phase sequence and accumulate context over time.
+</objective>
+<process>
+1. **read ROADMAP.md** to find existing backlog entries:
+   ```bash
+   cat .planning/ROADMAP.md
+   ```
+2. **Find next backlog number:**
+   ```bash
+   NEXT=$(node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" phase next-decimal 999 --raw)
+   ```
+   If no 999.x phases exist, start at 999.1.
+3. **Create the phase directory:**
+   ```bash
+   SLUG=$(node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" generate-slug "$ARGUMENTS" --raw)
+   mkdir -p ".planning/phases/${NEXT}-${SLUG}"
+   touch ".planning/phases/${NEXT}-${SLUG}/.gitkeep"
+   ```
+4. **Add to ROADMAP.md** under a `## Backlog` section. If the section doesn't exist, create it at the end:
+   ```markdown
+   ## Backlog
+   ### Phase {NEXT}: {description} (BACKLOG)
+   **Goal:** [Captured for future planning]
+   **Requirements:** TBD
+   **Plans:** 0 plans
+   Plans:
+   - [ ] TBD (promote with /gsd-review-backlog when ready)
+   ```
+5. **Commit:**
+   ```bash
+   node "$HOME/.config/opencode/get-shit-done/bin/gsd-tools.cjs" commit "docs: add backlog item ${NEXT} — ${ARGUMENTS}" --files .planning/ROADMAP.md ".planning/phases/${NEXT}-${SLUG}/.gitkeep"
+   ```
+6. **Report:**
+   ```
+   ## 📋 Backlog Item Added
+   Phase {NEXT}: {description}
+   Directory: .planning/phases/{NEXT}-{slug}/
+   This item lives in the backlog parking lot.
+   Use /gsd-discuss-phase {NEXT} to explore it further.
+   Use /gsd-review-backlog to promote items to active milestone.
+   ```
+</process>
+<notes>
+- 999.x numbering keeps backlog items out of the active phase sequence
+- Phase directories are created immediately, so /gsd-discuss-phase and /gsd-plan-phase work on them
+- No `Depends on:` field — backlog items are unsequenced by definition
+- Sparse numbering is fine (999.1, 999.3) — always uses next-decimal
+</notes>

package/commands/gsd/gsd-analyze-dependencies.md ADDED Viewed

@@ -0,0 +1,34 @@
+---
+name: gsd-analyze-dependencies
+description: Analyze phase dependencies and suggest Depends on entries for ROADMAP.md
+permissions:
+   read: true
+   write: true
+   bash: true
+   glob: true
+   grep: true
+   question: true
+---
+<objective>
+Analyze the phase dependency graph for the current milestone. For each phase pair, determine if there is a dependency relationship based on:
+- File overlap (phases that modify the same files must be ordered)
+- Semantic dependencies (a phase that uses an API built by another phase)
+- Data flow (a phase that consumes output from another phase)
+Then suggest `Depends on` updates to ROADMAP.md.
+</objective>
+<execution_context>
+@$HOME/.config/opencode/get-shit-done/workflows/analyze-dependencies.md
+</execution_context>
+<context>
+No arguments required. Requires an active milestone with ROADMAP.md.
+Run this command BEFORE `/gsd-manager` to fill in missing `Depends on` fields and prevent merge conflicts from unordered parallel execution.
+</context>
+<process>
+Execute the analyze-dependencies workflow from @$HOME/.config/opencode/get-shit-done/workflows/analyze-dependencies.md end-to-end.
+Present dependency suggestions clearly and apply confirmed updates to ROADMAP.md.
+</process>

package/commands/gsd/gsd-audit-uat.md ADDED Viewed

@@ -0,0 +1,24 @@
+---
+name: gsd-audit-uat
+description: Cross-phase audit of all outstanding UAT and verification items
+permissions:
+   read: true
+   glob: true
+   grep: true
+   bash: true
+---
+<objective>
+Scan all phases for pending, skipped, blocked, and human_needed UAT items. Cross-reference against codebase to detect stale documentation. Produce prioritized human test plan.
+</objective>
+<execution_context>
+@$HOME/.config/opencode/get-shit-done/workflows/audit-uat.md
+</execution_context>
+<context>
+Core planning files are loaded in-workflow via CLI.
+**Scope:**
+glob: .planning/phases/*/*-UAT.md
+glob: .planning/phases/*/*-VERIFICATION.md
+</context>