gsd-pi 2.38.0-dev.add4f78 → 2.38.0-dev.bc2e21e
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/resources/extensions/gsd/prompts/discuss.md +11 -14
- package/dist/resources/extensions/gsd/prompts/execute-task.md +2 -2
- package/dist/resources/extensions/gsd/prompts/guided-discuss-milestone.md +11 -12
- package/dist/resources/extensions/gsd/prompts/guided-discuss-slice.md +8 -10
- package/dist/resources/extensions/gsd/prompts/guided-resume-task.md +1 -1
- package/dist/resources/extensions/gsd/prompts/queue.md +4 -8
- package/dist/resources/extensions/gsd/prompts/reactive-execute.md +11 -8
- package/dist/resources/extensions/gsd/prompts/run-uat.md +25 -10
- package/dist/resources/extensions/gsd/prompts/workflow-start.md +2 -2
- package/package.json +1 -1
- package/src/resources/extensions/gsd/prompts/discuss.md +11 -14
- package/src/resources/extensions/gsd/prompts/execute-task.md +2 -2
- package/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md +11 -12
- package/src/resources/extensions/gsd/prompts/guided-discuss-slice.md +8 -10
- package/src/resources/extensions/gsd/prompts/guided-resume-task.md +1 -1
- package/src/resources/extensions/gsd/prompts/queue.md +4 -8
- package/src/resources/extensions/gsd/prompts/reactive-execute.md +11 -8
- package/src/resources/extensions/gsd/prompts/run-uat.md +25 -10
- package/src/resources/extensions/gsd/prompts/workflow-start.md +2 -2
- package/src/resources/extensions/gsd/tests/prompt-contracts.test.ts +59 -0
- package/src/resources/extensions/gsd/tests/run-uat.test.ts +11 -3
|
@@ -11,7 +11,7 @@ After the user describes their idea, **do not ask questions yet**. First, prove
|
|
|
11
11
|
1. Summarize what you understood in your own words — concretely, not abstractly.
|
|
12
12
|
2. Give an honest size read: roughly how many milestones, roughly how many slices in the first one. Base this on the actual work involved, not a classification label. A config change might be 1 milestone with 1 slice. A social network might be 5 milestones with 8+ slices each. Use your judgment.
|
|
13
13
|
3. Include scope honesty — a bullet list of the major capabilities you're hearing: "Here's what I'm hearing: [bullet list of major capabilities]."
|
|
14
|
-
4.
|
|
14
|
+
4. Invite correction in one plain sentence: "Here's my read. Correct anything important I missed." — plain text, not `ask_user_questions`.
|
|
15
15
|
|
|
16
16
|
This prevents runaway questioning by forcing comprehension proof before anything else. Do not skip this step. Do not combine it with the first question round.
|
|
17
17
|
|
|
@@ -21,7 +21,7 @@ After reflection is confirmed, decide the approach based on the actual scope —
|
|
|
21
21
|
|
|
22
22
|
**If the work spans multiple milestones:** Before drilling into details, map the full landscape:
|
|
23
23
|
1. Propose a milestone sequence — names, one-line intents, rough dependencies
|
|
24
|
-
2. Present this
|
|
24
|
+
2. Present this as the working milestone sequence. Adjust it if the user objects, sharpens it, or adds constraints; otherwise keep moving.
|
|
25
25
|
3. Only then begin the deep Q&A — and scope the Q&A to the full vision, not just M001
|
|
26
26
|
|
|
27
27
|
**If the work fits in a single milestone:** Proceed directly to questioning.
|
|
@@ -48,7 +48,7 @@ You are a thinking partner, not an interviewer.
|
|
|
48
48
|
|
|
49
49
|
**Challenge vagueness, make abstract concrete.** When the user says something abstract ("it should be smart" / "it needs to handle edge cases" / "good UX"), push for specifics. What does "smart" mean in practice? Which edge cases? What does good UX look like for this specific interaction?
|
|
50
50
|
|
|
51
|
-
**
|
|
51
|
+
**Lead with experience, but ask implementation when it materially matters.** Default questions should target the experience and outcome. But when implementation choices materially change scope, proof, compliance, integration, deployment, or irreversible architecture, ask them directly instead of forcing a fake UX phrasing.
|
|
52
52
|
|
|
53
53
|
**Freeform rule:** When the user selects "Other" or clearly wants to explain something freely, stop using `ask_user_questions` and switch to plain text follow-ups. Let them talk. Resume structured questions when appropriate.
|
|
54
54
|
|
|
@@ -105,16 +105,13 @@ Example flow:
|
|
|
105
105
|
|
|
106
106
|
If they clarify, absorb the correction and re-verify.
|
|
107
107
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
Only after the depth checklist is fully satisfied and you genuinely understand the work, offer to proceed.
|
|
108
|
+
The depth verification is the required write-gate. Do **not** add another meta "ready to proceed?" checkpoint immediately after it unless there is still material ambiguity.
|
|
111
109
|
|
|
112
|
-
|
|
113
|
-
"Here's what I'm planning to build: [list of capabilities with rough complexity]. Does this match your vision, or did I miss something?"
|
|
110
|
+
## Wrap-up Gate
|
|
114
111
|
|
|
115
|
-
|
|
112
|
+
Once the depth checklist is fully satisfied, move directly into requirements and roadmap preview. Do not insert a separate "are you ready to continue?" gate unless the user explicitly wants to keep brainstorming or you still see material ambiguity.
|
|
116
113
|
|
|
117
|
-
If
|
|
114
|
+
If you need a final scope reflection, fold it into the depth summary or roadmap preview rather than asking for permission twice.
|
|
118
115
|
|
|
119
116
|
## Focused Research
|
|
120
117
|
|
|
@@ -165,9 +162,9 @@ Rules:
|
|
|
165
162
|
|
|
166
163
|
For multi-milestone projects, requirements should span the full vision. Requirements owned by later milestones get provisional ownership. The full requirement set captures the user's complete vision — milestones are the sequencing strategy, not the scope boundary.
|
|
167
164
|
|
|
168
|
-
If the project is new or has no `REQUIREMENTS.md`,
|
|
165
|
+
If the project is new or has no `REQUIREMENTS.md`, surface candidate requirements in chat before writing the roadmap. Ask for correction only on material omissions, wrong ownership, or wrong scope. If the user has already been specific and raises no substantive objection, treat the requirement set as confirmed and continue.
|
|
169
166
|
|
|
170
|
-
**Print the requirements in chat before
|
|
167
|
+
**Print the requirements in chat before writing the roadmap.** Do not say "here are the requirements" and then only write them to a file. The user must see them in the terminal. Print a markdown table with columns: ID, Title, Status, Owner, Source. Group by status (Active, Deferred, Out of Scope). After the table, ask: "Confirm, adjust, or add?"
|
|
171
168
|
|
|
172
169
|
## Scope Assessment
|
|
173
170
|
|
|
@@ -179,7 +176,7 @@ Before moving to output, confirm the size estimate from your reflection still ho
|
|
|
179
176
|
|
|
180
177
|
Before writing any files, **print the planned roadmap in chat** so the user can see and approve it. Print a markdown table with columns: Slice, Title, Risk, Depends, Demo. One row per slice. Below the table, print the milestone definition of done as a bullet list.
|
|
181
178
|
|
|
182
|
-
|
|
179
|
+
If the user raises a substantive objection, adjust the roadmap. Otherwise, present the roadmap and ask: "Ready to write, or want to adjust?" — one gate, not two.
|
|
183
180
|
|
|
184
181
|
### Naming Convention
|
|
185
182
|
|
|
@@ -236,7 +233,7 @@ If a milestone has no dependencies, omit the frontmatter. The dependency chain f
|
|
|
236
233
|
|
|
237
234
|
#### Phase 3: Sequential readiness gate for remaining milestones
|
|
238
235
|
|
|
239
|
-
For each remaining milestone **one at a time, in sequence**, use `ask_user_questions` to
|
|
236
|
+
For each remaining milestone **one at a time, in sequence**, decide the most likely readiness mode from the evidence you already have, then use `ask_user_questions` to let the user correct that recommendation. Present three options:
|
|
240
237
|
|
|
241
238
|
- **"Discuss now"** — The user wants to conduct a focused discussion for this milestone in the current session, while the context from the broader discussion is still fresh. Proceed with a focused discussion for this milestone (reflection → investigation → questioning → depth verification). When the discussion concludes, write a full `CONTEXT.md`. Then move to the gate for the next milestone.
|
|
242
239
|
- **"Write draft for later"** — This milestone has seed material from the current conversation but needs its own dedicated discussion in a future session. Write a `CONTEXT-DRAFT.md` capturing the seed material (what was discussed, key ideas, provisional scope, open questions). Mark it clearly as a draft, not a finalized context. **What happens downstream:** When auto-mode reaches this milestone, it pauses and notifies the user: "M00x has draft context — needs discussion. Run /gsd." The `/gsd` wizard shows a "Discuss from draft" option that seeds the new discussion with this draft, so nothing from the current conversation is lost. After the dedicated discussion produces a full CONTEXT.md, the draft file is automatically deleted.
|
|
@@ -6,7 +6,7 @@ You are executing GSD auto-mode.
|
|
|
6
6
|
|
|
7
7
|
Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory. Do NOT `cd` to any other directory.
|
|
8
8
|
|
|
9
|
-
A researcher explored the codebase and a planner decomposed the work — you are the executor. The task plan below is your authoritative contract.
|
|
9
|
+
A researcher explored the codebase and a planner decomposed the work — you are the executor. The task plan below is your authoritative contract for the slice goal and verification bar, but it is not a substitute for local reality. Verify the referenced files and surrounding code before changing them. Do not do broad re-research or spontaneous re-planning. Small factual corrections, file-path fixes, and local implementation adaptations are part of execution. Escalate to `blocker_discovered: true` only when the slice contract or downstream task graph no longer holds.
|
|
10
10
|
|
|
11
11
|
{{overridesSection}}
|
|
12
12
|
|
|
@@ -27,7 +27,7 @@ A researcher explored the codebase and a planner decomposed the work — you are
|
|
|
27
27
|
Then:
|
|
28
28
|
0. Narrate step transitions, key implementation decisions, and verification outcomes as you work. Keep it terse — one line between tool-call clusters, not between every call — but write complete sentences in user-facing prose, not shorthand notes or scratchpad fragments.
|
|
29
29
|
1. **Load relevant skills before writing code.** Check the `GSD Skill Preferences` block in system context and the `<available_skills>` catalog in your system prompt. For each skill that matches this task's technology stack (e.g., React, Next.js, accessibility, component design), `read` its SKILL.md file now. Skills contain implementation rules and patterns that should guide your code. If no skills match this task, skip this step.
|
|
30
|
-
2. Execute the steps in the inlined task plan
|
|
30
|
+
2. Execute the steps in the inlined task plan, adapting minor local mismatches when the surrounding code differs from the planner's snapshot
|
|
31
31
|
3. Build the real thing. If the task plan says "create login endpoint", build an endpoint that actually authenticates against a real store, not one that returns a hardcoded success response. If the task plan says "create dashboard page", build a page that renders real data from the API, not a component with hardcoded props. Stubs and mocks are for tests, not for the shipped feature.
|
|
32
32
|
4. Write or update tests as part of execution — tests are verification, not an afterthought. If the slice plan defines test files in its Verification section and this is the first task, create them (they should initially fail).
|
|
33
33
|
5. When implementing non-trivial runtime behavior (async flows, API boundaries, background processes, error paths), add or preserve agent-usable observability. Skip this for simple changes where it doesn't apply.
|
|
@@ -33,19 +33,16 @@ Ask **1–3 questions per round**. Keep each question focused on one of:
|
|
|
33
33
|
|
|
34
34
|
After the user answers, investigate further if any answer opens a new unknown, then ask the next round.
|
|
35
35
|
|
|
36
|
-
###
|
|
36
|
+
### Round cadence
|
|
37
37
|
|
|
38
|
-
After each round of answers,
|
|
38
|
+
After each round of answers, decide whether you already have enough depth to write a strong context file.
|
|
39
39
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
**If `{{structuredQuestionsAvailable}}` is `true
|
|
43
|
-
- "
|
|
44
|
-
- "
|
|
45
|
-
|
|
46
|
-
**If `{{structuredQuestionsAvailable}}` is `false`:** ask in plain text.
|
|
47
|
-
|
|
48
|
-
If the user wants to keep going, keep asking. Stop when they say wrap up.
|
|
40
|
+
- If not, investigate any newly-opened unknowns and continue to the next round immediately. Do **not** ask a meta "ready to wrap up?" question after every round.
|
|
41
|
+
- Use a single wrap-up prompt only when you genuinely believe the depth checklist is satisfied or the user signals they want to stop.
|
|
42
|
+
- **If `{{structuredQuestionsAvailable}}` is `true` and you need that wrap-up prompt:** use `ask_user_questions` with options:
|
|
43
|
+
- "Write the context file" *(recommended when depth is satisfied)*
|
|
44
|
+
- "One more pass"
|
|
45
|
+
- **If `{{structuredQuestionsAvailable}}` is `false`:** ask in plain text only once you believe you are ready to write.
|
|
49
46
|
|
|
50
47
|
---
|
|
51
48
|
|
|
@@ -55,7 +52,7 @@ If the user wants to keep going, keep asking. Stop when they say wrap up.
|
|
|
55
52
|
|
|
56
53
|
**Challenge vagueness, make abstract concrete.** When the user says something abstract ("it should be smart" / "good UX"), push for specifics.
|
|
57
54
|
|
|
58
|
-
**
|
|
55
|
+
**Lead with experience, but ask implementation when it materially matters.** Default questions should target the experience and outcome. But when implementation choices materially change scope, proof, compliance, integration, deployment, or irreversible architecture, ask them directly instead of forcing a fake UX phrasing.
|
|
59
56
|
|
|
60
57
|
**Position-first framing.** Have opinions. "I'd lean toward X because Y — does that match your thinking?" is better than "what do you think about X vs Y?"
|
|
61
58
|
|
|
@@ -95,6 +92,8 @@ Before moving to the wrap-up gate, verify you have covered:
|
|
|
95
92
|
|
|
96
93
|
If they clarify, absorb the correction and re-verify.
|
|
97
94
|
|
|
95
|
+
The depth verification is the only required confirmation gate. Do not add a second "ready to proceed?" gate after it.
|
|
96
|
+
|
|
98
97
|
---
|
|
99
98
|
|
|
100
99
|
## Output
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
You are interviewing the user to surface behavioural, UX, and usage grey areas for slice **{{sliceId}}: {{sliceTitle}}** of milestone **{{milestoneId}}**.
|
|
2
2
|
|
|
3
|
-
Your goal is **not** to
|
|
3
|
+
Your goal is **not** to center the discussion on tech stack trivia, naming conventions, or speculative architecture. Your goal is to produce a context file that captures the human decisions: what this slice should feel like, how it should behave, what edge cases matter, where scope begins and ends, and what the user cares about that won't be obvious from the roadmap entry alone. If a technical choice materially changes scope, proof, or integration behavior, ask it directly and capture it.
|
|
4
4
|
|
|
5
5
|
{{inlinedContext}}
|
|
6
6
|
|
|
@@ -27,17 +27,15 @@ Ask **1–3 questions per round** using `ask_user_questions`. Keep each question
|
|
|
27
27
|
|
|
28
28
|
After the user answers, investigate further if any answer opens a new unknown, then ask the next round.
|
|
29
29
|
|
|
30
|
-
###
|
|
30
|
+
### Round cadence
|
|
31
31
|
|
|
32
|
-
After each round of answers,
|
|
32
|
+
After each round of answers, decide whether you already have enough signal to write the slice context cleanly.
|
|
33
33
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
- "
|
|
38
|
-
- "
|
|
39
|
-
|
|
40
|
-
If the user wants to keep going, keep asking. Stop when they say wrap up.
|
|
34
|
+
- If not, investigate any new unknowns and continue to the next round immediately. Do **not** ask a meta "ready to wrap up?" question after every round.
|
|
35
|
+
- Ask a single wrap-up question only when you genuinely believe the slice is well understood or the user signals they want to stop.
|
|
36
|
+
- When you do ask it, use `ask_user_questions` with:
|
|
37
|
+
- "Write the context file" *(recommended when the slice is well understood)*
|
|
38
|
+
- "One more pass"
|
|
41
39
|
|
|
42
40
|
---
|
|
43
41
|
|
|
@@ -1 +1 @@
|
|
|
1
|
-
Resume interrupted work. Find the continue file (`{{sliceId}}-CONTINUE.md` or `continue.md`) in slice {{sliceId}} of milestone {{milestoneId}},
|
|
1
|
+
Resume interrupted work. Find the continue file (`{{sliceId}}-CONTINUE.md` or `continue.md`) in slice {{sliceId}} of milestone {{milestoneId}}, read it, and use it as the recovery contract for where to pick up. Do **not** delete the continue file immediately. Keep it until the task is successfully completed or you have written a newer summary/continue artifact that clearly supersedes it. If the resumed attempt fails again, update or replace the continue file so no recovery context is lost. If a `GSD Skill Preferences` block is present in system context, use it to decide which skills to load and follow during execution, without relaxing required verification or artifact rules.
|
|
@@ -36,15 +36,11 @@ Don't go deep — just enough that your next question reflects what's actually t
|
|
|
36
36
|
- How the new work relates to existing milestones — overlap, dependencies, prerequisites
|
|
37
37
|
- If `.gsd/REQUIREMENTS.md` exists: which unmet Active or Deferred requirements this queued work advances
|
|
38
38
|
|
|
39
|
-
**Then use ask_user_questions** to dig into gray areas —
|
|
39
|
+
**Then use ask_user_questions** to dig into gray areas — scope boundaries, proof expectations, integration choices, tech preferences when they materially matter, and what's in vs out. 1-3 questions per round.
|
|
40
40
|
|
|
41
41
|
If a `GSD Skill Preferences` block is present in system context, use it to decide which skills to load and follow during discuss/planning work, but do not let it override the required discuss flow or artifact requirements.
|
|
42
42
|
|
|
43
|
-
**Self-regulate:**
|
|
44
|
-
"I think I have a good picture. Ready to queue this, or are there more things to discuss?"
|
|
45
|
-
with options: "Ready to queue (Recommended)", "I have more to discuss"
|
|
46
|
-
|
|
47
|
-
If the user wants to keep going, keep asking. If they're ready, proceed.
|
|
43
|
+
**Self-regulate:** Do **not** ask a meta "ready to queue?" question after every round. Keep going until you have enough depth to write the context well, then use a single wrap-up prompt if needed. If the user clearly keeps adding detail instead of objecting, treat that as permission to continue.
|
|
48
44
|
|
|
49
45
|
## Existing Milestone Awareness
|
|
50
46
|
|
|
@@ -88,7 +84,7 @@ For EACH milestone you are about to write context for, investigate the codebase
|
|
|
88
84
|
1. **Read the actual code** — for every file or module you reference in "Existing Codebase / Prior Art", read enough to confirm your assumptions about what exists, what it does, and what it doesn't do. Do not guess from memory or training data.
|
|
89
85
|
2. **Check for stale assumptions** — the codebase may have changed since the user's spec was written. Verify: do the APIs you reference still exist? Have modules been refactored? Has upstream merged features that change the landscape?
|
|
90
86
|
3. **Identify phantom capabilities** — for every capability you list as "existing," confirm it actually works as described. Look for: functions that exist but are never called, fields that are set but never read, features that are piped but never connected.
|
|
91
|
-
4. **Note what you found** — include verified findings in the context file's "Existing Codebase / Prior Art" section with "verified against
|
|
87
|
+
4. **Note what you found** — include verified findings in the context file's "Existing Codebase / Prior Art" section with annotations like "verified against current codebase state" or an actual concrete version/commit only if you truly have one.
|
|
92
88
|
|
|
93
89
|
### Step 2: Per-Milestone Depth Verification
|
|
94
90
|
|
|
@@ -103,7 +99,7 @@ This triggers the per-milestone write-gate. The question should present:
|
|
|
103
99
|
- Key technical assumptions you verified (or couldn't verify)
|
|
104
100
|
- Any risks or unknowns the investigation surfaced
|
|
105
101
|
|
|
106
|
-
The user confirms or corrects before you write. One depth verification per milestone — not one for all milestones combined.
|
|
102
|
+
The user confirms or corrects before you write. One depth verification per milestone — not one for all milestones combined. This is the required write-gate; do not add extra "ready to proceed?" prompts around it once you have enough signal.
|
|
107
103
|
|
|
108
104
|
**If you skip this step, the system will block the CONTEXT.md write and return an error telling you to complete verification first.**
|
|
109
105
|
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
You are executing **multiple tasks in parallel** for this slice. The task graph below shows which tasks are ready for simultaneous execution based on their input/output dependencies.
|
|
10
10
|
|
|
11
|
-
**Critical rule:** Use the `subagent` tool in **parallel mode** to dispatch all ready tasks simultaneously. Each subagent gets a
|
|
11
|
+
**Critical rule:** Use the `subagent` tool in **parallel mode** to dispatch all ready tasks simultaneously. Each subagent gets a full `execute-task` prompt and is responsible for its own implementation, verification, task summary, and checkbox updates. The parent batch agent orchestrates, verifies, and records failures only when a dispatched task failed before it could leave its own summary behind.
|
|
12
12
|
|
|
13
13
|
## Task Dependency Graph
|
|
14
14
|
|
|
@@ -24,15 +24,18 @@ You are executing **multiple tasks in parallel** for this slice. The task graph
|
|
|
24
24
|
|
|
25
25
|
1. **Dispatch all ready tasks** using `subagent` in parallel mode. Each subagent prompt is provided below.
|
|
26
26
|
2. **Wait for all subagents** to complete.
|
|
27
|
-
3. **Verify each task's outputs** — check that expected files were created/modified
|
|
28
|
-
4. **
|
|
29
|
-
5. **
|
|
30
|
-
6. **
|
|
27
|
+
3. **Verify each dispatched task's outputs** — check that expected files were created/modified, that verification commands pass where applicable, and that each task wrote its own `T##-SUMMARY.md`.
|
|
28
|
+
4. **Do not rewrite successful task summaries or duplicate checkbox edits.** Treat a subagent-written summary as authoritative for that task.
|
|
29
|
+
5. **If a failed task produced no summary, write a recovery summary for that task** with `blocker_discovered: true`, clear failure details, and leave the task unchecked so replan/retry has an authoritative record.
|
|
30
|
+
6. **Preserve successful sibling tasks exactly as they landed.** Do not roll back good work because another parallel task failed.
|
|
31
|
+
7. **Do NOT create a batch commit.** The surrounding unit lifecycle owns commits; this parent batch agent should not invent a second commit layer.
|
|
32
|
+
8. **Report the batch outcome** — which tasks succeeded, which failed, and any output collisions or dependency surprises.
|
|
31
33
|
|
|
32
34
|
If any subagent fails:
|
|
33
|
-
-
|
|
34
|
-
-
|
|
35
|
-
-
|
|
35
|
+
- Keep successful task summaries and checkbox updates as-is
|
|
36
|
+
- Write a failure summary only when the failed task did not leave one behind
|
|
37
|
+
- Do not silently discard or overwrite another task's outputs
|
|
38
|
+
- The orchestrator will handle re-dispatch or replanning on the next iteration
|
|
36
39
|
|
|
37
40
|
## Subagent Prompts
|
|
38
41
|
|
|
@@ -18,32 +18,47 @@ If a `GSD Skill Preferences` block is present in system context, use it to decid
|
|
|
18
18
|
|
|
19
19
|
**UAT file:** `{{uatPath}}`
|
|
20
20
|
**Result file to write:** `{{uatResultPath}}`
|
|
21
|
+
**Detected UAT mode:** `{{uatType}}`
|
|
21
22
|
|
|
22
|
-
You are the
|
|
23
|
+
You are the UAT runner. Execute every check defined in `{{uatPath}}` as deeply as this mode truthfully allows. Do not collapse live or subjective checks into cheap artifact checks just to get a PASS.
|
|
24
|
+
|
|
25
|
+
### Automation rules by mode
|
|
26
|
+
|
|
27
|
+
- `artifact-driven` — verify with shell commands, scripts, file reads, and artifact structure checks.
|
|
28
|
+
- `live-runtime` — exercise the real runtime path. Start or connect to the app/service if needed, use browser/runtime/network checks, and verify observable behavior.
|
|
29
|
+
- `mixed` — run all automatable artifact-driven and live-runtime checks. Separate any remaining human-only checks explicitly.
|
|
30
|
+
- `human-experience` — automate setup, preconditions, screenshots, logs, and objective checks, but do **not** invent subjective PASS results. Mark taste-based, experiential, or purely human-judgment checks as `NEEDS-HUMAN` and use an overall verdict of `PARTIAL` unless every required check was objective and passed.
|
|
31
|
+
|
|
32
|
+
### Evidence tools
|
|
33
|
+
|
|
34
|
+
Choose the lightest tool that proves the check honestly:
|
|
23
35
|
|
|
24
36
|
- Run shell commands with `bash`
|
|
25
37
|
- Run `grep` / `rg` checks against files
|
|
26
|
-
- Run `node` / script invocations
|
|
38
|
+
- Run `node` / other script invocations
|
|
27
39
|
- Read files and verify their contents
|
|
28
40
|
- Check that expected artifacts exist and have correct structure
|
|
41
|
+
- For live/runtime/UI checks, exercise the real flow in the browser when applicable and inspect runtime/network/console state
|
|
42
|
+
- When a check cannot be honestly automated, gather the best objective evidence you can and mark it `NEEDS-HUMAN`
|
|
29
43
|
|
|
30
44
|
For each check, record:
|
|
31
45
|
- The check description (from the UAT file)
|
|
46
|
+
- The evidence mode used: `artifact`, `runtime`, or `human-follow-up`
|
|
32
47
|
- The command or action taken
|
|
33
48
|
- The actual result observed
|
|
34
|
-
- PASS or
|
|
49
|
+
- `PASS`, `FAIL`, or `NEEDS-HUMAN`
|
|
35
50
|
|
|
36
51
|
After running all checks, compute the **overall verdict**:
|
|
37
|
-
- `PASS` — all checks passed
|
|
52
|
+
- `PASS` — all required checks passed and no human-only checks remain
|
|
38
53
|
- `FAIL` — one or more checks failed
|
|
39
|
-
- `PARTIAL` — some checks passed,
|
|
54
|
+
- `PARTIAL` — some checks passed, but one or more checks were skipped, inconclusive, or still require human judgment
|
|
40
55
|
|
|
41
56
|
Write `{{uatResultPath}}` with:
|
|
42
57
|
|
|
43
58
|
```markdown
|
|
44
59
|
---
|
|
45
60
|
sliceId: {{sliceId}}
|
|
46
|
-
uatType:
|
|
61
|
+
uatType: {{uatType}}
|
|
47
62
|
verdict: PASS | FAIL | PARTIAL
|
|
48
63
|
date: <ISO 8601 timestamp>
|
|
49
64
|
---
|
|
@@ -52,9 +67,9 @@ date: <ISO 8601 timestamp>
|
|
|
52
67
|
|
|
53
68
|
## Checks
|
|
54
69
|
|
|
55
|
-
| Check | Result | Notes |
|
|
56
|
-
|
|
57
|
-
| <check description> | PASS / FAIL | <observed output or reason> |
|
|
70
|
+
| Check | Mode | Result | Notes |
|
|
71
|
+
|-------|------|--------|-------|
|
|
72
|
+
| <check description> | artifact / runtime / human-follow-up | PASS / FAIL / NEEDS-HUMAN | <observed output, evidence, or reason> |
|
|
58
73
|
|
|
59
74
|
## Overall Verdict
|
|
60
75
|
|
|
@@ -62,7 +77,7 @@ date: <ISO 8601 timestamp>
|
|
|
62
77
|
|
|
63
78
|
## Notes
|
|
64
79
|
|
|
65
|
-
<any additional context, errors encountered, or follow-up
|
|
80
|
+
<any additional context, errors encountered, screenshots/logs gathered, or manual follow-up still required>
|
|
66
81
|
```
|
|
67
82
|
|
|
68
83
|
---
|
|
@@ -14,7 +14,7 @@ You are executing a **{{templateName}}** workflow (template: `{{templateId}}`).
|
|
|
14
14
|
|
|
15
15
|
## Workflow Definition
|
|
16
16
|
|
|
17
|
-
Follow the workflow defined below. Execute each phase in order, completing one before moving to the next.
|
|
17
|
+
Follow the workflow defined below. Execute each phase in order, completing one before moving to the next. For low and medium complexity workflows, keep moving by default — pause only at true decision gates (user must choose between materially different directions, outward-facing actions need approval, or the workflow explicitly requires a human checkpoint). For high complexity workflows, confirm at phase transitions unless the workflow explicitly marks a gate as skip-safe.
|
|
18
18
|
|
|
19
19
|
{{workflowContent}}
|
|
20
20
|
|
|
@@ -24,5 +24,5 @@ Follow the workflow defined below. Execute each phase in order, completing one b
|
|
|
24
24
|
2. **Artifact discipline.** If an artifact directory is specified, write all planning/summary documents there.
|
|
25
25
|
3. **Atomic commits.** Commit working code after each meaningful change. Use conventional commit format: `<type>(<scope>): <description>`.
|
|
26
26
|
4. **Verify before shipping.** Run the project's test suite and build before marking the workflow complete.
|
|
27
|
-
5. **
|
|
27
|
+
5. **Decision gates, not ceremony.** After each phase, summarize what changed. For low/medium complexity, ask for confirmation only when the next phase depends on a real user choice or external approval. For high complexity, confirm before proceeding to each new phase.
|
|
28
28
|
6. **Stay focused.** This is a {{complexity}}-complexity workflow. Match your ceremony level to the task — don't over-engineer or under-deliver.
|
package/package.json
CHANGED
|
@@ -11,7 +11,7 @@ After the user describes their idea, **do not ask questions yet**. First, prove
|
|
|
11
11
|
1. Summarize what you understood in your own words — concretely, not abstractly.
|
|
12
12
|
2. Give an honest size read: roughly how many milestones, roughly how many slices in the first one. Base this on the actual work involved, not a classification label. A config change might be 1 milestone with 1 slice. A social network might be 5 milestones with 8+ slices each. Use your judgment.
|
|
13
13
|
3. Include scope honesty — a bullet list of the major capabilities you're hearing: "Here's what I'm hearing: [bullet list of major capabilities]."
|
|
14
|
-
4.
|
|
14
|
+
4. Invite correction in one plain sentence: "Here's my read. Correct anything important I missed." — plain text, not `ask_user_questions`.
|
|
15
15
|
|
|
16
16
|
This prevents runaway questioning by forcing comprehension proof before anything else. Do not skip this step. Do not combine it with the first question round.
|
|
17
17
|
|
|
@@ -21,7 +21,7 @@ After reflection is confirmed, decide the approach based on the actual scope —
|
|
|
21
21
|
|
|
22
22
|
**If the work spans multiple milestones:** Before drilling into details, map the full landscape:
|
|
23
23
|
1. Propose a milestone sequence — names, one-line intents, rough dependencies
|
|
24
|
-
2. Present this
|
|
24
|
+
2. Present this as the working milestone sequence. Adjust it if the user objects, sharpens it, or adds constraints; otherwise keep moving.
|
|
25
25
|
3. Only then begin the deep Q&A — and scope the Q&A to the full vision, not just M001
|
|
26
26
|
|
|
27
27
|
**If the work fits in a single milestone:** Proceed directly to questioning.
|
|
@@ -48,7 +48,7 @@ You are a thinking partner, not an interviewer.
|
|
|
48
48
|
|
|
49
49
|
**Challenge vagueness, make abstract concrete.** When the user says something abstract ("it should be smart" / "it needs to handle edge cases" / "good UX"), push for specifics. What does "smart" mean in practice? Which edge cases? What does good UX look like for this specific interaction?
|
|
50
50
|
|
|
51
|
-
**
|
|
51
|
+
**Lead with experience, but ask implementation when it materially matters.** Default questions should target the experience and outcome. But when implementation choices materially change scope, proof, compliance, integration, deployment, or irreversible architecture, ask them directly instead of forcing a fake UX phrasing.
|
|
52
52
|
|
|
53
53
|
**Freeform rule:** When the user selects "Other" or clearly wants to explain something freely, stop using `ask_user_questions` and switch to plain text follow-ups. Let them talk. Resume structured questions when appropriate.
|
|
54
54
|
|
|
@@ -105,16 +105,13 @@ Example flow:
|
|
|
105
105
|
|
|
106
106
|
If they clarify, absorb the correction and re-verify.
|
|
107
107
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
Only after the depth checklist is fully satisfied and you genuinely understand the work, offer to proceed.
|
|
108
|
+
The depth verification is the required write-gate. Do **not** add another meta "ready to proceed?" checkpoint immediately after it unless there is still material ambiguity.
|
|
111
109
|
|
|
112
|
-
|
|
113
|
-
"Here's what I'm planning to build: [list of capabilities with rough complexity]. Does this match your vision, or did I miss something?"
|
|
110
|
+
## Wrap-up Gate
|
|
114
111
|
|
|
115
|
-
|
|
112
|
+
Once the depth checklist is fully satisfied, move directly into requirements and roadmap preview. Do not insert a separate "are you ready to continue?" gate unless the user explicitly wants to keep brainstorming or you still see material ambiguity.
|
|
116
113
|
|
|
117
|
-
If
|
|
114
|
+
If you need a final scope reflection, fold it into the depth summary or roadmap preview rather than asking for permission twice.
|
|
118
115
|
|
|
119
116
|
## Focused Research
|
|
120
117
|
|
|
@@ -165,9 +162,9 @@ Rules:
|
|
|
165
162
|
|
|
166
163
|
For multi-milestone projects, requirements should span the full vision. Requirements owned by later milestones get provisional ownership. The full requirement set captures the user's complete vision — milestones are the sequencing strategy, not the scope boundary.
|
|
167
164
|
|
|
168
|
-
If the project is new or has no `REQUIREMENTS.md`,
|
|
165
|
+
If the project is new or has no `REQUIREMENTS.md`, surface candidate requirements in chat before writing the roadmap. Ask for correction only on material omissions, wrong ownership, or wrong scope. If the user has already been specific and raises no substantive objection, treat the requirement set as confirmed and continue.
|
|
169
166
|
|
|
170
|
-
**Print the requirements in chat before
|
|
167
|
+
**Print the requirements in chat before writing the roadmap.** Do not say "here are the requirements" and then only write them to a file. The user must see them in the terminal. Print a markdown table with columns: ID, Title, Status, Owner, Source. Group by status (Active, Deferred, Out of Scope). After the table, ask: "Confirm, adjust, or add?"
|
|
171
168
|
|
|
172
169
|
## Scope Assessment
|
|
173
170
|
|
|
@@ -179,7 +176,7 @@ Before moving to output, confirm the size estimate from your reflection still ho
|
|
|
179
176
|
|
|
180
177
|
Before writing any files, **print the planned roadmap in chat** so the user can see and approve it. Print a markdown table with columns: Slice, Title, Risk, Depends, Demo. One row per slice. Below the table, print the milestone definition of done as a bullet list.
|
|
181
178
|
|
|
182
|
-
|
|
179
|
+
If the user raises a substantive objection, adjust the roadmap. Otherwise, present the roadmap and ask: "Ready to write, or want to adjust?" — one gate, not two.
|
|
183
180
|
|
|
184
181
|
### Naming Convention
|
|
185
182
|
|
|
@@ -236,7 +233,7 @@ If a milestone has no dependencies, omit the frontmatter. The dependency chain f
|
|
|
236
233
|
|
|
237
234
|
#### Phase 3: Sequential readiness gate for remaining milestones
|
|
238
235
|
|
|
239
|
-
For each remaining milestone **one at a time, in sequence**, use `ask_user_questions` to
|
|
236
|
+
For each remaining milestone **one at a time, in sequence**, decide the most likely readiness mode from the evidence you already have, then use `ask_user_questions` to let the user correct that recommendation. Present three options:
|
|
240
237
|
|
|
241
238
|
- **"Discuss now"** — The user wants to conduct a focused discussion for this milestone in the current session, while the context from the broader discussion is still fresh. Proceed with a focused discussion for this milestone (reflection → investigation → questioning → depth verification). When the discussion concludes, write a full `CONTEXT.md`. Then move to the gate for the next milestone.
|
|
242
239
|
- **"Write draft for later"** — This milestone has seed material from the current conversation but needs its own dedicated discussion in a future session. Write a `CONTEXT-DRAFT.md` capturing the seed material (what was discussed, key ideas, provisional scope, open questions). Mark it clearly as a draft, not a finalized context. **What happens downstream:** When auto-mode reaches this milestone, it pauses and notifies the user: "M00x has draft context — needs discussion. Run /gsd." The `/gsd` wizard shows a "Discuss from draft" option that seeds the new discussion with this draft, so nothing from the current conversation is lost. After the dedicated discussion produces a full CONTEXT.md, the draft file is automatically deleted.
|
|
@@ -6,7 +6,7 @@ You are executing GSD auto-mode.
|
|
|
6
6
|
|
|
7
7
|
Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory. Do NOT `cd` to any other directory.
|
|
8
8
|
|
|
9
|
-
A researcher explored the codebase and a planner decomposed the work — you are the executor. The task plan below is your authoritative contract.
|
|
9
|
+
A researcher explored the codebase and a planner decomposed the work — you are the executor. The task plan below is your authoritative contract for the slice goal and verification bar, but it is not a substitute for local reality. Verify the referenced files and surrounding code before changing them. Do not do broad re-research or spontaneous re-planning. Small factual corrections, file-path fixes, and local implementation adaptations are part of execution. Escalate to `blocker_discovered: true` only when the slice contract or downstream task graph no longer holds.
|
|
10
10
|
|
|
11
11
|
{{overridesSection}}
|
|
12
12
|
|
|
@@ -27,7 +27,7 @@ A researcher explored the codebase and a planner decomposed the work — you are
|
|
|
27
27
|
Then:
|
|
28
28
|
0. Narrate step transitions, key implementation decisions, and verification outcomes as you work. Keep it terse — one line between tool-call clusters, not between every call — but write complete sentences in user-facing prose, not shorthand notes or scratchpad fragments.
|
|
29
29
|
1. **Load relevant skills before writing code.** Check the `GSD Skill Preferences` block in system context and the `<available_skills>` catalog in your system prompt. For each skill that matches this task's technology stack (e.g., React, Next.js, accessibility, component design), `read` its SKILL.md file now. Skills contain implementation rules and patterns that should guide your code. If no skills match this task, skip this step.
|
|
30
|
-
2. Execute the steps in the inlined task plan
|
|
30
|
+
2. Execute the steps in the inlined task plan, adapting minor local mismatches when the surrounding code differs from the planner's snapshot
|
|
31
31
|
3. Build the real thing. If the task plan says "create login endpoint", build an endpoint that actually authenticates against a real store, not one that returns a hardcoded success response. If the task plan says "create dashboard page", build a page that renders real data from the API, not a component with hardcoded props. Stubs and mocks are for tests, not for the shipped feature.
|
|
32
32
|
4. Write or update tests as part of execution — tests are verification, not an afterthought. If the slice plan defines test files in its Verification section and this is the first task, create them (they should initially fail).
|
|
33
33
|
5. When implementing non-trivial runtime behavior (async flows, API boundaries, background processes, error paths), add or preserve agent-usable observability. Skip this for simple changes where it doesn't apply.
|
|
@@ -33,19 +33,16 @@ Ask **1–3 questions per round**. Keep each question focused on one of:
|
|
|
33
33
|
|
|
34
34
|
After the user answers, investigate further if any answer opens a new unknown, then ask the next round.
|
|
35
35
|
|
|
36
|
-
###
|
|
36
|
+
### Round cadence
|
|
37
37
|
|
|
38
|
-
After each round of answers,
|
|
38
|
+
After each round of answers, decide whether you already have enough depth to write a strong context file.
|
|
39
39
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
**If `{{structuredQuestionsAvailable}}` is `true
|
|
43
|
-
- "
|
|
44
|
-
- "
|
|
45
|
-
|
|
46
|
-
**If `{{structuredQuestionsAvailable}}` is `false`:** ask in plain text.
|
|
47
|
-
|
|
48
|
-
If the user wants to keep going, keep asking. Stop when they say wrap up.
|
|
40
|
+
- If not, investigate any newly-opened unknowns and continue to the next round immediately. Do **not** ask a meta "ready to wrap up?" question after every round.
|
|
41
|
+
- Use a single wrap-up prompt only when you genuinely believe the depth checklist is satisfied or the user signals they want to stop.
|
|
42
|
+
- **If `{{structuredQuestionsAvailable}}` is `true` and you need that wrap-up prompt:** use `ask_user_questions` with options:
|
|
43
|
+
- "Write the context file" *(recommended when depth is satisfied)*
|
|
44
|
+
- "One more pass"
|
|
45
|
+
- **If `{{structuredQuestionsAvailable}}` is `false`:** ask in plain text only once you believe you are ready to write.
|
|
49
46
|
|
|
50
47
|
---
|
|
51
48
|
|
|
@@ -55,7 +52,7 @@ If the user wants to keep going, keep asking. Stop when they say wrap up.
|
|
|
55
52
|
|
|
56
53
|
**Challenge vagueness, make abstract concrete.** When the user says something abstract ("it should be smart" / "good UX"), push for specifics.
|
|
57
54
|
|
|
58
|
-
**
|
|
55
|
+
**Lead with experience, but ask implementation when it materially matters.** Default questions should target the experience and outcome. But when implementation choices materially change scope, proof, compliance, integration, deployment, or irreversible architecture, ask them directly instead of forcing a fake UX phrasing.
|
|
59
56
|
|
|
60
57
|
**Position-first framing.** Have opinions. "I'd lean toward X because Y — does that match your thinking?" is better than "what do you think about X vs Y?"
|
|
61
58
|
|
|
@@ -95,6 +92,8 @@ Before moving to the wrap-up gate, verify you have covered:
|
|
|
95
92
|
|
|
96
93
|
If they clarify, absorb the correction and re-verify.
|
|
97
94
|
|
|
95
|
+
The depth verification is the only required confirmation gate. Do not add a second "ready to proceed?" gate after it.
|
|
96
|
+
|
|
98
97
|
---
|
|
99
98
|
|
|
100
99
|
## Output
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
You are interviewing the user to surface behavioural, UX, and usage grey areas for slice **{{sliceId}}: {{sliceTitle}}** of milestone **{{milestoneId}}**.
|
|
2
2
|
|
|
3
|
-
Your goal is **not** to
|
|
3
|
+
Your goal is **not** to center the discussion on tech stack trivia, naming conventions, or speculative architecture. Your goal is to produce a context file that captures the human decisions: what this slice should feel like, how it should behave, what edge cases matter, where scope begins and ends, and what the user cares about that won't be obvious from the roadmap entry alone. If a technical choice materially changes scope, proof, or integration behavior, ask it directly and capture it.
|
|
4
4
|
|
|
5
5
|
{{inlinedContext}}
|
|
6
6
|
|
|
@@ -27,17 +27,15 @@ Ask **1–3 questions per round** using `ask_user_questions`. Keep each question
|
|
|
27
27
|
|
|
28
28
|
After the user answers, investigate further if any answer opens a new unknown, then ask the next round.
|
|
29
29
|
|
|
30
|
-
###
|
|
30
|
+
### Round cadence
|
|
31
31
|
|
|
32
|
-
After each round of answers,
|
|
32
|
+
After each round of answers, decide whether you already have enough signal to write the slice context cleanly.
|
|
33
33
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
- "
|
|
38
|
-
- "
|
|
39
|
-
|
|
40
|
-
If the user wants to keep going, keep asking. Stop when they say wrap up.
|
|
34
|
+
- If not, investigate any new unknowns and continue to the next round immediately. Do **not** ask a meta "ready to wrap up?" question after every round.
|
|
35
|
+
- Ask a single wrap-up question only when you genuinely believe the slice is well understood or the user signals they want to stop.
|
|
36
|
+
- When you do ask it, use `ask_user_questions` with:
|
|
37
|
+
- "Write the context file" *(recommended when the slice is well understood)*
|
|
38
|
+
- "One more pass"
|
|
41
39
|
|
|
42
40
|
---
|
|
43
41
|
|
|
@@ -1 +1 @@
|
|
|
1
|
-
Resume interrupted work. Find the continue file (`{{sliceId}}-CONTINUE.md` or `continue.md`) in slice {{sliceId}} of milestone {{milestoneId}},
|
|
1
|
+
Resume interrupted work. Find the continue file (`{{sliceId}}-CONTINUE.md` or `continue.md`) in slice {{sliceId}} of milestone {{milestoneId}}, read it, and use it as the recovery contract for where to pick up. Do **not** delete the continue file immediately. Keep it until the task is successfully completed or you have written a newer summary/continue artifact that clearly supersedes it. If the resumed attempt fails again, update or replace the continue file so no recovery context is lost. If a `GSD Skill Preferences` block is present in system context, use it to decide which skills to load and follow during execution, without relaxing required verification or artifact rules.
|
|
@@ -36,15 +36,11 @@ Don't go deep — just enough that your next question reflects what's actually t
|
|
|
36
36
|
- How the new work relates to existing milestones — overlap, dependencies, prerequisites
|
|
37
37
|
- If `.gsd/REQUIREMENTS.md` exists: which unmet Active or Deferred requirements this queued work advances
|
|
38
38
|
|
|
39
|
-
**Then use ask_user_questions** to dig into gray areas —
|
|
39
|
+
**Then use ask_user_questions** to dig into gray areas — scope boundaries, proof expectations, integration choices, tech preferences when they materially matter, and what's in vs out. 1-3 questions per round.
|
|
40
40
|
|
|
41
41
|
If a `GSD Skill Preferences` block is present in system context, use it to decide which skills to load and follow during discuss/planning work, but do not let it override the required discuss flow or artifact requirements.
|
|
42
42
|
|
|
43
|
-
**Self-regulate:**
|
|
44
|
-
"I think I have a good picture. Ready to queue this, or are there more things to discuss?"
|
|
45
|
-
with options: "Ready to queue (Recommended)", "I have more to discuss"
|
|
46
|
-
|
|
47
|
-
If the user wants to keep going, keep asking. If they're ready, proceed.
|
|
43
|
+
**Self-regulate:** Do **not** ask a meta "ready to queue?" question after every round. Keep going until you have enough depth to write the context well, then use a single wrap-up prompt if needed. If the user clearly keeps adding detail instead of objecting, treat that as permission to continue.
|
|
48
44
|
|
|
49
45
|
## Existing Milestone Awareness
|
|
50
46
|
|
|
@@ -88,7 +84,7 @@ For EACH milestone you are about to write context for, investigate the codebase
|
|
|
88
84
|
1. **Read the actual code** — for every file or module you reference in "Existing Codebase / Prior Art", read enough to confirm your assumptions about what exists, what it does, and what it doesn't do. Do not guess from memory or training data.
|
|
89
85
|
2. **Check for stale assumptions** — the codebase may have changed since the user's spec was written. Verify: do the APIs you reference still exist? Have modules been refactored? Has upstream merged features that change the landscape?
|
|
90
86
|
3. **Identify phantom capabilities** — for every capability you list as "existing," confirm it actually works as described. Look for: functions that exist but are never called, fields that are set but never read, features that are piped but never connected.
|
|
91
|
-
4. **Note what you found** — include verified findings in the context file's "Existing Codebase / Prior Art" section with "verified against
|
|
87
|
+
4. **Note what you found** — include verified findings in the context file's "Existing Codebase / Prior Art" section with annotations like "verified against current codebase state" or an actual concrete version/commit only if you truly have one.
|
|
92
88
|
|
|
93
89
|
### Step 2: Per-Milestone Depth Verification
|
|
94
90
|
|
|
@@ -103,7 +99,7 @@ This triggers the per-milestone write-gate. The question should present:
|
|
|
103
99
|
- Key technical assumptions you verified (or couldn't verify)
|
|
104
100
|
- Any risks or unknowns the investigation surfaced
|
|
105
101
|
|
|
106
|
-
The user confirms or corrects before you write. One depth verification per milestone — not one for all milestones combined.
|
|
102
|
+
The user confirms or corrects before you write. One depth verification per milestone — not one for all milestones combined. This is the required write-gate; do not add extra "ready to proceed?" prompts around it once you have enough signal.
|
|
107
103
|
|
|
108
104
|
**If you skip this step, the system will block the CONTEXT.md write and return an error telling you to complete verification first.**
|
|
109
105
|
|
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
You are executing **multiple tasks in parallel** for this slice. The task graph below shows which tasks are ready for simultaneous execution based on their input/output dependencies.
|
|
10
10
|
|
|
11
|
-
**Critical rule:** Use the `subagent` tool in **parallel mode** to dispatch all ready tasks simultaneously. Each subagent gets a
|
|
11
|
+
**Critical rule:** Use the `subagent` tool in **parallel mode** to dispatch all ready tasks simultaneously. Each subagent gets a full `execute-task` prompt and is responsible for its own implementation, verification, task summary, and checkbox updates. The parent batch agent orchestrates, verifies, and records failures only when a dispatched task failed before it could leave its own summary behind.
|
|
12
12
|
|
|
13
13
|
## Task Dependency Graph
|
|
14
14
|
|
|
@@ -24,15 +24,18 @@ You are executing **multiple tasks in parallel** for this slice. The task graph
|
|
|
24
24
|
|
|
25
25
|
1. **Dispatch all ready tasks** using `subagent` in parallel mode. Each subagent prompt is provided below.
|
|
26
26
|
2. **Wait for all subagents** to complete.
|
|
27
|
-
3. **Verify each task's outputs** — check that expected files were created/modified
|
|
28
|
-
4. **
|
|
29
|
-
5. **
|
|
30
|
-
6. **
|
|
27
|
+
3. **Verify each dispatched task's outputs** — check that expected files were created/modified, that verification commands pass where applicable, and that each task wrote its own `T##-SUMMARY.md`.
|
|
28
|
+
4. **Do not rewrite successful task summaries or duplicate checkbox edits.** Treat a subagent-written summary as authoritative for that task.
|
|
29
|
+
5. **If a failed task produced no summary, write a recovery summary for that task** with `blocker_discovered: true`, clear failure details, and leave the task unchecked so replan/retry has an authoritative record.
|
|
30
|
+
6. **Preserve successful sibling tasks exactly as they landed.** Do not roll back good work because another parallel task failed.
|
|
31
|
+
7. **Do NOT create a batch commit.** The surrounding unit lifecycle owns commits; this parent batch agent should not invent a second commit layer.
|
|
32
|
+
8. **Report the batch outcome** — which tasks succeeded, which failed, and any output collisions or dependency surprises.
|
|
31
33
|
|
|
32
34
|
If any subagent fails:
|
|
33
|
-
-
|
|
34
|
-
-
|
|
35
|
-
-
|
|
35
|
+
- Keep successful task summaries and checkbox updates as-is
|
|
36
|
+
- Write a failure summary only when the failed task did not leave one behind
|
|
37
|
+
- Do not silently discard or overwrite another task's outputs
|
|
38
|
+
- The orchestrator will handle re-dispatch or replanning on the next iteration
|
|
36
39
|
|
|
37
40
|
## Subagent Prompts
|
|
38
41
|
|
|
@@ -18,32 +18,47 @@ If a `GSD Skill Preferences` block is present in system context, use it to decid
|
|
|
18
18
|
|
|
19
19
|
**UAT file:** `{{uatPath}}`
|
|
20
20
|
**Result file to write:** `{{uatResultPath}}`
|
|
21
|
+
**Detected UAT mode:** `{{uatType}}`
|
|
21
22
|
|
|
22
|
-
You are the
|
|
23
|
+
You are the UAT runner. Execute every check defined in `{{uatPath}}` as deeply as this mode truthfully allows. Do not collapse live or subjective checks into cheap artifact checks just to get a PASS.
|
|
24
|
+
|
|
25
|
+
### Automation rules by mode
|
|
26
|
+
|
|
27
|
+
- `artifact-driven` — verify with shell commands, scripts, file reads, and artifact structure checks.
|
|
28
|
+
- `live-runtime` — exercise the real runtime path. Start or connect to the app/service if needed, use browser/runtime/network checks, and verify observable behavior.
|
|
29
|
+
- `mixed` — run all automatable artifact-driven and live-runtime checks. Separate any remaining human-only checks explicitly.
|
|
30
|
+
- `human-experience` — automate setup, preconditions, screenshots, logs, and objective checks, but do **not** invent subjective PASS results. Mark taste-based, experiential, or purely human-judgment checks as `NEEDS-HUMAN` and use an overall verdict of `PARTIAL` unless every required check was objective and passed.
|
|
31
|
+
|
|
32
|
+
### Evidence tools
|
|
33
|
+
|
|
34
|
+
Choose the lightest tool that proves the check honestly:
|
|
23
35
|
|
|
24
36
|
- Run shell commands with `bash`
|
|
25
37
|
- Run `grep` / `rg` checks against files
|
|
26
|
-
- Run `node` / script invocations
|
|
38
|
+
- Run `node` / other script invocations
|
|
27
39
|
- Read files and verify their contents
|
|
28
40
|
- Check that expected artifacts exist and have correct structure
|
|
41
|
+
- For live/runtime/UI checks, exercise the real flow in the browser when applicable and inspect runtime/network/console state
|
|
42
|
+
- When a check cannot be honestly automated, gather the best objective evidence you can and mark it `NEEDS-HUMAN`
|
|
29
43
|
|
|
30
44
|
For each check, record:
|
|
31
45
|
- The check description (from the UAT file)
|
|
46
|
+
- The evidence mode used: `artifact`, `runtime`, or `human-follow-up`
|
|
32
47
|
- The command or action taken
|
|
33
48
|
- The actual result observed
|
|
34
|
-
- PASS or
|
|
49
|
+
- `PASS`, `FAIL`, or `NEEDS-HUMAN`
|
|
35
50
|
|
|
36
51
|
After running all checks, compute the **overall verdict**:
|
|
37
|
-
- `PASS` — all checks passed
|
|
52
|
+
- `PASS` — all required checks passed and no human-only checks remain
|
|
38
53
|
- `FAIL` — one or more checks failed
|
|
39
|
-
- `PARTIAL` — some checks passed,
|
|
54
|
+
- `PARTIAL` — some checks passed, but one or more checks were skipped, inconclusive, or still require human judgment
|
|
40
55
|
|
|
41
56
|
Write `{{uatResultPath}}` with:
|
|
42
57
|
|
|
43
58
|
```markdown
|
|
44
59
|
---
|
|
45
60
|
sliceId: {{sliceId}}
|
|
46
|
-
uatType:
|
|
61
|
+
uatType: {{uatType}}
|
|
47
62
|
verdict: PASS | FAIL | PARTIAL
|
|
48
63
|
date: <ISO 8601 timestamp>
|
|
49
64
|
---
|
|
@@ -52,9 +67,9 @@ date: <ISO 8601 timestamp>
|
|
|
52
67
|
|
|
53
68
|
## Checks
|
|
54
69
|
|
|
55
|
-
| Check | Result | Notes |
|
|
56
|
-
|
|
57
|
-
| <check description> | PASS / FAIL | <observed output or reason> |
|
|
70
|
+
| Check | Mode | Result | Notes |
|
|
71
|
+
|-------|------|--------|-------|
|
|
72
|
+
| <check description> | artifact / runtime / human-follow-up | PASS / FAIL / NEEDS-HUMAN | <observed output, evidence, or reason> |
|
|
58
73
|
|
|
59
74
|
## Overall Verdict
|
|
60
75
|
|
|
@@ -62,7 +77,7 @@ date: <ISO 8601 timestamp>
|
|
|
62
77
|
|
|
63
78
|
## Notes
|
|
64
79
|
|
|
65
|
-
<any additional context, errors encountered, or follow-up
|
|
80
|
+
<any additional context, errors encountered, screenshots/logs gathered, or manual follow-up still required>
|
|
66
81
|
```
|
|
67
82
|
|
|
68
83
|
---
|
|
@@ -14,7 +14,7 @@ You are executing a **{{templateName}}** workflow (template: `{{templateId}}`).
|
|
|
14
14
|
|
|
15
15
|
## Workflow Definition
|
|
16
16
|
|
|
17
|
-
Follow the workflow defined below. Execute each phase in order, completing one before moving to the next.
|
|
17
|
+
Follow the workflow defined below. Execute each phase in order, completing one before moving to the next. For low and medium complexity workflows, keep moving by default — pause only at true decision gates (user must choose between materially different directions, outward-facing actions need approval, or the workflow explicitly requires a human checkpoint). For high complexity workflows, confirm at phase transitions unless the workflow explicitly marks a gate as skip-safe.
|
|
18
18
|
|
|
19
19
|
{{workflowContent}}
|
|
20
20
|
|
|
@@ -24,5 +24,5 @@ Follow the workflow defined below. Execute each phase in order, completing one b
|
|
|
24
24
|
2. **Artifact discipline.** If an artifact directory is specified, write all planning/summary documents there.
|
|
25
25
|
3. **Atomic commits.** Commit working code after each meaningful change. Use conventional commit format: `<type>(<scope>): <description>`.
|
|
26
26
|
4. **Verify before shipping.** Run the project's test suite and build before marking the workflow complete.
|
|
27
|
-
5. **
|
|
27
|
+
5. **Decision gates, not ceremony.** After each phase, summarize what changed. For low/medium complexity, ask for confirmation only when the next phase depends on a real user choice or external approval. For high complexity, confirm before proceeding to each new phase.
|
|
28
28
|
6. **Stay focused.** This is a {{complexity}}-complexity workflow. Match your ceremony level to the task — don't over-engineer or under-deliver.
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import test from "node:test";
|
|
2
|
+
import assert from "node:assert/strict";
|
|
3
|
+
import { readFileSync } from "node:fs";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
|
|
6
|
+
const promptsDir = join(process.cwd(), "src/resources/extensions/gsd/prompts");
|
|
7
|
+
|
|
8
|
+
function readPrompt(name: string): string {
|
|
9
|
+
return readFileSync(join(promptsDir, `${name}.md`), "utf-8");
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
test("reactive-execute prompt keeps task summaries with subagents and avoids batch commits", () => {
|
|
13
|
+
const prompt = readPrompt("reactive-execute");
|
|
14
|
+
assert.match(prompt, /subagent-written summary as authoritative/i);
|
|
15
|
+
assert.match(prompt, /Do NOT create a batch commit/i);
|
|
16
|
+
assert.doesNotMatch(prompt, /\*\*Write task summaries\*\*/i);
|
|
17
|
+
assert.doesNotMatch(prompt, /\*\*Commit\*\* all changes/i);
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
test("run-uat prompt branches on dynamic UAT mode and supports runtime evidence", () => {
|
|
21
|
+
const prompt = readPrompt("run-uat");
|
|
22
|
+
assert.match(prompt, /\*\*Detected UAT mode:\*\*\s*`\{\{uatType\}\}`/);
|
|
23
|
+
assert.match(prompt, /uatType:\s*\{\{uatType\}\}/);
|
|
24
|
+
assert.match(prompt, /live-runtime/);
|
|
25
|
+
assert.match(prompt, /browser\/runtime\/network/i);
|
|
26
|
+
assert.match(prompt, /NEEDS-HUMAN/);
|
|
27
|
+
assert.doesNotMatch(prompt, /uatType:\s*artifact-driven/);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
test("workflow-start prompt defaults to autonomy instead of per-phase confirmation", () => {
|
|
31
|
+
const prompt = readPrompt("workflow-start");
|
|
32
|
+
assert.match(prompt, /Keep moving by default/i);
|
|
33
|
+
assert.match(prompt, /Decision gates, not ceremony/i);
|
|
34
|
+
assert.doesNotMatch(prompt, /confirm with the user before proceeding/i);
|
|
35
|
+
assert.doesNotMatch(prompt, /Gate between phases/i);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
test("discuss prompt allows implementation questions when they materially matter", () => {
|
|
39
|
+
const prompt = readPrompt("discuss");
|
|
40
|
+
assert.match(prompt, /Lead with experience, but ask implementation when it materially matters/i);
|
|
41
|
+
assert.match(prompt, /one gate, not two/i);
|
|
42
|
+
assert.doesNotMatch(prompt, /Questions must be about the experience, not the implementation/i);
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
test("guided discussion prompts avoid wrap-up prompts after every round", () => {
|
|
46
|
+
const milestonePrompt = readPrompt("guided-discuss-milestone");
|
|
47
|
+
const slicePrompt = readPrompt("guided-discuss-slice");
|
|
48
|
+
assert.match(milestonePrompt, /Do \*\*not\*\* ask a meta "ready to wrap up\?" question after every round/i);
|
|
49
|
+
assert.match(slicePrompt, /Do \*\*not\*\* ask a meta "ready to wrap up\?" question after every round/i);
|
|
50
|
+
assert.doesNotMatch(milestonePrompt, /I think I have a solid picture of this milestone\. Ready to wrap up/i);
|
|
51
|
+
assert.doesNotMatch(slicePrompt, /I think I have a solid picture of this slice\. Ready to wrap up/i);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
test("guided-resume-task prompt preserves recovery state until work is superseded", () => {
|
|
55
|
+
const prompt = readPrompt("guided-resume-task");
|
|
56
|
+
assert.match(prompt, /Do \*\*not\*\* delete the continue file immediately/i);
|
|
57
|
+
assert.match(prompt, /successfully completed or you have written a newer summary\/continue artifact/i);
|
|
58
|
+
assert.doesNotMatch(prompt, /Delete the continue file after reading it/i);
|
|
59
|
+
});
|
|
@@ -210,7 +210,7 @@ async function main(): Promise<void> {
|
|
|
210
210
|
const sliceId = 'S01';
|
|
211
211
|
const uatPath = '.gsd/milestones/M001/slices/S01/S01-UAT.md';
|
|
212
212
|
const uatResultPath = '.gsd/milestones/M001/slices/S01/S01-UAT-RESULT.md';
|
|
213
|
-
const uatType = '
|
|
213
|
+
const uatType = 'live-runtime';
|
|
214
214
|
const inlinedContext = '<!-- no context -->';
|
|
215
215
|
|
|
216
216
|
let promptResult: string | undefined;
|
|
@@ -246,13 +246,21 @@ async function main(): Promise<void> {
|
|
|
246
246
|
promptResult?.includes(uatResultPath) ?? false,
|
|
247
247
|
`prompt contains uatResultPath value after substitution`,
|
|
248
248
|
);
|
|
249
|
+
assertTrue(
|
|
250
|
+
promptResult?.includes(`Detected UAT mode:** \`${uatType}\``) ?? false,
|
|
251
|
+
`prompt contains detected dynamic uatType value "${uatType}" after substitution`,
|
|
252
|
+
);
|
|
253
|
+
assertTrue(
|
|
254
|
+
promptResult?.includes(`uatType: ${uatType}`) ?? false,
|
|
255
|
+
`prompt contains dynamic uatType frontmatter value "${uatType}" after substitution`,
|
|
256
|
+
);
|
|
249
257
|
assertTrue(
|
|
250
258
|
!/\{\{[^}]+\}\}/.test(promptResult ?? ''),
|
|
251
259
|
'no unreplaced {{...}} tokens remain after variable substitution',
|
|
252
260
|
);
|
|
253
261
|
assertTrue(
|
|
254
|
-
/
|
|
255
|
-
'prompt contains
|
|
262
|
+
/browser|runtime|execute|run/i.test(promptResult ?? ''),
|
|
263
|
+
'prompt contains runtime execution language (browser/runtime/execute/run)',
|
|
256
264
|
);
|
|
257
265
|
assertTrue(
|
|
258
266
|
!/surfaced for human review/i.test(promptResult ?? ''),
|