gsd-pi 2.37.1-dev.d3ace49 → 2.38.0-dev.63ad7e5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/dist/app-paths.js +1 -1
  2. package/dist/cli.js +9 -0
  3. package/dist/extension-discovery.d.ts +5 -3
  4. package/dist/extension-discovery.js +14 -9
  5. package/dist/extension-registry.js +2 -2
  6. package/dist/remote-questions-config.js +2 -2
  7. package/dist/resources/extensions/browser-tools/package.json +3 -1
  8. package/dist/resources/extensions/cmux/index.js +55 -1
  9. package/dist/resources/extensions/context7/package.json +1 -1
  10. package/dist/resources/extensions/env-utils.js +29 -0
  11. package/dist/resources/extensions/get-secrets-from-user.js +5 -24
  12. package/dist/resources/extensions/google-search/package.json +3 -1
  13. package/dist/resources/extensions/gsd/auto/session.js +6 -23
  14. package/dist/resources/extensions/gsd/auto-dispatch.js +7 -8
  15. package/dist/resources/extensions/gsd/auto-loop.js +68 -97
  16. package/dist/resources/extensions/gsd/auto-post-unit.js +75 -71
  17. package/dist/resources/extensions/gsd/auto-prompts.js +7 -31
  18. package/dist/resources/extensions/gsd/auto-start.js +13 -2
  19. package/dist/resources/extensions/gsd/auto-worktree-sync.js +13 -5
  20. package/dist/resources/extensions/gsd/auto.js +143 -96
  21. package/dist/resources/extensions/gsd/captures.js +9 -1
  22. package/dist/resources/extensions/gsd/commands-extensions.js +3 -2
  23. package/dist/resources/extensions/gsd/commands-handlers.js +16 -3
  24. package/dist/resources/extensions/gsd/commands-prefs-wizard.js +1 -1
  25. package/dist/resources/extensions/gsd/commands.js +22 -2
  26. package/dist/resources/extensions/gsd/context-budget.js +2 -10
  27. package/dist/resources/extensions/gsd/detection.js +1 -2
  28. package/dist/resources/extensions/gsd/docs/preferences-reference.md +0 -2
  29. package/dist/resources/extensions/gsd/doctor-checks.js +82 -0
  30. package/dist/resources/extensions/gsd/doctor-environment.js +78 -0
  31. package/dist/resources/extensions/gsd/doctor-format.js +15 -0
  32. package/dist/resources/extensions/gsd/doctor-providers.js +27 -11
  33. package/dist/resources/extensions/gsd/doctor.js +184 -11
  34. package/dist/resources/extensions/gsd/export.js +1 -1
  35. package/dist/resources/extensions/gsd/files.js +2 -2
  36. package/dist/resources/extensions/gsd/forensics.js +1 -1
  37. package/dist/resources/extensions/gsd/index.js +2 -1
  38. package/dist/resources/extensions/gsd/migrate/parsers.js +1 -1
  39. package/dist/resources/extensions/gsd/package.json +1 -1
  40. package/dist/resources/extensions/gsd/preferences-models.js +0 -12
  41. package/dist/resources/extensions/gsd/preferences-types.js +0 -1
  42. package/dist/resources/extensions/gsd/preferences-validation.js +1 -11
  43. package/dist/resources/extensions/gsd/preferences.js +5 -5
  44. package/dist/resources/extensions/gsd/prompts/discuss.md +11 -14
  45. package/dist/resources/extensions/gsd/prompts/execute-task.md +2 -2
  46. package/dist/resources/extensions/gsd/prompts/guided-discuss-milestone.md +11 -12
  47. package/dist/resources/extensions/gsd/prompts/guided-discuss-slice.md +8 -10
  48. package/dist/resources/extensions/gsd/prompts/guided-resume-task.md +1 -1
  49. package/dist/resources/extensions/gsd/prompts/queue.md +4 -8
  50. package/dist/resources/extensions/gsd/prompts/reactive-execute.md +11 -8
  51. package/dist/resources/extensions/gsd/prompts/run-uat.md +25 -10
  52. package/dist/resources/extensions/gsd/prompts/workflow-start.md +2 -2
  53. package/dist/resources/extensions/gsd/repo-identity.js +21 -4
  54. package/dist/resources/extensions/gsd/resource-version.js +2 -1
  55. package/dist/resources/extensions/gsd/state.js +1 -1
  56. package/dist/resources/extensions/gsd/visualizer-data.js +1 -1
  57. package/dist/resources/extensions/gsd/worktree.js +35 -16
  58. package/dist/resources/extensions/remote-questions/status.js +2 -1
  59. package/dist/resources/extensions/remote-questions/store.js +2 -1
  60. package/dist/resources/extensions/search-the-web/provider.js +2 -1
  61. package/dist/resources/extensions/subagent/index.js +12 -3
  62. package/dist/resources/extensions/subagent/isolation.js +2 -1
  63. package/dist/resources/extensions/ttsr/rule-loader.js +2 -1
  64. package/dist/resources/extensions/universal-config/package.json +1 -1
  65. package/dist/welcome-screen.d.ts +12 -0
  66. package/dist/welcome-screen.js +53 -0
  67. package/package.json +1 -1
  68. package/packages/pi-coding-agent/dist/core/package-manager.d.ts.map +1 -1
  69. package/packages/pi-coding-agent/dist/core/package-manager.js +8 -4
  70. package/packages/pi-coding-agent/dist/core/package-manager.js.map +1 -1
  71. package/packages/pi-coding-agent/package.json +1 -1
  72. package/packages/pi-coding-agent/src/core/package-manager.ts +8 -4
  73. package/pkg/package.json +1 -1
  74. package/src/resources/extensions/cmux/index.ts +57 -1
  75. package/src/resources/extensions/env-utils.ts +31 -0
  76. package/src/resources/extensions/get-secrets-from-user.ts +5 -24
  77. package/src/resources/extensions/gsd/auto/session.ts +7 -25
  78. package/src/resources/extensions/gsd/auto-dispatch.ts +6 -8
  79. package/src/resources/extensions/gsd/auto-loop.ts +88 -133
  80. package/src/resources/extensions/gsd/auto-post-unit.ts +52 -42
  81. package/src/resources/extensions/gsd/auto-prompts.ts +7 -33
  82. package/src/resources/extensions/gsd/auto-start.ts +18 -2
  83. package/src/resources/extensions/gsd/auto-worktree-sync.ts +15 -4
  84. package/src/resources/extensions/gsd/auto.ts +139 -101
  85. package/src/resources/extensions/gsd/captures.ts +10 -1
  86. package/src/resources/extensions/gsd/commands-extensions.ts +4 -2
  87. package/src/resources/extensions/gsd/commands-handlers.ts +17 -2
  88. package/src/resources/extensions/gsd/commands-prefs-wizard.ts +1 -1
  89. package/src/resources/extensions/gsd/commands.ts +24 -2
  90. package/src/resources/extensions/gsd/context-budget.ts +2 -12
  91. package/src/resources/extensions/gsd/detection.ts +2 -2
  92. package/src/resources/extensions/gsd/docs/preferences-reference.md +0 -2
  93. package/src/resources/extensions/gsd/doctor-checks.ts +75 -0
  94. package/src/resources/extensions/gsd/doctor-environment.ts +82 -1
  95. package/src/resources/extensions/gsd/doctor-format.ts +20 -0
  96. package/src/resources/extensions/gsd/doctor-providers.ts +26 -9
  97. package/src/resources/extensions/gsd/doctor-types.ts +16 -1
  98. package/src/resources/extensions/gsd/doctor.ts +177 -13
  99. package/src/resources/extensions/gsd/export.ts +1 -1
  100. package/src/resources/extensions/gsd/files.ts +2 -2
  101. package/src/resources/extensions/gsd/forensics.ts +1 -1
  102. package/src/resources/extensions/gsd/index.ts +3 -1
  103. package/src/resources/extensions/gsd/migrate/parsers.ts +1 -1
  104. package/src/resources/extensions/gsd/preferences-models.ts +0 -12
  105. package/src/resources/extensions/gsd/preferences-types.ts +0 -4
  106. package/src/resources/extensions/gsd/preferences-validation.ts +1 -11
  107. package/src/resources/extensions/gsd/preferences.ts +5 -5
  108. package/src/resources/extensions/gsd/prompts/discuss.md +11 -14
  109. package/src/resources/extensions/gsd/prompts/execute-task.md +2 -2
  110. package/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md +11 -12
  111. package/src/resources/extensions/gsd/prompts/guided-discuss-slice.md +8 -10
  112. package/src/resources/extensions/gsd/prompts/guided-resume-task.md +1 -1
  113. package/src/resources/extensions/gsd/prompts/queue.md +4 -8
  114. package/src/resources/extensions/gsd/prompts/reactive-execute.md +11 -8
  115. package/src/resources/extensions/gsd/prompts/run-uat.md +25 -10
  116. package/src/resources/extensions/gsd/prompts/workflow-start.md +2 -2
  117. package/src/resources/extensions/gsd/repo-identity.ts +23 -4
  118. package/src/resources/extensions/gsd/resource-version.ts +3 -1
  119. package/src/resources/extensions/gsd/state.ts +1 -1
  120. package/src/resources/extensions/gsd/tests/agent-end-retry.test.ts +21 -18
  121. package/src/resources/extensions/gsd/tests/auto-loop.test.ts +11 -31
  122. package/src/resources/extensions/gsd/tests/cmux.test.ts +93 -0
  123. package/src/resources/extensions/gsd/tests/doctor-enhancements.test.ts +266 -0
  124. package/src/resources/extensions/gsd/tests/doctor-providers.test.ts +86 -3
  125. package/src/resources/extensions/gsd/tests/preferences.test.ts +2 -7
  126. package/src/resources/extensions/gsd/tests/prompt-contracts.test.ts +59 -0
  127. package/src/resources/extensions/gsd/tests/repo-identity-worktree.test.ts +21 -1
  128. package/src/resources/extensions/gsd/tests/run-uat.test.ts +11 -3
  129. package/src/resources/extensions/gsd/tests/worktree.test.ts +47 -0
  130. package/src/resources/extensions/gsd/types.ts +0 -1
  131. package/src/resources/extensions/gsd/visualizer-data.ts +1 -1
  132. package/src/resources/extensions/gsd/worktree.ts +35 -15
  133. package/src/resources/extensions/remote-questions/status.ts +3 -1
  134. package/src/resources/extensions/remote-questions/store.ts +3 -1
  135. package/src/resources/extensions/search-the-web/provider.ts +2 -1
  136. package/src/resources/extensions/subagent/index.ts +12 -3
  137. package/src/resources/extensions/subagent/isolation.ts +3 -1
  138. package/src/resources/extensions/ttsr/rule-loader.ts +3 -1
  139. package/dist/resources/extensions/gsd/prompt-compressor.js +0 -393
  140. package/dist/resources/extensions/gsd/semantic-chunker.js +0 -254
  141. package/dist/resources/extensions/gsd/summary-distiller.js +0 -212
  142. package/src/resources/extensions/gsd/prompt-compressor.ts +0 -508
  143. package/src/resources/extensions/gsd/semantic-chunker.ts +0 -336
  144. package/src/resources/extensions/gsd/summary-distiller.ts +0 -258
  145. package/src/resources/extensions/gsd/tests/context-compression.test.ts +0 -193
  146. package/src/resources/extensions/gsd/tests/prompt-compressor.test.ts +0 -529
  147. package/src/resources/extensions/gsd/tests/semantic-chunker.test.ts +0 -426
  148. package/src/resources/extensions/gsd/tests/summary-distiller.test.ts +0 -323
  149. package/src/resources/extensions/gsd/tests/token-optimization-benchmark.test.ts +0 -1272
  150. package/src/resources/extensions/gsd/tests/token-optimization-prefs.test.ts +0 -164
@@ -33,19 +33,16 @@ Ask **1–3 questions per round**. Keep each question focused on one of:
33
33
 
34
34
  After the user answers, investigate further if any answer opens a new unknown, then ask the next round.
35
35
 
36
- ### Check-in after each round
36
+ ### Round cadence
37
37
 
38
- After each round of answers, ask:
38
+ After each round of answers, decide whether you already have enough depth to write a strong context file.
39
39
 
40
- > "I think I have a solid picture of this milestone. Ready to wrap up and write the context file, or is there more to cover?"
41
-
42
- **If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` with options:
43
- - "Wrap up — write the context file" *(recommended after ~2–3 rounds)*
44
- - "Keep going — more to discuss"
45
-
46
- **If `{{structuredQuestionsAvailable}}` is `false`:** ask in plain text.
47
-
48
- If the user wants to keep going, keep asking. Stop when they say wrap up.
40
+ - If not, investigate any newly-opened unknowns and continue to the next round immediately. Do **not** ask a meta "ready to wrap up?" question after every round.
41
+ - Use a single wrap-up prompt only when you genuinely believe the depth checklist is satisfied or the user signals they want to stop.
42
+ - **If `{{structuredQuestionsAvailable}}` is `true` and you need that wrap-up prompt:** use `ask_user_questions` with options:
43
+ - "Write the context file" *(recommended when depth is satisfied)*
44
+ - "One more pass"
45
+ - **If `{{structuredQuestionsAvailable}}` is `false`:** ask in plain text only once you believe you are ready to write.
49
46
 
50
47
  ---
51
48
 
@@ -55,7 +52,7 @@ If the user wants to keep going, keep asking. Stop when they say wrap up.
55
52
 
56
53
  **Challenge vagueness, make abstract concrete.** When the user says something abstract ("it should be smart" / "good UX"), push for specifics.
57
54
 
58
- **Questions must be about the experience, not the implementation.** Never ask "what auth provider?" ask "when someone logs in, what should that feel like?" Implementation is your job. Understanding what they want to experience is the discussion's job.
55
+ **Lead with experience, but ask implementation when it materially matters.** Default questions should target the experience and outcome. But when implementation choices materially change scope, proof, compliance, integration, deployment, or irreversible architecture, ask them directly instead of forcing a fake UX phrasing.
59
56
 
60
57
  **Position-first framing.** Have opinions. "I'd lean toward X because Y — does that match your thinking?" is better than "what do you think about X vs Y?"
61
58
 
@@ -95,6 +92,8 @@ Before moving to the wrap-up gate, verify you have covered:
95
92
 
96
93
  If they clarify, absorb the correction and re-verify.
97
94
 
95
+ The depth verification is the only required confirmation gate. Do not add a second "ready to proceed?" gate after it.
96
+
98
97
  ---
99
98
 
100
99
  ## Output
@@ -1,6 +1,6 @@
1
1
  You are interviewing the user to surface behavioural, UX, and usage grey areas for slice **{{sliceId}}: {{sliceTitle}}** of milestone **{{milestoneId}}**.
2
2
 
3
- Your goal is **not** to settle tech stack, naming conventions, or architecture — that happens during research and planning. Your goal is to produce a context file that captures the human decisions: what this slice should feel like, how it should behave, what edge cases matter, where scope begins and ends, and what the user cares about that won't be obvious from the roadmap entry alone.
3
+ Your goal is **not** to center the discussion on tech stack trivia, naming conventions, or speculative architecture. Your goal is to produce a context file that captures the human decisions: what this slice should feel like, how it should behave, what edge cases matter, where scope begins and ends, and what the user cares about that won't be obvious from the roadmap entry alone. If a technical choice materially changes scope, proof, or integration behavior, ask it directly and capture it.
4
4
 
5
5
  {{inlinedContext}}
6
6
 
@@ -27,17 +27,15 @@ Ask **1–3 questions per round** using `ask_user_questions`. Keep each question
27
27
 
28
28
  After the user answers, investigate further if any answer opens a new unknown, then ask the next round.
29
29
 
30
- ### Check-in after each round
30
+ ### Round cadence
31
31
 
32
- After each round of answers, use `ask_user_questions` to ask:
32
+ After each round of answers, decide whether you already have enough signal to write the slice context cleanly.
33
33
 
34
- > "I think I have a solid picture of this slice. Ready to wrap up and write the context file, or is there more to cover?"
35
-
36
- Options:
37
- - "Wrap up — write the context file" *(recommended after ~2–3 rounds)*
38
- - "Keep going — more to discuss"
39
-
40
- If the user wants to keep going, keep asking. Stop when they say wrap up.
34
+ - If not, investigate any new unknowns and continue to the next round immediately. Do **not** ask a meta "ready to wrap up?" question after every round.
35
+ - Ask a single wrap-up question only when you genuinely believe the slice is well understood or the user signals they want to stop.
36
+ - When you do ask it, use `ask_user_questions` with:
37
+ - "Write the context file" *(recommended when the slice is well understood)*
38
+ - "One more pass"
41
39
 
42
40
  ---
43
41
 
@@ -1 +1 @@
1
- Resume interrupted work. Find the continue file (`{{sliceId}}-CONTINUE.md` or `continue.md`) in slice {{sliceId}} of milestone {{milestoneId}}, then pick up from where you left off. Delete the continue file after reading it. If a `GSD Skill Preferences` block is present in system context, use it to decide which skills to load and follow during execution, without relaxing required verification or artifact rules.
1
+ Resume interrupted work. Find the continue file (`{{sliceId}}-CONTINUE.md` or `continue.md`) in slice {{sliceId}} of milestone {{milestoneId}}, read it, and use it as the recovery contract for where to pick up. Do **not** delete the continue file immediately. Keep it until the task is successfully completed or you have written a newer summary/continue artifact that clearly supersedes it. If the resumed attempt fails again, update or replace the continue file so no recovery context is lost. If a `GSD Skill Preferences` block is present in system context, use it to decide which skills to load and follow during execution, without relaxing required verification or artifact rules.
@@ -36,15 +36,11 @@ Don't go deep — just enough that your next question reflects what's actually t
36
36
  - How the new work relates to existing milestones — overlap, dependencies, prerequisites
37
37
  - If `.gsd/REQUIREMENTS.md` exists: which unmet Active or Deferred requirements this queued work advances
38
38
 
39
- **Then use ask_user_questions** to dig into gray areas — architecture choices, scope boundaries, tech preferences, what's in vs out. 1-3 questions per round.
39
+ **Then use ask_user_questions** to dig into gray areas — scope boundaries, proof expectations, integration choices, tech preferences when they materially matter, and what's in vs out. 1-3 questions per round.
40
40
 
41
41
  If a `GSD Skill Preferences` block is present in system context, use it to decide which skills to load and follow during discuss/planning work, but do not let it override the required discuss flow or artifact requirements.
42
42
 
43
- **Self-regulate:** After about 10-15 questions total (3-5 rounds), or when you feel you have a solid understanding, include a question like:
44
- "I think I have a good picture. Ready to queue this, or are there more things to discuss?"
45
- with options: "Ready to queue (Recommended)", "I have more to discuss"
46
-
47
- If the user wants to keep going, keep asking. If they're ready, proceed.
43
+ **Self-regulate:** Do **not** ask a meta "ready to queue?" question after every round. Keep going until you have enough depth to write the context well, then use a single wrap-up prompt if needed. If the user clearly keeps adding detail instead of objecting, treat that as permission to continue.
48
44
 
49
45
  ## Existing Milestone Awareness
50
46
 
@@ -88,7 +84,7 @@ For EACH milestone you are about to write context for, investigate the codebase
88
84
  1. **Read the actual code** — for every file or module you reference in "Existing Codebase / Prior Art", read enough to confirm your assumptions about what exists, what it does, and what it doesn't do. Do not guess from memory or training data.
89
85
  2. **Check for stale assumptions** — the codebase may have changed since the user's spec was written. Verify: do the APIs you reference still exist? Have modules been refactored? Has upstream merged features that change the landscape?
90
86
  3. **Identify phantom capabilities** — for every capability you list as "existing," confirm it actually works as described. Look for: functions that exist but are never called, fields that are set but never read, features that are piped but never connected.
91
- 4. **Note what you found** — include verified findings in the context file's "Existing Codebase / Prior Art" section with "verified against v{version}" annotations.
87
+ 4. **Note what you found** — include verified findings in the context file's "Existing Codebase / Prior Art" section with annotations like "verified against current codebase state" or an actual concrete version/commit only if you truly have one.
92
88
 
93
89
  ### Step 2: Per-Milestone Depth Verification
94
90
 
@@ -103,7 +99,7 @@ This triggers the per-milestone write-gate. The question should present:
103
99
  - Key technical assumptions you verified (or couldn't verify)
104
100
  - Any risks or unknowns the investigation surfaced
105
101
 
106
- The user confirms or corrects before you write. One depth verification per milestone — not one for all milestones combined.
102
+ The user confirms or corrects before you write. One depth verification per milestone — not one for all milestones combined. This is the required write-gate; do not add extra "ready to proceed?" prompts around it once you have enough signal.
107
103
 
108
104
  **If you skip this step, the system will block the CONTEXT.md write and return an error telling you to complete verification first.**
109
105
 
@@ -8,7 +8,7 @@
8
8
 
9
9
  You are executing **multiple tasks in parallel** for this slice. The task graph below shows which tasks are ready for simultaneous execution based on their input/output dependencies.
10
10
 
11
- **Critical rule:** Use the `subagent` tool in **parallel mode** to dispatch all ready tasks simultaneously. Each subagent gets a self-contained execute-task prompt. After all subagents return, verify each task's outputs and write summaries.
11
+ **Critical rule:** Use the `subagent` tool in **parallel mode** to dispatch all ready tasks simultaneously. Each subagent gets a full `execute-task` prompt and is responsible for its own implementation, verification, task summary, and checkbox updates. The parent batch agent orchestrates, verifies, and records failures only when a dispatched task failed before it could leave its own summary behind.
12
12
 
13
13
  ## Task Dependency Graph
14
14
 
@@ -24,15 +24,18 @@ You are executing **multiple tasks in parallel** for this slice. The task graph
24
24
 
25
25
  1. **Dispatch all ready tasks** using `subagent` in parallel mode. Each subagent prompt is provided below.
26
26
  2. **Wait for all subagents** to complete.
27
- 3. **Verify each task's outputs** — check that expected files were created/modified and that verification commands pass.
28
- 4. **Write task summaries** for each completed task using the task-summary template.
29
- 5. **Mark completed tasks** as done in the slice plan (checkbox `[x]`).
30
- 6. **Commit** all changes with a clear message covering the parallel batch.
27
+ 3. **Verify each dispatched task's outputs** — check that expected files were created/modified, that verification commands pass where applicable, and that each task wrote its own `T##-SUMMARY.md`.
28
+ 4. **Do not rewrite successful task summaries or duplicate checkbox edits.** Treat a subagent-written summary as authoritative for that task.
29
+ 5. **If a failed task produced no summary, write a recovery summary for that task** with `blocker_discovered: true`, clear failure details, and leave the task unchecked so replan/retry has an authoritative record.
30
+ 6. **Preserve successful sibling tasks exactly as they landed.** Do not roll back good work because another parallel task failed.
31
+ 7. **Do NOT create a batch commit.** The surrounding unit lifecycle owns commits; this parent batch agent should not invent a second commit layer.
32
+ 8. **Report the batch outcome** — which tasks succeeded, which failed, and any output collisions or dependency surprises.
31
33
 
32
34
  If any subagent fails:
33
- - Write a summary for the failed task with `blocker_discovered: true`
34
- - Continue marking the successful tasks as done
35
- - The orchestrator will handle re-dispatch on the next iteration
35
+ - Keep successful task summaries and checkbox updates as-is
36
+ - Write a failure summary only when the failed task did not leave one behind
37
+ - Do not silently discard or overwrite another task's outputs
38
+ - The orchestrator will handle re-dispatch or replanning on the next iteration
36
39
 
37
40
  ## Subagent Prompts
38
41
 
@@ -18,32 +18,47 @@ If a `GSD Skill Preferences` block is present in system context, use it to decid
18
18
 
19
19
  **UAT file:** `{{uatPath}}`
20
20
  **Result file to write:** `{{uatResultPath}}`
21
+ **Detected UAT mode:** `{{uatType}}`
21
22
 
22
- You are the test runner. Execute every check defined in `{{uatPath}}` directly:
23
+ You are the UAT runner. Execute every check defined in `{{uatPath}}` as deeply as this mode truthfully allows. Do not collapse live or subjective checks into cheap artifact checks just to get a PASS.
24
+
25
+ ### Automation rules by mode
26
+
27
+ - `artifact-driven` — verify with shell commands, scripts, file reads, and artifact structure checks.
28
+ - `live-runtime` — exercise the real runtime path. Start or connect to the app/service if needed, use browser/runtime/network checks, and verify observable behavior.
29
+ - `mixed` — run all automatable artifact-driven and live-runtime checks. Separate any remaining human-only checks explicitly.
30
+ - `human-experience` — automate setup, preconditions, screenshots, logs, and objective checks, but do **not** invent subjective PASS results. Mark taste-based, experiential, or purely human-judgment checks as `NEEDS-HUMAN` and use an overall verdict of `PARTIAL` unless every required check was objective and passed.
31
+
32
+ ### Evidence tools
33
+
34
+ Choose the lightest tool that proves the check honestly:
23
35
 
24
36
  - Run shell commands with `bash`
25
37
  - Run `grep` / `rg` checks against files
26
- - Run `node` / script invocations
38
+ - Run `node` / other script invocations
27
39
  - Read files and verify their contents
28
40
  - Check that expected artifacts exist and have correct structure
41
+ - For live/runtime/UI checks, exercise the real flow in the browser when applicable and inspect runtime/network/console state
42
+ - When a check cannot be honestly automated, gather the best objective evidence you can and mark it `NEEDS-HUMAN`
29
43
 
30
44
  For each check, record:
31
45
  - The check description (from the UAT file)
46
+ - The evidence mode used: `artifact`, `runtime`, or `human-follow-up`
32
47
  - The command or action taken
33
48
  - The actual result observed
34
- - PASS or FAIL verdict
49
+ - `PASS`, `FAIL`, or `NEEDS-HUMAN`
35
50
 
36
51
  After running all checks, compute the **overall verdict**:
37
- - `PASS` — all checks passed
52
+ - `PASS` — all required checks passed and no human-only checks remain
38
53
  - `FAIL` — one or more checks failed
39
- - `PARTIAL` — some checks passed, some failed or were skipped
54
+ - `PARTIAL` — some checks passed, but one or more checks were skipped, inconclusive, or still require human judgment
40
55
 
41
56
  Write `{{uatResultPath}}` with:
42
57
 
43
58
  ```markdown
44
59
  ---
45
60
  sliceId: {{sliceId}}
46
- uatType: artifact-driven
61
+ uatType: {{uatType}}
47
62
  verdict: PASS | FAIL | PARTIAL
48
63
  date: <ISO 8601 timestamp>
49
64
  ---
@@ -52,9 +67,9 @@ date: <ISO 8601 timestamp>
52
67
 
53
68
  ## Checks
54
69
 
55
- | Check | Result | Notes |
56
- |-------|--------|-------|
57
- | <check description> | PASS / FAIL | <observed output or reason> |
70
+ | Check | Mode | Result | Notes |
71
+ |-------|------|--------|-------|
72
+ | <check description> | artifact / runtime / human-follow-up | PASS / FAIL / NEEDS-HUMAN | <observed output, evidence, or reason> |
58
73
 
59
74
  ## Overall Verdict
60
75
 
@@ -62,7 +77,7 @@ date: <ISO 8601 timestamp>
62
77
 
63
78
  ## Notes
64
79
 
65
- <any additional context, errors encountered, or follow-up items>
80
+ <any additional context, errors encountered, screenshots/logs gathered, or manual follow-up still required>
66
81
  ```
67
82
 
68
83
  ---
@@ -14,7 +14,7 @@ You are executing a **{{templateName}}** workflow (template: `{{templateId}}`).
14
14
 
15
15
  ## Workflow Definition
16
16
 
17
- Follow the workflow defined below. Execute each phase in order, completing one before moving to the next. At each phase gate, confirm with the user before proceeding.
17
+ Follow the workflow defined below. Execute each phase in order, completing one before moving to the next. For low and medium complexity workflows, keep moving by default — pause only at true decision gates (user must choose between materially different directions, outward-facing actions need approval, or the workflow explicitly requires a human checkpoint). For high complexity workflows, confirm at phase transitions unless the workflow explicitly marks a gate as skip-safe.
18
18
 
19
19
  {{workflowContent}}
20
20
 
@@ -24,5 +24,5 @@ Follow the workflow defined below. Execute each phase in order, completing one b
24
24
  2. **Artifact discipline.** If an artifact directory is specified, write all planning/summary documents there.
25
25
  3. **Atomic commits.** Commit working code after each meaningful change. Use conventional commit format: `<type>(<scope>): <description>`.
26
26
  4. **Verify before shipping.** Run the project's test suite and build before marking the workflow complete.
27
- 5. **Gate between phases.** After each phase, summarize what was done and ask the user to confirm before moving to the next phase.
27
+ 5. **Decision gates, not ceremony.** After each phase, summarize what changed. For low/medium complexity, ask for confirmation only when the next phase depends on a real user choice or external approval. For high complexity, confirm before proceeding to each new phase.
28
28
  6. **Stay focused.** This is a {{complexity}}-complexity workflow. Match your ceremony level to the task — don't over-engineer or under-deliver.
@@ -12,6 +12,8 @@ import { existsSync, lstatSync, mkdirSync, readFileSync, realpathSync, rmSync, s
12
12
  import { homedir } from "node:os";
13
13
  import { join, resolve } from "node:path";
14
14
 
15
+ const gsdHome = process.env.GSD_HOME || join(homedir(), ".gsd");
16
+
15
17
  // ─── Repo Identity ──────────────────────────────────────────────────────────
16
18
 
17
19
  /**
@@ -90,14 +92,31 @@ function resolveGitRoot(basePath: string): string {
90
92
  }
91
93
  }
92
94
 
95
+ /**
96
+ * Validate a GSD_PROJECT_ID value.
97
+ *
98
+ * Must contain only alphanumeric characters, hyphens, and underscores.
99
+ * Call this once at startup so the user gets immediate feedback on bad values.
100
+ */
101
+ export function validateProjectId(id: string): boolean {
102
+ return /^[a-zA-Z0-9_-]+$/.test(id);
103
+ }
104
+
93
105
  /**
94
106
  * Compute a stable identity for a repository.
95
107
  *
96
- * SHA-256 of `${remoteUrl}\n${resolvedRoot}`, truncated to 12 hex chars.
97
- * Deterministic: same repo always produces the same hash regardless of
98
- * which worktree the caller is inside.
108
+ * If `GSD_PROJECT_ID` is set, returns it directly (validation is expected
109
+ * to have already happened at startup via `validateProjectId`).
110
+ *
111
+ * Otherwise returns SHA-256 of `${remoteUrl}\n${resolvedRoot}`, truncated
112
+ * to 12 hex chars. Deterministic: same repo always produces the same hash
113
+ * regardless of which worktree the caller is inside.
99
114
  */
100
115
  export function repoIdentity(basePath: string): string {
116
+ const projectId = process.env.GSD_PROJECT_ID;
117
+ if (projectId) {
118
+ return projectId;
119
+ }
101
120
  const remoteUrl = getRemoteUrl(basePath);
102
121
  const root = resolveGitRoot(basePath);
103
122
  const input = `${remoteUrl}\n${root}`;
@@ -113,7 +132,7 @@ export function repoIdentity(basePath: string): string {
113
132
  * otherwise `~/.gsd/projects/<hash>`.
114
133
  */
115
134
  export function externalGsdRoot(basePath: string): string {
116
- const base = process.env.GSD_STATE_DIR || join(homedir(), ".gsd");
135
+ const base = process.env.GSD_STATE_DIR || gsdHome;
117
136
  return join(base, "projects", repoIdentity(basePath));
118
137
  }
119
138
 
@@ -11,6 +11,8 @@ import { join } from "node:path";
11
11
  import { homedir } from "node:os";
12
12
  import { resolveProjectRoot } from "./worktree.js";
13
13
 
14
+ const gsdHome = process.env.GSD_HOME || join(homedir(), ".gsd");
15
+
14
16
  // ─── Resource Staleness ───────────────────────────────────────────────────
15
17
 
16
18
  /**
@@ -23,7 +25,7 @@ function isManifestWithVersion(data: unknown): data is { gsdVersion: string } {
23
25
  }
24
26
 
25
27
  export function readResourceVersion(): string | null {
26
- const agentDir = process.env.GSD_CODING_AGENT_DIR || join(homedir(), ".gsd", "agent");
28
+ const agentDir = process.env.GSD_CODING_AGENT_DIR || join(gsdHome, "agent");
27
29
  const manifestPath = join(agentDir, "managed-resources.json");
28
30
  const manifest = loadJsonFileOrNull(manifestPath, isManifestWithVersion);
29
31
  return manifest?.gsdVersion ?? null;
@@ -31,7 +31,7 @@ import {
31
31
  gsdRoot,
32
32
  } from './paths.js';
33
33
 
34
- import { milestoneIdSort, findMilestoneIds } from './guided-flow.js';
34
+ import { milestoneIdSort, findMilestoneIds } from './milestone-ids.js';
35
35
  import { nativeBatchParseGsdFiles, type BatchParsedFile } from './native-parser-bridge.js';
36
36
 
37
37
  import { join, resolve } from 'path';
@@ -1,9 +1,9 @@
1
1
  /**
2
- * agent-end-retry.test.ts — Regression checks for the post-#1419 agent_end model.
2
+ * agent-end-retry.test.ts — Regression checks for the agent_end model.
3
3
  *
4
- * The old recursive handleAgentEnd retry path is gone. The loop now keeps
5
- * pendingResolve + pendingAgentEndQueue on AutoSession, and handleAgentEnd is
6
- * only a thin compatibility wrapper around resolveAgentEnd().
4
+ * The per-unit one-shot resolve function lives at module level in auto-loop.ts
5
+ * (_currentResolve). handleAgentEnd is a thin compatibility wrapper around
6
+ * resolveAgentEnd().
7
7
  */
8
8
 
9
9
  import test from "node:test";
@@ -14,40 +14,43 @@ import { fileURLToPath } from "node:url";
14
14
 
15
15
  const __dirname = dirname(fileURLToPath(import.meta.url));
16
16
  const AUTO_TS_PATH = join(__dirname, "..", "auto.ts");
17
+ const AUTO_LOOP_TS_PATH = join(__dirname, "..", "auto-loop.ts");
17
18
  const SESSION_TS_PATH = join(__dirname, "..", "auto", "session.ts");
18
19
 
19
20
  function getAutoTsSource(): string {
20
21
  return readFileSync(AUTO_TS_PATH, "utf-8");
21
22
  }
22
23
 
24
+ function getAutoLoopTsSource(): string {
25
+ return readFileSync(AUTO_LOOP_TS_PATH, "utf-8");
26
+ }
27
+
23
28
  function getSessionTsSource(): string {
24
29
  return readFileSync(SESSION_TS_PATH, "utf-8");
25
30
  }
26
31
 
27
- test("AutoSession declares pending agent_end queue state", () => {
28
- const source = getSessionTsSource();
32
+ test("auto-loop.ts declares _currentResolve for per-unit one-shot promises", () => {
33
+ const source = getAutoLoopTsSource();
29
34
  assert.ok(
30
- source.includes("pendingResolve"),
31
- "AutoSession must declare pendingResolve for the in-flight unit promise",
35
+ source.includes("_currentResolve"),
36
+ "auto-loop.ts must declare _currentResolve for the per-unit resolve function",
32
37
  );
33
38
  assert.ok(
34
- source.includes("pendingAgentEndQueue"),
35
- "AutoSession must declare pendingAgentEndQueue for between-iteration agent_end events",
39
+ source.includes("_sessionSwitchInFlight"),
40
+ "auto-loop.ts must declare _sessionSwitchInFlight guard",
36
41
  );
37
42
  });
38
43
 
39
- test("AutoSession reset clears pending agent_end queue state", () => {
44
+ test("AutoSession no longer holds promise state (moved to auto-loop.ts module scope)", () => {
40
45
  const source = getSessionTsSource();
41
- const resetIdx = source.indexOf("reset(): void");
42
- assert.ok(resetIdx > -1, "AutoSession must have a reset() method");
43
- const resetBlock = source.slice(resetIdx, resetIdx + 4000);
46
+ // Properties should NOT exist as class fields
44
47
  assert.ok(
45
- resetBlock.includes("this.pendingResolve = null"),
46
- "reset() must clear pendingResolve",
48
+ !source.includes("pendingResolve:"),
49
+ "AutoSession must not declare pendingResolve (moved to auto-loop.ts)",
47
50
  );
48
51
  assert.ok(
49
- resetBlock.includes("this.pendingAgentEndQueue = []"),
50
- "reset() must clear pendingAgentEndQueue",
52
+ !source.includes("pendingAgentEndQueue:"),
53
+ "AutoSession must not declare pendingAgentEndQueue (removed — events are dropped)",
51
54
  );
52
55
  });
53
56
 
@@ -37,9 +37,6 @@ function makeMockSession(opts?: {
37
37
  const session = {
38
38
  active: true,
39
39
  verbose: false,
40
- sessionSwitchInFlight: false,
41
- pendingResolve: null,
42
- pendingAgentEndQueue: [],
43
40
  cmdCtx: {
44
41
  newSession: () => {
45
42
  opts?.onNewSessionStart?.(session);
@@ -96,7 +93,6 @@ test("resolveAgentEnd resolves a pending runUnit promise", async () => {
96
93
  const ctx = makeMockCtx();
97
94
  const pi = makeMockPi();
98
95
  const s = makeMockSession();
99
- _setActiveSession(s);
100
96
  const event = makeEvent();
101
97
 
102
98
  // Start runUnit — it will create the promise and send a message,
@@ -122,25 +118,21 @@ test("resolveAgentEnd resolves a pending runUnit promise", async () => {
122
118
  assert.deepEqual(result.event, event);
123
119
  });
124
120
 
125
- test("resolveAgentEnd queues event when no promise is pending", () => {
121
+ test("resolveAgentEnd drops event when no promise is pending", () => {
126
122
  _resetPendingResolve();
127
- const s = makeMockSession();
128
- _setActiveSession(s);
129
123
 
130
- // Should not throw — queues the event for the next runUnit
124
+ // Should not throw — event is dropped (logged as warning)
131
125
  assert.doesNotThrow(() => {
132
126
  resolveAgentEnd(makeEvent());
133
127
  });
134
- assert.equal(s.pendingAgentEndQueue.length, 1, "event should be queued");
135
128
  });
136
129
 
137
- test("double resolveAgentEnd only resolves once (second is queued)", async () => {
130
+ test("double resolveAgentEnd only resolves once (second is dropped)", async () => {
138
131
  _resetPendingResolve();
139
132
 
140
133
  const ctx = makeMockCtx();
141
134
  const pi = makeMockPi();
142
135
  const s = makeMockSession();
143
- _setActiveSession(s);
144
136
  const event1 = makeEvent([{ id: 1 }]);
145
137
  const event2 = makeEvent([{ id: 2 }]);
146
138
 
@@ -151,15 +143,10 @@ test("double resolveAgentEnd only resolves once (second is queued)", async () =>
151
143
  // First resolve — should work
152
144
  resolveAgentEnd(event1);
153
145
 
154
- // Second resolve — should be queued (no pending promise)
146
+ // Second resolve — should be dropped (no pending resolver)
155
147
  assert.doesNotThrow(() => {
156
148
  resolveAgentEnd(event2);
157
149
  });
158
- assert.equal(
159
- s.pendingAgentEndQueue.length,
160
- 1,
161
- "second event should be queued",
162
- );
163
150
 
164
151
  const result = await resultPromise;
165
152
  assert.equal(result.status, "completed");
@@ -211,29 +198,25 @@ test("runUnit returns cancelled when s.active is false before sendMessage", asyn
211
198
  assert.equal(pi.calls.length, 0);
212
199
  });
213
200
 
214
- test("runUnit only arms pendingResolve after newSession completes", async () => {
201
+ test("runUnit only arms resolve after newSession completes", async () => {
215
202
  _resetPendingResolve();
216
203
 
217
204
  let sawSwitchFlag = false;
218
- let sawPendingResolve: unknown = "unset";
219
205
 
220
206
  const ctx = makeMockCtx();
221
207
  const pi = makeMockPi();
222
208
  const s = makeMockSession({
223
209
  newSessionDelayMs: 20,
224
- onNewSessionStart: (session) => {
225
- sawSwitchFlag = session.sessionSwitchInFlight;
226
- sawPendingResolve = session.pendingResolve;
210
+ onNewSessionStart: () => {
211
+ sawSwitchFlag = isSessionSwitchInFlight();
227
212
  },
228
213
  });
229
- _setActiveSession(s);
230
214
 
231
215
  const resultPromise = runUnit(ctx, pi, s, "task", "T01", "prompt", undefined);
232
216
 
233
217
  await new Promise((r) => setTimeout(r, 30));
234
218
 
235
219
  assert.equal(sawSwitchFlag, true, "session switch guard should be active during newSession");
236
- assert.equal(sawPendingResolve, null, "pendingResolve should not be armed before newSession completes");
237
220
  assert.equal(isSessionSwitchInFlight(), false, "session switch guard should clear after newSession settles");
238
221
 
239
222
  resolveAgentEnd(makeEvent());
@@ -275,24 +258,23 @@ test("auto-loop.ts contains a while keyword", () => {
275
258
  );
276
259
  });
277
260
 
278
- test("auto-loop.ts one-shot pattern: pendingResolve is nulled before calling resolver", () => {
261
+ test("auto-loop.ts one-shot pattern: _currentResolve is nulled before calling resolver", () => {
279
262
  const src = readFileSync(
280
263
  resolve(import.meta.dirname, "..", "auto-loop.ts"),
281
264
  "utf-8",
282
265
  );
283
266
  // The one-shot pattern requires: save ref, null the variable, then call
284
- // Look for the pattern: s.pendingResolve = null appearing before r(
285
267
  const resolveBlock = src.slice(
286
268
  src.indexOf("export function resolveAgentEnd"),
287
269
  src.indexOf("export function resolveAgentEnd") + 600,
288
270
  );
289
- const nullIdx = resolveBlock.indexOf("pendingResolve = null");
271
+ const nullIdx = resolveBlock.indexOf("_currentResolve = null");
290
272
  const callIdx = resolveBlock.indexOf("r({");
291
- assert.ok(nullIdx > 0, "should null pendingResolve in resolveAgentEnd");
273
+ assert.ok(nullIdx > 0, "should null _currentResolve in resolveAgentEnd");
292
274
  assert.ok(callIdx > 0, "should call resolver in resolveAgentEnd");
293
275
  assert.ok(
294
276
  nullIdx < callIdx,
295
- "pendingResolve should be nulled before calling the resolver (one-shot)",
277
+ "_currentResolve should be nulled before calling the resolver (one-shot)",
296
278
  );
297
279
  });
298
280
 
@@ -462,8 +444,6 @@ function makeLoopSession(overrides?: Partial<Record<string, unknown>>) {
462
444
  pendingQuickTasks: [],
463
445
  sidecarQueue: [],
464
446
  autoModeStartModel: null,
465
- pendingResolve: null,
466
- pendingAgentEndQueue: [],
467
447
  unitDispatchCount: new Map<string, number>(),
468
448
  unitLifetimeDispatches: new Map<string, number>(),
469
449
  unitRecoveryCount: new Map<string, number>(),