gsd-pi 2.22.0 → 2.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. package/README.md +25 -1
  2. package/dist/cli.js +62 -4
  3. package/dist/headless.d.ts +21 -0
  4. package/dist/headless.js +346 -0
  5. package/dist/help-text.js +32 -0
  6. package/dist/mcp-server.d.ts +20 -3
  7. package/dist/mcp-server.js +21 -1
  8. package/dist/models-resolver.d.ts +32 -0
  9. package/dist/models-resolver.js +50 -0
  10. package/dist/resources/extensions/bg-shell/output-formatter.ts +36 -16
  11. package/dist/resources/extensions/bg-shell/process-manager.ts +6 -4
  12. package/dist/resources/extensions/bg-shell/types.ts +33 -1
  13. package/dist/resources/extensions/browser-tools/capture.ts +18 -16
  14. package/dist/resources/extensions/browser-tools/index.ts +20 -0
  15. package/dist/resources/extensions/browser-tools/tests/browser-tools-unit.test.cjs +25 -0
  16. package/dist/resources/extensions/browser-tools/tools/action-cache.ts +216 -0
  17. package/dist/resources/extensions/browser-tools/tools/codegen.ts +274 -0
  18. package/dist/resources/extensions/browser-tools/tools/device.ts +183 -0
  19. package/dist/resources/extensions/browser-tools/tools/extract.ts +229 -0
  20. package/dist/resources/extensions/browser-tools/tools/injection-detect.ts +221 -0
  21. package/dist/resources/extensions/browser-tools/tools/network-mock.ts +244 -0
  22. package/dist/resources/extensions/browser-tools/tools/pdf.ts +92 -0
  23. package/dist/resources/extensions/browser-tools/tools/state-persistence.ts +202 -0
  24. package/dist/resources/extensions/browser-tools/tools/visual-diff.ts +209 -0
  25. package/dist/resources/extensions/browser-tools/tools/zoom.ts +104 -0
  26. package/dist/resources/extensions/gsd/auto-dashboard.ts +2 -0
  27. package/dist/resources/extensions/gsd/auto-recovery.ts +10 -0
  28. package/dist/resources/extensions/gsd/auto.ts +437 -11
  29. package/dist/resources/extensions/gsd/captures.ts +49 -0
  30. package/dist/resources/extensions/gsd/commands.ts +20 -3
  31. package/dist/resources/extensions/gsd/dashboard-overlay.ts +16 -2
  32. package/dist/resources/extensions/gsd/diff-context.ts +73 -80
  33. package/dist/resources/extensions/gsd/doctor.ts +20 -1
  34. package/dist/resources/extensions/gsd/forensics.ts +95 -52
  35. package/dist/resources/extensions/gsd/guided-flow.ts +10 -5
  36. package/dist/resources/extensions/gsd/mcp-server.ts +33 -12
  37. package/dist/resources/extensions/gsd/post-unit-hooks.ts +2 -1
  38. package/dist/resources/extensions/gsd/prompts/execute-task.md +5 -0
  39. package/dist/resources/extensions/gsd/prompts/guided-discuss-milestone.md +104 -1
  40. package/dist/resources/extensions/gsd/prompts/plan-milestone.md +1 -0
  41. package/dist/resources/extensions/gsd/prompts/system.md +2 -1
  42. package/dist/resources/extensions/gsd/prompts/validate-milestone.md +91 -0
  43. package/dist/resources/extensions/gsd/roadmap-slices.ts +41 -1
  44. package/dist/resources/extensions/gsd/session-forensics.ts +36 -2
  45. package/dist/resources/extensions/gsd/templates/milestone-validation.md +62 -0
  46. package/dist/resources/extensions/gsd/tests/auto-lock-creation.test.ts +186 -0
  47. package/dist/resources/extensions/gsd/tests/auto-recovery.test.ts +64 -0
  48. package/dist/resources/extensions/gsd/tests/auto-skip-loop.test.ts +123 -0
  49. package/dist/resources/extensions/gsd/tests/doctor.test.ts +58 -0
  50. package/dist/resources/extensions/gsd/tests/in-flight-tool-tracking.test.ts +17 -6
  51. package/dist/resources/extensions/gsd/tests/integration/headless-command.ts +534 -0
  52. package/dist/resources/extensions/gsd/tests/roadmap-slices.test.ts +43 -1
  53. package/dist/resources/extensions/gsd/tests/triage-dispatch.test.ts +120 -0
  54. package/dist/resources/extensions/gsd/tests/triage-resolution.test.ts +203 -2
  55. package/dist/resources/extensions/gsd/tests/visualizer-overlay.test.ts +8 -3
  56. package/dist/resources/extensions/gsd/triage-resolution.ts +83 -0
  57. package/dist/resources/extensions/gsd/visualizer-overlay.ts +8 -1
  58. package/dist/resources/extensions/gsd/workspace-index.ts +34 -6
  59. package/package.json +1 -1
  60. package/packages/pi-coding-agent/dist/core/tools/bash-background.test.d.ts +10 -0
  61. package/packages/pi-coding-agent/dist/core/tools/bash-background.test.d.ts.map +1 -0
  62. package/packages/pi-coding-agent/dist/core/tools/bash-background.test.js +79 -0
  63. package/packages/pi-coding-agent/dist/core/tools/bash-background.test.js.map +1 -0
  64. package/packages/pi-coding-agent/dist/core/tools/bash.d.ts +18 -0
  65. package/packages/pi-coding-agent/dist/core/tools/bash.d.ts.map +1 -1
  66. package/packages/pi-coding-agent/dist/core/tools/bash.js +77 -1
  67. package/packages/pi-coding-agent/dist/core/tools/bash.js.map +1 -1
  68. package/packages/pi-coding-agent/dist/core/tools/index.d.ts +1 -1
  69. package/packages/pi-coding-agent/dist/core/tools/index.d.ts.map +1 -1
  70. package/packages/pi-coding-agent/dist/core/tools/index.js +1 -1
  71. package/packages/pi-coding-agent/dist/core/tools/index.js.map +1 -1
  72. package/packages/pi-coding-agent/dist/index.d.ts +1 -1
  73. package/packages/pi-coding-agent/dist/index.d.ts.map +1 -1
  74. package/packages/pi-coding-agent/dist/index.js +1 -1
  75. package/packages/pi-coding-agent/dist/index.js.map +1 -1
  76. package/packages/pi-coding-agent/src/core/tools/bash-background.test.ts +91 -0
  77. package/packages/pi-coding-agent/src/core/tools/bash.ts +83 -1
  78. package/packages/pi-coding-agent/src/core/tools/index.ts +1 -0
  79. package/packages/pi-coding-agent/src/index.ts +1 -0
  80. package/src/resources/extensions/bg-shell/output-formatter.ts +36 -16
  81. package/src/resources/extensions/bg-shell/process-manager.ts +6 -4
  82. package/src/resources/extensions/bg-shell/types.ts +33 -1
  83. package/src/resources/extensions/browser-tools/capture.ts +18 -16
  84. package/src/resources/extensions/browser-tools/index.ts +20 -0
  85. package/src/resources/extensions/browser-tools/tests/browser-tools-unit.test.cjs +25 -0
  86. package/src/resources/extensions/browser-tools/tools/action-cache.ts +216 -0
  87. package/src/resources/extensions/browser-tools/tools/codegen.ts +274 -0
  88. package/src/resources/extensions/browser-tools/tools/device.ts +183 -0
  89. package/src/resources/extensions/browser-tools/tools/extract.ts +229 -0
  90. package/src/resources/extensions/browser-tools/tools/injection-detect.ts +221 -0
  91. package/src/resources/extensions/browser-tools/tools/network-mock.ts +244 -0
  92. package/src/resources/extensions/browser-tools/tools/pdf.ts +92 -0
  93. package/src/resources/extensions/browser-tools/tools/state-persistence.ts +202 -0
  94. package/src/resources/extensions/browser-tools/tools/visual-diff.ts +209 -0
  95. package/src/resources/extensions/browser-tools/tools/zoom.ts +104 -0
  96. package/src/resources/extensions/gsd/auto-dashboard.ts +2 -0
  97. package/src/resources/extensions/gsd/auto-recovery.ts +10 -0
  98. package/src/resources/extensions/gsd/auto.ts +437 -11
  99. package/src/resources/extensions/gsd/captures.ts +49 -0
  100. package/src/resources/extensions/gsd/commands.ts +20 -3
  101. package/src/resources/extensions/gsd/dashboard-overlay.ts +16 -2
  102. package/src/resources/extensions/gsd/diff-context.ts +73 -80
  103. package/src/resources/extensions/gsd/doctor.ts +20 -1
  104. package/src/resources/extensions/gsd/forensics.ts +95 -52
  105. package/src/resources/extensions/gsd/guided-flow.ts +10 -5
  106. package/src/resources/extensions/gsd/mcp-server.ts +33 -12
  107. package/src/resources/extensions/gsd/post-unit-hooks.ts +2 -1
  108. package/src/resources/extensions/gsd/prompts/execute-task.md +5 -0
  109. package/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md +104 -1
  110. package/src/resources/extensions/gsd/prompts/plan-milestone.md +1 -0
  111. package/src/resources/extensions/gsd/prompts/system.md +2 -1
  112. package/src/resources/extensions/gsd/prompts/validate-milestone.md +91 -0
  113. package/src/resources/extensions/gsd/roadmap-slices.ts +41 -1
  114. package/src/resources/extensions/gsd/session-forensics.ts +36 -2
  115. package/src/resources/extensions/gsd/templates/milestone-validation.md +62 -0
  116. package/src/resources/extensions/gsd/tests/auto-lock-creation.test.ts +186 -0
  117. package/src/resources/extensions/gsd/tests/auto-recovery.test.ts +64 -0
  118. package/src/resources/extensions/gsd/tests/auto-skip-loop.test.ts +123 -0
  119. package/src/resources/extensions/gsd/tests/doctor.test.ts +58 -0
  120. package/src/resources/extensions/gsd/tests/in-flight-tool-tracking.test.ts +17 -6
  121. package/src/resources/extensions/gsd/tests/integration/headless-command.ts +534 -0
  122. package/src/resources/extensions/gsd/tests/roadmap-slices.test.ts +43 -1
  123. package/src/resources/extensions/gsd/tests/triage-dispatch.test.ts +120 -0
  124. package/src/resources/extensions/gsd/tests/triage-resolution.test.ts +203 -2
  125. package/src/resources/extensions/gsd/tests/visualizer-overlay.test.ts +8 -3
  126. package/src/resources/extensions/gsd/triage-resolution.ts +83 -0
  127. package/src/resources/extensions/gsd/visualizer-overlay.ts +8 -1
  128. package/src/resources/extensions/gsd/workspace-index.ts +34 -6
@@ -1,15 +1,24 @@
1
- // @ts-ignore — @modelcontextprotocol/sdk types may not be in extensions tsconfig
2
- import { Server } from '@modelcontextprotocol/sdk/server'
3
- // @ts-ignore
4
- import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio'
5
- // @ts-ignore
6
- import { ListToolsRequestSchema, CallToolRequestSchema } from '@modelcontextprotocol/sdk/types'
1
+ /**
2
+ * MCP (Model Context Protocol) server for the GSD extension.
3
+ *
4
+ * This module provides the same MCP server functionality as src/mcp-server.ts
5
+ * but can be loaded via jiti in the extension runtime context. It enables
6
+ * GSD's tools to be used by external AI clients (Claude Desktop, VS Code
7
+ * Copilot, etc.) via the MCP standard protocol over stdin/stdout.
8
+ */
7
9
 
8
10
  interface McpTool {
9
11
  name: string
10
12
  description: string
11
13
  parameters: Record<string, unknown>
12
- execute(toolCallId: string, params: Record<string, unknown>, signal?: AbortSignal, onUpdate?: unknown): Promise<{ content: Array<{ type: string; text?: string; data?: string; mimeType?: string }> }>
14
+ execute(
15
+ toolCallId: string,
16
+ params: Record<string, unknown>,
17
+ signal?: AbortSignal,
18
+ onUpdate?: unknown,
19
+ ): Promise<{
20
+ content: Array<{ type: string; text?: string; data?: string; mimeType?: string }>
21
+ }>
13
22
  }
14
23
 
15
24
  export async function startMcpServer(options: {
@@ -18,6 +27,16 @@ export async function startMcpServer(options: {
18
27
  }): Promise<void> {
19
28
  const { tools, version = '0.0.0' } = options
20
29
 
30
+ // Dynamic imports — MCP SDK subpath exports use a "./*" wildcard pattern
31
+ // that cannot be statically resolved by all TypeScript configurations.
32
+ // @ts-ignore
33
+ const { Server } = await import('@modelcontextprotocol/sdk/server')
34
+ // @ts-ignore
35
+ const { StdioServerTransport } = await import('@modelcontextprotocol/sdk/server/stdio.js')
36
+ // @ts-ignore
37
+ const sdkTypes = await import('@modelcontextprotocol/sdk/types')
38
+ const { ListToolsRequestSchema, CallToolRequestSchema } = sdkTypes
39
+
21
40
  const toolMap = new Map<string, McpTool>()
22
41
  for (const tool of tools) {
23
42
  toolMap.set(tool.name, tool)
@@ -28,9 +47,10 @@ export async function startMcpServer(options: {
28
47
  { capabilities: { tools: {} } },
29
48
  )
30
49
 
50
+ // tools/list — return every registered GSD tool with its JSON Schema parameters
31
51
  server.setRequestHandler(ListToolsRequestSchema, async () => {
32
52
  return {
33
- tools: tools.map((t) => ({
53
+ tools: tools.map((t: McpTool) => ({
34
54
  name: t.name,
35
55
  description: t.description,
36
56
  inputSchema: t.parameters,
@@ -38,6 +58,7 @@ export async function startMcpServer(options: {
38
58
  }
39
59
  })
40
60
 
61
+ // tools/call — execute the requested tool and return content blocks
41
62
  server.setRequestHandler(CallToolRequestSchema, async (request: any) => {
42
63
  const { name, arguments: args } = request.params
43
64
  const tool = toolMap.get(name)
@@ -56,15 +77,15 @@ export async function startMcpServer(options: {
56
77
  undefined,
57
78
  )
58
79
 
59
- const content = result.content.map((block) => {
80
+ const content = result.content.map((block: any) => {
60
81
  if (block.type === 'text') {
61
- return { type: 'text' as const, text: block.text }
82
+ return { type: 'text' as const, text: block.text ?? '' }
62
83
  }
63
84
  if (block.type === 'image') {
64
85
  return {
65
86
  type: 'image' as const,
66
- data: block.data,
67
- mimeType: block.mimeType,
87
+ data: block.data ?? '',
88
+ mimeType: block.mimeType ?? 'image/png',
68
89
  }
69
90
  }
70
91
  return { type: 'text' as const, text: JSON.stringify(block) }
@@ -60,7 +60,8 @@ export function checkPostUnitHooks(
60
60
 
61
61
  // Don't trigger hooks for other hook units (prevent hook-on-hook chains)
62
62
  // Don't trigger hooks for triage units (prevent hook-on-triage chains)
63
- if (completedUnitType.startsWith("hook/") || completedUnitType === "triage-captures") return null;
63
+ // Don't trigger hooks for quick-task units (lightweight one-offs from captures)
64
+ if (completedUnitType.startsWith("hook/") || completedUnitType === "triage-captures" || completedUnitType === "quick-task") return null;
64
65
 
65
66
  // Check if any hooks are configured for this unit type
66
67
  const hooks = resolvePostUnitHooks().filter(h =>
@@ -31,6 +31,11 @@ Then:
31
31
  3. Build the real thing. If the task plan says "create login endpoint", build an endpoint that actually authenticates against a real store, not one that returns a hardcoded success response. If the task plan says "create dashboard page", build a page that renders real data from the API, not a component with hardcoded props. Stubs and mocks are for tests, not for the shipped feature.
32
32
  4. Write or update tests as part of execution — tests are verification, not an afterthought. If the slice plan defines test files in its Verification section and this is the first task, create them (they should initially fail).
33
33
  5. When implementing non-trivial runtime behavior (async flows, API boundaries, background processes, error paths), add or preserve agent-usable observability. Skip this for simple changes where it doesn't apply.
34
+
35
+ **Background process rule:** Never use bare `command &` to run background processes. The shell's `&` operator leaves stdout/stderr attached to the parent, which causes the Bash tool to hang indefinitely waiting for those streams to close. Always redirect output before backgrounding:
36
+ - Correct: `command > /dev/null 2>&1 &` or `nohup command > /dev/null 2>&1 &`
37
+ - Example: `python -m http.server 8080 > /dev/null 2>&1 &` (NOT `python -m http.server 8080 &`)
38
+ - Preferred: use the `bg_shell` tool if available — it manages process lifecycle correctly without stream-inheritance issues
34
39
  6. Verify must-haves are met by running concrete checks (tests, commands, observable behaviors)
35
40
  7. Run the slice-level verification checks defined in the slice plan's Verification section. Track which pass. On the final task of the slice, all must pass before marking done. On intermediate tasks, partial passes are expected — note which ones pass in the summary.
36
41
  8. If the task touches UI, browser flows, DOM behavior, or user-visible web state:
@@ -1,5 +1,108 @@
1
1
  Discuss milestone {{milestoneId}} ("{{milestoneTitle}}"). Identify gray areas, ask the user about them, and write `{{milestoneId}}-CONTEXT.md` in the milestone directory with the decisions. Use the **Context** output template below. If a `GSD Skill Preferences` block is present in system context, use it to decide which skills to load and follow; do not override required artifact rules.
2
2
 
3
+ **Structured questions available: {{structuredQuestionsAvailable}}**
4
+
3
5
  {{inlinedTemplates}}
4
6
 
5
- **Investigate between question rounds to make your questions smarter.** Before each round of questions, do enough lightweight research that your questions are grounded in reality — not guesses about what exists or what's possible. Check library docs (`resolve_library`/`get_library_docs`) when tech choices are relevant, search the web (`search-the-web` with `freshness`/`domain` filters, then `fetch_page` for full content) to verify the landscape, scout the codebase (`rg`, `find`, `scout`) to understand what already exists. Don't go deep — just enough that your next question reflects what's actually true. The goal is to ask questions the user can't answer by saying "did you check the docs?" or "look at the code."
7
+ ---
8
+
9
+ ## Interview Protocol
10
+
11
+ ### Before your first question round
12
+
13
+ Do a lightweight targeted investigation so your questions are grounded in reality:
14
+ - Scout the codebase (`rg`, `find`, or `scout`) to understand what already exists that this milestone touches or builds on
15
+ - Check the roadmap context above (if present) to understand what surrounds this milestone
16
+ - Identify the 3–5 biggest behavioural and architectural unknowns: things where the user's answer will materially change what gets built
17
+
18
+ Do **not** go deep — just enough that your questions reflect what's actually true rather than what you assume.
19
+
20
+ ### Question rounds
21
+
22
+ Ask **1–3 questions per round**. Keep each question focused on one of:
23
+ - **What they're building** — concrete enough to explain to a stranger
24
+ - **Why it needs to exist** — the problem it solves or the desire it fulfills
25
+ - **Who it's for** — user, team, themselves
26
+ - **What "done" looks like** — observable outcomes, not abstract goals
27
+ - **The biggest technical unknowns / risks** — what could fail, what hasn't been proven
28
+ - **What external systems/services this touches** — APIs, databases, third-party services
29
+
30
+ **If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for each round. 1–3 questions per call, each as a separate question object. Keep option labels short (3–5 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions.
31
+
32
+ **If `{{structuredQuestionsAvailable}}` is `false`:** ask questions in plain text. Keep each round to 1–3 focused questions. Wait for answers before asking the next round.
33
+
34
+ After the user answers, investigate further if any answer opens a new unknown, then ask the next round.
35
+
36
+ ### Check-in after each round
37
+
38
+ After each round of answers, ask:
39
+
40
+ > "I think I have a solid picture of this milestone. Ready to wrap up and write the context file, or is there more to cover?"
41
+
42
+ **If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` with options:
43
+ - "Wrap up — write the context file" *(recommended after ~2–3 rounds)*
44
+ - "Keep going — more to discuss"
45
+
46
+ **If `{{structuredQuestionsAvailable}}` is `false`:** ask in plain text.
47
+
48
+ If the user wants to keep going, keep asking. Stop when they say wrap up.
49
+
50
+ ---
51
+
52
+ ## Questioning philosophy
53
+
54
+ **Start open, follow energy.** Let the user's enthusiasm guide where you dig deeper.
55
+
56
+ **Challenge vagueness, make abstract concrete.** When the user says something abstract ("it should be smart" / "good UX"), push for specifics.
57
+
58
+ **Questions must be about the experience, not the implementation.** Never ask "what auth provider?" — ask "when someone logs in, what should that feel like?" Implementation is your job. Understanding what they want to experience is the discussion's job.
59
+
60
+ **Position-first framing.** Have opinions. "I'd lean toward X because Y — does that match your thinking?" is better than "what do you think about X vs Y?"
61
+
62
+ **Negative constraints.** Ask what would disappoint them. What they explicitly don't want. Negative constraints are sharper than positive wishes.
63
+
64
+ **Anti-patterns — never do these:**
65
+ - Checklist walking through predetermined topics regardless of what the user said
66
+ - Canned generic questions that could apply to any project
67
+ - Corporate speak ("What are your key success metrics?")
68
+ - Rapid-fire questions without acknowledging answers
69
+ - Asking about technical skill level
70
+
71
+ ---
72
+
73
+ ## Depth Verification
74
+
75
+ Before moving to the wrap-up gate, verify you have covered:
76
+
77
+ - [ ] What they're building — concrete enough to explain to a stranger
78
+ - [ ] Why it needs to exist
79
+ - [ ] Who it's for
80
+ - [ ] What "done" looks like
81
+ - [ ] The biggest technical unknowns / risks
82
+ - [ ] What external systems/services this touches
83
+
84
+ **Print a structured depth summary in chat first** — using the user's own terminology. Cover what you understood, what shaped your understanding, and any areas of remaining uncertainty.
85
+
86
+ **Then confirm:**
87
+
88
+ **If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` with:
89
+ - header: "Depth Check"
90
+ - question: "Did I capture the depth right?"
91
+ - options: "Yes, you got it (Recommended)", "Not quite — let me clarify"
92
+ - **The question ID must contain `depth_verification`** (e.g. `depth_verification_confirm`) — this enables the write-gate downstream.
93
+
94
+ **If `{{structuredQuestionsAvailable}}` is `false`:** ask in plain text: "Did I capture that correctly? Anything I missed?" Wait for confirmation before proceeding.
95
+
96
+ If they clarify, absorb the correction and re-verify.
97
+
98
+ ---
99
+
100
+ ## Output
101
+
102
+ Once the user confirms depth:
103
+
104
+ 1. Use the **Context** output template below
105
+ 2. `mkdir -p` the milestone directory if needed
106
+ 3. Write `{{milestoneId}}-CONTEXT.md` — preserve the user's exact terminology, emphasis, and framing. Do not paraphrase nuance into generic summaries. The context file is downstream agents' only window into this conversation.
107
+ 4. Commit: `git add {{milestoneId}}-CONTEXT.md && git commit -m "docs({{milestoneId}}): milestone context from discuss"`
108
+ 5. Say exactly: `"{{milestoneId}} context written."` — nothing else.
@@ -51,6 +51,7 @@ Apply these when decomposing and ordering slices:
51
51
  - **Completion must imply capability.** If every slice in this roadmap were completed exactly as written, the milestone's promised outcome should actually work at the proof level claimed. Do not write slices that can all be checked off while the user-visible capability still does not exist.
52
52
  - **Don't invent risks.** If the project is straightforward, skip the proof strategy and just ship value in smart order. Not everything has major unknowns.
53
53
  - **Ship features, not proofs.** A completed slice should leave the product in a state where the new capability is actually usable through its real interface. A login flow slice ends with a working login page, not a middleware function. An API slice ends with endpoints that return real data from a real store, not hardcoded fixtures. A dashboard slice ends with a real dashboard rendering real data, not a component that renders mock props. If a slice can't ship the real thing yet because a dependency isn't built, it should ship with realistic stubs that are clearly marked for replacement — but the user-facing surface must be real.
54
+ - **Dependency format is comma-separated, never range syntax.** Write `depends:[S01,S02,S03]` — not `depends:[S01-S03]`. Range syntax is not a valid format and permanently blocks the slice.
54
55
  - **Ambition matches the milestone.** The number and depth of slices should match the milestone's ambition. A milestone promising "core platform with auth, data model, and primary user loop" should have enough slices to actually deliver all three as working features — not two proof-of-concept slices and a note that "the rest will come in the next milestone." If the milestone's context promises an outcome, the roadmap must deliver it.
55
56
  - **Right-size the decomposition.** Match slice count to actual complexity. If the work is small enough to build and verify in one pass, it's one slice — don't split it into three just because you can identify sub-steps. Multiple requirements can share a single slice. Conversely, don't cram genuinely independent capabilities into one slice just to keep the count low. Let the work dictate the structure.
56
57
 
@@ -154,7 +154,7 @@ Templates showing the expected format for each artifact type are in:
154
154
 
155
155
  **External facts:** Use `search-the-web` + `fetch_page`, or `search_and_read` for one-call extraction. Use `freshness` for recency. Never state current facts from training data without verification.
156
156
 
157
- **Background processes:** Use `bg_shell` with `start` + `wait_for_ready` for servers, watchers, and daemons. Never poll with `sleep`/retry loops — `wait_for_ready` exists for this. For status checks, use `digest` (~30 tokens), not `output` (~2000 tokens). Use `highlights` (~100 tokens) when you need significant lines only. Use `output` only when actively debugging.
157
+ **Background processes:** Use `bg_shell` with `start` + `wait_for_ready` for servers, watchers, and daemons. Never use `bash` with `&` or `nohup` to background a process — the `bash` tool waits for stdout to close, so backgrounded children that inherit the file descriptors cause it to hang indefinitely. Never poll with `sleep`/retry loops — `wait_for_ready` exists for this. For status checks, use `digest` (~30 tokens), not `output` (~2000 tokens). Use `highlights` (~100 tokens) when you need significant lines only. Use `output` only when actively debugging.
158
158
 
159
159
  **One-shot commands:** Use `async_bash` for builds, tests, and installs. The result is pushed to you when the command exits — no polling needed. Use `await_job` to block on a specific job.
160
160
 
@@ -169,6 +169,7 @@ Templates showing the expected format for each artifact type are in:
169
169
  - Never use `cat` to read a file you might edit — `read` gives you the exact text `edit` needs.
170
170
  - Never `grep` for a function definition when `lsp` go-to-definition is available.
171
171
  - Never poll a server with `sleep 1 && curl` loops — use `bg_shell` `wait_for_ready`.
172
+ - Never use `bash` with `&` to background a process — it hangs because the child inherits stdout. Use `bg_shell` `start` instead.
172
173
  - Never use `bg_shell` `output` for a status check — use `digest`.
173
174
  - Never read files one-by-one to understand a subsystem — use `rg` or `scout` first.
174
175
  - Never guess at library APIs from training data — use `get_library_docs`.
@@ -0,0 +1,91 @@
1
+ You are executing GSD auto-mode.
2
+
3
+ ## UNIT: Validate Milestone {{milestoneId}} ("{{milestoneTitle}}") — Remediation Round {{remediationRound}}
4
+
5
+ ## Working Directory
6
+
7
+ Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory. Do NOT `cd` to any other directory.
8
+
9
+ ## Your Role in the Pipeline
10
+
11
+ All slices are done. Before the **complete-milestone agent** closes this milestone, you reconcile planned work against what was actually delivered. You audit success criteria against evidence, inventory deferred work across all slice summaries and UAT results, and classify gaps. If auto-remediable gaps exist on the first pass, you append remediation slices to the roadmap so the pipeline can execute them before completion. After remediation slices run, you re-validate. The milestone only proceeds to completion once validation passes.
12
+
13
+ This is a gate, not a formality. But most milestones pass — bias toward "pass" unless you find concrete evidence of unmet criteria or meaningful gaps.
14
+
15
+ All relevant context has been preloaded below — the roadmap, all slice summaries, UAT results, requirements, decisions, and project context are inlined. Start working immediately without re-reading these files.
16
+
17
+ {{inlinedContext}}
18
+
19
+ If a `GSD Skill Preferences` block is present in system context, use it to decide which skills to load and follow during validation, without relaxing required verification or artifact rules.
20
+
21
+ Then:
22
+
23
+ ### Step 1: Audit Success Criteria
24
+
25
+ Enumerate each success criterion from the roadmap's `## Success Criteria` section. For each criterion, map it to concrete evidence from slice summaries, UAT results, or observable behavior.
26
+
27
+ Format each criterion as:
28
+
29
+ - `Criterion text` — **MET** — evidence: {{specific slice summary, UAT result, test output, or observable behavior}}
30
+ - `Criterion text` — **NOT MET** — gap: {{what's missing and why}}
31
+
32
+ Every criterion must have a definitive verdict. Do not mark a criterion as MET without specific evidence.
33
+
34
+ ### Step 2: Inventory Deferred Work
35
+
36
+ Scan ALL slice summaries for:
37
+ - `Known Limitations` sections
38
+ - `Follow-ups` sections
39
+ - `Deviations` sections
40
+
41
+ Scan ALL UAT results for:
42
+ - `Not Proven By This UAT` sections
43
+ - Any PARTIAL or FAIL verdicts
44
+
45
+ Check:
46
+ - `.gsd/REQUIREMENTS.md` for Active requirements not yet Validated
47
+ - `.gsd/CAPTURES.md` for unresolved deferred captures
48
+
49
+ Collect every item into a single inventory. Do not skip items because they seem minor — the classification step handles prioritization.
50
+
51
+ ### Step 3: Classify Each Gap
52
+
53
+ For every unmet criterion and every deferred work item, classify it as one of:
54
+
55
+ - **auto-remediable** — can be fixed by adding a new slice (missing feature, unfixed bug, untested path, incomplete integration)
56
+ - **human-required** — needs Lex's input (design decision, external service dependency, manual verification, judgment call, ambiguous requirement)
57
+ - **acceptable** — known limitation that's OK to ship (documented trade-off, explicitly scoped for a future milestone, minor rough edge with no user impact)
58
+
59
+ Be conservative with **auto-remediable**. Only classify a gap as auto-remediable if you're confident a slice can resolve it without human judgment. When in doubt, classify as **human-required**.
60
+
61
+ ### Step 4: Act on Gaps
62
+
63
+ **If this is remediation round 0 AND auto-remediable gaps exist:**
64
+
65
+ 1. Define remediation slices to address auto-remediable gaps. Follow the exact roadmap slice format:
66
+ `- [ ] **S0X: Title** \`risk:medium\` \`depends:[]\``
67
+ Include a brief description of what each slice must accomplish.
68
+ 2. Append these slices to `{{roadmapPath}}` after existing slices (do not modify completed slices).
69
+ 3. Update the boundary map in the roadmap if the new slices introduce new integration points.
70
+ 4. Set verdict to `needs-remediation`.
71
+
72
+ **If this is remediation round 1 or higher:**
73
+
74
+ Do NOT add more slices. At this point either:
75
+ - All remaining gaps are acceptable — set verdict to `pass`
76
+ - Remaining gaps need Lex's input — set verdict to `needs-attention`
77
+
78
+ Never add remediation slices after round 0. If round 0 remediation didn't close the gaps, escalate.
79
+
80
+ **If no auto-remediable gaps exist (any round):**
81
+
82
+ - If all criteria are MET and deferred items are acceptable or human-required only — set verdict to `pass` (with human-required items noted)
83
+ - If human-required items are blocking — set verdict to `needs-attention`
84
+
85
+ ### Step 5: Write Validation Report
86
+
87
+ Write `{{validationPath}}` using the milestone-validation template. Fill all frontmatter fields and every section. The report must be a complete record of the validation — a future agent reading only this file should understand what was checked, what passed, and what remains.
88
+
89
+ **You MUST write `{{validationPath}}` before finishing.**
90
+
91
+ When done, say: "Milestone {{milestoneId}} validated."
@@ -1,5 +1,45 @@
1
1
  import type { RoadmapSliceEntry, RiskLevel } from "./types.js";
2
2
 
3
+ /**
4
+ * Expand dependency shorthand into individual slice IDs.
5
+ *
6
+ * Handles two common LLM-generated patterns that the roadmap parser
7
+ * previously treated as single literal IDs (silently blocking slices):
8
+ *
9
+ * "S01-S04" → ["S01", "S02", "S03", "S04"] (range syntax)
10
+ * "S01..S04" → ["S01", "S02", "S03", "S04"] (dot-range syntax)
11
+ *
12
+ * Plain IDs ("S01", "S02") and empty strings pass through unchanged.
13
+ */
14
+ export function expandDependencies(deps: string[]): string[] {
15
+ const result: string[] = [];
16
+ for (const dep of deps) {
17
+ const trimmed = dep.trim();
18
+ if (!trimmed) continue;
19
+
20
+ // Match range syntax: S01-S04 or S01..S04 (case-insensitive prefix)
21
+ const rangeMatch = trimmed.match(/^([A-Za-z]+)(\d+)(?:-|\.\.)+([A-Za-z]+)(\d+)$/);
22
+ if (rangeMatch) {
23
+ const prefixA = rangeMatch[1]!.toUpperCase();
24
+ const startNum = parseInt(rangeMatch[2]!, 10);
25
+ const prefixB = rangeMatch[3]!.toUpperCase();
26
+ const endNum = parseInt(rangeMatch[4]!, 10);
27
+
28
+ // Only expand when both prefixes match and range is valid
29
+ if (prefixA === prefixB && startNum <= endNum) {
30
+ const width = rangeMatch[2]!.length; // preserve zero-padding (S01 not S1)
31
+ for (let i = startNum; i <= endNum; i++) {
32
+ result.push(`${prefixA}${String(i).padStart(width, "0")}`);
33
+ }
34
+ continue;
35
+ }
36
+ }
37
+
38
+ result.push(trimmed);
39
+ }
40
+ return result;
41
+ }
42
+
3
43
  function extractSlicesSection(content: string): string {
4
44
  const headingMatch = /^## Slices\s*$/m.exec(content);
5
45
  if (!headingMatch || headingMatch.index == null) return "";
@@ -33,7 +73,7 @@ export function parseRoadmapSlices(content: string): RoadmapSliceEntry[] {
33
73
 
34
74
  const depsMatch = rest.match(/`depends:\[([^\]]*)\]`/);
35
75
  const depends = depsMatch && depsMatch[1]!.trim()
36
- ? depsMatch[1]!.split(",").map(s => s.trim())
76
+ ? expandDependencies(depsMatch[1]!.split(",").map(s => s.trim()))
37
77
  : [];
38
78
 
39
79
  currentSlice = { id, title, risk, depends, done, demo: "" };
@@ -22,6 +22,7 @@ import { readFileSync, readdirSync, existsSync, statSync } from "node:fs";
22
22
  import { basename, join } from "node:path";
23
23
  import { nativeParseJsonlTail } from "./native-parser-bridge.js";
24
24
  import { nativeWorkingTreeStatus, nativeDiffStat } from "./native-git-bridge.js";
25
+ import { getAutoWorktreePath } from "./auto-worktree.js";
25
26
 
26
27
  // ─── Types ────────────────────────────────────────────────────────────────────
27
28
 
@@ -296,12 +297,45 @@ export function synthesizeCrashRecovery(
296
297
  * Replaces the old shallow getLastActivityDiagnostic().
297
298
  */
298
299
  export function getDeepDiagnostic(basePath: string): string | null {
299
- const activityDir = join(basePath, ".gsd", "activity");
300
- const trace = readLastActivityLog(activityDir);
300
+ // Try worktree activity logs first if an auto-worktree is active
301
+ let trace: ExecutionTrace | null = null;
302
+ try {
303
+ const mid = readActiveMilestoneId(basePath);
304
+ if (mid) {
305
+ const wtPath = getAutoWorktreePath(basePath, mid);
306
+ if (wtPath) {
307
+ const wtActivityDir = join(wtPath, ".gsd", "activity");
308
+ trace = readLastActivityLog(wtActivityDir);
309
+ }
310
+ }
311
+ } catch { /* non-fatal — fall through to root */ }
312
+
313
+ // Fall back to root activity logs
314
+ if (!trace || trace.toolCallCount === 0) {
315
+ const activityDir = join(basePath, ".gsd", "activity");
316
+ trace = readLastActivityLog(activityDir);
317
+ }
318
+
301
319
  if (!trace || trace.toolCallCount === 0) return null;
302
320
  return formatTraceSummary(trace);
303
321
  }
304
322
 
323
+ /**
324
+ * Read the active milestone ID directly from STATE.md without async deriveState().
325
+ * Looks for `**Active Milestone:** M001` pattern.
326
+ */
327
+ function readActiveMilestoneId(basePath: string): string | null {
328
+ try {
329
+ const statePath = join(basePath, ".gsd", "STATE.md");
330
+ if (!existsSync(statePath)) return null;
331
+ const content = readFileSync(statePath, "utf-8");
332
+ const match = /\*\*Active Milestone:\*\*\s*(\S+)/i.exec(content);
333
+ return match?.[1] ?? null;
334
+ } catch {
335
+ return null;
336
+ }
337
+ }
338
+
305
339
  // ─── Formatting ───────────────────────────────────────────────────────────────
306
340
 
307
341
  function formatRecoveryPrompt(
@@ -0,0 +1,62 @@
1
+ ---
2
+ id: {{milestoneId}}
3
+ remediation_round: {{round}}
4
+ verdict: pass | needs-remediation | needs-attention
5
+ slices_added: []
6
+ human_required_items: 0
7
+ validated_at: {{date}}
8
+ ---
9
+
10
+ # {{milestoneId}}: Milestone Validation
11
+
12
+ ## Success Criteria Audit
13
+
14
+ <!-- For each success criterion from the roadmap, list the criterion text,
15
+ verdict (MET / NOT MET), and the specific evidence or gap.
16
+ Every criterion must appear here with a definitive verdict. -->
17
+
18
+ - **Criterion:** {{criterionText}}
19
+ **Verdict:** {{MET or NOT MET}}
20
+ **Evidence:** {{sliceSummary, UATResult, testOutput, or observableBehavior}}
21
+
22
+ ## Deferred Work Inventory
23
+
24
+ <!-- Every deferred, incomplete, or flagged item found across all slice summaries
25
+ and UAT results. Include the source so a reader can trace back to the original. -->
26
+
27
+ | Item | Source | Classification | Disposition |
28
+ |------|--------|----------------|-------------|
29
+ | {{itemDescription}} | {{sliceId or UAT reference}} | {{auto-remediable / human-required / acceptable}} | {{what happens with this item}} |
30
+
31
+ ## Requirement Coverage
32
+
33
+ <!-- Active requirements from REQUIREMENTS.md that are not yet Validated.
34
+ If no REQUIREMENTS.md exists, write "No requirements tracking active." -->
35
+
36
+ - **{{requirementId}}**: {{status}} — {{disposition: covered by remediation slice / acceptable gap / needs attention}}
37
+
38
+ ## Remediation Slices
39
+
40
+ <!-- New slices appended to the roadmap to address auto-remediable gaps.
41
+ Include the full slice definition as written to the roadmap.
42
+ If no slices were added, write "None required." -->
43
+
44
+ {{remediationSliceDefinitions OR "None required."}}
45
+
46
+ ## Requires Attention
47
+
48
+ <!-- Items classified as human-required, with enough context for Lex to make a decision.
49
+ Ordered by priority (blocking items first).
50
+ If none, write "None." -->
51
+
52
+ - **{{itemTitle}}** ({{priority: blocking / non-blocking}})
53
+ Context: {{whatTheItemIs, whereItCameFrom, whyItNeedsHumanInput}}
54
+
55
+ ## Verdict
56
+
57
+ <!-- One-paragraph summary assessment.
58
+ State the verdict (pass / needs-remediation / needs-attention),
59
+ the number of criteria met vs total, and the key finding
60
+ that determined the verdict. -->
61
+
62
+ {{verdictSummary}}