npm - gsd-pi - Versions diffs - 2.22.0 → 2.23.0 - Mend

gsd-pi 2.22.0 → 2.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

package/src/resources/extensions/gsd/mcp-server.ts CHANGED Viewed

@@ -1,15 +1,24 @@
-// @ts-ignore — @modelcontextprotocol/sdk types may not be in extensions tsconfig
-import { Server } from '@modelcontextprotocol/sdk/server'
-// @ts-ignore
-import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio'
-// @ts-ignore
-import { ListToolsRequestSchema, CallToolRequestSchema } from '@modelcontextprotocol/sdk/types'
+/**
+ * MCP (Model Context Protocol) server for the GSD extension.
+ *
+ * This module provides the same MCP server functionality as src/mcp-server.ts
+ * but can be loaded via jiti in the extension runtime context. It enables
+ * GSD's tools to be used by external AI clients (Claude Desktop, VS Code
+ * Copilot, etc.) via the MCP standard protocol over stdin/stdout.
+ */
 interface McpTool {
   name: string
   description: string
   parameters: Record<string, unknown>
-  execute(toolCallId: string, params: Record<string, unknown>, signal?: AbortSignal, onUpdate?: unknown): Promise<{ content: Array<{ type: string; text?: string; data?: string; mimeType?: string }> }>
+  execute(
+    toolCallId: string,
+    params: Record<string, unknown>,
+    signal?: AbortSignal,
+    onUpdate?: unknown,
+  ): Promise<{
+    content: Array<{ type: string; text?: string; data?: string; mimeType?: string }>
+  }>
 }
 export async function startMcpServer(options: {
@@ -18,6 +27,16 @@ export async function startMcpServer(options: {
 }): Promise<void> {
   const { tools, version = '0.0.0' } = options
+  // Dynamic imports — MCP SDK subpath exports use a "./*" wildcard pattern
+  // that cannot be statically resolved by all TypeScript configurations.
+  // @ts-ignore
+  const { Server } = await import('@modelcontextprotocol/sdk/server')
+  // @ts-ignore
+  const { StdioServerTransport } = await import('@modelcontextprotocol/sdk/server/stdio.js')
+  // @ts-ignore
+  const sdkTypes = await import('@modelcontextprotocol/sdk/types')
+  const { ListToolsRequestSchema, CallToolRequestSchema } = sdkTypes
   const toolMap = new Map<string, McpTool>()
   for (const tool of tools) {
     toolMap.set(tool.name, tool)
@@ -28,9 +47,10 @@ export async function startMcpServer(options: {
     { capabilities: { tools: {} } },
   )
+  // tools/list — return every registered GSD tool with its JSON Schema parameters
   server.setRequestHandler(ListToolsRequestSchema, async () => {
     return {
-      tools: tools.map((t) => ({
+      tools: tools.map((t: McpTool) => ({
         name: t.name,
         description: t.description,
         inputSchema: t.parameters,
@@ -38,6 +58,7 @@ export async function startMcpServer(options: {
     }
   })
+  // tools/call — execute the requested tool and return content blocks
   server.setRequestHandler(CallToolRequestSchema, async (request: any) => {
     const { name, arguments: args } = request.params
     const tool = toolMap.get(name)
@@ -56,15 +77,15 @@ export async function startMcpServer(options: {
         undefined,
       )
-      const content = result.content.map((block) => {
+      const content = result.content.map((block: any) => {
         if (block.type === 'text') {
-          return { type: 'text' as const, text: block.text }
+          return { type: 'text' as const, text: block.text ?? '' }
         }
         if (block.type === 'image') {
           return {
             type: 'image' as const,
-            data: block.data,
-            mimeType: block.mimeType,
+            data: block.data ?? '',
+            mimeType: block.mimeType ?? 'image/png',
           }
         }
         return { type: 'text' as const, text: JSON.stringify(block) }

package/src/resources/extensions/gsd/post-unit-hooks.ts CHANGED Viewed

@@ -60,7 +60,8 @@ export function checkPostUnitHooks(
   // Don't trigger hooks for other hook units (prevent hook-on-hook chains)
   // Don't trigger hooks for triage units (prevent hook-on-triage chains)
-  if (completedUnitType.startsWith("hook/") || completedUnitType === "triage-captures") return null;
+  // Don't trigger hooks for quick-task units (lightweight one-offs from captures)
+  if (completedUnitType.startsWith("hook/") || completedUnitType === "triage-captures" || completedUnitType === "quick-task") return null;
   // Check if any hooks are configured for this unit type
   const hooks = resolvePostUnitHooks().filter(h =>

package/src/resources/extensions/gsd/prompts/execute-task.md CHANGED Viewed

@@ -31,6 +31,11 @@ Then:
 3. Build the real thing. If the task plan says "create login endpoint", build an endpoint that actually authenticates against a real store, not one that returns a hardcoded success response. If the task plan says "create dashboard page", build a page that renders real data from the API, not a component with hardcoded props. Stubs and mocks are for tests, not for the shipped feature.
 4. Write or update tests as part of execution — tests are verification, not an afterthought. If the slice plan defines test files in its Verification section and this is the first task, create them (they should initially fail).
 5. When implementing non-trivial runtime behavior (async flows, API boundaries, background processes, error paths), add or preserve agent-usable observability. Skip this for simple changes where it doesn't apply.
+   **Background process rule:** Never use bare `command &` to run background processes. The shell's `&` operator leaves stdout/stderr attached to the parent, which causes the Bash tool to hang indefinitely waiting for those streams to close. Always redirect output before backgrounding:
+   - Correct: `command > /dev/null 2>&1 &` or `nohup command > /dev/null 2>&1 &`
+   - Example: `python -m http.server 8080 > /dev/null 2>&1 &` (NOT `python -m http.server 8080 &`)
+   - Preferred: use the `bg_shell` tool if available — it manages process lifecycle correctly without stream-inheritance issues
 6. Verify must-haves are met by running concrete checks (tests, commands, observable behaviors)
 7. Run the slice-level verification checks defined in the slice plan's Verification section. Track which pass. On the final task of the slice, all must pass before marking done. On intermediate tasks, partial passes are expected — note which ones pass in the summary.
 8. If the task touches UI, browser flows, DOM behavior, or user-visible web state:

package/src/resources/extensions/gsd/prompts/guided-discuss-milestone.md CHANGED Viewed

@@ -1,5 +1,108 @@
 Discuss milestone {{milestoneId}} ("{{milestoneTitle}}"). Identify gray areas, ask the user about them, and write `{{milestoneId}}-CONTEXT.md` in the milestone directory with the decisions. Use the **Context** output template below. If a `GSD Skill Preferences` block is present in system context, use it to decide which skills to load and follow; do not override required artifact rules.
+**Structured questions available: {{structuredQuestionsAvailable}}**
 {{inlinedTemplates}}
-**Investigate between question rounds to make your questions smarter.** Before each round of questions, do enough lightweight research that your questions are grounded in reality — not guesses about what exists or what's possible. Check library docs (`resolve_library`/`get_library_docs`) when tech choices are relevant, search the web (`search-the-web` with `freshness`/`domain` filters, then `fetch_page` for full content) to verify the landscape, scout the codebase (`rg`, `find`, `scout`) to understand what already exists. Don't go deep — just enough that your next question reflects what's actually true. The goal is to ask questions the user can't answer by saying "did you check the docs?" or "look at the code."
+---
+## Interview Protocol
+### Before your first question round
+Do a lightweight targeted investigation so your questions are grounded in reality:
+- Scout the codebase (`rg`, `find`, or `scout`) to understand what already exists that this milestone touches or builds on
+- Check the roadmap context above (if present) to understand what surrounds this milestone
+- Identify the 3–5 biggest behavioural and architectural unknowns: things where the user's answer will materially change what gets built
+Do **not** go deep — just enough that your questions reflect what's actually true rather than what you assume.
+### Question rounds
+Ask **1–3 questions per round**. Keep each question focused on one of:
+- **What they're building** — concrete enough to explain to a stranger
+- **Why it needs to exist** — the problem it solves or the desire it fulfills
+- **Who it's for** — user, team, themselves
+- **What "done" looks like** — observable outcomes, not abstract goals
+- **The biggest technical unknowns / risks** — what could fail, what hasn't been proven
+- **What external systems/services this touches** — APIs, databases, third-party services
+**If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` for each round. 1–3 questions per call, each as a separate question object. Keep option labels short (3–5 words). Always include a freeform "Other / let me explain" option. When the user picks that option or writes a long freeform answer, switch to plain text follow-up for that thread before resuming structured questions.
+**If `{{structuredQuestionsAvailable}}` is `false`:** ask questions in plain text. Keep each round to 1–3 focused questions. Wait for answers before asking the next round.
+After the user answers, investigate further if any answer opens a new unknown, then ask the next round.
+### Check-in after each round
+After each round of answers, ask:
+> "I think I have a solid picture of this milestone. Ready to wrap up and write the context file, or is there more to cover?"
+**If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` with options:
+- "Wrap up — write the context file" *(recommended after ~2–3 rounds)*
+- "Keep going — more to discuss"
+**If `{{structuredQuestionsAvailable}}` is `false`:** ask in plain text.
+If the user wants to keep going, keep asking. Stop when they say wrap up.
+---
+## Questioning philosophy
+**Start open, follow energy.** Let the user's enthusiasm guide where you dig deeper.
+**Challenge vagueness, make abstract concrete.** When the user says something abstract ("it should be smart" / "good UX"), push for specifics.
+**Questions must be about the experience, not the implementation.** Never ask "what auth provider?" — ask "when someone logs in, what should that feel like?" Implementation is your job. Understanding what they want to experience is the discussion's job.
+**Position-first framing.** Have opinions. "I'd lean toward X because Y — does that match your thinking?" is better than "what do you think about X vs Y?"
+**Negative constraints.** Ask what would disappoint them. What they explicitly don't want. Negative constraints are sharper than positive wishes.
+**Anti-patterns — never do these:**
+- Checklist walking through predetermined topics regardless of what the user said
+- Canned generic questions that could apply to any project
+- Corporate speak ("What are your key success metrics?")
+- Rapid-fire questions without acknowledging answers
+- Asking about technical skill level
+---
+## Depth Verification
+Before moving to the wrap-up gate, verify you have covered:
+- [ ] What they're building — concrete enough to explain to a stranger
+- [ ] Why it needs to exist
+- [ ] Who it's for
+- [ ] What "done" looks like
+- [ ] The biggest technical unknowns / risks
+- [ ] What external systems/services this touches
+**Print a structured depth summary in chat first** — using the user's own terminology. Cover what you understood, what shaped your understanding, and any areas of remaining uncertainty.
+**Then confirm:**
+**If `{{structuredQuestionsAvailable}}` is `true`:** use `ask_user_questions` with:
+- header: "Depth Check"
+- question: "Did I capture the depth right?"
+- options: "Yes, you got it (Recommended)", "Not quite — let me clarify"
+- **The question ID must contain `depth_verification`** (e.g. `depth_verification_confirm`) — this enables the write-gate downstream.
+**If `{{structuredQuestionsAvailable}}` is `false`:** ask in plain text: "Did I capture that correctly? Anything I missed?" Wait for confirmation before proceeding.
+If they clarify, absorb the correction and re-verify.
+---
+## Output
+Once the user confirms depth:
+1. Use the **Context** output template below
+2. `mkdir -p` the milestone directory if needed
+3. Write `{{milestoneId}}-CONTEXT.md` — preserve the user's exact terminology, emphasis, and framing. Do not paraphrase nuance into generic summaries. The context file is downstream agents' only window into this conversation.
+4. Commit: `git add {{milestoneId}}-CONTEXT.md && git commit -m "docs({{milestoneId}}): milestone context from discuss"`
+5. Say exactly: `"{{milestoneId}} context written."` — nothing else.

package/src/resources/extensions/gsd/prompts/plan-milestone.md CHANGED Viewed

@@ -51,6 +51,7 @@ Apply these when decomposing and ordering slices:
 - **Completion must imply capability.** If every slice in this roadmap were completed exactly as written, the milestone's promised outcome should actually work at the proof level claimed. Do not write slices that can all be checked off while the user-visible capability still does not exist.
 - **Don't invent risks.** If the project is straightforward, skip the proof strategy and just ship value in smart order. Not everything has major unknowns.
 - **Ship features, not proofs.** A completed slice should leave the product in a state where the new capability is actually usable through its real interface. A login flow slice ends with a working login page, not a middleware function. An API slice ends with endpoints that return real data from a real store, not hardcoded fixtures. A dashboard slice ends with a real dashboard rendering real data, not a component that renders mock props. If a slice can't ship the real thing yet because a dependency isn't built, it should ship with realistic stubs that are clearly marked for replacement — but the user-facing surface must be real.
+- **Dependency format is comma-separated, never range syntax.** Write `depends:[S01,S02,S03]` — not `depends:[S01-S03]`. Range syntax is not a valid format and permanently blocks the slice.
 - **Ambition matches the milestone.** The number and depth of slices should match the milestone's ambition. A milestone promising "core platform with auth, data model, and primary user loop" should have enough slices to actually deliver all three as working features — not two proof-of-concept slices and a note that "the rest will come in the next milestone." If the milestone's context promises an outcome, the roadmap must deliver it.
 - **Right-size the decomposition.** Match slice count to actual complexity. If the work is small enough to build and verify in one pass, it's one slice — don't split it into three just because you can identify sub-steps. Multiple requirements can share a single slice. Conversely, don't cram genuinely independent capabilities into one slice just to keep the count low. Let the work dictate the structure.

package/src/resources/extensions/gsd/prompts/system.md CHANGED Viewed

@@ -154,7 +154,7 @@ Templates showing the expected format for each artifact type are in:
 **External facts:** Use `search-the-web` + `fetch_page`, or `search_and_read` for one-call extraction. Use `freshness` for recency. Never state current facts from training data without verification.
-**Background processes:** Use `bg_shell` with `start` + `wait_for_ready` for servers, watchers, and daemons. Never poll with `sleep`/retry loops — `wait_for_ready` exists for this. For status checks, use `digest` (~30 tokens), not `output` (~2000 tokens). Use `highlights` (~100 tokens) when you need significant lines only. Use `output` only when actively debugging.
+**Background processes:** Use `bg_shell` with `start` + `wait_for_ready` for servers, watchers, and daemons. Never use `bash` with `&` or `nohup` to background a process — the `bash` tool waits for stdout to close, so backgrounded children that inherit the file descriptors cause it to hang indefinitely. Never poll with `sleep`/retry loops — `wait_for_ready` exists for this. For status checks, use `digest` (~30 tokens), not `output` (~2000 tokens). Use `highlights` (~100 tokens) when you need significant lines only. Use `output` only when actively debugging.
 **One-shot commands:** Use `async_bash` for builds, tests, and installs. The result is pushed to you when the command exits — no polling needed. Use `await_job` to block on a specific job.
@@ -169,6 +169,7 @@ Templates showing the expected format for each artifact type are in:
 - Never use `cat` to read a file you might edit — `read` gives you the exact text `edit` needs.
 - Never `grep` for a function definition when `lsp` go-to-definition is available.
 - Never poll a server with `sleep 1 && curl` loops — use `bg_shell` `wait_for_ready`.
+- Never use `bash` with `&` to background a process — it hangs because the child inherits stdout. Use `bg_shell` `start` instead.
 - Never use `bg_shell` `output` for a status check — use `digest`.
 - Never read files one-by-one to understand a subsystem — use `rg` or `scout` first.
 - Never guess at library APIs from training data — use `get_library_docs`.

package/src/resources/extensions/gsd/prompts/validate-milestone.md ADDED Viewed

@@ -0,0 +1,91 @@
+You are executing GSD auto-mode.
+## UNIT: Validate Milestone {{milestoneId}} ("{{milestoneTitle}}") — Remediation Round {{remediationRound}}
+## Working Directory
+Your working directory is `{{workingDirectory}}`. All file reads, writes, and shell commands MUST operate relative to this directory. Do NOT `cd` to any other directory.
+## Your Role in the Pipeline
+All slices are done. Before the **complete-milestone agent** closes this milestone, you reconcile planned work against what was actually delivered. You audit success criteria against evidence, inventory deferred work across all slice summaries and UAT results, and classify gaps. If auto-remediable gaps exist on the first pass, you append remediation slices to the roadmap so the pipeline can execute them before completion. After remediation slices run, you re-validate. The milestone only proceeds to completion once validation passes.
+This is a gate, not a formality. But most milestones pass — bias toward "pass" unless you find concrete evidence of unmet criteria or meaningful gaps.
+All relevant context has been preloaded below — the roadmap, all slice summaries, UAT results, requirements, decisions, and project context are inlined. Start working immediately without re-reading these files.
+{{inlinedContext}}
+If a `GSD Skill Preferences` block is present in system context, use it to decide which skills to load and follow during validation, without relaxing required verification or artifact rules.
+Then:
+### Step 1: Audit Success Criteria
+Enumerate each success criterion from the roadmap's `## Success Criteria` section. For each criterion, map it to concrete evidence from slice summaries, UAT results, or observable behavior.
+Format each criterion as:
+- `Criterion text` — **MET** — evidence: {{specific slice summary, UAT result, test output, or observable behavior}}
+- `Criterion text` — **NOT MET** — gap: {{what's missing and why}}
+Every criterion must have a definitive verdict. Do not mark a criterion as MET without specific evidence.
+### Step 2: Inventory Deferred Work
+Scan ALL slice summaries for:
+- `Known Limitations` sections
+- `Follow-ups` sections
+- `Deviations` sections
+Scan ALL UAT results for:
+- `Not Proven By This UAT` sections
+- Any PARTIAL or FAIL verdicts
+Check:
+- `.gsd/REQUIREMENTS.md` for Active requirements not yet Validated
+- `.gsd/CAPTURES.md` for unresolved deferred captures
+Collect every item into a single inventory. Do not skip items because they seem minor — the classification step handles prioritization.
+### Step 3: Classify Each Gap
+For every unmet criterion and every deferred work item, classify it as one of:
+- **auto-remediable** — can be fixed by adding a new slice (missing feature, unfixed bug, untested path, incomplete integration)
+- **human-required** — needs Lex's input (design decision, external service dependency, manual verification, judgment call, ambiguous requirement)
+- **acceptable** — known limitation that's OK to ship (documented trade-off, explicitly scoped for a future milestone, minor rough edge with no user impact)
+Be conservative with **auto-remediable**. Only classify a gap as auto-remediable if you're confident a slice can resolve it without human judgment. When in doubt, classify as **human-required**.
+### Step 4: Act on Gaps
+**If this is remediation round 0 AND auto-remediable gaps exist:**
+1. Define remediation slices to address auto-remediable gaps. Follow the exact roadmap slice format:
+   `- [ ] **S0X: Title** \`risk:medium\` \`depends:[]\``
+   Include a brief description of what each slice must accomplish.
+2. Append these slices to `{{roadmapPath}}` after existing slices (do not modify completed slices).
+3. Update the boundary map in the roadmap if the new slices introduce new integration points.
+4. Set verdict to `needs-remediation`.
+**If this is remediation round 1 or higher:**
+Do NOT add more slices. At this point either:
+- All remaining gaps are acceptable — set verdict to `pass`
+- Remaining gaps need Lex's input — set verdict to `needs-attention`
+Never add remediation slices after round 0. If round 0 remediation didn't close the gaps, escalate.
+**If no auto-remediable gaps exist (any round):**
+- If all criteria are MET and deferred items are acceptable or human-required only — set verdict to `pass` (with human-required items noted)
+- If human-required items are blocking — set verdict to `needs-attention`
+### Step 5: Write Validation Report
+Write `{{validationPath}}` using the milestone-validation template. Fill all frontmatter fields and every section. The report must be a complete record of the validation — a future agent reading only this file should understand what was checked, what passed, and what remains.
+**You MUST write `{{validationPath}}` before finishing.**
+When done, say: "Milestone {{milestoneId}} validated."

package/src/resources/extensions/gsd/roadmap-slices.ts CHANGED Viewed

@@ -1,5 +1,45 @@
 import type { RoadmapSliceEntry, RiskLevel } from "./types.js";
+/**
+ * Expand dependency shorthand into individual slice IDs.
+ *
+ * Handles two common LLM-generated patterns that the roadmap parser
+ * previously treated as single literal IDs (silently blocking slices):
+ *
+ *   "S01-S04"  → ["S01", "S02", "S03", "S04"]  (range syntax)
+ *   "S01..S04" → ["S01", "S02", "S03", "S04"]  (dot-range syntax)
+ *
+ * Plain IDs ("S01", "S02") and empty strings pass through unchanged.
+ */
+export function expandDependencies(deps: string[]): string[] {
+  const result: string[] = [];
+  for (const dep of deps) {
+    const trimmed = dep.trim();
+    if (!trimmed) continue;
+    // Match range syntax: S01-S04 or S01..S04 (case-insensitive prefix)
+    const rangeMatch = trimmed.match(/^([A-Za-z]+)(\d+)(?:-|\.\.)+([A-Za-z]+)(\d+)$/);
+    if (rangeMatch) {
+      const prefixA = rangeMatch[1]!.toUpperCase();
+      const startNum = parseInt(rangeMatch[2]!, 10);
+      const prefixB = rangeMatch[3]!.toUpperCase();
+      const endNum = parseInt(rangeMatch[4]!, 10);
+      // Only expand when both prefixes match and range is valid
+      if (prefixA === prefixB && startNum <= endNum) {
+        const width = rangeMatch[2]!.length; // preserve zero-padding (S01 not S1)
+        for (let i = startNum; i <= endNum; i++) {
+          result.push(`${prefixA}${String(i).padStart(width, "0")}`);
+        }
+        continue;
+      }
+    }
+    result.push(trimmed);
+  }
+  return result;
+}
 function extractSlicesSection(content: string): string {
   const headingMatch = /^## Slices\s*$/m.exec(content);
   if (!headingMatch || headingMatch.index == null) return "";
@@ -33,7 +73,7 @@ export function parseRoadmapSlices(content: string): RoadmapSliceEntry[] {
       const depsMatch = rest.match(/`depends:\[([^\]]*)\]`/);
       const depends = depsMatch && depsMatch[1]!.trim()
-        ? depsMatch[1]!.split(",").map(s => s.trim())
+        ? expandDependencies(depsMatch[1]!.split(",").map(s => s.trim()))
         : [];
       currentSlice = { id, title, risk, depends, done, demo: "" };

package/src/resources/extensions/gsd/session-forensics.ts CHANGED Viewed

@@ -22,6 +22,7 @@ import { readFileSync, readdirSync, existsSync, statSync } from "node:fs";
 import { basename, join } from "node:path";
 import { nativeParseJsonlTail } from "./native-parser-bridge.js";
 import { nativeWorkingTreeStatus, nativeDiffStat } from "./native-git-bridge.js";
+import { getAutoWorktreePath } from "./auto-worktree.js";
 // ─── Types ────────────────────────────────────────────────────────────────────
@@ -296,12 +297,45 @@ export function synthesizeCrashRecovery(
  * Replaces the old shallow getLastActivityDiagnostic().
  */
 export function getDeepDiagnostic(basePath: string): string | null {
-  const activityDir = join(basePath, ".gsd", "activity");
-  const trace = readLastActivityLog(activityDir);
+  // Try worktree activity logs first if an auto-worktree is active
+  let trace: ExecutionTrace | null = null;
+  try {
+    const mid = readActiveMilestoneId(basePath);
+    if (mid) {
+      const wtPath = getAutoWorktreePath(basePath, mid);
+      if (wtPath) {
+        const wtActivityDir = join(wtPath, ".gsd", "activity");
+        trace = readLastActivityLog(wtActivityDir);
+      }
+    }
+  } catch { /* non-fatal — fall through to root */ }
+  // Fall back to root activity logs
+  if (!trace || trace.toolCallCount === 0) {
+    const activityDir = join(basePath, ".gsd", "activity");
+    trace = readLastActivityLog(activityDir);
+  }
   if (!trace || trace.toolCallCount === 0) return null;
   return formatTraceSummary(trace);
 }
+/**
+ * Read the active milestone ID directly from STATE.md without async deriveState().
+ * Looks for `**Active Milestone:** M001` pattern.
+ */
+function readActiveMilestoneId(basePath: string): string | null {
+  try {
+    const statePath = join(basePath, ".gsd", "STATE.md");
+    if (!existsSync(statePath)) return null;
+    const content = readFileSync(statePath, "utf-8");
+    const match = /\*\*Active Milestone:\*\*\s*(\S+)/i.exec(content);
+    return match?.[1] ?? null;
+  } catch {
+    return null;
+  }
+}
 // ─── Formatting ───────────────────────────────────────────────────────────────
 function formatRecoveryPrompt(

package/src/resources/extensions/gsd/templates/milestone-validation.md ADDED Viewed

@@ -0,0 +1,62 @@
+---
+id: {{milestoneId}}
+remediation_round: {{round}}
+verdict: pass | needs-remediation | needs-attention
+slices_added: []
+human_required_items: 0
+validated_at: {{date}}
+---
+# {{milestoneId}}: Milestone Validation
+## Success Criteria Audit
+<!-- For each success criterion from the roadmap, list the criterion text,
+     verdict (MET / NOT MET), and the specific evidence or gap.
+     Every criterion must appear here with a definitive verdict. -->
+- **Criterion:** {{criterionText}}
+  **Verdict:** {{MET or NOT MET}}
+  **Evidence:** {{sliceSummary, UATResult, testOutput, or observableBehavior}}
+## Deferred Work Inventory
+<!-- Every deferred, incomplete, or flagged item found across all slice summaries
+     and UAT results. Include the source so a reader can trace back to the original. -->
+| Item | Source | Classification | Disposition |
+|------|--------|----------------|-------------|
+| {{itemDescription}} | {{sliceId or UAT reference}} | {{auto-remediable / human-required / acceptable}} | {{what happens with this item}} |
+## Requirement Coverage
+<!-- Active requirements from REQUIREMENTS.md that are not yet Validated.
+     If no REQUIREMENTS.md exists, write "No requirements tracking active." -->
+- **{{requirementId}}**: {{status}} — {{disposition: covered by remediation slice / acceptable gap / needs attention}}
+## Remediation Slices
+<!-- New slices appended to the roadmap to address auto-remediable gaps.
+     Include the full slice definition as written to the roadmap.
+     If no slices were added, write "None required." -->
+{{remediationSliceDefinitions OR "None required."}}
+## Requires Attention
+<!-- Items classified as human-required, with enough context for Lex to make a decision.
+     Ordered by priority (blocking items first).
+     If none, write "None." -->
+- **{{itemTitle}}** ({{priority: blocking / non-blocking}})
+  Context: {{whatTheItemIs, whereItCameFrom, whyItNeedsHumanInput}}
+## Verdict
+<!-- One-paragraph summary assessment.
+     State the verdict (pass / needs-remediation / needs-attention),
+     the number of criteria met vs total, and the key finding
+     that determined the verdict. -->
+{{verdictSummary}}