npm - gsd-pi - Versions diffs - 2.48.0 → 2.49.0-dev.de3d9f6 - Mend

gsd-pi 2.48.0 → 2.49.0-dev.de3d9f6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (170) hide show

package/src/resources/extensions/gsd/commands/handlers/auto.ts CHANGED Viewed

@@ -1,10 +1,33 @@
 import type { ExtensionAPI, ExtensionCommandContext } from "@gsd/pi-coding-agent";
+import { existsSync, readFileSync } from "node:fs";
+import { resolve } from "node:path";
 import { enableDebug } from "../../debug-logger.js";
 import { getAutoDashboardData, isAutoActive, isAutoPaused, pauseAuto, startAuto, stopAuto, stopAutoRemote } from "../../auto.js";
 import { handleRate } from "../../commands-rate.js";
 import { guardRemoteSession, projectRoot } from "../context.js";
+/**
+ * Parse --yolo flag and optional file path from the auto command string.
+ * Supports: `/gsd auto --yolo path/to/file.md` or `/gsd auto -y path/to/file.md`
+ */
+function parseYoloFlag(trimmed: string): { yoloSeedFile: string | null; rest: string } {
+  const yoloRe = /(?:--yolo|-y)\s+("(?:[^"\\]|\\.)*"|'(?:[^'\\]|\\.)*'|\S+)/;
+  const match = trimmed.match(yoloRe);
+  if (!match) return { yoloSeedFile: null, rest: trimmed };
+  // Strip quotes if present
+  let filePath = match[1];
+  if ((filePath.startsWith('"') && filePath.endsWith('"')) ||
+      (filePath.startsWith("'") && filePath.endsWith("'"))) {
+    filePath = filePath.slice(1, -1);
+  }
+  const rest = trimmed.replace(match[0], "").replace(/\s+/g, " ").trim();
+  return { yoloSeedFile: filePath, rest };
+}
 export async function handleAutoCommand(trimmed: string, ctx: ExtensionCommandContext, pi: ExtensionAPI): Promise<boolean> {
   if (trimmed === "next" || trimmed.startsWith("next ")) {
     if (trimmed.includes("--dry-run")) {
@@ -21,11 +44,31 @@ export async function handleAutoCommand(trimmed: string, ctx: ExtensionCommandCo
   }
   if (trimmed === "auto" || trimmed.startsWith("auto ")) {
-    const verboseMode = trimmed.includes("--verbose");
-    const debugMode = trimmed.includes("--debug");
+    const { yoloSeedFile, rest } = parseYoloFlag(trimmed);
+    const verboseMode = rest.includes("--verbose");
+    const debugMode = rest.includes("--debug");
     if (debugMode) enableDebug(projectRoot());
     if (!(await guardRemoteSession(ctx, pi))) return true;
-    await startAuto(ctx, pi, projectRoot(), verboseMode);
+    if (yoloSeedFile) {
+      const resolved = resolve(projectRoot(), yoloSeedFile);
+      if (!existsSync(resolved)) {
+        ctx.ui.notify(`Yolo seed file not found: ${resolved}`, "error");
+        return true;
+      }
+      const seedContent = readFileSync(resolved, "utf-8").trim();
+      if (!seedContent) {
+        ctx.ui.notify(`Yolo seed file is empty: ${resolved}`, "error");
+        return true;
+      }
+      // Headless path: bootstrap project, dispatch non-interactive discuss,
+      // then auto-mode starts automatically via checkAutoStartAfterDiscuss
+      // when the LLM says "Milestone X ready."
+      const { showHeadlessMilestoneCreation } = await import("../../guided-flow.js");
+      await showHeadlessMilestoneCreation(ctx, pi, projectRoot(), seedContent);
+    } else {
+      await startAuto(ctx, pi, projectRoot(), verboseMode);
+    }
     return true;
   }

package/src/resources/extensions/gsd/git-service.ts CHANGED Viewed

@@ -102,23 +102,25 @@ export interface TaskCommitContext {
 /**
  * Build a meaningful conventional commit message from task execution context.
- * Format: `{type}({sliceId}/{taskId}): {description}`
+ * Format: `{type}: {description}` (clean conventional commit — no GSD IDs in subject).
+ *
+ * GSD metadata is placed in a `GSD-Task:` git trailer at the end of the body,
+ * following the same convention as `Signed-off-by:` or `Co-Authored-By:`.
  *
  * The description is the task summary one-liner if available (it describes
  * what was actually built), falling back to the task title (what was planned).
  */
 export function buildTaskCommitMessage(ctx: TaskCommitContext): string {
-  const scope = ctx.taskId; // e.g. "S01/T02" or just "T02"
   const description = ctx.oneLiner || ctx.taskTitle;
   const type = inferCommitType(ctx.taskTitle, ctx.oneLiner);
-  // Truncate description to ~72 chars for subject line
-  const maxDescLen = 68 - type.length - scope.length;
+  // Truncate description to ~72 chars for subject line (full budget without scope)
+  const maxDescLen = 70 - type.length;
   const truncated = description.length > maxDescLen
     ? description.slice(0, maxDescLen - 1).trimEnd() + "…"
     : description;
-  const subject = `${type}(${scope}): ${truncated}`;
+  const subject = `${type}: ${truncated}`;
   // Build body with key files if available
   const bodyParts: string[] = [];
@@ -131,15 +133,14 @@ export function buildTaskCommitMessage(ctx: TaskCommitContext): string {
     bodyParts.push(fileLines);
   }
+  // Trailers: GSD-Task first, then Resolves
+  bodyParts.push(`GSD-Task: ${ctx.taskId}`);
   if (ctx.issueNumber) {
     bodyParts.push(`Resolves #${ctx.issueNumber}`);
   }
-  if (bodyParts.length > 0) {
-    return `${subject}\n\n${bodyParts.join("\n\n")}`;
-  }
-  return subject;
+  return `${subject}\n\n${bodyParts.join("\n\n")}`;
 }
 /**
@@ -538,7 +539,7 @@ export class GitServiceImpl {
     const message = taskContext
       ? buildTaskCommitMessage(taskContext)
-      : `chore(${unitId}): auto-commit after ${unitType}`;
+      : `chore: auto-commit after ${unitType}\n\nGSD-Unit: ${unitId}`;
     nativeCommit(this.basePath, message, { allowEmpty: false });
     return message;
   }

package/src/resources/extensions/gsd/prompts/discuss-headless.md CHANGED Viewed

@@ -1,86 +1,253 @@
 # Headless Milestone Creation
-You are creating a GSD milestone from a provided specification document. This is a **headless** (non-interactive) flow — do NOT ask the user any questions. Work entirely from the provided specification.
+You are creating a GSD milestone from a provided specification document. This is a **headless** (non-interactive) flow — do NOT ask the user any questions. Wherever the interactive flow would ask the user, make your best-judgment call and document it as an assumption.
 ## Provided Specification
 {{seedContext}}
-## Your Task
+## Reflection Step
-### Step 1: Reflect
+Summarize your understanding of the specification concretely — not abstractly:
-Summarize your understanding of the specification concretely:
-- What is being built
-- Major capabilities/features
-- Scope estimate (how many milestones × slices)
-- Any ambiguities or gaps you notice
+1. Summarize what is being built in your own words.
+2. Give an honest size read: roughly how many milestones, roughly how many slices in the first one. Base this on the actual work involved, not a classification label.
+3. Include scope honesty — a bullet list of the major capabilities: "Here's what I'm reading from the spec: [bullet list of major capabilities]."
+4. Note any ambiguities, gaps, or areas where the spec is vague.
-### Step 2: Investigate (brief)
+Print this reflection in chat. Do not skip this step.
-Quickly scout the codebase to understand what already exists — spend no more than 5-6 tool calls here:
-- `ls` the project root and key directories
-- Search for relevant existing code, patterns, dependencies
-- Check library docs if needed (`resolve_library` / `get_library_docs`)
+## Vision Mapping
-Then move on to writing artifacts. Do not explore exhaustively — the research phase will do deeper investigation later.
+Decide the approach based on the actual scope:
-### Step 3: Make Decisions
+**If the work spans multiple milestones:** Map the full landscape:
+1. Propose a milestone sequence — names, one-line intents, rough dependencies
+2. Print this in chat as the working milestone sequence
-For any ambiguities or gaps in the specification:
-- Make your best-guess decision based on the spec's intent, codebase patterns, and domain conventions
-- Document each assumption clearly in the Context file
+**If the work fits in a single milestone:** Proceed directly to investigation.
-### Step 4: Assess Scope
+**Anti-reduction rule:** If the spec describes a big vision, plan the big vision. Do not reduce scope. Phase complex/risky work into later milestones — do not cut it. The spec's ambition is the target, and your job is to sequence it intelligently, not shrink it.
-Based on reflection + investigation:
-- Is this a single milestone or multiple milestones?
-- If multi-milestone: plan the full sequence with dependencies
+## Mandatory Investigation
-### Step 5: Write Artifacts
+Do a mandatory investigation pass before making any decisions. This is not optional.
-**Milestone ID**: {{milestoneId}}
+1. **Scout the codebase** — `ls`, `find`, `rg`, or `scout` for broad unfamiliar areas. Understand what already exists, what patterns are established, what constraints current code imposes.
+2. **Check library docs** — `resolve_library` / `get_library_docs` for any tech mentioned in the spec. Get current facts about capabilities, constraints, API shapes, version-specific behavior.
+3. **Web search** — `search-the-web` if the domain is unfamiliar, if you need current best practices, or if the spec references external services/APIs you need facts about. Use `fetch_page` for full content when snippets aren't enough.
-Use these templates exactly:
+**Web search budget:** Budget carefully across investigation + focused research:
+- Prefer `resolve_library` / `get_library_docs` over `web_search` for library documentation.
+- Prefer `search_and_read` for one-shot topic research.
+- Target 2-3 web searches in this investigation pass. Save remaining budget for focused research.
+- Do NOT repeat the same or similar queries.
-{{inlinedTemplates}}
+The goal: your decisions should reflect what's actually true in the codebase and ecosystem, not what you assume.
+## Autonomous Decision-Making
+For every area where the spec is ambiguous, vague, or silent:
+- Apply the depth checklist (below) to identify what needs resolution
+- Make your best-judgment call based on: the spec's intent, codebase patterns, domain conventions, and investigation findings
+- **Document every assumption** in the Context file under an "Assumptions" section
+- For each assumption, note: what the spec said (or didn't say), what you decided, and why
+### Depth Checklist
+Ensure ALL of these are resolved before writing artifacts — from the spec + investigation, not by asking:
+- [ ] **What is being built** — concrete enough that you could explain it to a stranger
+- [ ] **Why it needs to exist** — the problem it solves or the desire it fulfills
+- [ ] **Who it's for** — even if just the spec author
+- [ ] **What "done" looks like** — observable outcomes, not abstract goals
+- [ ] **The biggest technical unknowns / risks** — what could fail, what hasn't been proven
+- [ ] **What external systems/services this touches** — APIs, databases, third-party services, hardware
+If the spec leaves any of these unresolved, make your best-judgment call and document it.
+## Depth Verification
+Print a structured depth summary in chat covering:
+- What you understood the spec to describe
+- Key technical findings from investigation
+- Assumptions you made and why
+- Areas where you're least confident
+This is your audit trail. Print it — do not skip it.
+## Focused Research
+Do a focused research pass before roadmap creation.
+Research is advisory, not auto-binding. Use the spec + investigation to identify:
+- table stakes the product space usually expects
+- domain-standard behaviors that may be implied but not stated
+- likely omissions that would make the product feel incomplete
+- plausible anti-features or scope traps
+- differentiators worth preserving
+For multi-milestone visions, research should cover the full landscape, not just the first milestone. Research findings may affect milestone sequencing, not just slice ordering within M001.
+**Key difference from interactive flow:** Where the interactive flow would present research-surfaced candidate requirements for the user to confirm/defer/reject, you instead apply your best judgment. If a research finding clearly aligns with the spec's intent, include it. If it's tangential or would expand scope beyond what the spec describes, defer it or mark it out of scope. Document the reasoning.
+## Capability Contract
+Before writing a roadmap, produce `.gsd/REQUIREMENTS.md`.
+Use it as the project's explicit capability contract.
+Requirements must be organized into:
+- Active
+- Validated
+- Deferred
+- Out of Scope
+- Traceability
+Each requirement should include:
+- stable ID (`R###`)
+- title
+- class
+- status
+- description
+- why it matters
+- source (`spec`, `inferred`, `research`, or `execution`)
+- primary owning slice
+- supporting slices
+- validation status
+- notes
+Rules:
+- Keep requirements capability-oriented, not a giant feature inventory
+- Every Active requirement must either be mapped to a roadmap owner, explicitly deferred, blocked with reason, or moved out of scope
+- Product-facing work should capture launchability, primary user loop, continuity, and failure visibility when relevant
+- Later milestones may have provisional ownership, but the first planned milestone should map requirements to concrete slices wherever possible
+For multi-milestone projects, requirements should span the full vision. Requirements owned by later milestones get provisional ownership. The full requirement set captures the spec's complete vision — milestones are the sequencing strategy, not the scope boundary.
+**Print the requirements in chat before writing the roadmap.** Print a markdown table with columns: ID, Title, Status, Owner, Source. Group by status (Active, Deferred, Out of Scope).
+## Scope Assessment
+Confirm the size estimate from your reflection still holds. Investigation and research often reveal hidden complexity or simplify things. If the scope grew or shrank significantly, adjust the milestone and slice counts accordingly.
+## Output Phase
+### Roadmap Preview
+Before writing any files, **print the planned roadmap in chat**. Print a markdown table with columns: Slice, Title, Risk, Depends, Demo. One row per slice. Below the table, print the milestone definition of done as a bullet list.
+This is the user's audit trail in the TUI scrollback — do not skip it.
+### Naming Convention
-**For single milestone**, write in this order:
+Directories use bare IDs. Files use ID-SUFFIX format. Titles live inside file content, not in names.
+- Milestone dir: `.gsd/milestones/{{milestoneId}}/`
+- Milestone files: `{{milestoneId}}-CONTEXT.md`, `{{milestoneId}}-ROADMAP.md`
+- Slice dirs: `S01/`, `S02/`, etc.
+### Single Milestone
+In a single pass:
 1. `mkdir -p .gsd/milestones/{{milestoneId}}/slices`
-2. Write `.gsd/PROJECT.md` (using Project template)
-3. Write `.gsd/REQUIREMENTS.md` (using Requirements template)
-4. Write `{{contextPath}}` (using Context template) — preserve the specification's exact terminology, emphasis, and specific framing. Do not paraphrase domain-specific language into generics. Document assumptions under an "Assumptions" section.
-5. Write `{{roadmapPath}}` (using Roadmap template) — decompose into demoable vertical slices with checkboxes, risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice.
-6. Seed `.gsd/DECISIONS.md` (using Decisions template)
+2. Write or update `.gsd/PROJECT.md` — use the **Project** output template below. Describe what the project is, its current state, and list the milestone sequence.
+3. Write or update `.gsd/REQUIREMENTS.md` — use the **Requirements** output template below. Confirm requirement states, ownership, and traceability before roadmap creation.
+**Depth-Preservation Guidance for context.md:**
+Preserve the specification's exact terminology, emphasis, and specific framing. Do not paraphrase domain-specific language into generics. If the spec said "craft feel," write "craft feel" — not "high-quality user experience." The context file is downstream agents' only window into this conversation — flattening specifics into generics loses the signal that shaped every decision.
+4. Write `{{contextPath}}` — use the **Context** output template below. Preserve key risks, unknowns, existing codebase constraints, integration points, and relevant requirements surfaced during research. Include an "Assumptions" section documenting every judgment call.
+5. Call `gsd_plan_milestone` to create the roadmap. Decompose into demoable vertical slices with risk, depends, demo sentences, proof strategy, verification classes, milestone definition of done, requirement coverage, and a boundary map. If the milestone crosses multiple runtime boundaries, include an explicit final integration slice that proves the assembled system works end-to-end in a real environment. Use the **Roadmap** output template below to structure the tool call parameters.
+6. For each architectural or pattern decision, call `gsd_decision_save` — the tool auto-assigns IDs and regenerates `.gsd/DECISIONS.md` automatically.
 7. {{commitInstruction}}
-9. Say exactly: "Milestone {{milestoneId}} ready."
-**For multi-milestone**, write in this order:
+After writing the files, say exactly: "Milestone {{milestoneId}} ready." — nothing else. Auto-mode will start automatically.
+### Multi-Milestone
+#### Phase 1: Shared artifacts
 1. For each milestone, call `gsd_milestone_generate_id` to get its ID — never invent milestone IDs manually. Then `mkdir -p .gsd/milestones/<ID>/slices` for each.
-2. Write `.gsd/PROJECT.md` — full vision across ALL milestones (using Project template)
-3. Write `.gsd/REQUIREMENTS.md` — full capability contract (using Requirements template)
-4. Seed `.gsd/DECISIONS.md` (using Decisions template)
-5. Write PRIMARY `{{contextPath}}` — full context with all assumptions documented
-6. Write PRIMARY `{{roadmapPath}}` — detailed slices for the first milestone only
-7. For each remaining milestone, write full CONTEXT.md with `depends_on` frontmatter:
-   ```yaml
-   ---
-   depends_on: [M001, M002]
-   ---
-   # M003: Title
-   ```
-   Each context file should be rich enough that a future agent — with no memory of this conversation — can understand the intent, constraints, dependencies, what the milestone unlocks, and what "done" looks like.
-8. {{multiMilestoneCommitInstruction}}
-10. Say exactly: "Milestone {{milestoneId}} ready."
+2. Write `.gsd/PROJECT.md` — use the **Project** output template below.
+3. Write `.gsd/REQUIREMENTS.md` — use the **Requirements** output template below. Capture Active, Deferred, Out of Scope, and any already Validated requirements. Later milestones may have provisional ownership where slice plans do not exist yet.
+4. For any architectural or pattern decisions, call `gsd_decision_save` — the tool auto-assigns IDs and regenerates `.gsd/DECISIONS.md` automatically.
+#### Phase 2: Primary milestone
+5. Write a full `CONTEXT.md` for the primary milestone (the first in sequence). Include an "Assumptions" section.
+6. Call `gsd_plan_milestone` for **only the primary milestone** — detail-planning later milestones now is waste because the codebase will change. Include requirement coverage and a milestone definition of done.
+#### MANDATORY: depends_on Frontmatter in CONTEXT.md
+Every CONTEXT.md for a milestone that depends on other milestones MUST have YAML frontmatter with `depends_on`. The auto-mode state machine reads this field to determine execution order — without it, milestones may execute out of order or in parallel when they shouldn't.
+```yaml
+---
+depends_on: [M001, M002]
+---
+# M003: Title
+```
+If a milestone has no dependencies, omit the frontmatter. Do NOT rely on QUEUE.md or PROJECT.md for dependency tracking — the state machine only reads CONTEXT.md frontmatter.
+#### Phase 3: Remaining milestones
+For each remaining milestone, in dependency order, autonomously decide the best readiness mode:
+- **Write full context** — if the spec provides enough detail for this milestone and investigation confirms feasibility. Write a full `CONTEXT.md` with technical assumptions verified against the actual codebase.
+- **Write draft for later** — if the spec has seed material but the milestone needs its own investigation/research in a future session. Write a `CONTEXT-DRAFT.md` capturing seed material, key ideas, provisional scope, and open questions. **Downstream:** Auto-mode pauses at this milestone and prompts the user to discuss.
+- **Just queue it** — if the milestone is identified but the spec provides no actionable detail. No context file written. **Downstream:** Auto-mode pauses and starts a full discussion from scratch.
+**Default to writing full context** when the spec is detailed enough. Default to draft when the spec mentions the milestone but is vague. Default to queue when the milestone is implied by the vision but not described.
+**Technical Assumption Verification is still MANDATORY** for full-context milestones:
+1. Read the actual code for every file or module you reference. Confirm APIs exist, check what functions actually do.
+2. Check for stale assumptions — verify referenced modules still work as described.
+3. Print findings in chat before writing each milestone's CONTEXT.md.
+Each context file (full or draft) should be rich enough that a future agent encountering it fresh — with no memory of this conversation — can understand the intent, constraints, dependencies, what this milestone unlocks, and what "done" looks like.
+#### Milestone Gate Tracking (MANDATORY for multi-milestone)
+After deciding each milestone's readiness, immediately write or update `.gsd/DISCUSSION-MANIFEST.json`:
+```json
+{
+  "primary": "M001",
+  "milestones": {
+    "M001": { "gate": "discussed", "context": "full" },
+    "M002": { "gate": "discussed", "context": "full" },
+    "M003": { "gate": "queued",    "context": "none" }
+  },
+  "total": 3,
+  "gates_completed": 3
+}
+```
+Write this file AFTER each gate decision, not just at the end. Update `gates_completed` incrementally. The system reads this file and BLOCKS auto-start if `gates_completed < total`.
+For single-milestone projects, do NOT write this file.
+#### Phase 4: Finalize
+7. {{multiMilestoneCommitInstruction}}
+After writing the files, say exactly: "Milestone {{milestoneId}} ready." — nothing else. Auto-mode will start automatically.
 ## Critical Rules
-- **DO NOT ask the user any questions** — this is headless mode
+- **DO NOT ask the user any questions** — this is headless mode. Make judgment calls and document them.
 - **Preserve the specification's terminology** — don't paraphrase domain-specific language
-- **Document assumptions** — when you make a judgment call, note it in CONTEXT.md under "Assumptions"
-- **Investigate before writing** — always scout the codebase first
-- **Use depends_on frontmatter** for multi-milestone sequences (the state machine reads this field to determine execution order)
-- **Anti-reduction rule** — if the spec describes a big vision, plan the big vision. Do not ask "what's the minimum viable version?" or reduce scope. Phase complex/risky work into later milestones — do not cut it.
-- **Naming convention** — always use `gsd_milestone_generate_id` to get milestone IDs. Directories use bare IDs (e.g. `M001/` or `M001-r5jzab/`), files use ID-SUFFIX format (e.g. `M001-CONTEXT.md` or `M001-r5jzab-CONTEXT.md`). Never invent milestone IDs manually.
+- **Document assumptions** — every judgment call gets noted in CONTEXT.md under "Assumptions" with reasoning
+- **Investigate thoroughly** — scout codebase, check library docs, web search. Same rigor as interactive mode.
+- **Do focused research** — identify table stakes, domain standards, omissions, scope traps. Same rigor as interactive mode.
+- **Use proper tools** — `gsd_plan_milestone` for roadmaps, `gsd_decision_save` for decisions, `gsd_milestone_generate_id` for IDs
+- **Print artifacts in chat** — requirements table, roadmap preview, depth summary. The TUI scrollback is the user's audit trail.
+- **Use depends_on frontmatter** for multi-milestone sequences
+- **Anti-reduction rule** — if the spec describes a big vision, plan the big vision. Phase complexity — don't cut it.
+- **Naming convention** — always use `gsd_milestone_generate_id` for IDs. Directories use bare IDs, files use ID-SUFFIX format.
 - **End with "Milestone {{milestoneId}} ready."** — this triggers auto-start detection
+{{inlinedTemplates}}

package/src/resources/extensions/gsd/prompts/run-uat.md CHANGED Viewed

@@ -29,7 +29,7 @@ You are the UAT runner. Execute every check defined in `{{uatPath}}` as deeply a
 - `runtime-executable` — execute the specified command or script. Capture stdout/stderr as evidence. Record pass/fail based on exit code and output.
 - `live-runtime` — exercise the real runtime path. Start or connect to the app/service if needed, use browser/runtime/network checks, and verify observable behavior.
 - `mixed` — run all automatable artifact-driven and live-runtime checks. Separate any remaining human-only checks explicitly.
-- `human-experience` — automate setup, preconditions, screenshots, logs, and objective checks, but do **not** invent subjective PASS results. Mark taste-based, experiential, or purely human-judgment checks as `NEEDS-HUMAN` and use an overall verdict of `PARTIAL` unless every required check was objective and passed.
+- `human-experience` — automate setup, preconditions, screenshots, logs, and objective checks, but do **not** invent subjective PASS results. Mark taste-based, experiential, or purely human-judgment checks as `NEEDS-HUMAN`. Use an overall verdict of `PASS` when all automatable checks succeed (even if human-only checks remain as `NEEDS-HUMAN`). Use `PARTIAL` only when automatable checks themselves were inconclusive.
 ### Evidence tools
@@ -51,9 +51,9 @@ For each check, record:
 - `PASS`, `FAIL`, or `NEEDS-HUMAN`
 After running all checks, compute the **overall verdict**:
-- `PASS` — all required checks passed and no human-only checks remain
-- `FAIL` — one or more checks failed
-- `PARTIAL` — some checks passed, but one or more checks were skipped, inconclusive, or still require human judgment
+- `PASS` — all automatable checks passed. Any remaining checks that honestly require human judgment are marked `NEEDS-HUMAN` with clear instructions for the human reviewer. (This is the correct verdict for mixed/human-experience/live-runtime modes when all automatable checks succeed.)
+- `FAIL` — one or more automatable checks failed
+- `PARTIAL` — one or more automatable checks were skipped or returned inconclusive results (not the same as `NEEDS-HUMAN` — use PARTIAL only when the agent itself could not determine pass/fail for a check it was supposed to automate)
 Call `gsd_summary_save` with `milestone_id: {{milestoneId}}`, `slice_id: {{sliceId}}`, `artifact_type: "ASSESSMENT"`, and the full UAT result markdown as `content` — the tool computes the file path and persists to both DB and disk. The content should follow this format:

package/src/resources/extensions/gsd/tests/all-milestones-complete-merge.test.ts CHANGED Viewed

@@ -183,8 +183,8 @@ test("single milestone worktree is merged to main when all complete (#962)", (t)
     "milestone branch should be deleted",
   );
-  // Verify squash commit on main
-  const log = run("git log --oneline -3", tempDir);
+  // Verify squash commit on main (milestone ID is in trailer, not subject)
+  const log = run("git log -3", tempDir);
   assert.ok(
     log.includes("M001"),
     "squash commit on main should reference M001",

package/src/resources/extensions/gsd/tests/auto-stash-merge.test.ts CHANGED Viewed

@@ -76,7 +76,7 @@ test("#2151 bug 1: auto-stash unblocks merge when unrelated files are dirty", ()
     // Should succeed — the dirty README.md is auto-stashed before merge.
     const result = mergeMilestoneToMain(repo, "M200", roadmap);
-    assert.ok(result.commitMessage.includes("feat(M200)"), "merge succeeds with dirty unrelated file");
+    assert.ok(result.commitMessage.includes("feat:") && result.commitMessage.includes("GSD-Milestone: M200"), "merge succeeds with dirty unrelated file");
     assert.ok(existsSync(join(repo, "stash-test.ts")), "milestone code merged to main");
     // Verify the dirty file was restored (stash popped).

package/src/resources/extensions/gsd/tests/auto-worktree-milestone-merge.test.ts CHANGED Viewed

@@ -160,15 +160,17 @@ describe("auto-worktree-milestone-merge", () => {
     const result = mergeMilestoneToMain(repo, "M020", roadmap);
-    assert.match(result.commitMessage, /^feat\(M020\):/, "subject has conventional commit prefix");
+    assert.match(result.commitMessage, /^feat:/, "subject has conventional commit prefix without milestone ID");
     assert.ok(result.commitMessage.includes("Backend foundation"), "subject includes milestone title");
     assert.ok(result.commitMessage.includes("- S01: Core API"), "body lists S01");
     assert.ok(result.commitMessage.includes("- S02: Error handling"), "body lists S02");
     assert.ok(result.commitMessage.includes("- S03: Logging infra"), "body lists S03");
+    assert.ok(result.commitMessage.includes("GSD-Milestone: M020"), "body has GSD-Milestone trailer");
     assert.ok(result.commitMessage.includes("Branch: milestone/M020"), "body has branch metadata");
     const gitMsg = run("git log -1 --format=%B main", repo).trim();
-    assert.match(gitMsg, /^feat\(M020\):/, "git commit message starts with feat(M020):");
+    assert.match(gitMsg, /^feat:/, "git commit message starts with feat:");
+    assert.ok(gitMsg.includes("GSD-Milestone: M020"), "git commit has GSD-Milestone trailer");
     assert.ok(gitMsg.includes("- S01: Core API"), "git commit body has S01");
   });
@@ -213,11 +215,11 @@ describe("auto-worktree-milestone-merge", () => {
     const result = mergeMilestoneToMain(repo, "M040", roadmap);
     const mainLog = run("git log --oneline main", repo);
-    assert.ok(mainLog.includes("feat(M040)"), "milestone commit on main");
+    assert.ok(mainLog.includes("feat:"), "milestone commit on main");
     run("git push origin main", repo);
     const remoteLog = run("git log --oneline main", bareDir);
-    assert.ok(remoteLog.includes("feat(M040)"), "milestone commit reachable on remote after manual push");
+    assert.ok(remoteLog.includes("feat:"), "milestone commit reachable on remote after manual push");
     assert.strictEqual(typeof result.pushed, "boolean", "pushed flag remains boolean");
   });
@@ -248,7 +250,7 @@ describe("auto-worktree-milestone-merge", () => {
     let threw = false;
     try {
       const result = mergeMilestoneToMain(repo, "M050", roadmap);
-      assert.ok(result.commitMessage.includes("feat(M050)"), "merge commit created despite .gsd conflict");
+      assert.ok(result.commitMessage.includes("feat:") && result.commitMessage.includes("GSD-Milestone: M050"), "merge commit created despite .gsd conflict");
     } catch (err) {
       threw = true;
     }
@@ -274,7 +276,7 @@ describe("auto-worktree-milestone-merge", () => {
     let threw = false;
     try {
       const result = mergeMilestoneToMain(repo, "M060", roadmap);
-      assert.ok(result.commitMessage.includes("feat(M060)"), "merge commit created");
+      assert.ok(result.commitMessage.includes("feat:") && result.commitMessage.includes("GSD-Milestone: M060"), "merge commit created");
     } catch (err) {
       threw = true;
     }
@@ -312,7 +314,7 @@ describe("auto-worktree-milestone-merge", () => {
     let errMsg = "";
     try {
       const result = mergeMilestoneToMain(dir, "M070", roadmap);
-      assert.ok(result.commitMessage.includes("feat(M070)"), "merge commit created on master");
+      assert.ok(result.commitMessage.includes("feat:") && result.commitMessage.includes("GSD-Milestone: M070"), "merge commit created on master");
     } catch (err) {
       threw = true;
       errMsg = err instanceof Error ? err.message : String(err);
@@ -392,7 +394,7 @@ describe("auto-worktree-milestone-merge", () => {
     let threw = false;
     try {
       const result = mergeMilestoneToMain(repo, "M090", roadmap);
-      assert.ok(result.commitMessage.includes("feat(M090)"), "#1738 merge succeeds after cleaning synced dirs");
+      assert.ok(result.commitMessage.includes("feat:") && result.commitMessage.includes("GSD-Milestone: M090"), "#1738 merge succeeds after cleaning synced dirs");
     } catch (err: unknown) {
       threw = true;
     }
@@ -419,7 +421,7 @@ describe("auto-worktree-milestone-merge", () => {
     let threw = false;
     try {
       const result = mergeMilestoneToMain(repo, "M100", roadmap);
-      assert.ok(result.commitMessage.includes("feat(M100)"), "#2151: merge succeeds after stashing dirty files");
+      assert.ok(result.commitMessage.includes("feat:") && result.commitMessage.includes("GSD-Milestone: M100"), "#2151: merge succeeds after stashing dirty files");
     } catch {
       threw = true;
     }
@@ -519,7 +521,7 @@ describe("auto-worktree-milestone-merge", () => {
     let errMsg = "";
     try {
       const result = mergeMilestoneToMain(repo, "M140", roadmap);
-      assert.ok(result.commitMessage.includes("feat(M140)"), "merge commit created");
+      assert.ok(result.commitMessage.includes("feat:") && result.commitMessage.includes("GSD-Milestone: M140"), "merge commit created");
     } catch (err) {
       threw = true;
       errMsg = err instanceof Error ? err.message : String(err);
@@ -589,7 +591,7 @@ describe("auto-worktree-milestone-merge", () => {
     assert.ok(existsSync(squashMsgPath), "SQUASH_MSG planted before merge");
     const result = mergeMilestoneToMain(repo, "M160", roadmap);
-    assert.ok(result.commitMessage.includes("feat(M160)"), "merge commit created");
+    assert.ok(result.commitMessage.includes("feat:") && result.commitMessage.includes("GSD-Milestone: M160"), "merge commit created");
     assert.ok(!existsSync(squashMsgPath), "#1853: SQUASH_MSG must not persist after successful squash-merge");
   });
@@ -609,7 +611,7 @@ describe("auto-worktree-milestone-merge", () => {
     ]);
     const result = mergeMilestoneToMain(repo, "M170", roadmap);
-    assert.ok(result.commitMessage.includes("feat(M170)"), "merge commit created");
+    assert.ok(result.commitMessage.includes("feat:") && result.commitMessage.includes("GSD-Milestone: M170"), "merge commit created");
     assert.ok(
       existsSync(join(repo, "uncommitted-agent-code.ts")),

package/src/resources/extensions/gsd/tests/feature-branch-lifecycle-integration.test.ts CHANGED Viewed

@@ -252,7 +252,7 @@ describe('feature-branch-lifecycle-integration', async () => {
       // Exactly one new commit on feature branch (the squash merge)
       const featureLog = run(`git log --oneline ${featureBranch}`, repo);
       assert.ok(
-        featureLog.includes(`feat(${milestoneId})`),
+        featureLog.includes("feat:"),
         "feature branch has milestone merge commit",
       );