npm - @opengsd/gsd-pi - Versions diffs - 1.1.1-dev.9bb7453 → 1.1.1-dev.9f86580 - Mend

@opengsd/gsd-pi 1.1.1-dev.9bb7453 → 1.1.1-dev.9f86580

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (219) hide show

package/src/resources/extensions/gsd/tests/worktree-reentry.test.ts ADDED Viewed

@@ -0,0 +1,102 @@
+/**
+ * worktree-reentry.test.ts — Unit tests for reenterActiveWorktreeIfNeeded.
+ *
+ * Covers the cold-start (/quit + relaunch) path where cwd lands at the project
+ * root instead of the active milestone's worktree. The helper should chdir back
+ * into the worktree deterministically, and no-op when it shouldn't act.
+ */
+import { describe, test, beforeEach } from "node:test";
+import assert from "node:assert/strict";
+import { mkdtempSync, mkdirSync, writeFileSync, rmSync, realpathSync } from "node:fs";
+import { join } from "node:path";
+import { tmpdir } from "node:os";
+import { execFileSync } from "node:child_process";
+import { createAutoWorktree, _resetAutoWorktreeOriginalBaseForTests } from "../auto-worktree.ts";
+import { reenterActiveWorktreeIfNeeded } from "../worktree-reentry.ts";
+// Safe: all inputs below are hardcoded test strings, not user input.
+function git(subArgs: string[], cwd: string): void {
+  execFileSync("git", subArgs, { cwd, stdio: ["ignore", "pipe", "pipe"] });
+}
+function createTempRepo(
+  t: { after: (fn: () => void) => void },
+  opts: { isolation?: "worktree" | "none" } = {},
+): string {
+  const dir = realpathSync(mkdtempSync(join(tmpdir(), "wt-reentry-")));
+  t.after(() => rmSync(dir, { recursive: true, force: true }));
+  git(["init"], dir);
+  git(["config", "user.email", "test@test.com"], dir);
+  git(["config", "user.name", "Test"], dir);
+  writeFileSync(join(dir, "README.md"), "# test\n");
+  mkdirSync(join(dir, ".gsd"), { recursive: true });
+  if (opts.isolation === "worktree") {
+    writeFileSync(join(dir, ".gsd", "PREFERENCES.md"), "---\ngit:\n  isolation: worktree\n---\n", "utf-8");
+  }
+  const msDir = join(dir, ".gsd", "milestones", "M001");
+  mkdirSync(msDir, { recursive: true });
+  writeFileSync(join(msDir, "CONTEXT.md"), "# M001 Context\n");
+  git(["add", "."], dir);
+  git(["commit", "-m", "init"], dir);
+  git(["branch", "-M", "main"], dir);
+  return dir;
+}
+describe("reenterActiveWorktreeIfNeeded", () => {
+  const savedCwd = process.cwd();
+  beforeEach(() => {
+    _resetAutoWorktreeOriginalBaseForTests();
+    process.chdir(savedCwd);
+  });
+  test("re-enters the sole live worktree when sitting at the project root", async (t) => {
+    const dir = createTempRepo(t, { isolation: "worktree" });
+    t.after(() => process.chdir(savedCwd));
+    // createAutoWorktree chdir's INTO the worktree; simulate a cold start by
+    // returning to the project root with a clean workspace registry.
+    createAutoWorktree(dir, "M001");
+    process.chdir(dir);
+    _resetAutoWorktreeOriginalBaseForTests();
+    const entered = await reenterActiveWorktreeIfNeeded(dir);
+    assert.ok(entered, "re-entry returned a worktree path");
+    assert.strictEqual(realpathSync(process.cwd()), realpathSync(entered!), "cwd moved into the worktree");
+    assert.strictEqual(entered, join(dir, ".gsd", "worktrees", "M001"));
+  });
+  test("no-op when already inside a worktree", async (t) => {
+    const dir = createTempRepo(t, { isolation: "worktree" });
+    t.after(() => process.chdir(savedCwd));
+    createAutoWorktree(dir, "M001"); // leaves cwd inside the worktree
+    const cwdBefore = process.cwd();
+    const entered = await reenterActiveWorktreeIfNeeded(dir);
+    assert.strictEqual(entered, null, "no re-entry when already in a worktree");
+    assert.strictEqual(process.cwd(), cwdBefore, "cwd unchanged");
+  });
+  test("no-op when isolation is not worktree", async (t) => {
+    const dir = createTempRepo(t, { isolation: "none" });
+    t.after(() => process.chdir(savedCwd));
+    process.chdir(dir);
+    const entered = await reenterActiveWorktreeIfNeeded(dir);
+    assert.strictEqual(entered, null, "isolation=none never re-enters");
+    assert.strictEqual(realpathSync(process.cwd()), realpathSync(dir), "cwd stays at project root");
+  });
+  test("no-op when there are no worktrees", async (t) => {
+    const dir = createTempRepo(t, { isolation: "worktree" });
+    t.after(() => process.chdir(savedCwd));
+    process.chdir(dir);
+    const entered = await reenterActiveWorktreeIfNeeded(dir);
+    assert.strictEqual(entered, null, "nothing to re-enter");
+    assert.strictEqual(realpathSync(process.cwd()), realpathSync(dir), "cwd stays at project root");
+  });
+});

package/src/resources/extensions/gsd/tests/write-gate-planning-unit.test.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import test from 'node:test';
 import assert from 'node:assert/strict';
 import { join, sep } from 'node:path';
+import { GSD_PHASE_SCOPE_DISPLAY_REASON } from '../auto-unit-tool-scope.ts';
 import { ALLOWED_PLANNING_DISPATCH_AGENTS, shouldBlockPlanningUnit } from '../bootstrap/write-gate.ts';
 import { extractSubagentAgentClasses } from '../bootstrap/subagent-input.ts';
 import { isDeterministicPolicyError } from '../auto-tool-tracking.ts';
@@ -65,6 +66,19 @@ test('planning-unit: deterministic block reason is suitable for retry short-circ
   assert.strictEqual(isDeterministicPolicyError(r.reason!), true);
 });
+test('planning-unit: blocked tool-policy calls include UI-safe display reason', () => {
+  const r = shouldBlockPlanningUnit(
+    'edit',
+    'src/main.ts',
+    BASE,
+    'discuss-milestone',
+    PLANNING,
+  );
+  assert.strictEqual(r.block, true);
+  assert.match(r.reason!, /HARD BLOCK/);
+  assert.strictEqual(r.displayReason, GSD_PHASE_SCOPE_DISPLAY_REASON);
+});
 test('planning-unit: blocks write to user source via relative path', () => {
   const r = shouldBlockPlanningUnit('write', 'src/main.ts', BASE, 'plan-milestone', PLANNING);
   assert.strictEqual(r.block, true);
@@ -367,6 +381,7 @@ test('auto-unit scope: execute-task allows only its task completion lifecycle to
   assert.strictEqual(blocked.block, true);
   assert.match(blocked.reason!, /HARD BLOCK/);
   assert.match(blocked.reason!, /gsd_save_gate_result/);
+  assert.strictEqual(blocked.displayReason, GSD_PHASE_SCOPE_DISPLAY_REASON);
   assert.strictEqual(isDeterministicPolicyError(blocked.reason!), true);
 });

package/src/resources/extensions/gsd/tool-contract.ts CHANGED Viewed

@@ -8,12 +8,14 @@ import {
   type ToolsPolicy,
 } from "./unit-context-manifest.js";
 import { getRequiredWorkflowToolsForAutoUnit } from "./workflow-mcp.js";
+import { getUnitToolSurfaceContract } from "./unit-tool-contracts.js";
 export interface UnitToolContract {
   unitType: string;
   contextMode: ContextModePolicy;
   toolsPolicy: ToolsPolicy;
   requiredWorkflowTools: readonly string[];
+  forbiddenWorkflowTools: readonly { name: string; reason: string }[];
   promptObligations: readonly string[];
   validationRules: readonly string[];
   closeoutTools: readonly string[];
@@ -30,6 +32,7 @@ export type ToolContractResult =
 export function compileUnitToolContract(unitType: string): ToolContractResult {
   const manifest = resolveManifest(unitType);
+  const surfaceContract = getUnitToolSurfaceContract(unitType);
   if (!manifest) {
     return {
       ok: false,
@@ -39,8 +42,10 @@ export function compileUnitToolContract(unitType: string): ToolContractResult {
   }
   const requiredWorkflowTools = getRequiredWorkflowToolsForAutoUnit(unitType);
+  const forbiddenWorkflowTools = Object.entries(surfaceContract?.forbiddenGsdTools ?? {})
+    .map(([name, reason]) => ({ name, reason }));
   const closeoutTools = requiredWorkflowTools.filter((tool) =>
-    /^gsd_(?:task|slice|milestone|complete|validate|save|summary)/.test(tool),
+    /^gsd_(?:task|slice|milestone|complete|validate|save|summary|uat)/.test(tool),
   );
   if (requiresCloseoutTool(unitType) && closeoutTools.length === 0) {
@@ -58,6 +63,7 @@ export function compileUnitToolContract(unitType: string): ToolContractResult {
       contextMode: manifest.contextMode,
       toolsPolicy: manifest.tools,
       requiredWorkflowTools,
+      forbiddenWorkflowTools,
       promptObligations: [
         `context-mode:${manifest.contextMode}`,
         `tools-policy:${manifest.tools.mode}`,

package/src/resources/extensions/gsd/tool-presentation-plan.ts CHANGED Viewed

@@ -1,6 +1,20 @@
 // Project/App: gsd-pi
 // File Purpose: Resolve phase-aware tool surfaces for GSD model presentations.
+import {
+  RUN_UAT_BROWSER_TOOL_NAMES,
+  RUN_UAT_READ_ONLY_TOOL_NAMES,
+  RUN_UAT_TOOL_PRESENTATION_PLAN_ID,
+  RUN_UAT_WORKFLOW_TOOL_NAMES,
+} from "./unit-tool-contracts.js";
+export {
+  RUN_UAT_BROWSER_TOOL_NAMES,
+  RUN_UAT_READ_ONLY_TOOL_NAMES,
+  RUN_UAT_TOOL_PRESENTATION_PLAN_ID,
+  RUN_UAT_WORKFLOW_TOOL_NAMES,
+} from "./unit-tool-contracts.js";
 export type ToolPresentationSurface = "provider-tools" | "claude-code-sdk" | "mcp" | "hybrid";
 export interface ToolPresentationModel {
@@ -20,13 +34,12 @@ export interface ToolPresentationPlan {
   diagnostics: string[];
 }
-export const RUN_UAT_WORKFLOW_TOOL_NAMES = [
-  "gsd_uat_exec",
-  "gsd_uat_result_save",
-  "gsd_resume",
-  "gsd_milestone_status",
-  "gsd_journal_query",
-] as const;
+export interface RunUatResultPresentation {
+  surface: ToolPresentationSurface;
+  presentedTools: string[];
+  blockedTools: Array<{ name: string; reason: string }>;
+  toolPresentationPlanId: string;
+}
 export const RUN_UAT_FORBIDDEN_TOOL_NAMES = [
   "edit",
@@ -105,10 +118,72 @@ function addBlockedTool(
 export function buildRunUatCanonicalToolNames(options: { includeBrowserTools?: readonly string[] } = {}): string[] {
   return dedupe([
     ...RUN_UAT_WORKFLOW_TOOL_NAMES,
+    ...RUN_UAT_READ_ONLY_TOOL_NAMES,
     ...(options.includeBrowserTools ?? []),
   ]);
 }
+// UAT modes whose run-uat instructions direct the runner to exercise the live
+// app in a browser. These modes receive the browser tool surface so the runner
+// can actually drive the page instead of silently deferring browser checks to a
+// human. See run-uat.md automation rules: `browser-executable`, `live-runtime`,
+// and `mixed` are all told to drive a browser/runtime path, and
+// `human-experience` is told to capture screenshots. Without this, a webpage
+// UAT classified as anything but `browser-executable` had no browser tools and
+// downgraded its live checks to NEEDS-HUMAN (M001/S03 regression).
+export const BROWSER_INCLUSIVE_UAT_TYPES: readonly string[] = [
+  "browser-executable",
+  "live-runtime",
+  "mixed",
+  "human-experience",
+];
+function uatTypeIncludesBrowser(uatType: string | undefined): boolean {
+  return uatType !== undefined && BROWSER_INCLUSIVE_UAT_TYPES.includes(uatType);
+}
+export function runUatBrowserToolsForType(uatType: string | undefined): readonly string[] {
+  return uatTypeIncludesBrowser(uatType) ? RUN_UAT_BROWSER_TOOL_NAMES : [];
+}
+export function runUatPresentationSurfaceForType(uatType: string | undefined): ToolPresentationSurface {
+  return uatTypeIncludesBrowser(uatType) ? "hybrid" : "mcp";
+}
+export function buildRunUatPresentationForType(
+  uatType: string | undefined,
+  options: {
+    surface?: ToolPresentationSurface;
+    presentedTools?: readonly string[];
+  } = {},
+): RunUatResultPresentation {
+  return buildRunUatResultPresentation({
+    ...options,
+    surface: options.surface ?? runUatPresentationSurfaceForType(uatType),
+    includeBrowserTools: runUatBrowserToolsForType(uatType),
+  });
+}
+export function buildRunUatResultPresentation(options: {
+  surface?: ToolPresentationSurface;
+  includeBrowserTools?: readonly string[];
+  presentedTools?: readonly string[];
+} = {}): RunUatResultPresentation {
+  const presentedTools = options.presentedTools
+    ? dedupe(options.presentedTools)
+    : buildRunUatCanonicalToolNames({ includeBrowserTools: options.includeBrowserTools });
+  const blockedTools = RUN_UAT_FORBIDDEN_TOOL_NAMES
+    .filter((toolName) => !toolName.includes("*"))
+    .map((name) => ({ name, reason: "forbidden during run-uat" }));
+  return {
+    surface: options.surface ?? "mcp",
+    presentedTools,
+    blockedTools,
+    toolPresentationPlanId: RUN_UAT_TOOL_PRESENTATION_PLAN_ID,
+  };
+}
 export function resolveToolPresentationPlan(options: {
   phase: string;
   surface: ToolPresentationSurface;

package/src/resources/extensions/gsd/tools/complete-slice.ts CHANGED Viewed

@@ -34,7 +34,8 @@ import { getGatesForTurn } from "../gate-registry.js";
 import { gsdProjectionRoot, clearPathCache, resolveMilestoneFile } from "../paths.js";
 import { resolveCanonicalMilestoneRoot } from "../worktree-manager.js";
 import { checkOwnership, sliceUnitKey } from "../unit-ownership.js";
-import { saveFile, clearParseCache } from "../files.js";
+import { saveFile, clearParseCache, extractUatType } from "../files.js";
+import { hasBrowserRequiredText } from "../browser-evidence.js";
 import { invalidateStateCache } from "../state.js";
 import { renderRoadmapFromDb } from "../markdown-renderer.js";
 import { parseRoadmap } from "../parsers-legacy.js";
@@ -342,6 +343,33 @@ export async function handleCompleteSlice(
     return { error: `slice verification indicates blocked/failed state — do not complete a slice that has not passed verification. Address the blockers and re-verify first.` };
   }
+  // ── Browser/web UAT classification gate ────────────────────────────────
+  // A UAT that drives a running web UI (opening a page in a browser,
+  // navigating to a page/localhost) must declare a browser-capable mode so the
+  // run-uat runner surfaces browser tools and actually launches a browser.
+  // Otherwise the browser checks get silently deferred to a human and the slice
+  // passes on static checks alone (M001/S03 regression). `browser-executable`,
+  // `live-runtime`, and `mixed` all receive browser tools (see
+  // BROWSER_INCLUSIVE_UAT_TYPES); only the non-browser modes are rejected here.
+  //
+  // Reuse the canonical hasBrowserRequiredText detector (also used by dispatch
+  // and milestone validation): it skips Not-Proven/Out-of-Scope disclaimer
+  // sections and only treats verbs like navigate/open as web when they sit next
+  // to browser/page/localhost — avoiding false positives on CLI/file/API steps.
+  //
+  // Only `artifact-driven` is gated. It is the one mode that performs no
+  // execution at all (static/file checks), so a browser-requiring UAT under it
+  // genuinely defers verification to a human. Every other mode has a real
+  // verification path: `runtime-executable` runs browser test commands like
+  // `npx playwright test` via gsd_uat_exec, and live-runtime/mixed/
+  // browser-executable receive browser tools (BROWSER_INCLUSIVE_UAT_TYPES).
+  const declaredUatMode = extractUatType(params.uatContent || "") ?? "artifact-driven";
+  if (declaredUatMode === "artifact-driven" && hasBrowserRequiredText(params.uatContent || "")) {
+    return {
+      error: `UAT requires browser verification (opening a page in a browser, navigating to a page or localhost, screenshots) but declares "UAT mode: artifact-driven", which only runs static/file checks and would defer the browser work to a human. Use a mode that actually verifies the UI: "browser-executable" (interactive browser tools), "runtime-executable" (a browser test command such as playwright), or a browser-inclusive "mixed"/"live-runtime". Re-author the UAT Type section and complete the slice again.`,
+    };
+  }
   // ── Guards + DB writes inside a single transaction (prevents TOCTOU) ───
   const completedAt = new Date().toISOString();
   let guardError: string | null = null;

package/src/resources/extensions/gsd/tools/workflow-tool-executors.ts CHANGED Viewed

@@ -17,8 +17,9 @@ import {
 } from "../gsd-db.js";
 import { GATE_REGISTRY } from "../gate-registry.js";
 import { generateRequirementsMd, saveArtifactToDb } from "../db-writer.js";
-import { clearPathCache, resolveGsdPathContract, resolveMilestoneFile, resolveSliceFile } from "../paths.js";
+import { clearPathCache, relSliceFile, resolveGsdPathContract, resolveMilestoneFile, resolveSliceFile } from "../paths.js";
 import { saveFile, clearParseCache } from "../files.js";
+import { buildManualValidationGuidance, resolveCanonicalMilestoneRoot } from "../worktree-manager.js";
 import { existsSync, readdirSync, readFileSync, unlinkSync } from "node:fs";
 import { isAbsolute, join, resolve } from "node:path";
 import type { CompleteMilestoneParams } from "./complete-milestone.js";
@@ -48,9 +49,11 @@ import { loadEffectiveGSDPreferences } from "../preferences.js";
 import { parseProject } from "../schemas/parsers.js";
 import { getAutoRuntimeSnapshot } from "../auto-runtime-state.js";
 import {
+  buildRunUatPresentationForType,
   canonicalWorkflowToolName,
   parseMcpToolName,
   RUN_UAT_FORBIDDEN_TOOL_NAMES,
+  RUN_UAT_TOOL_PRESENTATION_PLAN_ID,
   RUN_UAT_WORKFLOW_TOOL_NAMES,
 } from "../tool-presentation-plan.js";
@@ -90,7 +93,7 @@ function blockIfWrongAutoUnit(requiredUnitType: string, operation: string): Tool
   if (!snapshot.active || !snapshot.currentUnit) return null;
   if (snapshot.currentUnit.type === requiredUnitType) return null;
-  const error = `HARD BLOCK: ${operation} may only run from ${requiredUnitType}; active unit is ${snapshot.currentUnit.type}. The orchestrator owns phase transitions.`;
+  const error = `HARD BLOCK: Tool Contract failure: ${operation} may only run from ${requiredUnitType}; active unit is ${snapshot.currentUnit.type}. Fix unit-tool-contracts.ts or the active Unit prompt. The orchestrator owns phase transitions.`;
   return {
     content: [{ type: "text", text: error }],
     details: { operation, error },
@@ -178,7 +181,11 @@ export async function executeSummarySave(
   if (rootArtifactGuard.block) {
     return {
       content: [{ type: "text", text: `Error saving artifact: ${rootArtifactGuard.reason ?? "root artifact write blocked"}` }],
-      details: { operation: "save_summary", error: "root_artifact_write_blocked" },
+      details: {
+        operation: "save_summary",
+        error: "root_artifact_write_blocked",
+        displayReason: "Approval confirmation required before saving final project setup artifacts.",
+      },
       isError: true,
     };
   }
@@ -191,9 +198,13 @@ export async function executeSummarySave(
   if (contextGuard.block) {
     return {
       content: [{ type: "text", text: `Error saving artifact: ${contextGuard.reason ?? "context write blocked"}` }],
-      details: { operation: "save_summary", error: "context_write_blocked" },
-    isError: true,
-      };
+      details: {
+        operation: "save_summary",
+        error: "context_write_blocked",
+        displayReason: "Depth check required before writing milestone context.",
+      },
+      isError: true,
+    };
   }
   try {
     let relativePath: string;
@@ -441,6 +452,9 @@ export interface UatEvidenceRef {
   kind: "gsd_uat_exec" | "gsd_exec" | "screenshot" | "log" | "url" | "browser";
   ref: string;
   note?: string;
+  unitType?: string;
+  tool?: string;
+  executionId?: string;
 }
 export interface UatCheckResultInput {
@@ -1008,10 +1022,68 @@ function isNonEmptyString(value: unknown): value is string {
   return typeof value === "string" && value.trim().length > 0;
 }
+function mergeBlockedTools(
+  current: UatPresentationInput["blockedTools"] | undefined,
+  canonical: UatPresentationInput["blockedTools"],
+): UatPresentationInput["blockedTools"] {
+  const merged = new Map<string, { name: string; reason: string }>();
+  for (const entry of [...(current ?? []), ...canonical]) {
+    merged.set(canonicalWorkflowToolName(parseMcpToolName(entry.name)?.tool ?? entry.name), entry);
+  }
+  return [...merged.values()];
+}
+function mergePresentedTools(current: readonly string[] | undefined, canonical: readonly string[]): string[] {
+  return [...new Set([...(current ?? []), ...canonical])];
+}
+function normalizeUatVerdict(params: UatResultSaveParams): UatResultSaveParams {
+  const raw = params as Partial<UatResultSaveParams> & Record<string, unknown>;
+  if (typeof raw.verdict === "string") {
+    return { ...params, verdict: raw.verdict.toUpperCase() as UatVerdict };
+  }
+  return params;
+}
+function supplyDefaultPresentation(params: UatResultSaveParams): UatResultSaveParams {
+  const raw = params as Partial<UatResultSaveParams> & Record<string, unknown>;
+  if (!raw.presentation) {
+    return { ...params, presentation: buildRunUatPresentationForType(params.uatType) };
+  }
+  return params;
+}
+function mergeCanonicalPresentation(params: UatResultSaveParams): UatResultSaveParams {
+  const canonicalPresentation = buildRunUatPresentationForType(params.uatType);
+  const providedPresentation = params.presentation as Partial<UatPresentationInput>;
+  return {
+    ...params,
+    presentation: {
+      ...providedPresentation,
+      surface: providedPresentation.surface ?? canonicalPresentation.surface,
+      presentedTools: mergePresentedTools(providedPresentation.presentedTools, canonicalPresentation.presentedTools),
+      blockedTools: mergeBlockedTools(providedPresentation.blockedTools, canonicalPresentation.blockedTools),
+      toolPresentationPlanId: RUN_UAT_TOOL_PRESENTATION_PLAN_ID,
+    } as UatPresentationInput,
+  };
+}
+const VALID_UAT_TYPES: readonly UatType[] = [
+  "artifact-driven",
+  "browser-executable",
+  "runtime-executable",
+  "live-runtime",
+  "mixed",
+  "human-experience",
+];
 function ensureUatRequiredFields(params: UatResultSaveParams): string | null {
   if (!isNonEmptyString(params.milestoneId)) return "milestoneId is required";
   if (!isNonEmptyString(params.sliceId)) return "sliceId is required";
   if (!isNonEmptyString(params.uatType)) return "uatType is required";
+  if (!(VALID_UAT_TYPES as readonly string[]).includes(params.uatType)) {
+    return `uatType must be one of: ${VALID_UAT_TYPES.join(", ")}`;
+  }
   if (!["PASS", "FAIL", "PARTIAL"].includes(params.verdict)) return "verdict must be PASS, FAIL, or PARTIAL";
   if (!Array.isArray(params.checks) || params.checks.length === 0) return "checks must contain at least one UAT check";
   if (!params.presentation || !Array.isArray(params.presentation.presentedTools)) return "presentation.presentedTools is required";
@@ -1147,6 +1219,15 @@ function validateUatChecks(basePath: string, params: UatResultSaveParams): strin
   return null;
 }
+function validateFreshUatOwnedEvidence(params: UatResultSaveParams): string | null {
+  const hasFreshUatEvidence = params.checks.some((check) =>
+    (check.evidence ?? []).some((evidence) => evidence.kind === "gsd_uat_exec")
+  );
+  return hasFreshUatEvidence
+    ? null
+    : "UAT Assessment requires at least one fresh gsd_uat_exec evidence reference from run-uat";
+}
 function validateUatMode(params: UatResultSaveParams): string | null {
   const modes = new Set(params.checks.map((check) => check.mode));
   const hasHuman = params.checks.some((check) => check.result === "NEEDS-HUMAN");
@@ -1257,7 +1338,12 @@ function escapeMarkdownTableCell(value: unknown): string {
     .replace(/\r?\n/g, "<br>");
 }
-function renderUatAssessment(params: UatResultSaveParams, attempt: number, gateVerdict: "pass" | "flag"): string {
+function renderUatAssessment(
+  params: UatResultSaveParams,
+  attempt: number,
+  gateVerdict: "pass" | "flag",
+  basePath: string,
+): string {
   const lines = [
     "---",
     `sliceId: ${params.sliceId}`,
@@ -1292,6 +1378,27 @@ function renderUatAssessment(params: UatResultSaveParams, attempt: number, gateV
     "",
     `Aggregate UAT gate saved as ${gateVerdict}.`,
   ];
+  // When any check still needs a human, point them at the exact checkout to
+  // validate — critical for worktree milestones whose code sits under a hidden
+  // `.gsd/worktrees/` path the reviewer would otherwise have to hunt for.
+  const hasHuman = params.checks.some((check) => check.result === "NEEDS-HUMAN");
+  if (hasHuman) {
+    const guidance = buildManualValidationGuidance(basePath, params.milestoneId, {
+      uatPath: relSliceFile(basePath, params.milestoneId, params.sliceId, "UAT"),
+    });
+    if (guidance) {
+      lines.push(
+        "",
+        "## Manual Validation",
+        "",
+        "One or more checks are marked `NEEDS-HUMAN` and require a person to validate:",
+        "",
+        ...guidance.split("\n").map((line) => `- ${line}`),
+      );
+    }
+  }
   return `${lines.join("\n")}\n`;
 }
@@ -1306,15 +1413,30 @@ export async function executeUatResultSave(
   params: UatResultSaveParams,
   basePath: string = process.cwd(),
 ): Promise<ToolExecutionResult> {
+  const unitGuard = blockIfWrongAutoUnit("run-uat", "save_uat_result");
+  if (unitGuard) return unitGuard;
+  // Phase 1: normalize verdict and supply the canonical presentation when none was provided.
+  params = normalizeUatVerdict(params);
+  params = supplyDefaultPresentation(params);
   const dbAvailable = await ensureDbOpen(basePath);
   if (!dbAvailable) return errorResult("save_uat_result", "GSD database is not available.", "db_unavailable");
+  // Phase 2: validate the submitted presentation before the canonical merge so that
+  // presentations missing required workflow tools are rejected rather than silently patched.
   const requiredError = ensureUatRequiredFields(params);
   if (requiredError) return errorResult("save_uat_result", requiredError, "invalid_params");
   const presentationError = validateCanonicalPresentation(params);
   if (presentationError) return errorResult("save_uat_result", presentationError, "alias_tool_name");
+  // Phase 3: merge in the canonical plan ID and read-only audit tools so the persisted
+  // artifact always carries the full audit surface even when the provider omitted them.
+  params = mergeCanonicalPresentation(params);
   const checkError = validateUatChecks(basePath, params);
   if (checkError) return errorResult("save_uat_result", checkError, "invalid_evidence");
+  const freshEvidenceError = validateFreshUatOwnedEvidence(params);
+  if (freshEvidenceError) return errorResult("save_uat_result", freshEvidenceError, "missing_fresh_uat_evidence");
   const modeError = validateUatMode(params);
   if (modeError) return errorResult("save_uat_result", modeError, "uat_mode_mismatch");
@@ -1329,7 +1451,7 @@ export async function executeUatResultSave(
     }
     const gateVerdict = params.verdict === "PASS" ? "pass" : "flag";
     const rationale = params.notes ?? `UAT ${params.verdict} for ${params.sliceId}.`;
-    const assessment = renderUatAssessment(params, attempt, gateVerdict);
+    const assessment = renderUatAssessment(params, attempt, gateVerdict, basePath);
     const summary = await executeSummarySave(
       {
         milestone_id: params.milestoneId,
@@ -1373,8 +1495,20 @@ export async function executeUatResultSave(
       evaluatedAt,
     });
     invalidateStateCache();
+    // Surface where to validate when checks are left for a human, so the path
+    // (often a buried worktree checkout) reaches the reviewer, not just the file.
+    const hasHuman = params.checks.some((check) => check.result === "NEEDS-HUMAN");
+    const manualGuidance = hasHuman
+      ? buildManualValidationGuidance(basePath, params.milestoneId, {
+          uatPath: relSliceFile(basePath, params.milestoneId, params.sliceId, "UAT"),
+        })
+      : null;
+    const savedText = `UAT result saved for ${params.milestoneId}/${params.sliceId}: ${params.verdict}`;
     return {
-      content: [{ type: "text", text: `UAT result saved for ${params.milestoneId}/${params.sliceId}: ${params.verdict}` }],
+      content: [{
+        type: "text",
+        text: manualGuidance ? `${savedText}\n\nManual validation needed:\n${manualGuidance}` : savedText,
+      }],
       details: {
         operation: "save_uat_result",
         milestoneId: params.milestoneId,
@@ -1384,6 +1518,9 @@ export async function executeUatResultSave(
         attempt,
         attemptPath,
         recommendedNextUnit: params.verdict === "PASS" ? null : "reactive-execute",
+        ...(hasHuman
+          ? { manualValidationPath: resolveCanonicalMilestoneRoot(basePath, params.milestoneId) }
+          : {}),
       },
     };
   } catch (err) {