npm - @opengsd/gsd-pi - Versions diffs - 1.1.1-dev.74e8dd1 → 1.1.1-dev.75048e7 - Mend

@opengsd/gsd-pi 1.1.1-dev.74e8dd1 → 1.1.1-dev.75048e7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (329) hide show

package/src/resources/extensions/gsd/mcp-project-config.ts CHANGED Viewed

@@ -1,14 +1,17 @@
-import { createHash } from "node:crypto";
 import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
-import { createRequire } from "node:module";
-import { basename, resolve } from "node:path";
+import { resolve } from "node:path";
 import { fileURLToPath } from "node:url";
+import {
+  GSD_BROWSER_MCP_SERVER_NAME,
+  resolveBundledGsdBrowserCliPath,
+  resolveGsdBrowserMcpLaunchConfig,
+} from "../shared/gsd-browser-cli.js";
 import { assertSafeDirectory } from "./validate-directory.js";
 import { detectWorkflowMcpLaunchConfig } from "./workflow-mcp.js";
 export const GSD_WORKFLOW_MCP_SERVER_NAME = "gsd-workflow";
-export const GSD_BROWSER_MCP_SERVER_NAME = "gsd-browser";
+export { GSD_BROWSER_MCP_SERVER_NAME, resolveBundledGsdBrowserCliPath };
 export interface ProjectMcpServerConfig {
   command?: string;
@@ -59,31 +62,6 @@ export function resolveBundledGsdCliPath(env: NodeJS.ProcessEnv = process.env):
   return null;
 }
-export function resolveBundledGsdBrowserCliPath(env: NodeJS.ProcessEnv = process.env): string | null {
-  const explicit = env.GSD_BROWSER_CLI_PATH?.trim() || env.GSD_BROWSER_BIN_PATH?.trim();
-  if (explicit) return explicit;
-  try {
-    const requireFromHere = createRequire(import.meta.url);
-    const packageJsonPath = requireFromHere.resolve("@opengsd/gsd-browser/package.json");
-    const candidate = resolve(packageJsonPath, "..", "bin", "gsd-browser");
-    if (existsSync(candidate)) return candidate;
-  } catch {
-    // Fall through to path candidates for source/dist layouts.
-  }
-  const candidates = [
-    resolve(fileURLToPath(new URL("../../../../node_modules/@opengsd/gsd-browser/bin/gsd-browser", import.meta.url))),
-    resolve(fileURLToPath(new URL("../../../../node_modules/.bin/gsd-browser", import.meta.url))),
-  ];
-  for (const candidate of candidates) {
-    if (existsSync(candidate)) return candidate;
-  }
-  return null;
-}
 export function buildProjectWorkflowMcpServerConfig(
   projectRoot: string,
   env: NodeJS.ProcessEnv = process.env,
@@ -119,31 +97,12 @@ function buildProjectWorkflowMcpServerSpec(
   };
 }
-function parseJsonEnv<T>(env: NodeJS.ProcessEnv, name: string): T | undefined {
-  const raw = env[name];
-  if (!raw) return undefined;
-  try {
-    return JSON.parse(raw) as T;
-  } catch {
-    throw new Error(`Invalid JSON in ${name}`);
-  }
-}
 function isEnvDisabled(value: string | undefined): boolean {
   if (!value) return false;
   const normalized = value.trim().toLowerCase();
   return normalized === "0" || normalized === "false" || normalized === "off";
 }
-function buildBrowserSessionName(projectRoot: string): string {
-  const resolvedProjectRoot = resolve(projectRoot);
-  const base = basename(resolvedProjectRoot)
-    .replace(/[^a-zA-Z0-9._-]+/g, "-")
-    .replace(/^-+|-+$/g, "") || "project";
-  const hash = createHash("sha1").update(resolvedProjectRoot).digest("hex").slice(0, 8);
-  return `gsd-${base}-${hash}`;
-}
 export function buildProjectBrowserMcpServerConfig(
   projectRoot: string,
   env: NodeJS.ProcessEnv = process.env,
@@ -157,39 +116,15 @@ function buildProjectBrowserMcpServerSpec(
 ): ProjectMcpServerSpec | null {
   if (isEnvDisabled(env.GSD_BROWSER_MCP_ENABLED)) return null;
-  const resolvedProjectRoot = resolve(projectRoot);
-  const serverName = env.GSD_BROWSER_MCP_NAME?.trim() || GSD_BROWSER_MCP_SERVER_NAME;
-  const explicitArgs = parseJsonEnv<unknown>(env, "GSD_BROWSER_MCP_ARGS");
-  const explicitEnv = parseJsonEnv<Record<string, string>>(env, "GSD_BROWSER_MCP_ENV");
-  const explicitCommand = env.GSD_BROWSER_MCP_COMMAND?.trim();
-  const explicitCliPath = env.GSD_BROWSER_CLI_PATH?.trim() || env.GSD_BROWSER_BIN_PATH?.trim();
-  const bundledCliPath = !explicitCommand && !explicitCliPath ? resolveBundledGsdBrowserCliPath(env) : null;
-  const command =
-    explicitCommand
-    || explicitCliPath
-    || (bundledCliPath ? process.execPath : undefined)
-    || "gsd-browser";
-  const args = Array.isArray(explicitArgs) && explicitArgs.length > 0
-    ? explicitArgs.map(String)
-    : [
-        ...(bundledCliPath ? [bundledCliPath] : []),
-        "mcp",
-        "--session",
-        buildBrowserSessionName(resolvedProjectRoot),
-        "--identity-scope",
-        "project",
-        "--identity-project",
-        resolvedProjectRoot,
-      ];
-  const cwd = env.GSD_BROWSER_MCP_CWD?.trim() || resolvedProjectRoot;
+  const launch = resolveGsdBrowserMcpLaunchConfig(projectRoot, env);
   return {
-    serverName,
+    serverName: launch.serverName,
     server: {
-      command,
-      args,
-      cwd,
-      ...(explicitEnv ? { env: explicitEnv } : {}),
+      command: launch.command,
+      args: launch.args,
+      cwd: launch.cwd,
+      ...(launch.env ? { env: launch.env } : {}),
     },
   };
 }

package/src/resources/extensions/gsd/memory-store.ts CHANGED Viewed

@@ -777,7 +777,10 @@ export function decayStaleMemories(thresholdUnits = 20): string[] {
     const cutoff = row['processed_at'] as string;
     const affected = adapter.prepare(
       `SELECT id FROM memories
-       WHERE superseded_by IS NULL AND updated_at < :cutoff AND confidence > 0.1`,
+       WHERE superseded_by IS NULL
+         AND updated_at < :cutoff
+         AND confidence > 0.1
+         AND (structured_fields IS NULL OR structured_fields NOT LIKE '%"sourceDecisionId"%')`,
     ).all({ ':cutoff': cutoff }).map((r) => r['id'] as string);
     decayMemoriesBefore(cutoff, new Date().toISOString());

package/src/resources/extensions/gsd/milestone-closeout.ts CHANGED Viewed

@@ -16,6 +16,7 @@ import { extractVerdict, isAcceptableUatVerdict } from "./verdict-parser.js";
 import { logWarning } from "./workflow-logger.js";
 import { hasImplementationArtifacts } from "./milestone-implementation-evidence.js";
 import { buildCompleteMilestonePrompt } from "./auto-prompts.js";
+import { checkCloseoutConsistencyGate } from "./closeout-consistency-gate.js";
 import type { DispatchAction, DispatchContext } from "./auto-dispatch.js";
 import {
   commitPendingMilestoneCloseoutChanges,
@@ -37,7 +38,8 @@ export async function isMilestoneCloseoutSettled(mid: string, basePath: string):
     if (isDbAvailable()) {
       const milestone = getMilestone(mid);
       if (milestone && isClosedStatus(milestone.status)) {
-        if (verifyExpectedArtifact("complete-milestone", mid, basePath)) {
+        const closeoutGate = checkCloseoutConsistencyGate(mid, { refreshFromDisk: true });
+        if (closeoutGate.ok && verifyExpectedArtifact("complete-milestone", mid, basePath)) {
           return true;
         }
       }

package/src/resources/extensions/gsd/pending-auto-start.ts CHANGED Viewed

@@ -14,7 +14,6 @@ export interface PendingAutoStartEntry {
   createdAt: number;
   readyRejectCount?: number;
   scope: MilestoneScope;
-  planBlockedRecoveryCount: number;
   r3bRecoveryCount: number;
 }
@@ -51,7 +50,6 @@ export function setPendingAutoStart(basePath: string, entry: PendingAutoStartInp
   const scope = scopeMilestone(ws, entry.milestoneId);
   pendingAutoStartMap.set(basePath, {
     createdAt: Date.now(),
-    planBlockedRecoveryCount: 0,
     r3bRecoveryCount: 0,
     ...entry,
     scope,

package/src/resources/extensions/gsd/post-unit-hooks.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import type {
   HookDispatchResult,
   PreDispatchResult,
   HookStatusEntry,
+  PostUnitGateBlock,
 } from "./types.js";
 import { getOrCreateRegistry, resolveHookArtifactPath } from "./rule-registry.js";
@@ -33,10 +34,22 @@ export function isRetryPending(): boolean {
   return getOrCreateRegistry().isRetryPending();
 }
-export function consumeRetryTrigger(): { unitType: string; unitId: string; retryArtifact: string } | null {
+export function consumeRetryTrigger(): { unitType: string; unitId: string; retryArtifact?: string } | null {
   return getOrCreateRegistry().consumeRetryTrigger();
 }
+export function consumeHookFailure(): { hookName: string; unitType: string; unitId: string; reason: string } | null {
+  return getOrCreateRegistry().consumeHookFailure();
+}
+export function isGateBlockPending(): boolean {
+  return getOrCreateRegistry().isGateBlockPending();
+}
+export function consumeGateBlock(): PostUnitGateBlock | null {
+  return getOrCreateRegistry().consumeGateBlock();
+}
 export function resetHookState(): void {
   getOrCreateRegistry().resetState();
 }

package/src/resources/extensions/gsd/preferences-validation.ts CHANGED Viewed

@@ -29,6 +29,14 @@ const VALID_UOK_TURN_ACTIONS = new Set<"commit" | "snapshot" | "status-only">([
   "snapshot",
   "status-only",
 ]);
+const VALID_POST_UNIT_HOOK_CRITICALITIES = new Set(["advisory", "blocking"]);
+const VALID_POST_UNIT_HOOK_ON_BLOCK_ACTIONS = new Set([
+  "retry-unit",
+  "retry-task",
+  "queue-task",
+  "queue-slice",
+  "pause",
+]);
 export function validatePreferences(preferences: GSDPreferences): {
   preferences: GSDPreferences;
@@ -486,9 +494,37 @@ export function validatePreferences(preferences: GSDPreferences): {
       if (typeof hook.artifact === "string" && hook.artifact.trim()) {
         validHook.artifact = hook.artifact.trim();
       }
+      if (hook.criticality !== undefined) {
+        const criticality = typeof hook.criticality === "string" ? hook.criticality.trim() : "";
+        if (VALID_POST_UNIT_HOOK_CRITICALITIES.has(criticality)) {
+          validHook.criticality = criticality as PostUnitHookConfig["criticality"];
+        } else {
+          errors.push(`post_unit_hooks "${name}" invalid criticality: ${String(hook.criticality)}`);
+        }
+      }
       if (typeof hook.retry_on === "string" && hook.retry_on.trim()) {
         validHook.retry_on = hook.retry_on.trim();
       }
+      if (hook.on_block !== undefined) {
+        if (!hook.on_block || typeof hook.on_block !== "object") {
+          errors.push(`post_unit_hooks "${name}" on_block must be an object`);
+        } else {
+          const onBlock = hook.on_block as unknown as Record<string, unknown>;
+          const action = typeof onBlock.action === "string" ? onBlock.action.trim() : "";
+          if (!VALID_POST_UNIT_HOOK_ON_BLOCK_ACTIONS.has(action)) {
+            errors.push(`post_unit_hooks "${name}" invalid on_block action: ${String(onBlock.action)}`);
+          } else {
+            validHook.on_block = { action: action as NonNullable<PostUnitHookConfig["on_block"]>["action"] };
+            if (typeof onBlock.artifact === "string" && onBlock.artifact.trim()) {
+              validHook.on_block.artifact = onBlock.artifact.trim();
+            }
+          }
+        }
+      }
+      if (validHook.criticality === "blocking" && !validHook.artifact) {
+        errors.push(`post_unit_hooks "${name}" criticality blocking requires artifact`);
+        continue;
+      }
       if (typeof hook.agent === "string" && hook.agent.trim()) {
         validHook.agent = hook.agent.trim();
       }

package/src/resources/extensions/gsd/prompt-loader.ts CHANGED Viewed

@@ -33,6 +33,10 @@ function hasRequiredExtensionAssets(rootDir: string, exists: ExistsFn = existsSy
   );
 }
+function isSourceExtensionDir(moduleDir: string): boolean {
+  return moduleDir.replaceAll("\\", "/").endsWith("/src/resources/extensions/gsd");
+}
 export function resolveExtensionDirFromCandidates(
   moduleDir: string,
   agentGsdDir: string,
@@ -41,6 +45,10 @@ export function resolveExtensionDirFromCandidates(
   const moduleUsable = hasRequiredExtensionAssets(moduleDir, exists);
   const agentUsable = hasRequiredExtensionAssets(agentGsdDir, exists);
+  // Source checkouts must use their own prompt tree. Otherwise local tests and
+  // dev runs can silently render stale prompts from ~/.gsd/agent/extensions/gsd.
+  if (moduleUsable && isSourceExtensionDir(moduleDir)) return moduleDir;
   // Prefer the user-local extension tree when both are valid. This avoids
   // leaking npm/global-install paths into prompts on Windows.
   if (agentUsable) return agentGsdDir;

package/src/resources/extensions/gsd/prompts/forensics.md CHANGED Viewed

@@ -12,6 +12,8 @@ Debug GSD itself. Trace the symptom to root cause in current source and produce
 GSD extension source: `{{gsdSourceDir}}`
+{{toolingSection}}
 ### Source Map by Domain
 | Domain | Files |
@@ -101,7 +103,7 @@ Then **offer GitHub issue creation**: "Would you like me to create a GitHub issu
 **CRITICAL:** The `github_issues` tool targets only the current user's repository and has no `repo` parameter. Use `gh issue create --repo open-gsd/gsd-pi` via `bash`. Do NOT use the `github_issues` tool.
-If yes, create using the `bash` tool:
+If yes and `bash` is available, create using the `bash` tool:
 ```bash
 ISSUE_BODY_FILE="${TMPDIR:-${TEMP:-${TMP:-.}}}/gsd-forensic-issue.md"
@@ -142,6 +144,64 @@ TYPE_ID=$(gh api graphql -f query='{ repository(owner:"open-gsd",name:"gsd-pi")
 gh api graphql -f query='mutation { updateIssue(input:{id:"'"$ISSUE_ID"'",issueTypeId:"'"$TYPE_ID"'"}) { issue { number } } }'
 ```
+If `bash` is unavailable, do not attempt `bash`, `write`, or `github_issues` tool calls. Instead, provide exactly one paste-once shell script for the user to run locally and say that the live duplicate check / issue creation must be run by the user:
+```bash
+KEYWORDS="..."
+echo "Searching closed issues for possible duplicates..."
+gh issue list --repo open-gsd/gsd-pi --state closed --search "$KEYWORDS" --limit 20
+echo "Searching open PRs for possible fixes..."
+gh pr list --repo open-gsd/gsd-pi --state open --search "$KEYWORDS" --limit 10
+echo "Searching merged PRs for possible fixes..."
+gh pr list --repo open-gsd/gsd-pi --state merged --search "$KEYWORDS" --limit 10
+read -r -p "Review the duplicate search above. Continue filing a new issue? [y/N] " SHOULD_FILE
+case "$SHOULD_FILE" in
+  y|Y|yes|YES) ;;
+  *) echo "Issue filing aborted."; exit 0 ;;
+esac
+ISSUE_BODY_FILE="${TMPDIR:-${TEMP:-${TMP:-.}}}/gsd-forensic-issue.md"
+cat > "$ISSUE_BODY_FILE" << 'GSD_ISSUE_BODY'
+## Problem
+[1-2 sentence summary]
+## Root Cause
+[Specific file:line in GSD source, with code snippet showing the bug]
+## Expected Behavior
+[What the code should do instead — concrete fix suggestion]
+## Environment
+- GSD version: [from report]
+- Model: [from report]
+- Unit: [type/id that failed]
+## Reproduction Context
+[Phase, milestone, slice, what was happening when it failed]
+## Forensic Evidence
+[Key anomalies, error traces, relevant tool call sequences from the report]
+---
+*Auto-generated by `/gsd forensics`*
+GSD_ISSUE_BODY
+ISSUE_URL=$(gh issue create --repo open-gsd/gsd-pi \
+  --title "..." \
+  --label "auto-generated" \
+  --body-file "$ISSUE_BODY_FILE")
+rm -f "$ISSUE_BODY_FILE"
+ISSUE_NUM=$(echo "$ISSUE_URL" | grep -oE '[0-9]+$')
+ISSUE_ID=$(gh api graphql -f query='{ repository(owner:"open-gsd",name:"gsd-pi") { issue(number:'"$ISSUE_NUM"') { id } } }' --jq '.data.repository.issue.id')
+TYPE_ID=$(gh api graphql -f query='{ repository(owner:"open-gsd",name:"gsd-pi") { issueTypes(first:20) { nodes { id name } } } }' --jq '.data.repository.issueTypes.nodes[] | select(.name=="Bug") | .id')
+gh api graphql -f query='mutation { updateIssue(input:{id:"'"$ISSUE_ID"'",issueTypeId:"'"$TYPE_ID"'"}) { issue { number } } }'
+echo "$ISSUE_URL"
+```
 ### Redaction Rules (CRITICAL)
 Before creating the issue, you MUST:

package/src/resources/extensions/gsd/prompts/gate-evaluate.md CHANGED Viewed

@@ -8,6 +8,8 @@
 You are evaluating **quality gates in parallel** for this slice. Each gate is an independent question that must be answered before task execution begins. Use the `subagent` tool to dispatch all gate evaluations simultaneously.
+**Tool call format:** Call `subagent` with `tasks: [...]` as a **native JSON array** — one object per gate. Do NOT JSON.stringify the array into a string; the tool validates that `tasks` is an array, and a serialized string will be rejected with "must be array".
 ## Slice Plan Context
 {{slicePlanContent}}
@@ -20,7 +22,7 @@ You are evaluating **quality gates in parallel** for this slice. Each gate is an
 ## Execution Protocol
-1. **Dispatch all gates** using `subagent` in parallel mode. Each subagent prompt is provided below.
+1. **Dispatch all gates** using `subagent` in parallel mode. Call `subagent` with `tasks: [{ agent: "tester", task: "<prompt>" }, ...]` — one object per gate. Each subagent prompt is provided below.
    Pass `tasks` as a **JSON array**, not a string. Example shape:
    ```json

package/src/resources/extensions/gsd/prompts/parallel-research-slices.md CHANGED Viewed

@@ -12,9 +12,11 @@ You are dispatching parallel research agents for **{{sliceCount}} slices** in mi
 Dispatch ALL slices simultaneously using the `subagent` tool in **parallel mode**. Each subagent will independently research its slice and write a RESEARCH file.
+**Tool call format:** Call `subagent` with `tasks: [...]` as a **native JSON array** — one object per slice. Do NOT JSON.stringify the array into a string; the tool validates that `tasks` is an array, and a serialized string will be rejected with "must be array".
 ## Execution Protocol
-1. Call `subagent` with `tasks: [...]` containing one entry per slice below
+1. Call `subagent` with `tasks: [{ agent: "scout", task: "<prompt>" }, ...]` containing one entry per slice below
 2. Wait for ALL subagents to complete
 3. Verify each slice's RESEARCH file was written (check `.gsd/milestones/{{mid}}/slices/<slice-id>/`)
 4. If a subagent failed to write its RESEARCH file, retry it **once** individually

package/src/resources/extensions/gsd/prompts/plan-slice.md CHANGED Viewed

@@ -43,7 +43,7 @@ If slice research is inlined, trust its architectural findings, but verify every
 5. Define slice verification before tasks. Non-trivial slices need real tests or executable assertions; boundary contracts need contract-exercising checks. Tests must not read .gitignore/gitignored paths such as `.gsd/`, `.planning/`, or `.audits/`.
 6. Include Threat Surface (Q3), Requirement Impact (Q4), proof level, observability, integration closure, Failure Modes (Q5), Load Profile (Q6), and Negative Tests (Q7) only where applicable.
 7. Right-size tasks. Simple slices can be one task; split only when context, ownership, or verification boundaries justify it.
-8. Task `verify` commands must be safe, simple commands. Do not use shell pipes, redirects, semicolons, backticks, command substitution, output trimming, or grep regex alternation with `|`. If multiple checks are needed, create a small test file and run it with `node --test` or a package test script, or use separate simple commands joined only with `&&`.
+8. Task `verify` commands must be safe, simple commands. Do not use shell pipes, redirects, semicolons, backticks, command substitution, output trimming, or grep regex alternation with `|`. If multiple checks are needed, create a small test file and run it with `node --test` or a package test script, or use separate simple commands joined only with `&&`. For absence checks, verify a pattern does not exist with `! grep -q 'pattern' file` or `! rg -q 'pattern' file`; do not use `grep -c` or `rg -c` to assert zero matches because count commands exit 1 when they find zero matches, and the verification gate treats that as failure.
 9. Each task needs the exact `gsd_plan_slice.tasks[]` shape: `taskId`, `title`, `description`, `estimate`, `files`, `verify`, `inputs`, `expectedOutput`, and optional `observabilityImpact`. `description` should contain the Why / Do / Done-when narrative. `files`, `inputs`, and `expectedOutput` must be JSON arrays of strings, even when there is only one path (for example, `"inputs": ["src/index.ts"]`, never `"inputs": "src/index.ts"`). Use paths relative to `{{workingDirectory}}`; do not put absolute paths to the original checkout or any directory outside `{{workingDirectory}}` in `files`, `inputs`, `expectedOutput`, or verification commands. **`expectedOutput` must only list files the task actually creates or overwrites on disk.** Do NOT include files the task merely reads, verifies, or tests — those belong only in `inputs`. If a task is a pure verification or test task that produces no new files, `expectedOutput` may be `[]` or limited to test-result artifacts (e.g. a log or assertion output). A file that does not yet exist on disk and is needed as an `input` must be produced by an earlier task's `expectedOutput` — if no prior task creates it, add a task before this one that does.
 10. Persist with `gsd_plan_slice` using `milestoneId`, `sliceId`, `goal`, optional `successCriteria`/`proofLevel`/`integrationClosure`/`observabilityImpact`, and `tasks`. `gsd_plan_slice` handles task persistence transactionally and renders `{{outputPath}}` plus task plans; do not call `gsd_plan_task`. The DB-backed tool is the canonical write path. Do **not** rely on direct `PLAN.md` writes as the source of truth.
 11. Self-audit before finishing: goal/demo closure, requirement coverage, deliverable coverage audit (cross-check every file listed in CONTEXT.md `## Scope` / `### In Scope` against task `files` or `expectedOutput`), locked decisions, concrete paths, dependency order, wiring, scope size, proof truthfulness, feature completeness, and quality gates. Quality gates: non-trivial slices/tasks include specific Q3-Q7 coverage where applicable.

package/src/resources/extensions/gsd/prompts/reactive-execute.md CHANGED Viewed

@@ -10,6 +10,8 @@ You are executing **multiple tasks in parallel** for this slice. The task graph
 **Critical rule:** Use the `subagent` tool in **parallel mode** to dispatch all ready tasks simultaneously. Each subagent gets a task-specific execution packet (task plan + dependency carry-forward + completion contract) and is responsible for its own implementation, verification, task summary, and completion tool calls. The parent batch agent orchestrates, verifies, and records failures only when a dispatched task failed before it could leave its own summary behind.
+**Tool call format:** Call `subagent` with `tasks: [...]` as a **native JSON array** — one object per ready task. Do NOT JSON.stringify the array into a string; the tool validates that `tasks` is an array, and a serialized string will be rejected with "must be array".
 ## Task Dependency Graph
 {{graphContext}}
@@ -22,7 +24,7 @@ You are executing **multiple tasks in parallel** for this slice. The task graph
 ## Execution Protocol
-1. **Dispatch all ready tasks** using `subagent` in parallel mode. Each subagent prompt is provided below.
+1. **Dispatch all ready tasks** using `subagent` in parallel mode. Call `subagent` with `tasks: [{ agent: "worker", task: "<prompt>" }, ...]` — one object per ready task. Each subagent prompt is provided below.
 2. **Wait for all subagents** to complete.
 3. **Verify each dispatched task's outputs** — check that expected files were created/modified, that verification commands pass where applicable, and that each task wrote its own `T##-SUMMARY.md`.
 4. **Do not rewrite successful task summaries or duplicate completion tool calls.** Treat a subagent-written summary as authoritative for that task.

package/src/resources/extensions/gsd/prompts/run-uat.md CHANGED Viewed

@@ -63,35 +63,39 @@ After running all checks, compute the **overall verdict**:
 - `FAIL` — one or more automatable checks failed
 - `PARTIAL` — one or more automatable checks were skipped or returned inconclusive results (not the same as `NEEDS-HUMAN` — use PARTIAL only when the agent itself could not determine pass/fail for a check it was supposed to automate)
-Call `gsd_summary_save` with `milestone_id: "{{milestoneId}}"`, `slice_id: "{{sliceId}}"`, `artifact_type: "ASSESSMENT"`, and the full UAT result markdown as `content`. The tool computes the assessment path, persists to DB/disk, and saves the aggregate UAT gate. The content should follow this logical shape:
+Call `gsd_uat_result_save` once after all checks are complete. The tool computes the assessment path, persists to DB/disk, saves attempt history, and saves the aggregate UAT gate.
-```markdown
----
-sliceId: {{sliceId}}
-uatType: {{uatType}}
-verdict: PASS | FAIL | PARTIAL
-date: <ISO 8601 timestamp>
----
-# UAT Result — {{sliceId}}
-## Checks
+Pass these top-level fields:
-| Check | Mode | Result | Notes |
-|-------|------|--------|-------|
-| <check description> | artifact / runtime / human-follow-up | PASS / FAIL / NEEDS-HUMAN | <observed output, evidence, or reason> |
-## Overall Verdict
+```ts
+milestoneId: "{{milestoneId}}",
+sliceId: "{{sliceId}}",
+uatType: "{{uatType}}",
+verdict: "PASS" | "FAIL" | "PARTIAL",
+notes: "<one sentence overall verdict rationale>",
+```
-<PASS / FAIL / PARTIAL> — <one sentence summary>
+Use this canonical `presentation` object in the save call so the audit can verify the run-uat tool surface without retrying missing fields one by one. Keep `toolPresentationPlanId` as `{{toolPresentationPlanId}}`. If browser tools were actually presented for this run, add those concrete browser tool names to `presentedTools`; otherwise reuse this object exactly:
-## Notes
+```json
+{{canonicalPresentation}}
+```
-<any additional context, errors encountered, screenshots/logs gathered, or manual follow-up still required>
+Pass `checks` with this logical shape:
+```ts
+checks: [{
+  id: "<stable check id>",
+  description: "<check description from the UAT file>",
+  mode: "artifact" | "runtime" | "browser" | "human-follow-up",
+  result: "PASS" | "FAIL" | "NEEDS-HUMAN",
+  evidence: [{ kind: "gsd_uat_exec", ref: "<evidence id>" }],
+  notes: "<observed output, evidence, reason, or manual follow-up>",
+}]
 ```
 ---
-**You MUST call `gsd_summary_save` with `artifact_type: "ASSESSMENT"` and the UAT result content before finishing. Do not write the assessment file directly.**
+**You MUST call `gsd_uat_result_save` before finishing. Do not write the assessment file directly, and do not call `gsd_summary_save` as a substitute.**
 When done, say: "UAT {{sliceId}} complete."

package/src/resources/extensions/gsd/prompts/validate-milestone.md CHANGED Viewed

@@ -33,7 +33,7 @@ Prompt: "Review milestone {{milestoneId}} requirements coverage. Working directo
 Prompt: "Review milestone {{milestoneId}} cross-slice integration. Working directory: {{workingDirectory}}. Read `{{roadmapPath}}` and find the boundary map (produces/consumes contracts). For each boundary, confirm producer SUMMARY produced the artifact and consumer SUMMARY consumed it. Output table: Boundary | Producer Summary | Consumer Summary | Status. End with one-line verdict: PASS if all boundaries honored, NEEDS-ATTENTION if any gaps."
 **Reviewer C - Assessment & Acceptance Criteria**
-Prompt: "Review milestone {{milestoneId}} assessment evidence and acceptance criteria. Working directory: {{workingDirectory}}. Read `.gsd/milestones/{{milestoneId}}/{{milestoneId}}-CONTEXT.md` for criteria. Check slice SUMMARY and ASSESSMENT files under `.gsd/milestones/{{milestoneId}}/slices/`; UAT files are specs, not evidence. Verify each criterion maps to passing evidence. Then review inlined milestone verification classes. For each non-empty planned class, output table: Class | Planned Check | Evidence | Verdict. Use the exact class names `Contract`, `Integration`, `Operational`, and `UAT` whenever those classes are present. If a planned browser/UAT class has no ASSESSMENT with browser/runtime actions and assertions, return NEEDS-ATTENTION. If no verification classes were planned, say that explicitly. Output sections `Acceptance Criteria` with checklist `[ ] Criterion | Evidence`, and `Verification Classes` with the table. End with one-line verdict: PASS if all criteria and classes are covered by evidence, NEEDS-ATTENTION if gaps exist."
+Prompt: "Review milestone {{milestoneId}} assessment evidence and acceptance criteria. Working directory: {{workingDirectory}}. Read `.gsd/milestones/{{milestoneId}}/{{milestoneId}}-CONTEXT.md` for criteria. Check slice SUMMARY and ASSESSMENT files under `.gsd/milestones/{{milestoneId}}/slices/`; UAT files are specs, not evidence. Verify each criterion maps to passing evidence. Then review the inlined `Verification Classes (from planning)` table. For every planned row in that table, output a `Verification Classes` table with columns `Class | Planned Check | Evidence | Verdict`. Preserve every planned non-empty class row; do not summarize, rename, combine, or omit planned classes. The first cell of each row must be exactly `Contract`, `Integration`, `Operational`, or `UAT` when that class is present in planning. If a planned class lacks evidence, still include its canonical row and mark the verdict NEEDS-ATTENTION or FAIL. If a planned browser/UAT class has no ASSESSMENT with browser/runtime actions and assertions, return NEEDS-ATTENTION. If no verification classes were planned, say that explicitly. Output sections `Acceptance Criteria` with checklist `[ ] Criterion | Evidence`, and `Verification Classes` with the table. End with one-line verdict: PASS if all criteria and classes are covered by evidence, NEEDS-ATTENTION if gaps exist."
 ### Step 2 - Synthesize Findings
@@ -71,8 +71,8 @@ reviewers: 3
 <if verdict is not pass: specific actions required>
 ```
-Call `gsd_validate_milestone` with the camelCase fields `milestoneId`, `verdict`, `remediationRound`, `successCriteriaChecklist`, `sliceDeliveryAudit`, `crossSliceIntegration`, `requirementCoverage`, `verdictRationale`, and `remediationPlan` when needed. If you include verification-class analysis, pass it in `verificationClasses`.
-Extract the `Verification Classes` subsection from Reviewer C and pass it verbatim in `verificationClasses` so the persisted validation output uses the canonical class names `Contract`, `Integration`, `Operational`, and `UAT`.
+Call `gsd_validate_milestone` with the camelCase fields `milestoneId`, `verdict`, `remediationRound`, `successCriteriaChecklist`, `sliceDeliveryAudit`, `crossSliceIntegration`, `requirementCoverage`, `verdictRationale`, and `remediationPlan` when needed. If planning included verification classes, pass a complete canonical table in `verificationClasses`.
+Set `verificationClasses` to the `Verification Classes` subsection from Reviewer C. It must include one canonical row for every non-empty planned class from `Verification Classes (from planning)`: `Contract`, `Integration`, `Operational`, and/or `UAT`. If Reviewer C omitted a planned class, reconstruct the missing row from the planning table, set Evidence to the gap, and use NEEDS-ATTENTION or FAIL. Do not call `gsd_validate_milestone` with a partial `verificationClasses` table.
 **DB access safety:** Do NOT query `.gsd/gsd.db` directly via `sqlite3` or `node -e require('better-sqlite3')` - the engine owns the WAL connection. Use `gsd_milestone_status` for milestone and slice state. Data is already inlined or available via `gsd_*` tools. Direct DB access risks WAL corruption and bypasses validation.

package/src/resources/extensions/gsd/recovery-classification.ts CHANGED Viewed

@@ -6,7 +6,9 @@ import { ReconciliationFailedError } from "./state-reconciliation.js";
 export type RecoveryFailureKind =
   | "tool-schema"
+  | "tool-contract"
   | "deterministic-policy"
+  | "lifecycle-progression"
   | "stale-worker"
   | "worktree-invalid"
   | "verification-drift"
@@ -52,6 +54,14 @@ export function classifyFailure(input: RecoveryClassificationInput): RecoveryCla
         exitReason: "tool-schema",
         remediation: "Fix the Unit Tool Contract or tool schema before retrying.",
       };
+    case "tool-contract":
+      return {
+        failureKind,
+        action: "stop",
+        reason: `Tool Contract failure${unitSuffix(input)}: ${message}`,
+        exitReason: "tool-contract",
+        remediation: "Fix the Unit Tool Contract or prompt so the Unit is only asked to use tools owned by its phase.",
+      };
     case "deterministic-policy":
       return {
         failureKind,
@@ -60,6 +70,14 @@ export function classifyFailure(input: RecoveryClassificationInput): RecoveryCla
         exitReason: "deterministic-policy",
         remediation: "Resolve the policy blocker; retrying the same Unit will repeat the failure.",
       };
+    case "lifecycle-progression":
+      return {
+        failureKind,
+        action: "stop",
+        reason: `Lifecycle progression failure${unitSuffix(input)}: ${message}`,
+        exitReason: "lifecycle-progression",
+        remediation: "Route to the required owning Unit or restore the missing artifact before advancing lifecycle state.",
+      };
     case "stale-worker":
       return {
         failureKind,
@@ -118,6 +136,8 @@ export function classifyFailure(input: RecoveryClassificationInput): RecoveryCla
 }
 function inferFailureKind(message: string): RecoveryFailureKind {
+  if (/tool contract|auto-unit tool scope|phase-boundary gate|not permitted.*own/i.test(message)) return "tool-contract";
+  if (/lifecycle progression|required artifact|missing .*assessment|missing .*closeout|cannot legally (?:advance|progress)/i.test(message)) return "lifecycle-progression";
   if (/schema|invalid.*tool|tool.*invalid|enum/i.test(message)) return "tool-schema";
   if (/deterministic policy|policy rejection|write gate|blocked by policy/i.test(message)) return "deterministic-policy";
   if (/stale worker|stale lock|worker.*stale/i.test(message)) return "stale-worker";