npm - @opengsd/gsd-pi - Versions diffs - 1.1.1-dev.a5a2de8 → 1.1.1-dev.b2556262 - Mend

@opengsd/gsd-pi 1.1.1-dev.a5a2de8 → 1.1.1-dev.b2556262

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (325) hide show

package/dist/resources/extensions/gsd/preferences-models.js CHANGED Viewed

@@ -20,82 +20,103 @@ export function resolveModelForUnit(unitType) {
     return resolved?.primary;
 }
 /**
- * Resolve model and fallbacks for a given auto-mode unit type.
- * Returns the primary model and ordered fallbacks, or undefined if not configured.
+ * Ordered phase-bucket chain a unit type resolves against, most-specific
+ * first. The first chain entry with a configured value wins; later entries
+ * are siblings the unit falls back to (e.g. `discuss → planning`).
  *
- * Supports both legacy string format and extended object format:
- * - Legacy: `planning: claude-opus-4-6`
- * - Extended: `planning: { model: claude-opus-4-6, fallbacks: [glm-5, minimax-m2.5] }`
+ * Single source of truth for the unit-type → phase mapping, shared by model
+ * resolution (`resolveModelWithFallbacksForUnit`) and thinking resolution
+ * (`resolveThinkingLevelForUnit`) so the two never drift (ADR-026).
  */
-export function resolveModelWithFallbacksForUnit(unitType) {
-    const prefs = loadEffectiveGSDPreferences(undefined, { availableModelIds: [] });
-    const models = prefs?.preferences?.models;
-    if (!models)
-        return undefined;
-    const m = models;
-    let phaseConfig;
+export function phaseChainForUnit(unitType) {
     switch (unitType) {
         case "research-milestone":
         case "research-slice":
-            phaseConfig = m.research;
-            break;
+        // Deep-mode project research orchestrator. Reads PROJECT.md / REQUIREMENTS.md
+        // and fans out research subagents. Routes to the research bucket.
+        case "research-project":
+            return ["research"];
         case "plan-milestone":
         case "plan-slice":
         case "refine-slice":
         case "replan-slice":
-            phaseConfig = m.planning;
-            break;
+            return ["planning"];
+        // Deep-mode project-level discussion units route to the same model bucket
+        // as milestone-level discussion (interactive interview style). Workflow
+        // preferences and research-decision are tiny ask_user_questions style units
+        // that share the discuss bucket because they are conversational. All fall
+        // back to planning when no `discuss` bucket is set.
         case "discuss-milestone":
         case "discuss-slice":
-        // Deep-mode project-level discussion units route to the same model
-        // bucket as milestone-level discussion (interactive interview style).
         case "discuss-project":
         case "discuss-requirements":
-        // Workflow preferences and research-decision are tiny ask_user_questions
-        // style units; they share the discuss bucket because they are
-        // conversational rather than research/execution. Falling back to planning
-        // when no `discuss` bucket is set keeps parity with the milestone units.
         case "workflow-preferences":
         case "research-decision":
-            phaseConfig = m.discuss ?? m.planning;
-            break;
-        // Deep-mode project research orchestrator. Reads PROJECT.md / REQUIREMENTS.md
-        // and fans out research subagents. Routes to the research bucket so it
-        // gets the research-tier model when one is configured.
-        case "research-project":
-            phaseConfig = m.research;
-            break;
+            return ["discuss", "planning"];
         case "execute-task":
         case "reactive-execute":
-            phaseConfig = m.execution;
-            break;
+            return ["execution"];
         case "execute-task-simple":
-            phaseConfig = m.execution_simple ?? m.execution;
-            break;
+            return ["execution_simple", "execution"];
         case "complete-slice":
         case "complete-milestone":
         case "worktree-merge":
-            phaseConfig = m.completion;
-            break;
+            return ["completion"];
         case "run-uat":
-            phaseConfig = m.uat ?? m.completion;
-            break;
+            return ["uat", "completion"];
         case "reassess-roadmap":
         case "rewrite-docs":
         case "gate-evaluate":
         case "validate-milestone":
-            phaseConfig = m.validation ?? m.planning;
-            break;
+            return ["validation", "planning"];
         default:
             // Subagent unit types (e.g., "subagent", "subagent/scout")
             if (unitType === "subagent" || unitType.startsWith("subagent/")) {
-                phaseConfig = m.subagent;
-                break;
+                return ["subagent"];
             }
             return undefined;
     }
-    if (!phaseConfig)
+}
+/**
+ * Find the phase bucket whose `models` entry wins the chain for a unit, plus
+ * that entry. Returns undefined when no phase in the chain is configured.
+ */
+function resolveWinningPhase(models, chain) {
+    if (!models)
         return undefined;
+    for (const key of chain) {
+        const config = models[key];
+        // Falsy check (not `!= null`) so an empty-string model is treated as
+        // unconfigured and the chain falls through — matches the pre-refactor
+        // switch, which bailed via `if (!phaseConfig)`.
+        if (!config)
+            continue;
+        // An object entry only "wins" if it provides a usable model. A model-less
+        // object (e.g. `{ provider: x }`, or `{}` left after stripping an invalid
+        // `thinking`) must not shadow sibling fallback or yield `{ primary: undefined }`.
+        if (typeof config === "object" && !config.model)
+            continue;
+        return { phase: key, config };
+    }
+    return undefined;
+}
+/**
+ * Resolve model and fallbacks for a given auto-mode unit type.
+ * Returns the primary model and ordered fallbacks, or undefined if not configured.
+ *
+ * Supports both legacy string format and extended object format:
+ * - Legacy: `planning: claude-opus-4-6`
+ * - Extended: `planning: { model: claude-opus-4-6, fallbacks: [glm-5, minimax-m2.5] }`
+ */
+export function resolveModelWithFallbacksForUnit(unitType) {
+    const prefs = loadEffectiveGSDPreferences(undefined, { availableModelIds: [] });
+    const chain = phaseChainForUnit(unitType);
+    if (!chain)
+        return undefined;
+    const winner = resolveWinningPhase(prefs?.preferences?.models, chain);
+    if (!winner)
+        return undefined;
+    const phaseConfig = winner.config;
     // Normalize: string -> { model, fallbacks: [] }
     if (typeof phaseConfig === "string") {
         return { primary: phaseConfig, fallbacks: [] };
@@ -110,6 +131,52 @@ export function resolveModelWithFallbacksForUnit(unitType) {
         fallbacks: phaseConfig.fallbacks ?? [],
     };
 }
+/**
+ * Resolve the explicitly configured reasoning effort for a unit type (ADR-026).
+ *
+ * Thinking travels with the model. The chain is walked most-specific-first up to
+ * and including the phase whose model won; at each level inline
+ * `models.<phase>.thinking` is preferred, then the same phase's `thinking` block
+ * entry. This means:
+ * - a more-specific block key (`thinking.execution_simple`) surfaces even when
+ *   the model only resolves on a less-specific sibling (`models.execution`);
+ * - inline thinking is honored even on a model-less `models.<phase>` entry
+ *   (e.g. `{ thinking: "high" }` with no `model`);
+ * - a unit that claimed its own model bucket never borrows a *less*-specific
+ *   sibling's thinking (the walk stops at the winning phase).
+ * When no model is configured anywhere in the chain, the walk spans the full
+ * chain so inline thinking and the `thinking` block both resolve on their own
+ * sibling chain.
+ *
+ * Returns undefined when nothing explicit is configured — the dispatch path
+ * then falls back to the session/default level and applies the code-writing
+ * floor. Session level, defaults, the floor, and capability clamping are NOT
+ * applied here.
+ */
+export function resolveThinkingLevelForUnit(unitType) {
+    const prefs = loadEffectiveGSDPreferences(undefined, { availableModelIds: [] })?.preferences;
+    if (!prefs)
+        return undefined;
+    const chain = phaseChainForUnit(unitType);
+    if (!chain)
+        return undefined;
+    const models = prefs.models;
+    const block = prefs.thinking;
+    // Walk most-specific-first, up to and including the winning model phase (or
+    // the full chain when no model is configured), checking inline then block.
+    const winner = resolveWinningPhase(models, chain);
+    const limit = winner ? chain.indexOf(winner.phase) + 1 : chain.length;
+    for (let i = 0; i < limit; i++) {
+        const key = chain[i];
+        const entry = models?.[key];
+        if (typeof entry === "object" && entry?.thinking)
+            return entry.thinking; // inline (incl. model-less)
+        const blockLevel = block?.[key];
+        if (blockLevel)
+            return blockLevel; // block
+    }
+    return undefined;
+}
 /**
  * Resolve the default session model from GSD preferences.
  *
@@ -339,6 +406,7 @@ export function resolveAutoSupervisorConfig() {
         soft_timeout_minutes: configured.soft_timeout_minutes ?? 20,
         idle_timeout_minutes: configured.idle_timeout_minutes ?? 10,
         hard_timeout_minutes: configured.hard_timeout_minutes ?? 30,
+        stalled_tool_timeout_minutes: configured.stalled_tool_timeout_minutes ?? 5,
         ...(configured.model ? { model: configured.model } : {}),
     };
 }

package/dist/resources/extensions/gsd/preferences-types.js CHANGED Viewed

@@ -46,6 +46,7 @@ export const KNOWN_PREFERENCE_KEYS = new Set([
     "skill_rules",
     "custom_instructions",
     "models",
+    "thinking",
     "skill_discovery",
     "skill_staleness_days",
     "auto_supervisor",
@@ -133,6 +134,18 @@ export const KNOWN_UNIT_LABELS = [
     "research-decision", "research-project",
 ];
 export const SKILL_ACTIONS = new Set(["use", "prefer", "avoid"]);
+/** The nine model-routing phase buckets. */
+export const GSD_MODEL_PHASE_KEYS = [
+    "research",
+    "planning",
+    "discuss",
+    "execution",
+    "execution_simple",
+    "completion",
+    "validation",
+    "subagent",
+    "uat",
+];
 /**
  * Format a skill reference for the system prompt.
  * If resolved, shows the path so the agent knows exactly where to read.

package/dist/resources/extensions/gsd/preferences-validation.js CHANGED Viewed

@@ -8,8 +8,18 @@
 import { isAbsolute } from "node:path";
 import { VALID_BRANCH_NAME } from "./git-service.js";
 import { normalizeStringArray } from "../shared/format-utils.js";
-import { KNOWN_PREFERENCE_KEYS, KNOWN_UNIT_LABELS, SKILL_ACTIONS, } from "./preferences-types.js";
+import { getGateIdsForTurn } from "./gate-registry.js";
+import { KNOWN_PREFERENCE_KEYS, KNOWN_UNIT_LABELS, GSD_MODEL_PHASE_KEYS, SKILL_ACTIONS, } from "./preferences-types.js";
 const VALID_TOKEN_PROFILES = new Set(["budget", "balanced", "quality", "burn-max"]);
+const VALID_THINKING_LEVELS = new Set([
+    "off",
+    "minimal",
+    "low",
+    "medium",
+    "high",
+    "xhigh",
+]);
+const KNOWN_MODEL_PHASE_KEYS = new Set(GSD_MODEL_PHASE_KEYS);
 const VALID_UOK_TURN_ACTIONS = new Set([
     "commit",
     "snapshot",
@@ -23,6 +33,7 @@ const VALID_POST_UNIT_HOOK_ON_BLOCK_ACTIONS = new Set([
     "queue-slice",
     "pause",
 ]);
+const VALID_GATE_EVALUATE_SLICE_GATES = new Set(getGateIdsForTurn("gate-evaluate"));
 export function validatePreferences(preferences) {
     const errors = [];
     const warnings = [];
@@ -375,12 +386,60 @@ export function validatePreferences(preferences) {
     // ─── Models ─────────────────────────────────────────────────────────
     if (preferences.models !== undefined) {
         if (preferences.models && typeof preferences.models === "object") {
-            validated.models = preferences.models;
+            // Static check for inline per-phase thinking (ADR-026). The resolved
+            // model isn't known until dispatch, so capability is clamped there; here
+            // we warn on illegal level strings AND strip them, so a typo can't reach
+            // resolveThinkingLevelForUnit and be treated as explicit configuration.
+            const sanitizedModels = {};
+            for (const [phase, entry] of Object.entries(preferences.models)) {
+                if (entry && typeof entry === "object" && "thinking" in entry) {
+                    const level = entry.thinking;
+                    if (level !== undefined && !VALID_THINKING_LEVELS.has(level)) {
+                        warnings.push(`models.${phase}.thinking "${String(level)}" is not a valid thinking level ` +
+                            `(off, minimal, low, medium, high, xhigh) — ignored`);
+                        const { thinking: _ignored, ...rest } = entry;
+                        // If stripping the bad thinking leaves no usable model, drop the
+                        // phase entirely rather than storing a hollow `{}` / `{ provider }`
+                        // entry that resolveWinningPhase would otherwise treat as configured.
+                        if (rest.model) {
+                            sanitizedModels[phase] = rest;
+                        }
+                        continue;
+                    }
+                }
+                sanitizedModels[phase] = entry;
+            }
+            validated.models = sanitizedModels;
         }
         else {
             errors.push("models must be an object");
         }
     }
+    // ─── Thinking (separate per-phase block, ADR-026) ───────────────────
+    if (preferences.thinking !== undefined) {
+        if (preferences.thinking && typeof preferences.thinking === "object" && !Array.isArray(preferences.thinking)) {
+            const validatedThinking = {};
+            for (const [phase, level] of Object.entries(preferences.thinking)) {
+                if (!KNOWN_MODEL_PHASE_KEYS.has(phase)) {
+                    warnings.push(`unknown thinking phase "${phase}" — must be one of: ` +
+                        `${[...KNOWN_MODEL_PHASE_KEYS].join(", ")} — ignored`);
+                    continue;
+                }
+                if (!VALID_THINKING_LEVELS.has(level)) {
+                    warnings.push(`thinking.${phase} "${String(level)}" is not a valid thinking level ` +
+                        `(off, minimal, low, medium, high, xhigh) — ignored`);
+                    continue;
+                }
+                validatedThinking[phase] = level;
+            }
+            if (Object.keys(validatedThinking).length > 0) {
+                validated.thinking = validatedThinking;
+            }
+        }
+        else {
+            errors.push("thinking must be an object");
+        }
+    }
     // ─── Auto Supervisor ────────────────────────────────────────────────
     if (preferences.auto_supervisor !== undefined) {
         if (preferences.auto_supervisor && typeof preferences.auto_supervisor === "object") {
@@ -931,7 +990,13 @@ export function validatePreferences(preferences) {
             }
             if (ge.slice_gates !== undefined) {
                 if (Array.isArray(ge.slice_gates) && ge.slice_gates.every((g) => typeof g === "string")) {
-                    validGe.slice_gates = ge.slice_gates;
+                    const invalid = ge.slice_gates.filter((g) => !VALID_GATE_EVALUATE_SLICE_GATES.has(g));
+                    if (invalid.length === 0) {
+                        validGe.slice_gates = ge.slice_gates;
+                    }
+                    else {
+                        errors.push(`gate_evaluation.slice_gates must contain only gate-evaluate slice gates: ${[...VALID_GATE_EVALUATE_SLICE_GATES].join(", ")}`);
+                    }
                 }
                 else {
                     errors.push("gate_evaluation.slice_gates must be an array of strings");

package/dist/resources/extensions/gsd/preferences.js CHANGED Viewed

@@ -37,7 +37,7 @@ export function resolveSkillStalenessDays(basePath) {
     return prefs?.preferences.skill_staleness_days ?? 60;
 }
 // ─── Re-exports: models ─────────────────────────────────────────────────────
-export { resolveModelForUnit, resolveModelWithFallbacksForUnit, getNextFallbackModel, isTransientNetworkError, validateModelId, updatePreferencesModels, resolveDynamicRoutingConfig, resolveAutoSupervisorConfig, resolveProfileDefaults, getProfileTierMap, resolveEffectiveProfile, resolveInlineLevel, resolveContextSelection, resolveSearchProviderFromPreferences, resolveDisabledModelProvidersFromPreferences, } from "./preferences-models.js";
+export { resolveModelForUnit, resolveModelWithFallbacksForUnit, resolveThinkingLevelForUnit, phaseChainForUnit, getNextFallbackModel, isTransientNetworkError, validateModelId, updatePreferencesModels, resolveDynamicRoutingConfig, resolveAutoSupervisorConfig, resolveProfileDefaults, getProfileTierMap, resolveEffectiveProfile, resolveInlineLevel, resolveContextSelection, resolveSearchProviderFromPreferences, resolveDisabledModelProvidersFromPreferences, } from "./preferences-models.js";
 // ─── Re-exports: MCP ────────────────────────────────────────────────────────
 export { resolveModelMcpConfig } from "./preferences-mcp.js";
 // ─── Path Constants & Getters ───────────────────────────────────────────────
@@ -330,6 +330,9 @@ function mergePreferences(base, override) {
         skill_rules: [...(base.skill_rules ?? []), ...(override.skill_rules ?? [])],
         custom_instructions: mergeStringLists(base.custom_instructions, override.custom_instructions),
         models: { ...(base.models ?? {}), ...(override.models ?? {}) },
+        thinking: (base.thinking || override.thinking)
+            ? { ...(base.thinking ?? {}), ...(override.thinking ?? {}) }
+            : undefined,
         skill_discovery: override.skill_discovery ?? base.skill_discovery,
         skill_staleness_days: override.skill_staleness_days ?? base.skill_staleness_days,
         auto_supervisor: { ...(base.auto_supervisor ?? {}), ...(override.auto_supervisor ?? {}) },

package/dist/resources/extensions/gsd/prompts/gate-evaluate.md CHANGED Viewed

@@ -38,7 +38,7 @@ You are evaluating **quality gates in parallel** for this slice. Each gate is an
 3. **Verify each gate wrote its result** by checking that `gsd_save_gate_result` was called for each gate ID.
    - Call it **directly** — do **not** use `ToolSearch` (it is not available in GSD).
    - Inside Claude Code use the active MCP-scoped workflow name for `gsd_save_gate_result`; otherwise use `gsd_save_gate_result`.
-   - Always pass all required fields (camelCase): `milestoneId`, `sliceId`, `gateId`, `verdict`, `rationale`. Never call with an empty `{}` object.
+   - Always pass all required fields (camelCase): `milestoneId`, `sliceId`, `gateId`, `verdict`, `rationale`, and `findings` (empty string if none). Never call with an empty `{}` object.
 4. **Report the batch outcome** — which gates passed, which flagged concerns, and which were omitted as not applicable.
 Gate agents may return `verdict: "omitted"` if the gate question is not applicable to this slice (e.g., no auth surface for Q3, no existing requirements touched for Q4). This is expected for simple slices.

package/dist/resources/extensions/gsd/prompts/plan-milestone.md CHANGED Viewed

@@ -14,7 +14,7 @@ All relevant context is preloaded below. Start immediately without re-reading th
 ## Already Planned? Soft Brake
-If `{{outputPath}}` exists with at least one slice line (e.g. `- [ ] **S01:`) AND `gsd_query` reports slice rows for this milestone, a prior `gsd_plan_milestone` call already persisted the plan. Do **not** re-call it; its UPSERT could overwrite existing planning. Skip to the ready phrase.
+If `{{outputPath}}` exists with at least one slice line (e.g. `- [ ] **S01:`) AND `gsd_milestone_status` reports slice rows for this milestone, a prior `gsd_plan_milestone` call already persisted the plan. Do **not** re-call it; its UPSERT could overwrite existing planning. Skip to the ready phrase.
 If only the file or only DB rows exist, the prior write was incomplete; plan normally so the tool reconciles both.

package/dist/resources/extensions/gsd/prompts/plan-slice.md CHANGED Viewed

@@ -44,7 +44,7 @@ If slice research is inlined, trust its architectural findings, but verify every
 6. Include Threat Surface (Q3), Requirement Impact (Q4), proof level, observability, integration closure, Failure Modes (Q5), Load Profile (Q6), and Negative Tests (Q7) only where applicable.
 7. Right-size tasks. Simple slices can be one task; split only when context, ownership, or verification boundaries justify it.
 8. Task `verify` commands must be safe, simple commands. Do not use shell pipes, redirects, semicolons, backticks, command substitution, output trimming, or grep regex alternation with `|`. If multiple checks are needed, create a small test file and run it with `node --test` or a package test script, or use separate simple commands joined only with `&&`. For absence checks, verify a pattern does not exist with `! grep -q 'pattern' file` or `! rg -q 'pattern' file`; do not use `grep -c` or `rg -c` to assert zero matches because count commands exit 1 when they find zero matches, and the verification gate treats that as failure.
-9. Each task needs the exact `gsd_plan_slice.tasks[]` shape: `taskId`, `title`, `description`, `estimate`, `files`, `verify`, `inputs`, `expectedOutput`, and optional `observabilityImpact`. `description` should contain the Why / Do / Done-when narrative. `files`, `inputs`, and `expectedOutput` must be JSON arrays of strings, even when there is only one path (for example, `"inputs": ["src/index.ts"]`, never `"inputs": "src/index.ts"`). Use paths relative to `{{workingDirectory}}`; do not put absolute paths to the original checkout or any directory outside `{{workingDirectory}}` in `files`, `inputs`, `expectedOutput`, or verification commands. **`expectedOutput` must only list files the task actually creates or overwrites on disk.** Do NOT include files the task merely reads, verifies, or tests — those belong only in `inputs`. If a task is a pure verification or test task that produces no new files, `expectedOutput` may be `[]` or limited to test-result artifacts (e.g. a log or assertion output). A file that does not yet exist on disk and is needed as an `input` must be produced by an earlier task's `expectedOutput` — if no prior task creates it, add a task before this one that does.
+9. Each task needs the exact `gsd_plan_slice.tasks[]` shape: `taskId`, `title`, `description`, `estimate`, `files`, `verify`, `inputs`, `expectedOutput`, and optional `observabilityImpact`. `description` should contain the Why / Do / Done-when narrative. `files`, `inputs`, and `expectedOutput` must be JSON arrays of strings, even when there is only one path (for example, `"inputs": ["src/index.ts"]`, never `"inputs": "src/index.ts"`). Use paths relative to `{{workingDirectory}}`; do not put absolute paths to the original checkout or any directory outside `{{workingDirectory}}` in `files`, `inputs`, `expectedOutput`, or verification commands. **`expectedOutput` must only list files the task actually creates or overwrites on disk.** Do NOT include files the task merely reads, verifies, tests, or describes — those belong in `inputs`, `verify`, `description`, or slice success criteria. If a task is a pure verification or test task that produces no new files, `expectedOutput` must be `[]`; if it writes a test-result log or assertion output file, list only that concrete file path. A file that does not yet exist on disk and is needed as an `input` must be produced by an earlier task's `expectedOutput` — if no prior task creates it, add a task before this one that does.
 10. Persist with `gsd_plan_slice` using `milestoneId`, `sliceId`, `goal`, optional `successCriteria`/`proofLevel`/`integrationClosure`/`observabilityImpact`, and `tasks`. `gsd_plan_slice` handles task persistence transactionally and renders `{{outputPath}}` plus task plans; do not call `gsd_plan_task`. The DB-backed tool is the canonical write path. Do **not** rely on direct `PLAN.md` writes as the source of truth.
 11. Self-audit before finishing: goal/demo closure, requirement coverage, deliverable coverage audit (cross-check every file listed in CONTEXT.md `## Scope` / `### In Scope` against task `files` or `expectedOutput`), locked decisions, concrete paths, dependency order, wiring, scope size, proof truthfulness, feature completeness, and quality gates. Quality gates: non-trivial slices/tasks include specific Q3-Q7 coverage where applicable.
 12. If planning creates structural decisions, call `gsd_decision_save` for each; the tool persists the decision and regenerates `.gsd/DECISIONS.md`.

package/dist/resources/extensions/gsd/prompts/refine-slice.md CHANGED Viewed

@@ -64,7 +64,7 @@ Then:
 2. {{skillActivation}} Record the installed skills you expect executors to use in each task plan's `skills_used` frontmatter.
 3. Define slice-level verification: the objective stopping condition. Plan real test files with real assertions; for simple slices, executable commands are fine.
 4. For non-trivial slices, plan observability / proof level / integration closure, threat surface, and requirement impact. Omit entirely for simple slices.
-5. Decompose the slice into tasks that fit one context window each. Every task passed to `gsd_plan_slice` must use the exact keys `taskId`, `title`, `description`, `estimate`, `files`, `verify`, `inputs`, `expectedOutput`, and optional `observabilityImpact`. Put Why / Do / Done-when detail in `description`. `files`, `inputs`, and `expectedOutput` must be JSON arrays of strings, even for one path (for example, `"expectedOutput": ["src/index.ts"]`, never `"expectedOutput": "src/index.ts"`).
+5. Decompose the slice into tasks that fit one context window each. Every task passed to `gsd_plan_slice` must use the exact keys `taskId`, `title`, `description`, `estimate`, `files`, `verify`, `inputs`, `expectedOutput`, and optional `observabilityImpact`. Put Why / Do / Done-when detail in `description`. `files`, `inputs`, and `expectedOutput` must be JSON arrays of strings, even for one path (for example, `"expectedOutput": ["src/index.ts"]`, never `"expectedOutput": "src/index.ts"`). `expectedOutput` is path-only: list only files the task creates or overwrites, and use `[]` for pure verification tasks.
 6. **Persist planning state through `gsd_plan_slice`.** Call it with the full payload. The tool writes to the DB and renders `{{outputPath}}` and `{{slicePath}}/tasks/T##-PLAN.md` automatically. Do NOT rely on direct `PLAN.md` writes.
 7. **Self-audit the plan.** If every task were completed exactly as written, the slice goal/demo should be true. Every must-have maps to a task. Inputs and Expected Output are backtick-wrapped file paths.
 8. If refinement produced structural decisions that diverge from the sketch, call `gsd_decision_save` for each; the tool persists the decision and regenerates `.gsd/DECISIONS.md`.

package/dist/resources/extensions/gsd/prompts/run-uat.md CHANGED Viewed

@@ -27,7 +27,7 @@ You are the UAT runner. Execute every check defined in `{{uatPath}}` as deeply a
 ### Automation rules by mode
 - `artifact-driven` — verify with shell commands, scripts, file reads, and artifact structure checks.
-- `browser-executable` — use gsd-browser tools to navigate to the target URL and verify expected behavior. Prefer `mcp__gsd-browser__browser_*` tools when namespaced, or direct `browser_*` tools when surfaced without a namespace. Capture screenshots as evidence. Record pass/fail with specific assertions.
+- `browser-executable` — use browser tools to navigate to the target URL and verify expected behavior. Prefer direct `browser_*` tools when available. Capture screenshots as evidence. Record pass/fail with specific assertions.
 - `runtime-executable` — execute the specified command or script. Capture stdout/stderr as evidence. Record pass/fail based on exit code and output.
 - `live-runtime` — exercise the real runtime path. Start or connect to the app/service if needed, use browser/runtime/network checks, and verify observable behavior.
 - `mixed` — run all automatable artifact-driven and live-runtime checks. Separate any remaining human-only checks explicitly.
@@ -48,7 +48,7 @@ Choose the lightest tool that proves the check honestly:
 - Run `node` / other script invocations
 - Read files and verify their contents
 - Check that expected artifacts exist and have correct structure
-- For live/runtime/UI checks, exercise the real flow with gsd-browser when applicable and inspect runtime/network/console state
+- For live/runtime/UI checks, exercise the real flow with browser tools when applicable and inspect runtime/network/console state
 - When a check cannot be honestly automated, gather the best objective evidence you can and mark it `NEEDS-HUMAN`
 For each check, record:

package/dist/resources/extensions/gsd/prompts/system.md CHANGED Viewed

@@ -118,7 +118,7 @@ Templates are in `{{templatesDir}}`.
 **Secrets:** Use `secure_env_collect`. Never ask the user to edit `.env` files or paste secrets.
-**Browser verification:** Verify frontend work against a running app with gsd-browser by default. Use `browser_find`/`browser_snapshot_refs` for discovery, refs/selectors -> `browser_batch` for actions, `browser_assert` for verification, and `browser_diff` -> console/network logs -> full inspection as last resort. If tools are MCP-namespaced, prefer `mcp__gsd-browser__browser_*`. Retry only with a new hypothesis.
+**Browser verification:** Verify frontend work against a running app with browser tools by default. Use `browser_find`/`browser_snapshot_refs` for discovery, refs/selectors -> `browser_batch` for actions, `browser_assert` for verification, and `browser_diff` -> console/network logs -> full inspection as last resort. If browser tools are MCP-namespaced, use that host-provided browser surface. Retry only with a new hypothesis.
 **Database:** Never query `.gsd/gsd.db` directly via `sqlite3`, `better-sqlite3`, or `node -e require('better-sqlite3')`; the engine owns a single-writer WAL connection. Use `gsd_milestone_status`, `gsd_journal_query`, or other `gsd_*` tools.

package/dist/resources/extensions/gsd/roadmap-slices.js CHANGED Viewed

@@ -194,7 +194,11 @@ export function parseRoadmapSlices(content) {
             const depends = depsMatch && depsMatch[1].trim()
                 ? expandDependencies(depsMatch[1].split(",").map(s => s.trim()))
                 : [];
-            currentSlice = { id, title, risk, depends, done, demo: "" };
+            // ADR-011: the renderer writes a `[sketch]` badge for sketch slices.
+            // Parse it back so the is_sketch flag survives a markdown → DB re-import
+            // (e.g. /gsd recover); otherwise the flag was silently lost.
+            const isSketch = /\[sketch\]/i.test(rest);
+            currentSlice = { id, title, risk, depends, done, demo: "", isSketch };
             continue;
         }
         if (currentSlice && line.trim().startsWith(">")) {

package/dist/resources/extensions/gsd/safety/content-validator.js CHANGED Viewed

@@ -29,6 +29,8 @@ export function validateContent(unitType, artifactPath) {
         return [];
     }
 }
+const TASK_MARKER_RE = /^\s*(?:-\s+\[[ xX]\]\s+\*\*T\d+:|#{2,4}\s+T\d+\b)/gm;
+const SLICE_MARKER_RE = /^\s*(?:-\s+\[[ xX]\]\s+\*\*S\d+:|#{2,4}\s+S\d+\b)/gm;
 const VALIDATORS = {
     "plan-slice": validatePlanSlice,
     "plan-milestone": validatePlanMilestone,
@@ -36,10 +38,10 @@ const VALIDATORS = {
 function validatePlanSlice(content) {
     const violations = [];
     // Must have at least 1 task entry — single-task slices are valid (#3649)
-    const taskCount = (content.match(/- \[[ x]\] \*\*T\d+/g) || []).length;
+    const taskCount = (content.match(TASK_MARKER_RE) || []).length;
     if (taskCount < 1) {
         violations.push({
-            severity: "warning",
+            severity: "error",
             reason: `Slice plan has ${taskCount} task(s) — expected at least 1`,
         });
     }
@@ -62,10 +64,10 @@ function validatePlanSlice(content) {
 function validatePlanMilestone(content) {
     const violations = [];
     // Must have at least 1 slice entry
-    const sliceCount = (content.match(/##\s+S\d+/g) || []).length;
+    const sliceCount = (content.match(SLICE_MARKER_RE) || []).length;
     if (sliceCount < 1) {
         violations.push({
-            severity: "warning",
+            severity: "error",
             reason: `Milestone roadmap has ${sliceCount} slice(s) — expected at least 1`,
         });
     }

package/dist/resources/extensions/gsd/skill-manifest.js CHANGED Viewed

@@ -114,6 +114,18 @@ const UNIT_TYPE_SKILL_MANIFEST = {
         "review",
         "accessibility",
     ],
+    // Slice closeout — the "closer" role: verify assembled task work, write the
+    // downstream-ready summary + UAT, optionally drive reviewer/security/tester
+    // subagents. Predictable skill set, mirrors `complete-milestone`.
+    "complete-slice": [
+        "verify-before-complete",
+        "test",
+        "review",
+        "security-review",
+        "write-docs",
+        "observability",
+        "handoff",
+    ],
     // `execute-task` intentionally omitted — implementation hot path covers a
     // wide surface of technologies; wildcard fallback preserves today's
     // behavior until per-task skill hints can be derived from task-plan