npm - @hegemonart/get-design-done - Versions diffs - 1.24.2 → 1.25.0 - Mend

@hegemonart/get-design-done 1.24.2 → 1.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/CHANGELOG.md +41 -0
package/README.de.md +679 -0
package/README.fr.md +679 -0
package/README.it.md +679 -0
package/README.ja.md +679 -0
package/README.ko.md +679 -0
package/README.md +396 -729
package/README.zh-CN.md +480 -133
package/SKILL.md +2 -0
package/agents/prototype-gate.md +122 -0
package/agents/quality-gate-runner.md +125 -0
package/hooks/budget-enforcer.ts +132 -7
package/hooks/gdd-decision-injector.js +183 -3
package/hooks/gdd-turn-closeout.js +238 -0
package/hooks/hooks.json +10 -0
package/package.json +5 -5
package/reference/STATE-TEMPLATE.md +41 -0
package/reference/config-schema.md +30 -0
package/scripts/lib/gdd-state/mutator.ts +454 -0
package/scripts/lib/gdd-state/parser.ts +351 -1
package/scripts/lib/gdd-state/types.ts +193 -0
package/scripts/lib/quality-gate-detect.cjs +126 -0
package/skills/quality-gate/SKILL.md +222 -0
package/skills/router/SKILL.md +29 -9
package/skills/sketch-wrap-up/SKILL.md +47 -2
package/skills/spike-wrap-up/SKILL.md +41 -2
package/skills/turn-closeout/SKILL.md +115 -0
package/skills/verify/SKILL.md +22 -0

package/scripts/lib/quality-gate-detect.cjs ADDED Viewed

@@ -0,0 +1,126 @@
+'use strict';
+// scripts/lib/quality-gate-detect.cjs — quality-gate detection chain.
+//
+// Phase 25 Plan 25-09: promotes the doc-only auto-detection logic from
+// skills/quality-gate/SKILL.md (Step 1, D-06) into a small testable
+// JS module. Pure function, no I/O, no clock.
+//
+// The 3-tier resolution order (D-06):
+//
+//   Tier 1 — Authoritative config:
+//     If the (already-loaded) `.design/config.json#quality_gate.commands`
+//     array is non-empty, return it verbatim. Skip all later tiers.
+//
+//   Tier 2 — Auto-detect from package.json#scripts:
+//     If the (already-loaded) package.json#scripts object exists and is
+//     non-empty, intersect its keys with the canonical allowlist and
+//     emit `npm run <script>` for each match. The allowlist (case-
+//     sensitive, exact match):
+//
+//       lint
+//       typecheck   (or `tsc` as a substitute when `typecheck` is absent)
+//       test
+//       chromatic
+//       test:visual
+//
+//     Hard exclusions (never included even if present):
+//
+//       test:e2e          (too slow for a Stage 4.5 gate)
+//       test:integration  (only excluded when a separate `test` exists)
+//
+//   Tier 3 — Skip with notice:
+//     Returns an empty array. Caller emits a `quality_gate_skipped`
+//     event and writes a `<run/>` with status="skipped".
+//
+// Mirrors the table in skills/quality-gate/SKILL.md verbatim. When the
+// SKILL.md prose changes, change this module in lockstep — the SKILL is
+// the design intent, this is the executable encoding consumers can test
+// against.
+/**
+ * Allowlisted script names (case-sensitive, exact match unless noted).
+ * Order matters: it determines the canonical command-list ordering, which
+ * in turn drives the deterministic `commands_run` field in events.jsonl
+ * and the STATE.md <run/> entry.
+ */
+const ALLOWLIST = Object.freeze([
+  'lint',
+  'typecheck',
+  'test',
+  'chromatic',
+  'test:visual',
+]);
+/**
+ * Hard exclusions. Even when present in package.json#scripts, these are
+ * never run by the quality gate — they are too slow / orthogonal to the
+ * gate's purpose. Excluding `test:integration` only matters when a
+ * separate `test` script exists; we encode that invariant in detect().
+ */
+const ALWAYS_EXCLUDED = Object.freeze(['test:e2e']);
+/**
+ * Detection chain.
+ *
+ * @param {object}  inputs
+ * @param {string[]|null|undefined} inputs.configCommands
+ *        Value of `.design/config.json#quality_gate.commands`. `null` or
+ *        empty array means "no config-side override; fall through to
+ *        auto-detect". The caller is responsible for reading the file.
+ * @param {Record<string, string>|null|undefined} inputs.scripts
+ *        Value of `package.json#scripts`. `null` means "no package.json".
+ * @returns {{commands: string[], tier: 1|2|3, reason?: string}}
+ *        Detection result. `tier` is the tier that produced the
+ *        commands (1 / 2 / 3 — see top of file). `reason` is populated
+ *        on tier 3 only ("no commands resolved").
+ */
+function detect(inputs) {
+  const configCommands = inputs && inputs.configCommands;
+  const scripts = inputs && inputs.scripts;
+  // --- Tier 1: authoritative config wins. ---
+  if (Array.isArray(configCommands) && configCommands.length > 0) {
+    return { commands: configCommands.slice(), tier: 1 };
+  }
+  // --- Tier 2: auto-detect from package.json#scripts. ---
+  if (scripts && typeof scripts === 'object') {
+    const detected = autoDetect(scripts);
+    if (detected.length > 0) {
+      return { commands: detected, tier: 2 };
+    }
+  }
+  // --- Tier 3: nothing resolved → skip with notice. ---
+  return { commands: [], tier: 3, reason: 'no commands resolved' };
+}
+/**
+ * Apply the allowlist to a `scripts` map. Pure.
+ */
+function autoDetect(scripts) {
+  const out = [];
+  for (const name of ALLOWLIST) {
+    if (name === 'typecheck') {
+      // `typecheck` preferred; fall through to `tsc` only if absent.
+      if (Object.prototype.hasOwnProperty.call(scripts, 'typecheck')) {
+        out.push('npm run typecheck');
+      } else if (Object.prototype.hasOwnProperty.call(scripts, 'tsc')) {
+        out.push('npm run tsc');
+      }
+      continue;
+    }
+    if (Object.prototype.hasOwnProperty.call(scripts, name) && !ALWAYS_EXCLUDED.includes(name)) {
+      out.push(`npm run ${name}`);
+    }
+  }
+  return out;
+}
+module.exports = {
+  ALLOWLIST,
+  ALWAYS_EXCLUDED,
+  detect,
+  autoDetect,
+};

package/skills/quality-gate/SKILL.md ADDED Viewed

@@ -0,0 +1,222 @@
+---
+name: quality-gate
+description: "Stage 4.5 of the pipeline. Detects, runs, and classifies project quality commands (lint / typecheck / test / visual-regression) between /gdd:design and /gdd:verify; writes the most recent run to STATE.md <quality_gate>. Non-blocking on timeout (warn + proceed); failures spawn design-fixer until the loop converges or max_iters is reached."
+tools: Read, Write, Edit, Bash, Grep, Glob, Task
+color: amber
+model: inherit
+default-tier: haiku
+tier-rationale: "Orchestration of pre-detected commands and a downstream Haiku classifier. The skill itself does no synthesis — Bash runs do all the work, the classifier agent owns the routing decision."
+size_budget: M
+parallel-safe: conditional-on-touches
+typical-duration-seconds: 180
+reads-only: false
+writes:
+  - ".design/STATE.md"
+  - ".design/events.jsonl"
+---
+@reference/shared-preamble.md
+# quality-gate
+## Role
+You are the Stage 4.5 gate that runs between `/gdd:design` and `/gdd:verify`. You answer one question: *does this project's own quality tooling pass against the current working tree?*
+You are NOT a design checker, an a11y checker, or a verifier. You are a thin façade over the project's existing `lint` / `typecheck` / `test` / visual-regression scripts. You exist so that the verify stage can refuse entry when those scripts fail (and so that the fix loop can be bounded and observable).
+You write exactly two artifacts:
+1. The `<quality_gate>` block in `.design/STATE.md` (one most-recent `<run/>` element).
+2. Lifecycle events in `.design/events.jsonl` (per Step 6 below).
+You never block on timeout. You never block on a "skipped" detection result. You only mark `status="fail"` when the fix loop reaches `max_iters` without converging — and even then it is the verify stage's job to refuse entry; YOU exit successfully so the user sees the report regardless.
+## Configuration Surface
+Read once at start, from `.design/config.json` (all keys optional; defaults documented):
+| Key | Default | Purpose |
+|-----|---------|---------|
+| `quality_gate.commands` | `null` | Authoritative list of commands. When provided, skips auto-detection. Each entry is a string the shell can run (e.g. `"npm run lint"`). |
+| `quality_gate.timeout_seconds` | `600` | Total wall-clock budget for Step 2. On timeout: warn + proceed (D-07). |
+| `quality_gate.max_iters` | `3` | Hard cap on Step 4 fix-loop iterations. |
+Missing config file is not an error — defaults apply.
+## Step 1 — Detection chain
+Per D-06, resolve the active command list with this 3-tier fallback. Stop at the first tier that produces ≥ 1 command:
+### Tier 1 — Authoritative config
+If `.design/config.json` carries `quality_gate.commands` and the array is non-empty, use it verbatim. Skip Tier 2 and Tier 3.
+### Tier 2 — Auto-detect from `package.json#scripts`
+If `package.json` exists at the project root, read its `scripts` object. Match script names against the following allowlist (case-sensitive, exact match unless noted):
+| Script name | Notes |
+|-------------|-------|
+| `lint` | Always include if present. |
+| `typecheck` | Always include if present. |
+| `tsc` | Include if `typecheck` is absent (substitute, not duplicate). |
+| `test` | Include if present. |
+| `chromatic` | Include if present (visual-regression). |
+| `test:visual` | Include if present (visual-regression). |
+**Excluded by name** (intentionally — too slow for a Stage 4.5 gate):
+- `test:e2e`
+- `test:integration` (only if a separate `test` exists)
+- Any script whose name starts with `dev:`, `build:`, `start:`.
+For each matched script, the command to run is `npm run <script-name>` (use `pnpm run` or `yarn` only if the project's root carries a corresponding lockfile and the user's `.design/config.json` lists `quality_gate.package_manager`; otherwise default to `npm run` for portability).
+If `package.json` does not exist, or `scripts` is empty, or no allowlisted name matches, advance to Tier 3.
+### Tier 3 — Skip with notice
+Emit a `quality_gate_skipped` event with `reason: "no commands resolved"` (Step 6). Write a `<run/>` element with `status="skipped"`, `commands_run=""`, `iteration=0`, `started_at` and `completed_at` set to the same timestamp. Exit successfully with status `skipped`. The verify-entry gate (Plan 25-07 territory) does NOT block on `skipped`.
+## Step 2 — Parallel run
+Open Step 2 by emitting `quality_gate_started` with the resolved command list (Step 6).
+For each command produced by Step 1, spawn a **separate** `Bash` invocation; collect `{command, exit_code, stdout, stderr}` for each. Run them concurrently — the gate's wall-clock budget is the slowest command, not their sum.
+The combined wall-clock budget is `quality_gate.timeout_seconds` (default 600). If the budget elapses before all commands complete:
+1. Emit `quality_gate_timeout` with the names of commands that did not finish.
+2. Mark `status="timeout"`, `commands_run=<comma-joined attempted names>`, and treat unfinished commands as having no failure to classify.
+3. Skip Step 3 / Step 4 (no fix loop on timeout — it would just compound the slowness).
+4. Proceed to Step 5 (STATE write) and Step 6 (final event).
+5. **Exit successfully.** Verify entry treats `timeout` as a warn, not a block.
+If all commands complete within budget, advance to Step 3.
+## Step 3 — Classification
+Spawn the `quality-gate-runner` agent via the `Task` tool. Pass an input payload of the shape:
+```json
+{
+  "outputs": [
+    {"command": "npm run lint", "exit_code": 0, "stderr": ""},
+    {"command": "npm run typecheck", "exit_code": 1, "stderr": "<verbatim stderr>"},
+    {"command": "npm run test", "exit_code": 0, "stderr": ""}
+  ]
+}
+```
+The agent emits a single JSON object on stdout (see `agents/quality-gate-runner.md`):
+```json
+{
+  "status": "pass" | "fail",
+  "classified_failures": {
+    "lint": ["…"],
+    "type": ["…"],
+    "test": ["…"],
+    "visual": ["…"]
+  }
+}
+```
+When `status === "pass"`, advance directly to Step 5 with `iteration` equal to the current loop counter (starts at `1` on the first pass).
+When `status === "fail"`, advance to Step 4.
+## Step 4 — Fix loop (D-08)
+If `iteration >= quality_gate.max_iters` (default 3), the loop is exhausted:
+- Emit `quality_gate_fail` with the final classified failures.
+- Mark `status="fail"`, persist the final `iteration`, and proceed to Step 5.
+- **Exit successfully.** Verify entry refuses on `status="fail"`; YOU do not throw.
+Otherwise, increment `iteration` and emit `quality_gate_iteration` with the current value. Spawn the existing `design-fixer` agent (Phase 5) via `Task` with classified failures as context — pass the same shape produced by Step 3 plus the original `outputs[]` for verbatim error context. After the fixer returns, restart from Step 2 (re-run all commands; do not prune to "only the previously failing ones" — fixes can introduce regressions in formerly-clean commands).
+The loop terminates when either Step 3 returns `status="pass"` or `iteration` reaches `max_iters`.
+## Step 5 — STATE write
+Open `.design/STATE.md`. Mutate the parsed state's `quality_gate` field to:
+```ts
+{
+  run: {
+    started_at: <ISO 8601 — captured at Step 2 entry>,
+    completed_at: <ISO 8601 — now>,
+    status: <"pass" | "fail" | "timeout" | "skipped">,
+    iteration: <final loop counter>,
+    commands_run: <comma-joined names of commands that completed>,
+    extra_attrs: {},
+  },
+}
+```
+Persist via `mcp__gdd_state__set_quality_gate` (the underlying mutator wiring is named in this contract; the SDK MCP layer wraps every mutator method, so the surface inherits free from the parser/mutator extension landed in this plan). Until the MCP tool exists (Plan 25-07 surfaces it in the verify-stage integration), use the `apply()` mutator from `scripts/lib/gdd-state/mutator.ts` directly:
+```ts
+apply(raw, (state) => {
+  state.quality_gate = { run };
+  return state;
+});
+```
+Either path is acceptable. The on-disk shape is identical.
+## Step 6 — Event emission (D-09)
+Emit lifecycle events to `.design/events.jsonl` via the existing `appendEvent()` surface exported from `scripts/lib/event-stream/index.ts` — the same module Phase 22 telemetry, the budget-enforcer, the read-injection scanner, and the gdd-state MCP server already write through. Do not roll a bespoke writer; the singleton in `event-stream/index.ts` is persist-first / broadcast-second and never throws on the persist path, which is the contract this skill relies on.
+Import shape:
+```ts
+import { appendEvent } from '../../scripts/lib/event-stream/index.ts';
+```
+Each emission is a single `appendEvent({...})` call with `type` set to one of the six names in the table below. Pass the event-specific payload fields verbatim — `appendEvent` stamps `_meta` (pid, host, source) and the JSONL writer captures the canonical `ts` from the writer surface. The `cycle` and `stage` fields are stamped by the same path used elsewhere in Phase 22+ (consumers match on `type`; treat `ts`, `cycle`, `stage` as injected, not caller-supplied).
+One event per JSONL line. Schema and lifecycle map:
+| Event | When (lifecycle position) | Required fields |
+|-------|---------------------------|-----------------|
+| `quality_gate_started` | Step 2 entry — fired ONCE per skill invocation, immediately before any `Bash` spawn. Carries the resolved command list from Step 1 so downstream telemetry can correlate `started` → terminal event. | `commands` (string[]), `timeout_seconds` (number), `max_iters` (number) |
+| `quality_gate_iteration` | Step 4 entry — fired ONCE per retry, with `iteration` set to the new (post-increment) loop counter. The first run is implicit (covered by `started`); only retries `≥ 2` emit `iteration`. | `iteration` (int ≥ 2) |
+| `quality_gate_pass` | Step 3 returned `status: "pass"` — terminal happy path. Fires before Step 5 (STATE write) so a consumer tailing the stream sees the verdict before the on-disk run record. | `iteration` (final loop counter), `commands_run` (string[]) |
+| `quality_gate_fail` | Step 4 reached `max_iters` without convergence — terminal failure path. The verify-entry gate (Step 2.5 of `skills/verify/SKILL.md`) is the sole consumer that *acts* on this; this skill exits successfully regardless. | `iteration` (final loop counter, equal to `max_iters`), `classified_failures` (object — same shape as `quality-gate-runner` agent output) |
+| `quality_gate_timeout` | Step 2 wall-clock budget elapsed — terminal warn path (per D-07 verify treats this as a warning, not a block). Fires before Step 5 STATE write, same ordering as `pass`/`fail`. | `unfinished_commands` (string[]) |
+| `quality_gate_skipped` | Step 1 Tier 3 fired (no commands resolved) — terminal no-op path. Fires before the synthetic `<run/>` is written to STATE.md. | `reason` (string — e.g. `"no commands resolved"`) |
+All six events carry the standard `ts`, `cycle`, `stage` fields injected by `appendEvent` / the writer. Do not invent additional event names — the verify-entry gate, reflector, and Phase 22 telemetry consumers match on this exact list. Do not emit any of these names from any path other than the lifecycle positions above (e.g., do not emit `quality_gate_started` again on a Step 4 retry — that's what `quality_gate_iteration` is for).
+**Failure-mode contract:** `appendEvent()` swallows persist failures internally. If the writer cannot open `.design/events.jsonl`, the skill MUST still proceed — the event stream is observability, not correctness. The STATE.md write in Step 5 is the durable record consumers MUST rely on; events.jsonl is the supplementary timeline.
+## Output Contract
+Emit a single JSON object on stdout summarizing the run for the caller:
+```json
+{
+  "status": "pass",
+  "iteration": 1,
+  "commands_run": ["npm run lint", "npm run typecheck", "npm run test"],
+  "started_at": "2026-04-29T10:00:00Z",
+  "completed_at": "2026-04-29T10:01:42Z"
+}
+```
+Schema:
+- `status` — `pass | fail | timeout | skipped`.
+- `iteration` — final loop counter; `0` for `skipped`.
+- `commands_run` — array of command strings actually executed.
+- `started_at` / `completed_at` — ISO 8601, copied from the STATE write.
+The skill exits with shell exit code `0` on every terminal status — including `fail`. The verify-entry gate is the sole consumer of the `fail` status; this skill never throws to the orchestrator.
+## Constraints
+- **Do not** prune the command list across iterations — always re-run everything in Step 2.
+- **Do not** spawn `quality-gate-runner` more than once per loop iteration. Spawn `design-fixer` more than once if and only if the loop iterates.
+- **Do not** read or write any STATE block other than `<quality_gate>` and `<position>` (the latter only as required by the standard write contract; the gate is a checkpoint, not a stage transition, so `<position>` updates are limited to `last_checkpoint`).
+- **Do not** invoke verify or design — Stage 4.5 sits strictly between them.
+- Treat exit codes via the standard convention: `0` = clean; non-zero = failure to be classified. Do not interpret stderr content for the pass/fail decision — the agent does that classification, you do not.

package/skills/router/SKILL.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: gdd-router
-description: "Routes a /gdd command to fast|quick|full path and returns {path, model_tier_overrides, estimated_cost_usd, cache_hits}. Deterministic — no model call. Invoked once at command entry before any Agent spawn. Read by hooks/budget-enforcer.js."
+description: "Routes a /gdd command to fast|quick|full path + S|M|L|XL complexity_class and returns {path, complexity_class, model_tier_overrides, estimated_cost_usd, cache_hits}. Deterministic — no model call. Invoked once at command entry before any Agent spawn. Read by hooks/budget-enforcer.js."
 argument-hint: "<intent-string> [<target-artifacts-csv>]"
 tools: Read, Bash, Grep
 ---
@@ -18,25 +18,45 @@ You are a deterministic routing skill. You do not spawn agents. You read `.desig
   ```json
   {
     "path": "fast",
+    "complexity_class": "M",
     "model_tier_overrides": {"design-verifier": "haiku"},
     "estimated_cost_usd": 0.034,
     "cache_hits": ["design-context-builder:abc123"]
   }
   ```
-- `path` enum: `fast` (single Haiku + no checkers), `quick` (Sonnet mappers + Haiku verify), `full` (Opus planners + full quality gates).
+- `path` enum: `fast` (single Haiku + no checkers), `quick` (Sonnet mappers + Haiku verify), `full` (Opus planners + full quality gates). Stays unchanged for back-compat per D-05.
+- `complexity_class` enum: `S | M | L | XL` (Phase 25 / D-04, D-05). Additive to `path` — existing consumers reading only `path` keep working. Mapping is documented in the Path Selection Heuristic table below.
 - `model_tier_overrides` merges agent frontmatter `default-tier` with `.design/budget.json.tier_overrides` — budget.json wins per D-04.
 - `estimated_cost_usd` is the sum of per-spawn estimates using the D-06 formula and `reference/model-prices.md`.
 - `cache_hits` is a list of `{agent}:{input-hash}` strings that exist in `.design/cache-manifest.json` and are within TTL; emitting a hit lets the hook short-circuit that spawn per D-05.
 ## Path Selection Heuristic
-| Signal | path |
-|--------|------|
-| Command is `/gdd:scan`, `/gdd:stats`, `/gdd:health`, `/gdd:help` | `fast` |
-| Command spawns exactly one agent (no orchestration) | `fast` |
-| Command spawns parallel mappers but no planners/auditors (`/gdd:discover` in `--auto` mode) | `quick` |
-| Command spawns planners, auditors, verifiers, or integration-checkers (`/gdd:plan`, `/gdd:verify`, `/gdd:audit`) | `full` |
-| `--dry-run` flag present on any command | downgrade one tier (fast↔quick↔full) |
+The router emits both `path` (legacy 3-tier enum) and `complexity_class` (Phase 25 4-tier enum). The canonical mapping is:
+| complexity_class | path | Behavior |
+|------------------|------|----------|
+| `S` | `fast` (short-circuited) | Skip router itself, skip cache-manager, skip telemetry write. Deterministic no-op decision. |
+| `M` | `fast` | Single Haiku + no checkers. |
+| `L` | `quick` | Sonnet mappers + Haiku verify. |
+| `XL` | `full` | Opus planners + full quality gates. Recommends worktree-isolation default + mandatory inter-stage checkpoint + reflector auto-spawn. |
+Bucket assignment:
+| Signal | complexity_class | path |
+|--------|------------------|------|
+| Command is `/gdd:help`, `/gdd:stats`, `/gdd:note`, `/gdd:health`, single-Haiku skill | `S` | `fast` (short-circuited — see below) |
+| Command is `/gdd:scan`, `/gdd:brief`, `/gdd:sketch`, `/gdd:spike`, `/gdd:fast` | `M` | `fast` |
+| Command spawns exactly one agent (no orchestration), not in S list | `M` | `fast` |
+| Command is `/gdd:explore`, `/gdd:discover`, standalone `/gdd:verify`, standalone `/gdd:plan` | `L` | `quick` |
+| Command spawns parallel mappers but no planners/auditors (`/gdd:discover` in `--auto` mode) | `L` | `quick` |
+| Command is `/gdd:next`, `/gdd:do`, `/gdd:autonomous`, end-to-end Brief→Verify, anything spawning planners + auditors + verifiers in series | `XL` | `full` |
+| Command spawns planners, auditors, verifiers, or integration-checkers (`/gdd:plan`, `/gdd:verify`, `/gdd:audit`) and is not standalone | `XL` | `full` |
+| `--dry-run` flag present on any command | downgrade one tier (XL→L→M→S; `path` follows the mapping table) |
+### S-class short-circuit
+When `complexity_class` would be `S`, the router itself **does not run** for that invocation — the deterministic skip list is encoded in the `/gdd:*` SKILL.md entry of the matching command. The budget-enforcer hook treats "no router decision payload + matching command name" as the S-class signal and skips enforcement entirely (no telemetry row, no cache lookup, no event emission). When the router *is* invoked explicitly (e.g., debugging) it still emits `complexity_class: "S"` in the JSON for observability, but the runtime path is the no-op.
 ## Cost Estimation Algorithm

package/skills/sketch-wrap-up/SKILL.md CHANGED Viewed

@@ -62,11 +62,54 @@ Write `.design/sketches/<slug>/WINNER.md`:
 **Project skill written to**: ./.claude/skills/design-<area>-conventions.md
 ```
-## Step 7 — Update sketches SUMMARY.md
+## Step 7 — Append D-XX + `<prototyping>` outcome to STATE.md
+Two coupled writes to `.design/STATE.md`. Both must succeed together so the
+sketch resolution is discoverable from both `<decisions>` (read by all
+downstream stages) and `<prototyping>` (read by planner-specific context via
+the decision-injector).
+Compute `D-XX` as the highest existing `D-NN` in `<decisions>` plus 1
+(scan `<decisions>` for `D-\d+:` entries and take `max + 1`, zero-padded
+to two digits — e.g. existing `D-07` → new entry is `D-08`). Use the same
+`D-XX` value in both writes below.
+**Write 1 — append a numbered decision under `<decisions>`:**
+```
+D-XX: sketch/<slug> — winner: variant-N — <one-line rationale> (locked)
+  Source: .design/sketches/<slug>/WINNER.md
+```
+**Write 2 — append a `<sketch>` child element under `<prototyping>`:**
+```
+<sketch slug="<slug>" cycle="<cycle>" decision="D-XX" status="resolved"/>
+```
+`<cycle>` is the current cycle id from `.design/STATE.md` frontmatter
+(`cycle:` field; empty string is valid for Wave A single-cycle projects).
+If a `<prototyping>` block does not yet exist in STATE.md, materialize it
+between `<must_haves>` and `<connections>` per the STATE template, then
+append the `<sketch …/>` line as its first child. The block is omitted on
+fresh files and only appears once the first sketch / spike / skipped entry
+lands.
+If MCP `gdd_state` tools are available, prefer the typed mutators (these
+wrap `scripts/lib/gdd-state/mutator.ts` and emit byte-identical output to
+manual edits):
+```
+- mcp__gdd_state__add_decision({id: "D-XX", text: "sketch/<slug> — winner: variant-N — <rationale>", status: "locked"})
+- mcp__gdd_state__add_prototyping({type: "sketch", slug: "<slug>", cycle: "<cycle>", decision: "D-XX", status: "resolved"})
+```
+Without MCP, edit `.design/STATE.md` directly with `Read` + `Write`,
+inserting the two lines into the correct blocks.
+## Step 8 — Update sketches SUMMARY.md
 Append entry to `.design/sketches/SUMMARY.md` (create if missing):
 ```markdown
-- <slug> (YYYY-MM-DD) — winner: variant-N — area: <area> — <one-line rationale>
+- <slug> (YYYY-MM-DD) — winner: variant-N — area: <area> — D-XX — <one-line rationale>
 ```
 ## After writing
@@ -76,6 +119,8 @@ Append entry to `.design/sketches/SUMMARY.md` (create if missing):
 Slug: <slug>
 Winner: variant-N
 Area: <area>
+Decision recorded: D-XX
+Prototyping entry: <sketch slug="<slug>" cycle="<cycle>" decision="D-XX" status="resolved"/>
 Project skill: ./.claude/skills/design-<area>-conventions.md
 ━━━━━━━━━━━━━━━━━━━━━
 ```

package/skills/spike-wrap-up/SKILL.md CHANGED Viewed

@@ -53,9 +53,47 @@ D-XX: spike/<slug> — <verdict> — <recommendation>
   Source: .design/spikes/<slug>/FINDINGS.md
 ```
-(Increment D-XX from the highest existing number.)
+(Increment D-XX from the highest existing number — scan `<decisions>` for
+`D-\d+:` entries and take `max + 1`, zero-padded to two digits.)
-## Step 6 — Update spikes SUMMARY.md
+If MCP `gdd_state` tools are available, prefer the typed mutator:
+```
+- mcp__gdd_state__add_decision({id: "D-XX", text: "spike/<slug> — <verdict> — <recommendation> — <one-line rationale>", status: "locked"})
+```
+## Step 6 — Append `<prototyping>` outcome to STATE.md
+Coupled with the Step 5 decision write — both must succeed together so the
+spike resolution is discoverable from both `<decisions>` (read by all
+downstream stages) and `<prototyping>` (read by planner-specific context via
+the decision-injector). Use the **same `D-XX`** as Step 5.
+Append a `<spike>` child element under `<prototyping>` in `.design/STATE.md`:
+```
+<spike slug="<slug>" cycle="<cycle>" decision="D-XX" verdict="yes|no|partial" status="resolved"/>
+```
+`<cycle>` is the current cycle id from `.design/STATE.md` frontmatter
+(`cycle:` field; empty string is valid for Wave A single-cycle projects).
+`verdict` is the answer from Step 3 (`yes` / `no` / `partial`).
+If a `<prototyping>` block does not yet exist in STATE.md, materialize it
+between `<must_haves>` and `<connections>` per the STATE template, then
+append the `<spike …/>` line as its first child. The block is omitted on
+fresh files and only appears once the first sketch / spike / skipped entry
+lands.
+If MCP `gdd_state` tools are available, prefer the typed mutator (it wraps
+`scripts/lib/gdd-state/mutator.ts` and emits byte-identical output to manual
+edits):
+```
+- mcp__gdd_state__add_prototyping({type: "spike", slug: "<slug>", cycle: "<cycle>", decision: "D-XX", verdict: "<verdict>", status: "resolved"})
+```
+Without MCP, edit `.design/STATE.md` directly with `Read` + `Write`,
+inserting the line into the `<prototyping>` block.
+## Step 7 — Update spikes SUMMARY.md
 Append entry to `.design/spikes/SUMMARY.md` (create if missing):
 ```markdown
@@ -69,6 +107,7 @@ Append entry to `.design/spikes/SUMMARY.md` (create if missing):
 Slug: <slug>
 Verdict: <verdict>
 Decision recorded: D-XX
+Prototyping entry: <spike slug="<slug>" cycle="<cycle>" decision="D-XX" verdict="<verdict>" status="resolved"/>
 FINDINGS.md written.
 ━━━━━━━━━━━━━━━━━━━━
 ```

package/skills/turn-closeout/SKILL.md ADDED Viewed

@@ -0,0 +1,115 @@
+---
+name: gdd-turn-closeout
+description: "Portable mirror of the gdd-turn-closeout Stop hook (D-11). Closes the events.jsonl gap at turn-end and surfaces a stage-completion or paused-mid-task nudge. Tail-called by orchestrator skills (/gdd:next, /gdd:design, /gdd:verify) at exit on the 13 non-Claude runtimes that lack a Stop hook surface. Idempotent, non-blocking, ≤10ms typical."
+argument-hint: "(none — reads .design/STATE.md and .design/telemetry/events.jsonl from cwd)"
+tools: Read, Bash
+---
+# gdd-turn-closeout
+## Role
+You are a deterministic **closeout** skill. You close the per-turn telemetry gap on runtimes that don't expose a Stop event (codex, gemini, and 11 others). You are a code-level mirror of `hooks/gdd-turn-closeout.js` (D-10): same conditions, same idempotence, same emitted event shape. The only difference: the JS hook emits the nudge as `additionalContext` via the harness; this skill prints the nudge directly to the user.
+**When to invoke:** orchestrator skills (`/gdd:next`, `/gdd:design`, `/gdd:verify`) tail-call this skill as their final step before returning, so the user sees a closing nudge that matches what Claude Code users see via the Stop hook. Adoption is incremental — each orchestrator can wire the tail-call independently; the skill exists as a stable, callable surface today.
+## Invocation Contract
+- **Input**: none. Operates on `.design/STATE.md` and `.design/telemetry/events.jsonl` in the current working directory.
+- **Output**: at most one printed line — the nudge — or silent return.
+- **Latency budget**: ≤10ms typical (matches D-10). Read **only** STATE.md and the tail of events.jsonl; never load the full event stream.
+- **Idempotence**: if the most recent event line is already a `turn_end` for the current `(stage, task_progress)` tuple, skip the append but still print the nudge.
+- **Non-blocking**: any I/O failure → silent return. This skill must never gate the user.
+## Algorithm
+Execute these steps **in order** and stop at the first early-return.
+### Step 1 — Try to read STATE.md
+Read `.design/STATE.md`. If the file is missing or unreadable: **return silently** (no print, no append). Mirrors the JS hook's "missing STATE.md" branch.
+### Step 2 — Parse the `<position>` block
+Lightweight-parse only the `<position>…</position>` block (the rest of STATE.md is irrelevant here). Extract `stage`, `status`, `task_progress`. A regex pass (`/<position>([\s\S]*?)<\/position>/` then per-line `key: value`) is sufficient — do **not** invoke the full STATE parser (cost overhead).
+If `status != "in_progress"`: **return silently**. The pipeline is either initialized, completed, or blocked — no turn-end gap to close.
+### Step 3 — Tail the last event line
+Read **only the last 8 KiB** of `.design/telemetry/events.jsonl` (a single event line is ≪64 KiB). Treat all of these as "stale by definition":
+- The file is missing.
+- The file is empty.
+- The last line fails to parse as JSON.
+- The last line's `timestamp` is missing or unparseable.
+Otherwise compute `now - last_event.timestamp`. If the gap is **<60 seconds**, the user is actively mid-turn — **return silently** (the next real event will close the gap naturally).
+A reasonable Bash one-liner for the tail when running this skill in a runtime that lacks a Read-tail primitive: `tail -n 1 .design/telemetry/events.jsonl 2>/dev/null`.
+### Step 4 — Idempotence check, then append
+If the last event is already shaped `{type: "turn_end", stage: <same>, payload: {task_progress: <same>}}` for the **exact** `(stage, task_progress)` tuple from Step 2: **skip the append** but proceed to Step 5.
+Otherwise append a single JSONL line to `.design/telemetry/events.jsonl`:
+```json
+{"type":"turn_end","timestamp":"<ISO 8601 now>","sessionId":"<session-id-or-'turn-closeout'>","stage":"<stage>","payload":{"task_progress":"<N/M>"},"_meta":{"source":"gdd-turn-closeout-skill"}}
+```
+Create `.design/telemetry/` if missing. The append must be a single `appendFile`-equivalent call (the writer assumes append-atomicity per Plan 20-06).
+### Step 5 — Print the nudge
+Match `task_progress` against `^(\d+)/(\d+)$`:
+- **Numerator equals denominator and denominator > 0** (e.g. `5/5`, stage-complete):
+  > Stage `<stage>` complete — run `/gdd:next` or `/gdd:reflect`
+- **Otherwise** (mid-task, e.g. `3/7`, `0/0`, malformed):
+  > Stage `<stage>` paused mid-task — resume with `/gdd:resume`
+Print exactly one of these two lines. No additional commentary, no explanations of what the skill did — the nudge is the user-facing surface.
+## Failure Modes
+Every step above has an explicit silent-return on failure. The skill must remain non-blocking under all conditions:
+| Condition | Behavior |
+|-----------|----------|
+| `.design/STATE.md` missing or unreadable | Silent return |
+| `<position>` block absent or malformed | Silent return |
+| `status != "in_progress"` | Silent return |
+| `.design/telemetry/events.jsonl` missing | Treat as stale → fall through to append + nudge |
+| Last event line unparseable | Treat as stale → fall through |
+| Last event timestamp <60s old | Silent return |
+| Append fails (permission, disk full) | Print the nudge anyway; do not surface the I/O error |
+| Any uncaught throw at any step | Silent return |
+## Equivalence with the JS hook
+This skill and `hooks/gdd-turn-closeout.js` MUST stay code-level equivalent. Specifically:
+- Same four early-return branches (no STATE / not in_progress / fresh event / no-op).
+- Same staleness threshold: **60 seconds**.
+- Same idempotence guard: `(type=turn_end, stage, payload.task_progress)` triple.
+- Same emitted event shape (only `_meta.source` differs: `gdd-turn-closeout` vs `gdd-turn-closeout-skill`, so reflector telemetry can distinguish hook-driven vs skill-driven turn-ends).
+- Same nudge wording for both `N/N` and mid-task cases.
+If you change one, change the other in the same plan. Plan 25-09's `tests/turn-closeout-hook.test.cjs` covers the JS hook; the parallel coverage for this skill rides on Plan 25-09's Phase 25 baseline.
+## Non-Goals
+- **Not a state writer.** This skill never edits STATE.md. The events.jsonl append is the only side effect.
+- **Not a stage transition.** A `turn_end` event is a within-stage observation, not a state-machine move; downstream tools that gate on stage transitions ignore it.
+- **Not a Stop-event harness.** Cross-runtime Stop-event support at the harness level is explicit out-of-scope for Phase 25 (see CONTEXT.md OOS section).
+## Integration Point
+The canonical tail-call sites (per D-11) are `/gdd:next`, `/gdd:design`, `/gdd:verify`. Each orchestrator's final step, immediately before returning to the user, should be:
+> Invoke skill `gdd-turn-closeout`.
+Tail-call wiring is intentionally not part of v1.25 (Plan 25-04 ships only the callable surface). Each orchestrator can adopt the wiring independently in a follow-up.