npm - @hegemonart/get-design-done - Versions diffs - 1.31.5 → 1.33.0 - Mend

@hegemonart/get-design-done 1.31.5 → 1.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/CHANGELOG.md +63 -0
package/NOTICE +81 -5
package/README.md +25 -0
package/SKILL.md +4 -0
package/hooks/hooks.json +9 -0
package/hooks/inject-using-gdd.sh +72 -0
package/hooks/run-hook.cmd +35 -0
package/package.json +2 -2
package/reference/schemas/events.schema.json +63 -1
package/reference/schemas/pressure-scenario.schema.json +69 -0
package/scripts/lib/health-mirror/index.cjs +79 -1
package/scripts/lib/skill-behavior/runner.cjs +187 -0
package/scripts/lib/skill-behavior/stub-invoker.cjs +95 -0
package/scripts/lib/skill-behavior/telemetry.cjs +379 -0
package/sdk/mcp/gdd-mcp/server.js +42 -0
package/skills/audit/SKILL.md +13 -0
package/skills/brief/SKILL.md +25 -0
package/skills/design/SKILL.md +17 -0
package/skills/discuss/SKILL.md +13 -0
package/skills/explore/SKILL.md +17 -0
package/skills/health/SKILL.md +6 -0
package/skills/plan/SKILL.md +25 -0
package/skills/router/SKILL.md +4 -0
package/skills/router/router-pick-emitter.md +78 -0
package/skills/using-gdd/SKILL.md +78 -0
package/skills/verify/SKILL.md +17 -0
package/scripts/lib/cli/index.ts +0 -29
package/scripts/lib/error-classifier.cjs +0 -29
package/scripts/lib/event-stream/index.ts +0 -29
package/scripts/lib/gdd-errors/index.ts +0 -29
package/scripts/lib/gdd-state/index.ts +0 -29
package/scripts/lib/iteration-budget.cjs +0 -29
package/scripts/lib/jittered-backoff.cjs +0 -29
package/scripts/lib/lockfile.cjs +0 -29
package/scripts/mcp-servers/gdd-mcp/server.ts +0 -35
package/scripts/mcp-servers/gdd-state/server.ts +0 -34

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -5,14 +5,14 @@
   },
   "metadata": {
     "description": "Get Design Done — 5-stage agent-orchestrated design pipeline with 9 connections, handoff-first workflow, bidirectional Figma write-back, 22+ specialized agents, queryable knowledge layer (intel store, dependency analysis, learnings extraction), and a self-improvement loop (reflector, frontmatter + budget feedback, global-skills layer). v1.20.0 ships the SDK foundation: gdd-state MCP server (11 typed tools), lockfile-safe STATE.md mutations, event stream, and resilience primitives (jittered-backoff, rate-guard, error-classifier, iteration-budget) for rate-limit + 429 + context-overflow recovery. Full CI/CD pipeline (Node 22/24 × Linux/macOS/Windows) and release automation (auto-tag + GitHub Release + release-time smoke test).",
-    "version": "1.31.5"
+    "version": "1.33.0"
   },
   "plugins": [
     {
       "name": "get-design-done",
       "source": "./",
       "description": "Agent-orchestrated 5-stage design pipeline: Brief → Explore → Plan → Design → Verify. 22+ specialized agents, 9 connections (Figma, Refero, Preview, Storybook, Chromatic, Figma Writer, Graphify, Pinterest, Claude Design), Claude Design handoff, bidirectional Figma write-back, and a queryable intel store (.design/intel/) for dependency and learnings queries. Standalone commands: style, darkmode, compare, figma-write, graphify, handoff, analyze-dependencies, skill-manifest, extract-learnings. Embeds NNG heuristics, WCAG thresholds, typographic systems, motion framework, and anti-pattern catalog. Ships with a full CI/CD pipeline (Node 22/24 × Linux/macOS/Windows) and release automation. Optimization layer (v1.0.4.1, retroactive): gdd-router + gdd-cache-manager skills, PreToolUse budget-enforcer hook, tier-aware agent frontmatter, lazy checker gates, streaming synthesizer, /gdd:warm-cache + /gdd:optimize commands, and cost telemetry at .design/telemetry/costs.jsonl — targeting 50-70% per-task token-cost reduction with no quality-floor regression. v1.20.0 SDK foundation: gdd-state MCP server (11 typed tools), lockfile-safe STATE.md mutations, event stream at .design/telemetry/events.jsonl, resilience primitives (jittered-backoff, rate-guard, error-classifier, iteration-budget) with rate-limit + 429 + context-overflow recovery, and TypeScript toolchain.",
-      "version": "1.31.5",
+      "version": "1.33.0",
       "author": {
         "name": "hegemonart"
       },

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "get-design-done",
   "short_name": "gdd",
-  "version": "1.31.5",
+  "version": "1.33.0",
   "description": "Agent-orchestrated 5-stage design pipeline: Brief → Explore → Plan → Design → Verify. 22+ specialized agents, 9 connections (Figma, Refero, Preview, Storybook, Chromatic, Figma Writer, Graphify, Pinterest, Claude Design), handoff-first workflow via Claude Design bundles, bidirectional Figma write-back (annotations, Code Connect), queryable intel store (`.design/intel/`) for O(1) design surface lookups, and self-improvement loop (reflector agent, frontmatter + budget feedback, global-skills layer at `~/.claude/gdd/global-skills/`). Standalone commands: style, darkmode, compare, figma-write, graphify, handoff, analyze-dependencies, skill-manifest, extract-learnings, reflect, apply-reflections. Embeds NNG heuristics, WCAG thresholds, typographic systems, motion framework, and anti-pattern catalog. Ships with a full CI/CD pipeline (Node 22/24 × Linux/macOS/Windows, lint + schema + frontmatter + stale-ref + shellcheck + gitleaks + injection-scan + blocking size-budget) and release automation (auto-tag + GitHub Release + release-time smoke test). Optimization layer (v1.0.4.1, retroactive): gdd-router + gdd-cache-manager skills, PreToolUse budget-enforcer hook, tier-aware agent frontmatter, lazy checker gates, streaming synthesizer, /gdd:warm-cache + /gdd:optimize commands, and cost telemetry at .design/telemetry/costs.jsonl — targeting 50-70% per-task token-cost reduction with no quality-floor regression. v1.20.0 SDK foundation: gdd-state MCP server (11 typed tools), lockfile-safe STATE.md mutations, event stream at .design/telemetry/events.jsonl, resilience primitives (jittered-backoff, rate-guard, error-classifier, iteration-budget) with rate-limit + 429 + context-overflow recovery, and TypeScript toolchain. v1.27.7 ships gdd-mcp (Phase 27.7): 12 read-only MCP tools for sub-3s priming. v1.28.0 (Phase 28): Foundational References Tier 2 — 5 new reference files (color-theory, composition, proportion-systems, i18n, contrast-advanced), 2 verifier i18n probes + 1 explore i18n-readiness probe, 12 additive cross-link insertions across 10 existing references, 2 orthogonal audit-scoring lens-tags (composition_alignment + i18n_readiness).",
   "author": {
     "name": "hegemonart",

package/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,69 @@ All notable changes to get-design-done are documented here. Versions follow [sem
 ---
+## [1.33.0] - 2026-05-30
+### Phase 33 — Skill Behavior Tests (Pressure-Scenario Harness)
+Adds a **behavior-test category** that complements the static validators (Phase 28.5 line/frontmatter) and static guardrails (Phase 32 `<HARD-GATE>` presence) with tests that verify skills hold UNDER PRESSURE. A manifest-driven runner drives a pressure scenario (time / sunk-cost / authority / exhaustion / scope-minimization) through an injectable agent-invoker and validates the response against a compliance/violation rubric with N-attempts + majority rule. Ships the harness + 8 baseline scenarios + synthetic RED baselines + the description-format A/B methodology + reflector telemetry integration. Ports the TDD-for-skills methodology + the pressure-scenario pattern from [`obra/superpowers/skills/writing-skills`](https://github.com/obra/superpowers) (MIT). 6 plans across Waves A–C.
+### Added
+- **Manifest-driven pressure-scenario runner** — `scripts/lib/skill-behavior/runner.cjs` exposes an INJECTABLE `invokeAgent(prompt, opts) -> { text }` seam (no `@anthropic-ai/sdk` dependency — D-03): a deterministic STUB invoker (`scripts/lib/skill-behavior/stub-invoker.cjs`) for CI/tests, plus a documented real-invoker adapter for the opt-in keyed run. Runs each scenario N times and decides compliance by majority.
+- **Pressure-scenario schema** — `reference/schemas/pressure-scenario.schema.json` (wired into `validate:schemas`), with conformance tests for the 8 scenario manifests.
+- **8 pressure scenarios + synthetic RED baselines** — `test/suite/skill-behavior/scenarios/` (7 stage skills + `using-gdd`) with synthetic-from-observed-cycle-drift RED baselines at `test/fixtures/skill-behavior-baseline/` (D-02 — ROADMAP-sanctioned).
+- **Description-format A/B methodology** — `docs/research/description-format-ab.md` documents the trigger-only vs `<what>. Use when` counterfactual + the 7/10-run threshold (D-08), with a `pending: keyed run` marker. The empirical result is an opt-in maintainer follow-up (no API key in CI).
+- **Reflector telemetry** — `scripts/lib/skill-behavior/telemetry.cjs` emits to `.design/telemetry/skill-behavior.jsonl`; a sustained-failure signal (≥3 of last 10 runs failing for a scenario) feeds an `apply-reflections` proposal (stub-tested integration — D-07).
+- **`npm run test:behavior` (opt-in, D-06).** A new script that runs the behavior tests ONLY when `ANTHROPIC_API_KEY` is set (a clear skip message + exit 0 otherwise). The default `npm test` is UNCHANGED — the structural stub tests stay CI-green (LLM non-determinism keeps live behavior runs out of the default suite).
+- **Docs** — `CONTRIBUTING.md` gains a "How to add a pressure scenario" section + the keyed `ANTHROPIC_API_KEY=… npm run test:behavior` procedure; `README.md` gains a "Skill behavior tests" subsection.
+### Removed
+- **BREAKING: the Phase-31.5 deprecation shims are removed (D-04).** The 10 `GDD-DEPRECATION-SHIM` re-exports re-created at the OLD SDK paths in v1.31.5 — `scripts/lib/{cli,event-stream,gdd-state,gdd-errors}/index.ts`, `scripts/lib/{error-classifier,iteration-budget,jittered-backoff,lockfile}.cjs`, and `scripts/mcp-servers/{gdd-state,gdd-mcp}/server.ts` — are deleted. The grace window elapsed (v1.31.5 shipped with shims → v1.32.0 still had them → v1.33.0 removes them). The now-empty `scripts/mcp-servers/` is dropped from the `package.json` `files` allowlist. **If you imported `scripts/lib/…` or `scripts/mcp-servers/…` directly, import from `sdk/…` instead** (e.g. `scripts/lib/cli` → `sdk/cli`, `scripts/lib/error-classifier.cjs` → `sdk/primitives/error-classifier.cjs`, `scripts/mcp-servers/gdd-state/server.ts` → `sdk/mcp/gdd-state/server.ts`). Internal callers were all repointed to `sdk/` in 31.5 + the Phase-32 gdd-events fix; the `gdd-state-mcp` / `gdd-mcp` bins target `sdk/`, so deletion drops only the external re-export — proven by the `no-stale-internal-refs` guard + the full suite + the 31.5 headless pack→install→run E2E.
+### Attribution
+- **Methodology + pattern ported from [`obra/superpowers/skills/writing-skills`](https://github.com/obra/superpowers) (MIT).** The TDD-for-skills cycle (RED: agent fails without the skill → GREEN: skill counters the rationalizations → REFACTOR: close new loopholes) and the pressure-scenario pattern. See `NOTICE`. We port the methodology, not the content — GDD's scenarios, rubrics, and skills are GDD-specific.
+### Notes
+- The behavioral evidence (real RED baselines from live agent runs + the empirical A/B result) is NOT capturable autonomously (no API key / SDK in CI). RED baselines are authored synthetic-from-observed-cycle-drift (D-02); the A/B evidence file documents methodology + expected-signal + a `pending: keyed run` marker. A Phase-28.5 feedback note points at `docs/research/description-format-ab.md`; **Phase 28.5's description-format validator regex is unchanged** (33-06 emits the pointer only — D-08).
+- The 31.5 tarball golden (`test/fixtures/baselines/phase-31-5/tarball-manifest.txt`) was regenerated as a reviewed delta: **+4** skill-behavior paths (`reference/schemas/pressure-scenario.schema.json` + the 3 `scripts/lib/skill-behavior/*.cjs`) and **−10** removed shim paths (618 paths).
+- 6-manifest lockstep at **v1.33.0** (`package.json` + `package-lock.json` + `.claude-plugin/plugin.json` + `.claude-plugin/marketplace.json` (metadata.version + plugins[0].version) + `.cursor-plugin/plugin.json` + `.codex-plugin/plugin.json`). Version-sync hygiene done upfront (D-09): `OFF_CADENCE_VERSIONS.add('1.33.0')` + prior `manifests-version.txt` baselines forward-propagated 1.32.0 → 1.33.0.
+---
+## [1.32.0] - 2026-05-30
+### Phase 32 — Skill Auto-Trigger Discipline + Defensive Guardrails
+Closes the auto-trigger gap between GDD's 70+ skills and the harness's description-match skill-discovery layer. GDD had zero forcing functions — agents consulted skills opportunistically, not disciplinedly. This release ports the skill-discipline **mechanism** (not content) from [`obra/superpowers`](https://github.com/obra/superpowers) (MIT): a SessionStart-injected bootstrap contract, defensive guardrails at every stage transition, and two lightweight skill-discovery instruments that feed Phase 33's behavioral A/B. 9 plans across Waves A–C.
+### Added
+- **`using-gdd` SessionStart bootstrap (the forcing function GDD lacked).** A new `skills/using-gdd/SKILL.md` discipline contract — the **1%-rule** ("if you think there is even a 1% chance a skill applies, you ABSOLUTELY MUST invoke it"), a ≥10-row **red-flags table** (Thought → Reality), a skill-priority order (Process → Implementation → Audit), an instruction-priority precedence (user CLAUDE.md > GDD skill > defaults), and the GDD pipeline flow. Carries `disable-model-invocation: true` (it is injected, not model-invoked) and a pure-trigger description (no `<what>` clause, per superpowers' shortcut finding — proof-by-implementation; Phase 28.5's description-format validator stays open pending Phase 33's A/B evidence).
+- **Per-harness SessionStart inject emitter.** `hooks/inject-using-gdd.sh` is a single polyglot script that reads `using-gdd` and emits it as the host harness's SessionStart `additionalContext` shape — Cursor (`additional_context`), Claude Code (`hookSpecificOutput.additionalContext`), and SDK-standard (top-level `additionalContext`) branches via env-var detection, with a pure-bash JSON escaper (no jq/python dependency). A `hooks/run-hook.cmd` polyglot Windows wrapper and a 5th `hooks/hooks.json` SessionStart entry (matcher `startup|clear|compact`) wire it in.
+- **`<SUBAGENT-STOP>` no-cascade structural guarantee.** The inject is wired ONLY under the SessionStart hook event; subagent spawns do not fire SessionStart, so the bootstrap contract cannot cascade into a subagent's context. The `using-gdd` body opens with a `<SUBAGENT-STOP>` tag. (Structural guarantee here; the behavioral proof under pressure is deferred to Phase 33.)
+- **`<HARD-GATE>` at the 5 stage transitions.** `skills/{brief,explore,plan,design,verify}/SKILL.md` each gain a `<HARD-GATE>` block that refuses to advance the pipeline until the stage's required artifact (`.design/BRIEF.md`, `DESIGN.md` + `DESIGN-CONTEXT.md`, etc.) exists and is approved — reading the artifact path from `.design/STATE.md` when a project uses a custom location.
+- **Rationalization tables in the 7 stage-orchestrator skills.** `brief / explore / plan / design / verify / discuss / audit` each carry a `| Thought | Reality |` rationalization table (≥6 rows) that names the common "skip the stage" justifications and rebuts each.
+- **Inline self-review blocks** in `brief` and `plan` (the 2 spec-producing transitions) — a 4-line inline checklist (Phase 28.5 progressive-disclosure: a short check belongs at the transition surface, not behind a skill-discovery hop).
+- **Portable discipline blocks** in `AGENTS.md` + `GEMINI.md` so non-Claude-Code harnesses (Codex, Gemini, etc.) inherit the same skill-discipline contract.
+- **`router_pick` skill-discovery telemetry** — a new `router_pick` event in `reference/schemas/events.schema.json` plus an emit point (`skills/router/router-pick-emitter.md`). Records a sha256 `context_hash` (never the raw intent — no PII) so Phase 33 can measure which skill the router actually selected.
+- **`lint-skill-descriptions.cjs` drift detector** — a maintainer/CI script (not shipped to npm) that flags any skill whose one-line `description:` is stale while its body changed ≥3 times since (the D-02 heuristic).
+- **`gdd-health` `skill_discipline` check (#7).** `scripts/lib/health-mirror/index.cjs` gains a 7th read-only check reporting `skill-discipline: ready` (using-gdd present AND `hooks.json` SessionStart wires the inject), `skill-discipline: missing using-gdd`, or `skill-discipline: hook not wired`. Documented in `skills/health/SKILL.md`.
+### Attribution
+- **Mechanism ported from [`obra/superpowers`](https://github.com/obra/superpowers) (MIT).** Three artifacts: the SessionStart hook-script structure, the 1%-rule + red-flags-table format, and the defensive-guardrail patterns (`<HARD-GATE>` / `<SUBAGENT-STOP>` / rationalization-table). See `NOTICE`. We port the MECHANISM, not the content — GDD's skills, gates, and tables are GDD-specific.
+### Notes
+- The pure-trigger `using-gdd` description ships as **proof-by-implementation** of superpowers' shortcut finding (a `<what>`-clause can make agents follow the description summary instead of reading the body). The counterfactual A/B description test and the pressure-scenario behavior runner are **deferred to Phase 33** (D-02); Phase 32 ships the `router_pick` events + drift-lint instruments that Phase 33 consumes. Phase 28.5's global description-format validator regex stays open until that evidence lands.
+- 4 stage skills (`brief`, `explore`, `plan`, `verify`) sit in the validator's advisory **warn** band (≥100 lines) after gaining the mandatory discipline blocks — well under the **block** threshold (250). Accepted by design: the gates + tables are the deliverable.
+- 6-manifest lockstep at **v1.32.0** (`package.json` + `.claude-plugin/plugin.json` + `.claude-plugin/marketplace.json` (metadata.version + plugins[0].version) + `.cursor-plugin/plugin.json` + `.codex-plugin/plugin.json`).
+---
 ## [1.31.5] - 2026-05-29
 ### Phase 31.5 — Repo Structure Consolidation

package/NOTICE CHANGED Viewed

@@ -211,14 +211,90 @@ See `.planning/phases/30.6-graphify-self-ownership/` for full phase
 documentation including the 10 architectural decisions (D-01 through D-10)
 and the migration of the 8 dispatching callsites to native `bin/gdd-graph`.
+──────────────────────────────────────────────────────────────────────────────
+Phase 32 — Skill Auto-Trigger Discipline + Defensive Guardrails (v1.32.0, 2026-05-30)
+──────────────────────────────────────────────────────────────────────────────
+The skill-discipline layer shipped in v1.32.0 ports the MECHANISM (not the
+content) from:
+  obra/superpowers (https://github.com/obra/superpowers)
+  License: MIT
+GDD had 70+ skills and zero forcing functions; superpowers ships exactly one
+(`using-superpowers` SessionStart inject) plus the `<HARD-GATE>` /
+`<SUBAGENT-STOP>` / rationalization-table guardrail patterns, and reliably
+auto-triggers its skills. We re-derive the mechanism in GDD's own runtime and
+skill set; the skill bodies, gates, tables, and pipeline flow are GDD-specific.
+Three ported artifacts:
+  hooks/inject-using-gdd.sh
+    └─ SessionStart hook-script structure adapted from superpowers'
+       `using-superpowers` inject: one polyglot script, env-var branch per
+       harness, pure-bash escape_for_json (no jq/python dependency).
+  skills/using-gdd/SKILL.md
+    └─ The 1%-rule ("even a 1% chance a skill applies → invoke it") + the
+       red-flags `| Thought | Reality |` table format adapted from
+       superpowers' using-superpowers discipline contract. GDD content:
+       GDD pipeline stages, skill-priority order, instruction-priority.
+  skills/{brief,explore,plan,design,verify,discuss,audit}/SKILL.md
+    └─ The defensive-guardrail patterns — `<HARD-GATE>` (refuse to advance a
+       stage without its artifact), `<SUBAGENT-STOP>` (no-cascade into
+       subagents), and the rationalization-table pattern — adapted from
+       superpowers. The specific gates, artifact paths, and table rows are
+       GDD-specific.
+The mechanism is the contribution being attributed; the discipline content is
+original to get-design-done.
+──────────────────────────────────────────────────────────────────────────────
+Phase 33 — Skill Behavior Tests (Pressure-Scenario Harness) (v1.33.0, 2026-05-30)
+──────────────────────────────────────────────────────────────────────────────
+The skill-behavior pressure-scenario harness shipped in v1.33.0 ports the
+TDD-for-skills METHODOLOGY and the pressure-scenario PATTERN (not the content)
+from:
+  obra/superpowers/skills/writing-skills (https://github.com/obra/superpowers)
+  License: MIT
+writing-skills codifies the TDD-for-skills cycle (RED: an agent fails the task
+without the skill → GREEN: the skill counters those specific rationalizations →
+REFACTOR: close newly-discovered loopholes) and the pattern of testing a skill
+UNDER PRESSURE (time / sunk-cost / authority / exhaustion / scope-minimization)
+rather than only statically. We re-derive the methodology + pattern in GDD's own
+runtime and skill set:
+  scripts/lib/skill-behavior/runner.cjs
+    └─ The manifest-driven pressure-scenario runner (injectable agent-invoker
+       seam, N-attempts + majority rule, RED→GREEN structured result) adapts
+       writing-skills' TDD-for-skills test loop. GDD content: the injectable
+       invoker seam (no SDK dependency — D-03), the scenario-manifest schema,
+       and the stub-LLM CI path.
+  test/suite/skill-behavior/scenarios/*.json
+    └─ The pressure-scenario manifest pattern (a scenario applies a named
+       pressure to a skill and scores compliance vs violation against a rubric)
+       adapts writing-skills' pressure-test pattern. The specific scenarios,
+       pressures, rubrics, and the 8 covered skills are GDD-specific.
+  reference/schemas/pressure-scenario.schema.json
+    └─ The scenario-manifest contract formalizing the pattern. GDD original.
+The methodology + pattern are the contribution being attributed; the scenarios,
+rubrics, runner implementation, and skills are original to get-design-done.
 ────────────────────────────────────────────────────────────────────────
 Note on the broader codebase: get-design-done as a whole is licensed under
 the MIT License (see LICENSE). The Apache 2.0 attribution above applies
 specifically to the cc-multi-cli-derived files listed under the Phase 27
-block. The MIT attributions under Phase 28.5 and Phase 28.7 cover content
-adapted from mattpocock/skills (MIT) and gsd-build/get-shit-done (MIT)
-respectively — the MIT-to-MIT re-licensing is straightforward and the
-attributions above provide the required source citation. The MIT and
-Apache 2.0 licenses are compatible — see
+block. The MIT attributions under Phase 28.5, Phase 28.7, Phase 32, and
+Phase 33 cover content/mechanism/methodology adapted from mattpocock/skills
+(MIT), gsd-build/get-shit-done (MIT), obra/superpowers (MIT), and
+obra/superpowers/skills/writing-skills (MIT) respectively — the MIT-to-MIT
+re-licensing is straightforward and the attributions above provide the
+required source citation. The MIT and Apache 2.0 licenses are compatible — see
 https://www.apache.org/legal/resolved.html#category-a.

package/README.md CHANGED Viewed

@@ -276,6 +276,31 @@ node scripts/lib/figma-extract/digest.cjs --raw <cache>/raw/<key> --out .design/
 See [`skills/figma-extract/SKILL.md`](skills/figma-extract/SKILL.md) and [`figma-plugin/README.md`](figma-plugin/README.md) for the full flow.
+### Skill discipline bootstrap (v1.32.0+)
+GDD ships 70+ skills, but a description-match skill router consults them opportunistically — easy to skip a stage under pressure. v1.32.0 adds the forcing function GDD lacked, porting the skill-discipline **mechanism** (not content) from [`obra/superpowers`](https://github.com/obra/superpowers) (MIT):
+- **SessionStart inject.** A `using-gdd` bootstrap contract is injected at every session start / `/clear` / compact (`hooks/inject-using-gdd.sh`, per-harness: Cursor / Claude Code / SDK). It carries the **1%-rule** ("even a 1% chance a skill applies → invoke it"), a red-flags `Thought → Reality` table, and the skill-priority + instruction-priority order — so the agent is primed to find the right skill before it acts.
+- **`<HARD-GATE>` at every stage transition.** Brief / Explore / Plan / Design / Verify each refuse to advance until the stage's artifact exists and is approved — no free-handing a stage.
+- **Rationalization tables** in all 7 stage skills name the common "skip it" justifications and rebut each; **inline self-review** blocks gate the brief and plan specs.
+- **`<SUBAGENT-STOP>` no-cascade.** The inject fires only on SessionStart, so the bootstrap never cascades into spawned subagents.
+- **Portable + health-aware.** `AGENTS.md` + `GEMINI.md` carry the same discipline block for non-Claude-Code harnesses, and `/gdd:health` reports a `skill-discipline` readiness line.
+See [`skills/using-gdd/SKILL.md`](skills/using-gdd/SKILL.md) and the `NOTICE` attribution for details.
+### Skill behavior tests (v1.33.0+)
+Static validators check a skill's shape; **behavior tests** check that it holds under pressure. v1.33.0 adds a manifest-driven pressure-scenario harness (porting the TDD-for-skills methodology + pressure-scenario pattern from [`obra/superpowers/skills/writing-skills`](https://github.com/obra/superpowers), MIT): a runner drives a scenario (time / sunk-cost / authority / exhaustion / scope-minimization) through an injectable agent-invoker and scores the response against a compliance/violation rubric with N-attempts + majority rule. Ships 8 scenarios (7 stage skills + `using-gdd`) with synthetic RED baselines.
+Behavior tests are **opt-in** and key-gated — the default `npm test` stub suite covers the harness structurally and stays CI-green (LLM non-determinism keeps live runs out of CI). To run the live pass:
+```bash
+# Skips + exits 0 when ANTHROPIC_API_KEY is unset.
+ANTHROPIC_API_KEY=sk-... GDD_BEHAVIOR_INVOKER=./path/to/invoker.cjs npm run test:behavior
+```
+See [`docs/research/description-format-ab.md`](docs/research/description-format-ab.md) for the description-format A/B methodology and [`CONTRIBUTING.md`](CONTRIBUTING.md) ("How to add a pressure scenario").
 ## How It Works

package/SKILL.md CHANGED Viewed

@@ -243,6 +243,10 @@ If `$ARGUMENTS` is a stage or command name — invoke it directly, no state chec
 /gdd:sketch-wrap-up  → Skill("get-design-done:gdd-sketch-wrap-up")
 /gdd:spike           → Skill("get-design-done:gdd-spike")
 /gdd:spike-wrap-up   → Skill("get-design-done:gdd-spike-wrap-up")
+# --- Bootstrap (not slash-routed) ---
+# using-gdd → injected at SessionStart by hooks/inject-using-gdd.sh
+#   (disable-model-invocation: true). The skill-discipline contract;
+#   not a user-invoked command — see skills/using-gdd/SKILL.md.
 ```
 Pass remaining arguments through: `/gdd:explore --skip-interview` → `Skill("get-design-done:gdd-explore", "--skip-interview")`.

package/hooks/hooks.json CHANGED Viewed

@@ -32,6 +32,15 @@
             "command": "node \"${CLAUDE_PLUGIN_ROOT}/hooks/gdd-sessionstart-recap.js\""
           }
         ]
+      },
+      {
+        "matcher": "startup|clear|compact",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "bash \"${CLAUDE_PLUGIN_ROOT}/hooks/inject-using-gdd.sh\""
+          }
+        ]
       }
     ],
     "PreToolUse": [

package/hooks/inject-using-gdd.sh ADDED Viewed

@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+# hooks/inject-using-gdd.sh — SessionStart per-harness context injector (D-07).
+#
+# The forcing function GDD lacked: on every session start / /clear / compact this
+# reads skills/using-gdd/SKILL.md (the bootstrap discipline contract) and emits it
+# as the host harness's SessionStart "additionalContext" shape so the agent is
+# primed with the 1%-rule + red-flags + skill-priority before it acts.
+#
+# Ported MECHANISM (not content) from obra/superpowers (MIT): one polyglot script,
+# env-var branch, pure-bash escape_for_json (no jq/python dependency). See NOTICE.
+#
+# Three emitted shapes (ONE JSON object on stdout, nothing else):
+#   Cursor       (CURSOR_PLUGIN_ROOT set)        -> {"additional_context": "<escaped>"}
+#   Claude Code  (CLAUDE_PLUGIN_ROOT set, no Cursor)
+#                                                -> {"hookSpecificOutput":
+#                                                     {"hookEventName":"SessionStart",
+#                                                      "additionalContext":"<escaped>"}}
+#   SDK-standard (neither; e.g. COPILOT_CLI)     -> {"additionalContext": "<escaped>"}
+#
+# Branch order: check Cursor BEFORE Claude Code — a Cursor session may also export
+# CLAUDE_PLUGIN_ROOT, and Cursor's own var must win.
+#
+# NO-CASCADE (D-06): this script is wired ONLY under the SessionStart hook event in
+# hooks/hooks.json. Subagent spawns do not fire SessionStart, so the inject cannot
+# cascade into a subagent's context. (Structural guarantee; behavioral proof = P33.)
+set -u
+# --- Resolve the plugin root so we can locate skills/using-gdd/SKILL.md ---------
+# Prefer the harness-provided roots; fall back to this script's parent dir so the
+# emitter is runnable straight from hooks/ in tests and in bare shells.
+SELF_DIR="$(cd "$(dirname "$0")" && pwd)"
+ROOT="${CURSOR_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT:-${SELF_DIR}/..}}"
+ROOT="${ROOT//\\//}"  # normalize Windows backslashes to forward slashes
+SKILL="${ROOT}/skills/using-gdd/SKILL.md"
+# Defensive: if the skill file is missing we must STILL emit a syntactically valid
+# JSON object (an empty additionalContext) so the SessionStart pipeline never
+# breaks on a partial install. Never crash the session start.
+if [[ -r "${SKILL}" ]]; then
+  CONTENT="$(cat "${SKILL}")"
+else
+  CONTENT=""
+fi
+# --- escape_for_json (superpowers pattern; pure bash param-substitution) --------
+# Order matters: backslash FIRST (so escapes we add next aren't re-escaped), then
+# double-quote, then the control chars newline / tab / carriage-return. Emits the
+# value WITH surrounding double-quotes so callers can splice it directly.
+escape_for_json() {
+  local s="$1"
+  s="${s//\\/\\\\}"   # \  -> \\
+  s="${s//\"/\\\"}"   # "  -> \"
+  s="${s//$'\t'/\\t}" # tab -> \t
+  s="${s//$'\r'/\\r}" # CR  -> \r
+  s="${s//$'\n'/\\n}" # LF  -> \n  (do last: newlines are the record separator)
+  printf '"%s"' "$s"
+}
+ESCAPED="$(escape_for_json "${CONTENT}")"
+# --- Branch on harness env vars and emit the matching single JSON object --------
+if [[ -n "${CURSOR_PLUGIN_ROOT:-}" ]]; then
+  # Cursor: top-level additional_context.
+  printf '{"additional_context": %s}\n' "${ESCAPED}"
+elif [[ -n "${CLAUDE_PLUGIN_ROOT:-}" ]]; then
+  # Claude Code: hookSpecificOutput envelope (mirrors hooks/gdd-decision-injector.js).
+  printf '{"hookSpecificOutput": {"hookEventName": "SessionStart", "additionalContext": %s}}\n' "${ESCAPED}"
+else
+  # SDK-standard (COPILOT_CLI or none): top-level additionalContext.
+  printf '{"additionalContext": %s}\n' "${ESCAPED}"
+fi

package/hooks/run-hook.cmd ADDED Viewed

@@ -0,0 +1,35 @@
+@echo off
+REM hooks/run-hook.cmd — Windows polyglot wrapper that invokes a GDD .sh hook
+REM through bash.
+REM
+REM Workaround for Claude Code's Windows auto-bash bug: CC can mis-handle a
+REM SessionStart `command` that points directly at a `.sh` file on Windows
+REM shells. This .cmd shim locates bash and runs the script explicitly, so the
+REM SessionStart inject (hooks/inject-using-gdd.sh) fires on Windows too.
+REM
+REM Usage:  run-hook.cmd <script-name.sh> [args...]
+REM Default (no arg): inject-using-gdd.sh — the SessionStart using-gdd injector.
+REM The host harness's env (CLAUDE_PLUGIN_ROOT / CURSOR_PLUGIN_ROOT / COPILOT_CLI)
+REM is inherited by bash and drives the emitter's per-harness branch.
+setlocal
+REM Script to run, relative to this .cmd's own directory (%~dp0 ends with a backslash).
+set "HOOK_SCRIPT=%~1"
+if "%HOOK_SCRIPT%"=="" set "HOOK_SCRIPT=inject-using-gdd.sh"
+if not "%~1"=="" shift
+set "HOOK_PATH=%~dp0%HOOK_SCRIPT%"
+REM Prefer bash on PATH; fall back to a typical Git-for-Windows install location.
+where bash >nul 2>nul
+if %ERRORLEVEL%==0 (
+  bash "%HOOK_PATH%" %*
+) else if exist "%ProgramFiles%\Git\bin\bash.exe" (
+  "%ProgramFiles%\Git\bin\bash.exe" "%HOOK_PATH%" %*
+) else (
+  REM No bash available: emit a valid empty SDK-shape JSON object so the
+  REM SessionStart pipeline still receives parseable output and never breaks.
+  echo {"additionalContext": ""}
+)
+endlocal

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@hegemonart/get-design-done",
-  "version": "1.31.5",
+  "version": "1.33.0",
   "description": "A design-quality pipeline for AI coding agents: brief, plan, implement, and verify UI work against your design system.",
   "author": "Hegemon",
   "homepage": "https://github.com/hegemonart/get-design-done",
@@ -24,7 +24,6 @@
     "recipes/",
     "docs/i18n/",
     "scripts/lib/",
-    "scripts/mcp-servers/",
     "scripts/cli/",
     "scripts/install.cjs",
     "SKILL.md",
@@ -51,6 +50,7 @@
     "prepack": "npm run build:sdk",
     "postpack": "node scripts/build-sdk-bins.cjs --clean",
     "test": "node --test --experimental-strip-types \"test/suite/**/*.test.cjs\" \"test/suite/**/*.test.ts\"",
+    "test:behavior": "node scripts/run-behavior-tests.cjs",
     "typecheck": "tsc --noEmit",
     "codegen:schemas": "node --experimental-strip-types scripts/codegen-schema-types.ts",
     "lint:md": "npx --yes markdownlint-cli2 \"**/*.md\" \"#node_modules\" \"#.planning\" \"#.claude\" \"#test/fixtures/baselines\"",

package/reference/schemas/events.schema.json CHANGED Viewed

@@ -10,7 +10,7 @@
     "type": {
       "type": "string",
       "minLength": 1,
-      "description": "Free-form event type identifier. Pre-registered seeds: state.mutation, state.transition, stage.entered, stage.exited, hook.fired, error, capability_gap."
+      "description": "Free-form event type identifier. Pre-registered seeds: state.mutation, state.transition, stage.entered, stage.exited, hook.fired, error, capability_gap, kfm-candidate, router_pick."
     },
     "timestamp": {
       "type": "string",
@@ -181,6 +181,57 @@
         }
       },
       "description": "Phase 30.5-03 D-06 kfm-candidate payload — 7 fields, additionalProperties: false. Validated when the envelope's type === 'kfm-candidate' via the allOf[1] conditional."
+    },
+    "RouterPickPayload": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": [
+        "event_id",
+        "source",
+        "picked_skill",
+        "context_hash",
+        "rank",
+        "alternatives",
+        "ts"
+      ],
+      "properties": {
+        "event_id": {
+          "type": "string",
+          "pattern": "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$",
+          "description": "UUIDv4 identifying this router_pick event. Stable across emit + read cycles."
+        },
+        "source": {
+          "type": "string",
+          "const": "router",
+          "description": "Phase 32-08 D-02 — the router_pick event class is emitted EXCLUSIVELY by the gdd-router skill at its resolved-pick point. No other producer is authorised."
+        },
+        "picked_skill": {
+          "type": "string",
+          "minLength": 1,
+          "description": "The skill or agent the router auto-picked for this intent. Phase 33 baselines per-skill auto-pick rates from this field (pick-rate regression)."
+        },
+        "context_hash": {
+          "type": "string",
+          "minLength": 1,
+          "description": "sha256 of the intent/context that drove the pick — NEVER the raw prompt (no PII, mirrors CapabilityGapPayload.context_hash discipline). Used by Phase 33 aggregation to cluster picks for the same context."
+        },
+        "rank": {
+          "type": "integer",
+          "minimum": 0,
+          "description": "Rank of the picked_skill among the candidates considered (0 = top pick). Lets Phase 33 distinguish confident top picks from close calls."
+        },
+        "alternatives": {
+          "type": "array",
+          "items": { "type": "string" },
+          "description": "Other candidate skill/agent names the router considered (names only — no scores, no prompt text). May be empty when the router had a single match. Surfaces which skills the router weighs but does not reach for."
+        },
+        "ts": {
+          "type": "string",
+          "format": "date-time",
+          "description": "ISO-8601 timestamp of the pick emission."
+        }
+      },
+      "description": "Phase 32-08 D-02 router_pick payload — 7 fields, additionalProperties: false, NO PII (context_hash only). Records which skill the router auto-picked per intent — the instrument that surfaces under-reached skills. Validated when the envelope's type === 'router_pick' via the allOf[2] conditional."
     }
   },
   "allOf": [
@@ -205,6 +256,17 @@
           "payload": { "$ref": "#/definitions/KfmCandidatePayload" }
         }
       }
+    },
+    {
+      "if": {
+        "properties": { "type": { "const": "router_pick" } },
+        "required": ["type"]
+      },
+      "then": {
+        "properties": {
+          "payload": { "$ref": "#/definitions/RouterPickPayload" }
+        }
+      }
     }
   ]
 }

package/reference/schemas/pressure-scenario.schema.json ADDED Viewed

@@ -0,0 +1,69 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$id": "https://get-design-done.example/schemas/pressure-scenario.schema.json",
+  "title": "Pressure Scenario Manifest",
+  "description": "Contract for a Phase-33 skill-behavior pressure-scenario manifest. The runner (scripts/lib/skill-behavior/runner.cjs) loads manifests conforming to this schema, spawns a subagent against `setup_prompt` under the named `pressures`, and validates the response against the `expected_compliance` / `expected_violations` regex sources (compiled with new RegExp(source)). The 5-value `pressures` enum and the required-field set come verbatim from ROADMAP Phase-33 SC#2.",
+  "type": "object",
+  "additionalProperties": false,
+  "required": [
+    "name",
+    "target_skill",
+    "pressures",
+    "setup_prompt",
+    "expected_compliance",
+    "expected_violations"
+  ],
+  "properties": {
+    "name": {
+      "type": "string",
+      "minLength": 1,
+      "description": "Unique scenario identifier, e.g. \"brief-time-pressure\"."
+    },
+    "target_skill": {
+      "type": "string",
+      "minLength": 1,
+      "description": "The skill under test, e.g. \"brief\", \"explore\", \"plan\", \"using-gdd\"."
+    },
+    "pressures": {
+      "type": "array",
+      "minItems": 1,
+      "description": "One or more pressure vectors applied in the setup_prompt.",
+      "items": {
+        "enum": ["time", "sunk-cost", "authority", "exhaustion", "scope-minimization"]
+      }
+    },
+    "setup_prompt": {
+      "type": "string",
+      "minLength": 1,
+      "description": "The prompt handed to the subagent — embeds the pressure(s) and asks it to act."
+    },
+    "expected_compliance": {
+      "type": "array",
+      "minItems": 1,
+      "description": "Regex SOURCE strings the response MUST match to count as compliant (the runner compiles each with new RegExp(source)).",
+      "items": { "type": "string", "minLength": 1 }
+    },
+    "expected_violations": {
+      "type": "array",
+      "description": "Regex SOURCE strings that, if matched, count as a violation (the runner compiles each with new RegExp(source)). May be empty.",
+      "items": { "type": "string", "minLength": 1 }
+    },
+    "description": {
+      "type": "string",
+      "description": "Optional free-text scenario note (33-03 baselines reference it)."
+    },
+    "variant": {
+      "type": "string",
+      "description": "Optional A/B variant label, e.g. \"trigger-only\" | \"what-clause\" (33-04 description-format A/B)."
+    },
+    "variants": {
+      "type": "array",
+      "description": "Optional array of A/B variant descriptors for a single-manifest A/B pair (33-04). Each item is an object, e.g. { label, description }.",
+      "items": { "type": "object" }
+    },
+    "body_probe": {
+      "type": "string",
+      "description": "Optional body-only probe prompt the A/B scenario asks (33-04 description-format A/B)."
+    }
+  }
+}