@hegemonart/get-design-done 1.31.5 → 1.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/CHANGELOG.md +63 -0
  4. package/NOTICE +81 -5
  5. package/README.md +25 -0
  6. package/SKILL.md +4 -0
  7. package/hooks/hooks.json +9 -0
  8. package/hooks/inject-using-gdd.sh +72 -0
  9. package/hooks/run-hook.cmd +35 -0
  10. package/package.json +2 -2
  11. package/reference/schemas/events.schema.json +63 -1
  12. package/reference/schemas/pressure-scenario.schema.json +69 -0
  13. package/scripts/lib/health-mirror/index.cjs +79 -1
  14. package/scripts/lib/skill-behavior/runner.cjs +187 -0
  15. package/scripts/lib/skill-behavior/stub-invoker.cjs +95 -0
  16. package/scripts/lib/skill-behavior/telemetry.cjs +379 -0
  17. package/sdk/mcp/gdd-mcp/server.js +42 -0
  18. package/skills/audit/SKILL.md +13 -0
  19. package/skills/brief/SKILL.md +25 -0
  20. package/skills/design/SKILL.md +17 -0
  21. package/skills/discuss/SKILL.md +13 -0
  22. package/skills/explore/SKILL.md +17 -0
  23. package/skills/health/SKILL.md +6 -0
  24. package/skills/plan/SKILL.md +25 -0
  25. package/skills/router/SKILL.md +4 -0
  26. package/skills/router/router-pick-emitter.md +78 -0
  27. package/skills/using-gdd/SKILL.md +78 -0
  28. package/skills/verify/SKILL.md +17 -0
  29. package/scripts/lib/cli/index.ts +0 -29
  30. package/scripts/lib/error-classifier.cjs +0 -29
  31. package/scripts/lib/event-stream/index.ts +0 -29
  32. package/scripts/lib/gdd-errors/index.ts +0 -29
  33. package/scripts/lib/gdd-state/index.ts +0 -29
  34. package/scripts/lib/iteration-budget.cjs +0 -29
  35. package/scripts/lib/jittered-backoff.cjs +0 -29
  36. package/scripts/lib/lockfile.cjs +0 -29
  37. package/scripts/mcp-servers/gdd-mcp/server.ts +0 -35
  38. package/scripts/mcp-servers/gdd-state/server.ts +0 -34
@@ -5,14 +5,14 @@
5
5
  },
6
6
  "metadata": {
7
7
  "description": "Get Design Done — 5-stage agent-orchestrated design pipeline with 9 connections, handoff-first workflow, bidirectional Figma write-back, 22+ specialized agents, queryable knowledge layer (intel store, dependency analysis, learnings extraction), and a self-improvement loop (reflector, frontmatter + budget feedback, global-skills layer). v1.20.0 ships the SDK foundation: gdd-state MCP server (11 typed tools), lockfile-safe STATE.md mutations, event stream, and resilience primitives (jittered-backoff, rate-guard, error-classifier, iteration-budget) for rate-limit + 429 + context-overflow recovery. Full CI/CD pipeline (Node 22/24 × Linux/macOS/Windows) and release automation (auto-tag + GitHub Release + release-time smoke test).",
8
- "version": "1.31.5"
8
+ "version": "1.33.0"
9
9
  },
10
10
  "plugins": [
11
11
  {
12
12
  "name": "get-design-done",
13
13
  "source": "./",
14
14
  "description": "Agent-orchestrated 5-stage design pipeline: Brief → Explore → Plan → Design → Verify. 22+ specialized agents, 9 connections (Figma, Refero, Preview, Storybook, Chromatic, Figma Writer, Graphify, Pinterest, Claude Design), Claude Design handoff, bidirectional Figma write-back, and a queryable intel store (.design/intel/) for dependency and learnings queries. Standalone commands: style, darkmode, compare, figma-write, graphify, handoff, analyze-dependencies, skill-manifest, extract-learnings. Embeds NNG heuristics, WCAG thresholds, typographic systems, motion framework, and anti-pattern catalog. Ships with a full CI/CD pipeline (Node 22/24 × Linux/macOS/Windows) and release automation. Optimization layer (v1.0.4.1, retroactive): gdd-router + gdd-cache-manager skills, PreToolUse budget-enforcer hook, tier-aware agent frontmatter, lazy checker gates, streaming synthesizer, /gdd:warm-cache + /gdd:optimize commands, and cost telemetry at .design/telemetry/costs.jsonl — targeting 50-70% per-task token-cost reduction with no quality-floor regression. v1.20.0 SDK foundation: gdd-state MCP server (11 typed tools), lockfile-safe STATE.md mutations, event stream at .design/telemetry/events.jsonl, resilience primitives (jittered-backoff, rate-guard, error-classifier, iteration-budget) with rate-limit + 429 + context-overflow recovery, and TypeScript toolchain.",
15
- "version": "1.31.5",
15
+ "version": "1.33.0",
16
16
  "author": {
17
17
  "name": "hegemonart"
18
18
  },
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "get-design-done",
3
3
  "short_name": "gdd",
4
- "version": "1.31.5",
4
+ "version": "1.33.0",
5
5
  "description": "Agent-orchestrated 5-stage design pipeline: Brief → Explore → Plan → Design → Verify. 22+ specialized agents, 9 connections (Figma, Refero, Preview, Storybook, Chromatic, Figma Writer, Graphify, Pinterest, Claude Design), handoff-first workflow via Claude Design bundles, bidirectional Figma write-back (annotations, Code Connect), queryable intel store (`.design/intel/`) for O(1) design surface lookups, and self-improvement loop (reflector agent, frontmatter + budget feedback, global-skills layer at `~/.claude/gdd/global-skills/`). Standalone commands: style, darkmode, compare, figma-write, graphify, handoff, analyze-dependencies, skill-manifest, extract-learnings, reflect, apply-reflections. Embeds NNG heuristics, WCAG thresholds, typographic systems, motion framework, and anti-pattern catalog. Ships with a full CI/CD pipeline (Node 22/24 × Linux/macOS/Windows, lint + schema + frontmatter + stale-ref + shellcheck + gitleaks + injection-scan + blocking size-budget) and release automation (auto-tag + GitHub Release + release-time smoke test). Optimization layer (v1.0.4.1, retroactive): gdd-router + gdd-cache-manager skills, PreToolUse budget-enforcer hook, tier-aware agent frontmatter, lazy checker gates, streaming synthesizer, /gdd:warm-cache + /gdd:optimize commands, and cost telemetry at .design/telemetry/costs.jsonl — targeting 50-70% per-task token-cost reduction with no quality-floor regression. v1.20.0 SDK foundation: gdd-state MCP server (11 typed tools), lockfile-safe STATE.md mutations, event stream at .design/telemetry/events.jsonl, resilience primitives (jittered-backoff, rate-guard, error-classifier, iteration-budget) with rate-limit + 429 + context-overflow recovery, and TypeScript toolchain. v1.27.7 ships gdd-mcp (Phase 27.7): 12 read-only MCP tools for sub-3s priming. v1.28.0 (Phase 28): Foundational References Tier 2 — 5 new reference files (color-theory, composition, proportion-systems, i18n, contrast-advanced), 2 verifier i18n probes + 1 explore i18n-readiness probe, 12 additive cross-link insertions across 10 existing references, 2 orthogonal audit-scoring lens-tags (composition_alignment + i18n_readiness).",
6
6
  "author": {
7
7
  "name": "hegemonart",
package/CHANGELOG.md CHANGED
@@ -4,6 +4,69 @@ All notable changes to get-design-done are documented here. Versions follow [sem
4
4
 
5
5
  ---
6
6
 
7
+ ## [1.33.0] - 2026-05-30
8
+
9
+ ### Phase 33 — Skill Behavior Tests (Pressure-Scenario Harness)
10
+
11
+ Adds a **behavior-test category** that complements the static validators (Phase 28.5 line/frontmatter) and static guardrails (Phase 32 `<HARD-GATE>` presence) with tests that verify skills hold UNDER PRESSURE. A manifest-driven runner drives a pressure scenario (time / sunk-cost / authority / exhaustion / scope-minimization) through an injectable agent-invoker and validates the response against a compliance/violation rubric with N-attempts + majority rule. Ships the harness + 8 baseline scenarios + synthetic RED baselines + the description-format A/B methodology + reflector telemetry integration. Ports the TDD-for-skills methodology + the pressure-scenario pattern from [`obra/superpowers/skills/writing-skills`](https://github.com/obra/superpowers) (MIT). 6 plans across Waves A–C.
12
+
13
+ ### Added
14
+
15
+ - **Manifest-driven pressure-scenario runner** — `scripts/lib/skill-behavior/runner.cjs` exposes an INJECTABLE `invokeAgent(prompt, opts) -> { text }` seam (no `@anthropic-ai/sdk` dependency — D-03): a deterministic STUB invoker (`scripts/lib/skill-behavior/stub-invoker.cjs`) for CI/tests, plus a documented real-invoker adapter for the opt-in keyed run. Runs each scenario N times and decides compliance by majority.
16
+ - **Pressure-scenario schema** — `reference/schemas/pressure-scenario.schema.json` (wired into `validate:schemas`), with conformance tests for the 8 scenario manifests.
17
+ - **8 pressure scenarios + synthetic RED baselines** — `test/suite/skill-behavior/scenarios/` (7 stage skills + `using-gdd`) with synthetic-from-observed-cycle-drift RED baselines at `test/fixtures/skill-behavior-baseline/` (D-02 — ROADMAP-sanctioned).
18
+ - **Description-format A/B methodology** — `docs/research/description-format-ab.md` documents the trigger-only vs `<what>. Use when` counterfactual + the 7/10-run threshold (D-08), with a `pending: keyed run` marker. The empirical result is an opt-in maintainer follow-up (no API key in CI).
19
+ - **Reflector telemetry** — `scripts/lib/skill-behavior/telemetry.cjs` emits to `.design/telemetry/skill-behavior.jsonl`; a sustained-failure signal (≥3 of last 10 runs failing for a scenario) feeds an `apply-reflections` proposal (stub-tested integration — D-07).
20
+ - **`npm run test:behavior` (opt-in, D-06).** A new script that runs the behavior tests ONLY when `ANTHROPIC_API_KEY` is set (a clear skip message + exit 0 otherwise). The default `npm test` is UNCHANGED — the structural stub tests stay CI-green (LLM non-determinism keeps live behavior runs out of the default suite).
21
+ - **Docs** — `CONTRIBUTING.md` gains a "How to add a pressure scenario" section + the keyed `ANTHROPIC_API_KEY=… npm run test:behavior` procedure; `README.md` gains a "Skill behavior tests" subsection.
22
+
23
+ ### Removed
24
+
25
+ - **BREAKING: the Phase-31.5 deprecation shims are removed (D-04).** The 10 `GDD-DEPRECATION-SHIM` re-exports re-created at the OLD SDK paths in v1.31.5 — `scripts/lib/{cli,event-stream,gdd-state,gdd-errors}/index.ts`, `scripts/lib/{error-classifier,iteration-budget,jittered-backoff,lockfile}.cjs`, and `scripts/mcp-servers/{gdd-state,gdd-mcp}/server.ts` — are deleted. The grace window elapsed (v1.31.5 shipped with shims → v1.32.0 still had them → v1.33.0 removes them). The now-empty `scripts/mcp-servers/` is dropped from the `package.json` `files` allowlist. **If you imported `scripts/lib/…` or `scripts/mcp-servers/…` directly, import from `sdk/…` instead** (e.g. `scripts/lib/cli` → `sdk/cli`, `scripts/lib/error-classifier.cjs` → `sdk/primitives/error-classifier.cjs`, `scripts/mcp-servers/gdd-state/server.ts` → `sdk/mcp/gdd-state/server.ts`). Internal callers were all repointed to `sdk/` in 31.5 + the Phase-32 gdd-events fix; the `gdd-state-mcp` / `gdd-mcp` bins target `sdk/`, so deletion drops only the external re-export — proven by the `no-stale-internal-refs` guard + the full suite + the 31.5 headless pack→install→run E2E.
26
+
27
+ ### Attribution
28
+
29
+ - **Methodology + pattern ported from [`obra/superpowers/skills/writing-skills`](https://github.com/obra/superpowers) (MIT).** The TDD-for-skills cycle (RED: agent fails without the skill → GREEN: skill counters the rationalizations → REFACTOR: close new loopholes) and the pressure-scenario pattern. See `NOTICE`. We port the methodology, not the content — GDD's scenarios, rubrics, and skills are GDD-specific.
30
+
31
+ ### Notes
32
+
33
+ - The behavioral evidence (real RED baselines from live agent runs + the empirical A/B result) is NOT capturable autonomously (no API key / SDK in CI). RED baselines are authored synthetic-from-observed-cycle-drift (D-02); the A/B evidence file documents methodology + expected-signal + a `pending: keyed run` marker. A Phase-28.5 feedback note points at `docs/research/description-format-ab.md`; **Phase 28.5's description-format validator regex is unchanged** (33-06 emits the pointer only — D-08).
34
+ - The 31.5 tarball golden (`test/fixtures/baselines/phase-31-5/tarball-manifest.txt`) was regenerated as a reviewed delta: **+4** skill-behavior paths (`reference/schemas/pressure-scenario.schema.json` + the 3 `scripts/lib/skill-behavior/*.cjs`) and **−10** removed shim paths (618 paths).
35
+ - 6-manifest lockstep at **v1.33.0** (`package.json` + `package-lock.json` + `.claude-plugin/plugin.json` + `.claude-plugin/marketplace.json` (metadata.version + plugins[0].version) + `.cursor-plugin/plugin.json` + `.codex-plugin/plugin.json`). Version-sync hygiene done upfront (D-09): `OFF_CADENCE_VERSIONS.add('1.33.0')` + prior `manifests-version.txt` baselines forward-propagated 1.32.0 → 1.33.0.
36
+
37
+ ---
38
+
39
+ ## [1.32.0] - 2026-05-30
40
+
41
+ ### Phase 32 — Skill Auto-Trigger Discipline + Defensive Guardrails
42
+
43
+ Closes the auto-trigger gap between GDD's 70+ skills and the harness's description-match skill-discovery layer. GDD had zero forcing functions — agents consulted skills opportunistically, not disciplinedly. This release ports the skill-discipline **mechanism** (not content) from [`obra/superpowers`](https://github.com/obra/superpowers) (MIT): a SessionStart-injected bootstrap contract, defensive guardrails at every stage transition, and two lightweight skill-discovery instruments that feed Phase 33's behavioral A/B. 9 plans across Waves A–C.
44
+
45
+ ### Added
46
+
47
+ - **`using-gdd` SessionStart bootstrap (the forcing function GDD lacked).** A new `skills/using-gdd/SKILL.md` discipline contract — the **1%-rule** ("if you think there is even a 1% chance a skill applies, you ABSOLUTELY MUST invoke it"), a ≥10-row **red-flags table** (Thought → Reality), a skill-priority order (Process → Implementation → Audit), an instruction-priority precedence (user CLAUDE.md > GDD skill > defaults), and the GDD pipeline flow. Carries `disable-model-invocation: true` (it is injected, not model-invoked) and a pure-trigger description (no `<what>` clause, per superpowers' shortcut finding — proof-by-implementation; Phase 28.5's description-format validator stays open pending Phase 33's A/B evidence).
48
+ - **Per-harness SessionStart inject emitter.** `hooks/inject-using-gdd.sh` is a single polyglot script that reads `using-gdd` and emits it as the host harness's SessionStart `additionalContext` shape — Cursor (`additional_context`), Claude Code (`hookSpecificOutput.additionalContext`), and SDK-standard (top-level `additionalContext`) branches via env-var detection, with a pure-bash JSON escaper (no jq/python dependency). A `hooks/run-hook.cmd` polyglot Windows wrapper and a 5th `hooks/hooks.json` SessionStart entry (matcher `startup|clear|compact`) wire it in.
49
+ - **`<SUBAGENT-STOP>` no-cascade structural guarantee.** The inject is wired ONLY under the SessionStart hook event; subagent spawns do not fire SessionStart, so the bootstrap contract cannot cascade into a subagent's context. The `using-gdd` body opens with a `<SUBAGENT-STOP>` tag. (Structural guarantee here; the behavioral proof under pressure is deferred to Phase 33.)
50
+ - **`<HARD-GATE>` at the 5 stage transitions.** `skills/{brief,explore,plan,design,verify}/SKILL.md` each gain a `<HARD-GATE>` block that refuses to advance the pipeline until the stage's required artifact (`.design/BRIEF.md`, `DESIGN.md` + `DESIGN-CONTEXT.md`, etc.) exists and is approved — reading the artifact path from `.design/STATE.md` when a project uses a custom location.
51
+ - **Rationalization tables in the 7 stage-orchestrator skills.** `brief / explore / plan / design / verify / discuss / audit` each carry a `| Thought | Reality |` rationalization table (≥6 rows) that names the common "skip the stage" justifications and rebuts each.
52
+ - **Inline self-review blocks** in `brief` and `plan` (the 2 spec-producing transitions) — a 4-line inline checklist (Phase 28.5 progressive-disclosure: a short check belongs at the transition surface, not behind a skill-discovery hop).
53
+ - **Portable discipline blocks** in `AGENTS.md` + `GEMINI.md` so non-Claude-Code harnesses (Codex, Gemini, etc.) inherit the same skill-discipline contract.
54
+ - **`router_pick` skill-discovery telemetry** — a new `router_pick` event in `reference/schemas/events.schema.json` plus an emit point (`skills/router/router-pick-emitter.md`). Records a sha256 `context_hash` (never the raw intent — no PII) so Phase 33 can measure which skill the router actually selected.
55
+ - **`lint-skill-descriptions.cjs` drift detector** — a maintainer/CI script (not shipped to npm) that flags any skill whose one-line `description:` is stale while its body changed ≥3 times since (the D-02 heuristic).
56
+ - **`gdd-health` `skill_discipline` check (#7).** `scripts/lib/health-mirror/index.cjs` gains a 7th read-only check reporting `skill-discipline: ready` (using-gdd present AND `hooks.json` SessionStart wires the inject), `skill-discipline: missing using-gdd`, or `skill-discipline: hook not wired`. Documented in `skills/health/SKILL.md`.
57
+
58
+ ### Attribution
59
+
60
+ - **Mechanism ported from [`obra/superpowers`](https://github.com/obra/superpowers) (MIT).** Three artifacts: the SessionStart hook-script structure, the 1%-rule + red-flags-table format, and the defensive-guardrail patterns (`<HARD-GATE>` / `<SUBAGENT-STOP>` / rationalization-table). See `NOTICE`. We port the MECHANISM, not the content — GDD's skills, gates, and tables are GDD-specific.
61
+
62
+ ### Notes
63
+
64
+ - The pure-trigger `using-gdd` description ships as **proof-by-implementation** of superpowers' shortcut finding (a `<what>`-clause can make agents follow the description summary instead of reading the body). The counterfactual A/B description test and the pressure-scenario behavior runner are **deferred to Phase 33** (D-02); Phase 32 ships the `router_pick` events + drift-lint instruments that Phase 33 consumes. Phase 28.5's global description-format validator regex stays open until that evidence lands.
65
+ - 4 stage skills (`brief`, `explore`, `plan`, `verify`) sit in the validator's advisory **warn** band (≥100 lines) after gaining the mandatory discipline blocks — well under the **block** threshold (250). Accepted by design: the gates + tables are the deliverable.
66
+ - 6-manifest lockstep at **v1.32.0** (`package.json` + `.claude-plugin/plugin.json` + `.claude-plugin/marketplace.json` (metadata.version + plugins[0].version) + `.cursor-plugin/plugin.json` + `.codex-plugin/plugin.json`).
67
+
68
+ ---
69
+
7
70
  ## [1.31.5] - 2026-05-29
8
71
 
9
72
  ### Phase 31.5 — Repo Structure Consolidation
package/NOTICE CHANGED
@@ -211,14 +211,90 @@ See `.planning/phases/30.6-graphify-self-ownership/` for full phase
211
211
  documentation including the 10 architectural decisions (D-01 through D-10)
212
212
  and the migration of the 8 dispatching callsites to native `bin/gdd-graph`.
213
213
 
214
+ ──────────────────────────────────────────────────────────────────────────────
215
+ Phase 32 — Skill Auto-Trigger Discipline + Defensive Guardrails (v1.32.0, 2026-05-30)
216
+ ──────────────────────────────────────────────────────────────────────────────
217
+
218
+ The skill-discipline layer shipped in v1.32.0 ports the MECHANISM (not the
219
+ content) from:
220
+
221
+ obra/superpowers (https://github.com/obra/superpowers)
222
+ License: MIT
223
+
224
+ GDD had 70+ skills and zero forcing functions; superpowers ships exactly one
225
+ (`using-superpowers` SessionStart inject) plus the `<HARD-GATE>` /
226
+ `<SUBAGENT-STOP>` / rationalization-table guardrail patterns, and reliably
227
+ auto-triggers its skills. We re-derive the mechanism in GDD's own runtime and
228
+ skill set; the skill bodies, gates, tables, and pipeline flow are GDD-specific.
229
+ Three ported artifacts:
230
+
231
+ hooks/inject-using-gdd.sh
232
+ └─ SessionStart hook-script structure adapted from superpowers'
233
+ `using-superpowers` inject: one polyglot script, env-var branch per
234
+ harness, pure-bash escape_for_json (no jq/python dependency).
235
+
236
+ skills/using-gdd/SKILL.md
237
+ └─ The 1%-rule ("even a 1% chance a skill applies → invoke it") + the
238
+ red-flags `| Thought | Reality |` table format adapted from
239
+ superpowers' using-superpowers discipline contract. GDD content:
240
+ GDD pipeline stages, skill-priority order, instruction-priority.
241
+
242
+ skills/{brief,explore,plan,design,verify,discuss,audit}/SKILL.md
243
+ └─ The defensive-guardrail patterns — `<HARD-GATE>` (refuse to advance a
244
+ stage without its artifact), `<SUBAGENT-STOP>` (no-cascade into
245
+ subagents), and the rationalization-table pattern — adapted from
246
+ superpowers. The specific gates, artifact paths, and table rows are
247
+ GDD-specific.
248
+
249
+ The mechanism is the contribution being attributed; the discipline content is
250
+ original to get-design-done.
251
+
252
+ ──────────────────────────────────────────────────────────────────────────────
253
+ Phase 33 — Skill Behavior Tests (Pressure-Scenario Harness) (v1.33.0, 2026-05-30)
254
+ ──────────────────────────────────────────────────────────────────────────────
255
+
256
+ The skill-behavior pressure-scenario harness shipped in v1.33.0 ports the
257
+ TDD-for-skills METHODOLOGY and the pressure-scenario PATTERN (not the content)
258
+ from:
259
+
260
+ obra/superpowers/skills/writing-skills (https://github.com/obra/superpowers)
261
+ License: MIT
262
+
263
+ writing-skills codifies the TDD-for-skills cycle (RED: an agent fails the task
264
+ without the skill → GREEN: the skill counters those specific rationalizations →
265
+ REFACTOR: close newly-discovered loopholes) and the pattern of testing a skill
266
+ UNDER PRESSURE (time / sunk-cost / authority / exhaustion / scope-minimization)
267
+ rather than only statically. We re-derive the methodology + pattern in GDD's own
268
+ runtime and skill set:
269
+
270
+ scripts/lib/skill-behavior/runner.cjs
271
+ └─ The manifest-driven pressure-scenario runner (injectable agent-invoker
272
+ seam, N-attempts + majority rule, RED→GREEN structured result) adapts
273
+ writing-skills' TDD-for-skills test loop. GDD content: the injectable
274
+ invoker seam (no SDK dependency — D-03), the scenario-manifest schema,
275
+ and the stub-LLM CI path.
276
+
277
+ test/suite/skill-behavior/scenarios/*.json
278
+ └─ The pressure-scenario manifest pattern (a scenario applies a named
279
+ pressure to a skill and scores compliance vs violation against a rubric)
280
+ adapts writing-skills' pressure-test pattern. The specific scenarios,
281
+ pressures, rubrics, and the 8 covered skills are GDD-specific.
282
+
283
+ reference/schemas/pressure-scenario.schema.json
284
+ └─ The scenario-manifest contract formalizing the pattern. GDD original.
285
+
286
+ The methodology + pattern are the contribution being attributed; the scenarios,
287
+ rubrics, runner implementation, and skills are original to get-design-done.
288
+
214
289
  ────────────────────────────────────────────────────────────────────────
215
290
 
216
291
  Note on the broader codebase: get-design-done as a whole is licensed under
217
292
  the MIT License (see LICENSE). The Apache 2.0 attribution above applies
218
293
  specifically to the cc-multi-cli-derived files listed under the Phase 27
219
- block. The MIT attributions under Phase 28.5 and Phase 28.7 cover content
220
- adapted from mattpocock/skills (MIT) and gsd-build/get-shit-done (MIT)
221
- respectively — the MIT-to-MIT re-licensing is straightforward and the
222
- attributions above provide the required source citation. The MIT and
223
- Apache 2.0 licenses are compatible see
294
+ block. The MIT attributions under Phase 28.5, Phase 28.7, Phase 32, and
295
+ Phase 33 cover content/mechanism/methodology adapted from mattpocock/skills
296
+ (MIT), gsd-build/get-shit-done (MIT), obra/superpowers (MIT), and
297
+ obra/superpowers/skills/writing-skills (MIT) respectively the MIT-to-MIT
298
+ re-licensing is straightforward and the attributions above provide the
299
+ required source citation. The MIT and Apache 2.0 licenses are compatible — see
224
300
  https://www.apache.org/legal/resolved.html#category-a.
package/README.md CHANGED
@@ -276,6 +276,31 @@ node scripts/lib/figma-extract/digest.cjs --raw <cache>/raw/<key> --out .design/
276
276
 
277
277
  See [`skills/figma-extract/SKILL.md`](skills/figma-extract/SKILL.md) and [`figma-plugin/README.md`](figma-plugin/README.md) for the full flow.
278
278
 
279
+ ### Skill discipline bootstrap (v1.32.0+)
280
+
281
+ GDD ships 70+ skills, but a description-match skill router consults them opportunistically — easy to skip a stage under pressure. v1.32.0 adds the forcing function GDD lacked, porting the skill-discipline **mechanism** (not content) from [`obra/superpowers`](https://github.com/obra/superpowers) (MIT):
282
+
283
+ - **SessionStart inject.** A `using-gdd` bootstrap contract is injected at every session start / `/clear` / compact (`hooks/inject-using-gdd.sh`, per-harness: Cursor / Claude Code / SDK). It carries the **1%-rule** ("even a 1% chance a skill applies → invoke it"), a red-flags `Thought → Reality` table, and the skill-priority + instruction-priority order — so the agent is primed to find the right skill before it acts.
284
+ - **`<HARD-GATE>` at every stage transition.** Brief / Explore / Plan / Design / Verify each refuse to advance until the stage's artifact exists and is approved — no free-handing a stage.
285
+ - **Rationalization tables** in all 7 stage skills name the common "skip it" justifications and rebut each; **inline self-review** blocks gate the brief and plan specs.
286
+ - **`<SUBAGENT-STOP>` no-cascade.** The inject fires only on SessionStart, so the bootstrap never cascades into spawned subagents.
287
+ - **Portable + health-aware.** `AGENTS.md` + `GEMINI.md` carry the same discipline block for non-Claude-Code harnesses, and `/gdd:health` reports a `skill-discipline` readiness line.
288
+
289
+ See [`skills/using-gdd/SKILL.md`](skills/using-gdd/SKILL.md) and the `NOTICE` attribution for details.
290
+
291
+ ### Skill behavior tests (v1.33.0+)
292
+
293
+ Static validators check a skill's shape; **behavior tests** check that it holds under pressure. v1.33.0 adds a manifest-driven pressure-scenario harness (porting the TDD-for-skills methodology + pressure-scenario pattern from [`obra/superpowers/skills/writing-skills`](https://github.com/obra/superpowers), MIT): a runner drives a scenario (time / sunk-cost / authority / exhaustion / scope-minimization) through an injectable agent-invoker and scores the response against a compliance/violation rubric with N-attempts + majority rule. Ships 8 scenarios (7 stage skills + `using-gdd`) with synthetic RED baselines.
294
+
295
+ Behavior tests are **opt-in** and key-gated — the default `npm test` stub suite covers the harness structurally and stays CI-green (LLM non-determinism keeps live runs out of CI). To run the live pass:
296
+
297
+ ```bash
298
+ # Skips + exits 0 when ANTHROPIC_API_KEY is unset.
299
+ ANTHROPIC_API_KEY=sk-... GDD_BEHAVIOR_INVOKER=./path/to/invoker.cjs npm run test:behavior
300
+ ```
301
+
302
+ See [`docs/research/description-format-ab.md`](docs/research/description-format-ab.md) for the description-format A/B methodology and [`CONTRIBUTING.md`](CONTRIBUTING.md) ("How to add a pressure scenario").
303
+
279
304
 
280
305
  ## How It Works
281
306
 
package/SKILL.md CHANGED
@@ -243,6 +243,10 @@ If `$ARGUMENTS` is a stage or command name — invoke it directly, no state chec
243
243
  /gdd:sketch-wrap-up → Skill("get-design-done:gdd-sketch-wrap-up")
244
244
  /gdd:spike → Skill("get-design-done:gdd-spike")
245
245
  /gdd:spike-wrap-up → Skill("get-design-done:gdd-spike-wrap-up")
246
+ # --- Bootstrap (not slash-routed) ---
247
+ # using-gdd → injected at SessionStart by hooks/inject-using-gdd.sh
248
+ # (disable-model-invocation: true). The skill-discipline contract;
249
+ # not a user-invoked command — see skills/using-gdd/SKILL.md.
246
250
  ```
247
251
 
248
252
  Pass remaining arguments through: `/gdd:explore --skip-interview` → `Skill("get-design-done:gdd-explore", "--skip-interview")`.
package/hooks/hooks.json CHANGED
@@ -32,6 +32,15 @@
32
32
  "command": "node \"${CLAUDE_PLUGIN_ROOT}/hooks/gdd-sessionstart-recap.js\""
33
33
  }
34
34
  ]
35
+ },
36
+ {
37
+ "matcher": "startup|clear|compact",
38
+ "hooks": [
39
+ {
40
+ "type": "command",
41
+ "command": "bash \"${CLAUDE_PLUGIN_ROOT}/hooks/inject-using-gdd.sh\""
42
+ }
43
+ ]
35
44
  }
36
45
  ],
37
46
  "PreToolUse": [
@@ -0,0 +1,72 @@
1
+ #!/usr/bin/env bash
2
+ # hooks/inject-using-gdd.sh — SessionStart per-harness context injector (D-07).
3
+ #
4
+ # The forcing function GDD lacked: on every session start / /clear / compact this
5
+ # reads skills/using-gdd/SKILL.md (the bootstrap discipline contract) and emits it
6
+ # as the host harness's SessionStart "additionalContext" shape so the agent is
7
+ # primed with the 1%-rule + red-flags + skill-priority before it acts.
8
+ #
9
+ # Ported MECHANISM (not content) from obra/superpowers (MIT): one polyglot script,
10
+ # env-var branch, pure-bash escape_for_json (no jq/python dependency). See NOTICE.
11
+ #
12
+ # Three emitted shapes (ONE JSON object on stdout, nothing else):
13
+ # Cursor (CURSOR_PLUGIN_ROOT set) -> {"additional_context": "<escaped>"}
14
+ # Claude Code (CLAUDE_PLUGIN_ROOT set, no Cursor)
15
+ # -> {"hookSpecificOutput":
16
+ # {"hookEventName":"SessionStart",
17
+ # "additionalContext":"<escaped>"}}
18
+ # SDK-standard (neither; e.g. COPILOT_CLI) -> {"additionalContext": "<escaped>"}
19
+ #
20
+ # Branch order: check Cursor BEFORE Claude Code — a Cursor session may also export
21
+ # CLAUDE_PLUGIN_ROOT, and Cursor's own var must win.
22
+ #
23
+ # NO-CASCADE (D-06): this script is wired ONLY under the SessionStart hook event in
24
+ # hooks/hooks.json. Subagent spawns do not fire SessionStart, so the inject cannot
25
+ # cascade into a subagent's context. (Structural guarantee; behavioral proof = P33.)
26
+
27
+ set -u
28
+
29
+ # --- Resolve the plugin root so we can locate skills/using-gdd/SKILL.md ---------
30
+ # Prefer the harness-provided roots; fall back to this script's parent dir so the
31
+ # emitter is runnable straight from hooks/ in tests and in bare shells.
32
+ SELF_DIR="$(cd "$(dirname "$0")" && pwd)"
33
+ ROOT="${CURSOR_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT:-${SELF_DIR}/..}}"
34
+ ROOT="${ROOT//\\//}" # normalize Windows backslashes to forward slashes
35
+ SKILL="${ROOT}/skills/using-gdd/SKILL.md"
36
+
37
+ # Defensive: if the skill file is missing we must STILL emit a syntactically valid
38
+ # JSON object (an empty additionalContext) so the SessionStart pipeline never
39
+ # breaks on a partial install. Never crash the session start.
40
+ if [[ -r "${SKILL}" ]]; then
41
+ CONTENT="$(cat "${SKILL}")"
42
+ else
43
+ CONTENT=""
44
+ fi
45
+
46
+ # --- escape_for_json (superpowers pattern; pure bash param-substitution) --------
47
+ # Order matters: backslash FIRST (so escapes we add next aren't re-escaped), then
48
+ # double-quote, then the control chars newline / tab / carriage-return. Emits the
49
+ # value WITH surrounding double-quotes so callers can splice it directly.
50
+ escape_for_json() {
51
+ local s="$1"
52
+ s="${s//\\/\\\\}" # \ -> \\
53
+ s="${s//\"/\\\"}" # " -> \"
54
+ s="${s//$'\t'/\\t}" # tab -> \t
55
+ s="${s//$'\r'/\\r}" # CR -> \r
56
+ s="${s//$'\n'/\\n}" # LF -> \n (do last: newlines are the record separator)
57
+ printf '"%s"' "$s"
58
+ }
59
+
60
+ ESCAPED="$(escape_for_json "${CONTENT}")"
61
+
62
+ # --- Branch on harness env vars and emit the matching single JSON object --------
63
+ if [[ -n "${CURSOR_PLUGIN_ROOT:-}" ]]; then
64
+ # Cursor: top-level additional_context.
65
+ printf '{"additional_context": %s}\n' "${ESCAPED}"
66
+ elif [[ -n "${CLAUDE_PLUGIN_ROOT:-}" ]]; then
67
+ # Claude Code: hookSpecificOutput envelope (mirrors hooks/gdd-decision-injector.js).
68
+ printf '{"hookSpecificOutput": {"hookEventName": "SessionStart", "additionalContext": %s}}\n' "${ESCAPED}"
69
+ else
70
+ # SDK-standard (COPILOT_CLI or none): top-level additionalContext.
71
+ printf '{"additionalContext": %s}\n' "${ESCAPED}"
72
+ fi
@@ -0,0 +1,35 @@
1
+ @echo off
2
+ REM hooks/run-hook.cmd — Windows polyglot wrapper that invokes a GDD .sh hook
3
+ REM through bash.
4
+ REM
5
+ REM Workaround for Claude Code's Windows auto-bash bug: CC can mis-handle a
6
+ REM SessionStart `command` that points directly at a `.sh` file on Windows
7
+ REM shells. This .cmd shim locates bash and runs the script explicitly, so the
8
+ REM SessionStart inject (hooks/inject-using-gdd.sh) fires on Windows too.
9
+ REM
10
+ REM Usage: run-hook.cmd <script-name.sh> [args...]
11
+ REM Default (no arg): inject-using-gdd.sh — the SessionStart using-gdd injector.
12
+ REM The host harness's env (CLAUDE_PLUGIN_ROOT / CURSOR_PLUGIN_ROOT / COPILOT_CLI)
13
+ REM is inherited by bash and drives the emitter's per-harness branch.
14
+ setlocal
15
+
16
+ REM Script to run, relative to this .cmd's own directory (%~dp0 ends with a backslash).
17
+ set "HOOK_SCRIPT=%~1"
18
+ if "%HOOK_SCRIPT%"=="" set "HOOK_SCRIPT=inject-using-gdd.sh"
19
+ if not "%~1"=="" shift
20
+
21
+ set "HOOK_PATH=%~dp0%HOOK_SCRIPT%"
22
+
23
+ REM Prefer bash on PATH; fall back to a typical Git-for-Windows install location.
24
+ where bash >nul 2>nul
25
+ if %ERRORLEVEL%==0 (
26
+ bash "%HOOK_PATH%" %*
27
+ ) else if exist "%ProgramFiles%\Git\bin\bash.exe" (
28
+ "%ProgramFiles%\Git\bin\bash.exe" "%HOOK_PATH%" %*
29
+ ) else (
30
+ REM No bash available: emit a valid empty SDK-shape JSON object so the
31
+ REM SessionStart pipeline still receives parseable output and never breaks.
32
+ echo {"additionalContext": ""}
33
+ )
34
+
35
+ endlocal
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hegemonart/get-design-done",
3
- "version": "1.31.5",
3
+ "version": "1.33.0",
4
4
  "description": "A design-quality pipeline for AI coding agents: brief, plan, implement, and verify UI work against your design system.",
5
5
  "author": "Hegemon",
6
6
  "homepage": "https://github.com/hegemonart/get-design-done",
@@ -24,7 +24,6 @@
24
24
  "recipes/",
25
25
  "docs/i18n/",
26
26
  "scripts/lib/",
27
- "scripts/mcp-servers/",
28
27
  "scripts/cli/",
29
28
  "scripts/install.cjs",
30
29
  "SKILL.md",
@@ -51,6 +50,7 @@
51
50
  "prepack": "npm run build:sdk",
52
51
  "postpack": "node scripts/build-sdk-bins.cjs --clean",
53
52
  "test": "node --test --experimental-strip-types \"test/suite/**/*.test.cjs\" \"test/suite/**/*.test.ts\"",
53
+ "test:behavior": "node scripts/run-behavior-tests.cjs",
54
54
  "typecheck": "tsc --noEmit",
55
55
  "codegen:schemas": "node --experimental-strip-types scripts/codegen-schema-types.ts",
56
56
  "lint:md": "npx --yes markdownlint-cli2 \"**/*.md\" \"#node_modules\" \"#.planning\" \"#.claude\" \"#test/fixtures/baselines\"",
@@ -10,7 +10,7 @@
10
10
  "type": {
11
11
  "type": "string",
12
12
  "minLength": 1,
13
- "description": "Free-form event type identifier. Pre-registered seeds: state.mutation, state.transition, stage.entered, stage.exited, hook.fired, error, capability_gap."
13
+ "description": "Free-form event type identifier. Pre-registered seeds: state.mutation, state.transition, stage.entered, stage.exited, hook.fired, error, capability_gap, kfm-candidate, router_pick."
14
14
  },
15
15
  "timestamp": {
16
16
  "type": "string",
@@ -181,6 +181,57 @@
181
181
  }
182
182
  },
183
183
  "description": "Phase 30.5-03 D-06 kfm-candidate payload — 7 fields, additionalProperties: false. Validated when the envelope's type === 'kfm-candidate' via the allOf[1] conditional."
184
+ },
185
+ "RouterPickPayload": {
186
+ "type": "object",
187
+ "additionalProperties": false,
188
+ "required": [
189
+ "event_id",
190
+ "source",
191
+ "picked_skill",
192
+ "context_hash",
193
+ "rank",
194
+ "alternatives",
195
+ "ts"
196
+ ],
197
+ "properties": {
198
+ "event_id": {
199
+ "type": "string",
200
+ "pattern": "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$",
201
+ "description": "UUIDv4 identifying this router_pick event. Stable across emit + read cycles."
202
+ },
203
+ "source": {
204
+ "type": "string",
205
+ "const": "router",
206
+ "description": "Phase 32-08 D-02 — the router_pick event class is emitted EXCLUSIVELY by the gdd-router skill at its resolved-pick point. No other producer is authorised."
207
+ },
208
+ "picked_skill": {
209
+ "type": "string",
210
+ "minLength": 1,
211
+ "description": "The skill or agent the router auto-picked for this intent. Phase 33 baselines per-skill auto-pick rates from this field (pick-rate regression)."
212
+ },
213
+ "context_hash": {
214
+ "type": "string",
215
+ "minLength": 1,
216
+ "description": "sha256 of the intent/context that drove the pick — NEVER the raw prompt (no PII, mirrors CapabilityGapPayload.context_hash discipline). Used by Phase 33 aggregation to cluster picks for the same context."
217
+ },
218
+ "rank": {
219
+ "type": "integer",
220
+ "minimum": 0,
221
+ "description": "Rank of the picked_skill among the candidates considered (0 = top pick). Lets Phase 33 distinguish confident top picks from close calls."
222
+ },
223
+ "alternatives": {
224
+ "type": "array",
225
+ "items": { "type": "string" },
226
+ "description": "Other candidate skill/agent names the router considered (names only — no scores, no prompt text). May be empty when the router had a single match. Surfaces which skills the router weighs but does not reach for."
227
+ },
228
+ "ts": {
229
+ "type": "string",
230
+ "format": "date-time",
231
+ "description": "ISO-8601 timestamp of the pick emission."
232
+ }
233
+ },
234
+ "description": "Phase 32-08 D-02 router_pick payload — 7 fields, additionalProperties: false, NO PII (context_hash only). Records which skill the router auto-picked per intent — the instrument that surfaces under-reached skills. Validated when the envelope's type === 'router_pick' via the allOf[2] conditional."
184
235
  }
185
236
  },
186
237
  "allOf": [
@@ -205,6 +256,17 @@
205
256
  "payload": { "$ref": "#/definitions/KfmCandidatePayload" }
206
257
  }
207
258
  }
259
+ },
260
+ {
261
+ "if": {
262
+ "properties": { "type": { "const": "router_pick" } },
263
+ "required": ["type"]
264
+ },
265
+ "then": {
266
+ "properties": {
267
+ "payload": { "$ref": "#/definitions/RouterPickPayload" }
268
+ }
269
+ }
208
270
  }
209
271
  ]
210
272
  }
@@ -0,0 +1,69 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "$id": "https://get-design-done.example/schemas/pressure-scenario.schema.json",
4
+ "title": "Pressure Scenario Manifest",
5
+ "description": "Contract for a Phase-33 skill-behavior pressure-scenario manifest. The runner (scripts/lib/skill-behavior/runner.cjs) loads manifests conforming to this schema, spawns a subagent against `setup_prompt` under the named `pressures`, and validates the response against the `expected_compliance` / `expected_violations` regex sources (compiled with new RegExp(source)). The 5-value `pressures` enum and the required-field set come verbatim from ROADMAP Phase-33 SC#2.",
6
+ "type": "object",
7
+ "additionalProperties": false,
8
+ "required": [
9
+ "name",
10
+ "target_skill",
11
+ "pressures",
12
+ "setup_prompt",
13
+ "expected_compliance",
14
+ "expected_violations"
15
+ ],
16
+ "properties": {
17
+ "name": {
18
+ "type": "string",
19
+ "minLength": 1,
20
+ "description": "Unique scenario identifier, e.g. \"brief-time-pressure\"."
21
+ },
22
+ "target_skill": {
23
+ "type": "string",
24
+ "minLength": 1,
25
+ "description": "The skill under test, e.g. \"brief\", \"explore\", \"plan\", \"using-gdd\"."
26
+ },
27
+ "pressures": {
28
+ "type": "array",
29
+ "minItems": 1,
30
+ "description": "One or more pressure vectors applied in the setup_prompt.",
31
+ "items": {
32
+ "enum": ["time", "sunk-cost", "authority", "exhaustion", "scope-minimization"]
33
+ }
34
+ },
35
+ "setup_prompt": {
36
+ "type": "string",
37
+ "minLength": 1,
38
+ "description": "The prompt handed to the subagent — embeds the pressure(s) and asks it to act."
39
+ },
40
+ "expected_compliance": {
41
+ "type": "array",
42
+ "minItems": 1,
43
+ "description": "Regex SOURCE strings the response MUST match to count as compliant (the runner compiles each with new RegExp(source)).",
44
+ "items": { "type": "string", "minLength": 1 }
45
+ },
46
+ "expected_violations": {
47
+ "type": "array",
48
+ "description": "Regex SOURCE strings that, if matched, count as a violation (the runner compiles each with new RegExp(source)). May be empty.",
49
+ "items": { "type": "string", "minLength": 1 }
50
+ },
51
+ "description": {
52
+ "type": "string",
53
+ "description": "Optional free-text scenario note (33-03 baselines reference it)."
54
+ },
55
+ "variant": {
56
+ "type": "string",
57
+ "description": "Optional A/B variant label, e.g. \"trigger-only\" | \"what-clause\" (33-04 description-format A/B)."
58
+ },
59
+ "variants": {
60
+ "type": "array",
61
+ "description": "Optional array of A/B variant descriptors for a single-manifest A/B pair (33-04). Each item is an object, e.g. { label, description }.",
62
+ "items": { "type": "object" }
63
+ },
64
+ "body_probe": {
65
+ "type": "string",
66
+ "description": "Optional body-only probe prompt the A/B scenario asks (33-04 description-format A/B)."
67
+ }
68
+ }
69
+ }