@hegemonart/get-design-done 1.25.0 → 1.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/CHANGELOG.md +96 -0
  4. package/README.md +12 -6
  5. package/SKILL.md +3 -0
  6. package/agents/README.md +89 -0
  7. package/agents/design-reflector.md +43 -0
  8. package/agents/gdd-intel-updater.md +34 -1
  9. package/hooks/budget-enforcer.ts +143 -4
  10. package/package.json +1 -1
  11. package/reference/model-prices.md +40 -19
  12. package/reference/peer-cli-capabilities.md +151 -0
  13. package/reference/peer-protocols.md +266 -0
  14. package/reference/prices/antigravity.md +21 -0
  15. package/reference/prices/augment.md +21 -0
  16. package/reference/prices/claude.md +42 -0
  17. package/reference/prices/cline.md +23 -0
  18. package/reference/prices/codebuddy.md +21 -0
  19. package/reference/prices/codex.md +25 -0
  20. package/reference/prices/copilot.md +21 -0
  21. package/reference/prices/cursor.md +21 -0
  22. package/reference/prices/gemini.md +25 -0
  23. package/reference/prices/kilo.md +21 -0
  24. package/reference/prices/opencode.md +23 -0
  25. package/reference/prices/qwen.md +25 -0
  26. package/reference/prices/trae.md +23 -0
  27. package/reference/prices/windsurf.md +21 -0
  28. package/reference/registry.json +121 -1
  29. package/reference/runtime-models.md +446 -0
  30. package/reference/schemas/runtime-models.schema.json +123 -0
  31. package/scripts/install.cjs +8 -0
  32. package/scripts/lib/bandit-router.cjs +214 -7
  33. package/scripts/lib/budget-enforcer.cjs +514 -0
  34. package/scripts/lib/cost-arbitrage.cjs +294 -0
  35. package/scripts/lib/event-stream/index.ts +14 -1
  36. package/scripts/lib/event-stream/types.ts +125 -1
  37. package/scripts/lib/install/installer.cjs +188 -11
  38. package/scripts/lib/install/parse-runtime-models.cjs +267 -0
  39. package/scripts/lib/install/runtimes.cjs +101 -0
  40. package/scripts/lib/peer-cli/acp-client.cjs +375 -0
  41. package/scripts/lib/peer-cli/adapters/codex.cjs +101 -0
  42. package/scripts/lib/peer-cli/adapters/copilot.cjs +79 -0
  43. package/scripts/lib/peer-cli/adapters/cursor.cjs +78 -0
  44. package/scripts/lib/peer-cli/adapters/gemini.cjs +81 -0
  45. package/scripts/lib/peer-cli/adapters/qwen.cjs +72 -0
  46. package/scripts/lib/peer-cli/asp-client.cjs +587 -0
  47. package/scripts/lib/peer-cli/broker-lifecycle.cjs +406 -0
  48. package/scripts/lib/peer-cli/registry.cjs +434 -0
  49. package/scripts/lib/peer-cli/spawn-cmd.cjs +149 -0
  50. package/scripts/lib/runtime-detect.cjs +96 -0
  51. package/scripts/lib/session-runner/index.ts +215 -0
  52. package/scripts/lib/session-runner/types.ts +60 -0
  53. package/scripts/lib/tier-resolver.cjs +311 -0
  54. package/scripts/validate-frontmatter.ts +297 -2
  55. package/skills/peer-cli-add/SKILL.md +170 -0
  56. package/skills/peer-cli-customize/SKILL.md +110 -0
  57. package/skills/peers/SKILL.md +101 -0
  58. package/skills/router/SKILL.md +51 -2
@@ -5,14 +5,14 @@
5
5
  },
6
6
  "metadata": {
7
7
  "description": "Get Design Done — 5-stage agent-orchestrated design pipeline with 9 connections, handoff-first workflow, bidirectional Figma write-back, 22+ specialized agents, queryable knowledge layer (intel store, dependency analysis, learnings extraction), and a self-improvement loop (reflector, frontmatter + budget feedback, global-skills layer). v1.20.0 ships the SDK foundation: gdd-state MCP server (11 typed tools), lockfile-safe STATE.md mutations, event stream, and resilience primitives (jittered-backoff, rate-guard, error-classifier, iteration-budget) for rate-limit + 429 + context-overflow recovery. Full CI/CD pipeline (Node 22/24 × Linux/macOS/Windows) and release automation (auto-tag + GitHub Release + release-time smoke test).",
8
- "version": "1.25.0"
8
+ "version": "1.27.0"
9
9
  },
10
10
  "plugins": [
11
11
  {
12
12
  "name": "get-design-done",
13
13
  "source": "./",
14
14
  "description": "Agent-orchestrated 5-stage design pipeline: Brief → Explore → Plan → Design → Verify. 22+ specialized agents, 9 connections (Figma, Refero, Preview, Storybook, Chromatic, Figma Writer, Graphify, Pinterest, Claude Design), Claude Design handoff, bidirectional Figma write-back, and a queryable intel store (.design/intel/) for dependency and learnings queries. Standalone commands: style, darkmode, compare, figma-write, graphify, handoff, analyze-dependencies, skill-manifest, extract-learnings. Embeds NNG heuristics, WCAG thresholds, typographic systems, motion framework, and anti-pattern catalog. Ships with a full CI/CD pipeline (Node 22/24 × Linux/macOS/Windows) and release automation. Optimization layer (v1.0.4.1, retroactive): gdd-router + gdd-cache-manager skills, PreToolUse budget-enforcer hook, tier-aware agent frontmatter, lazy checker gates, streaming synthesizer, /gdd:warm-cache + /gdd:optimize commands, and cost telemetry at .design/telemetry/costs.jsonl — targeting 50-70% per-task token-cost reduction with no quality-floor regression. v1.20.0 SDK foundation: gdd-state MCP server (11 typed tools), lockfile-safe STATE.md mutations, event stream at .design/telemetry/events.jsonl, resilience primitives (jittered-backoff, rate-guard, error-classifier, iteration-budget) with rate-limit + 429 + context-overflow recovery, and TypeScript toolchain.",
15
- "version": "1.25.0",
15
+ "version": "1.27.0",
16
16
  "author": {
17
17
  "name": "hegemonart"
18
18
  },
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "get-design-done",
3
3
  "short_name": "gdd",
4
- "version": "1.25.0",
4
+ "version": "1.27.0",
5
5
  "description": "Agent-orchestrated 5-stage design pipeline: Brief → Explore → Plan → Design → Verify. 22+ specialized agents, 9 connections (Figma, Refero, Preview, Storybook, Chromatic, Figma Writer, Graphify, Pinterest, Claude Design), handoff-first workflow via Claude Design bundles, bidirectional Figma write-back (annotations, Code Connect), queryable intel store (`.design/intel/`) for O(1) design surface lookups, and self-improvement loop (reflector agent, frontmatter + budget feedback, global-skills layer at `~/.claude/gdd/global-skills/`). Standalone commands: style, darkmode, compare, figma-write, graphify, handoff, analyze-dependencies, skill-manifest, extract-learnings, reflect, apply-reflections. Embeds NNG heuristics, WCAG thresholds, typographic systems, motion framework, and anti-pattern catalog. Ships with a full CI/CD pipeline (Node 22/24 × Linux/macOS/Windows, lint + schema + frontmatter + stale-ref + shellcheck + gitleaks + injection-scan + blocking size-budget) and release automation (auto-tag + GitHub Release + release-time smoke test). Optimization layer (v1.0.4.1, retroactive): gdd-router + gdd-cache-manager skills, PreToolUse budget-enforcer hook, tier-aware agent frontmatter, lazy checker gates, streaming synthesizer, /gdd:warm-cache + /gdd:optimize commands, and cost telemetry at .design/telemetry/costs.jsonl — targeting 50-70% per-task token-cost reduction with no quality-floor regression. v1.20.0 SDK foundation: gdd-state MCP server (11 typed tools), lockfile-safe STATE.md mutations, event stream at .design/telemetry/events.jsonl, resilience primitives (jittered-backoff, rate-guard, error-classifier, iteration-budget) with rate-limit + 429 + context-overflow recovery, and TypeScript toolchain.",
6
6
  "author": {
7
7
  "name": "hegemonart",
package/CHANGELOG.md CHANGED
@@ -4,6 +4,102 @@ All notable changes to get-design-done are documented here. Versions follow [sem
4
4
 
5
5
  ---
6
6
 
7
+ ## [1.27.0] — 2026-04-30
8
+
9
+ Phase 27 Peer-CLI Delegation Layer milestone — closes the **outbound** half of multi-runtime. Phase 24 made gdd installable on 14 runtimes; Phase 21 made the same pipeline run on each; Phase 26 made tier→model resolve correctly per runtime. v1.27.0 adds the missing piece: gdd agents OPTIONALLY delegate to local peer CLIs (Codex via App Server Protocol; Gemini/Cursor/Copilot/Qwen via Agent Client Protocol) when measurably cheaper or higher-quality for the role. Falls back to in-process Anthropic SDK when peer is unavailable. Honors Phase 26 tier maps + Phase 22 event chain + Phase 23.5 bandit posterior — `delegate?` becomes another arm in `(agent_type × tier × delegate)` Thompson sampling, no new ML.
10
+
11
+ ### Added
12
+
13
+ - **ACP client** — `scripts/lib/peer-cli/acp-client.cjs` (Plan 27-01, commit `4a2d201`). Line-delimited JSON-RPC over stdio for Gemini/Cursor/Copilot/Qwen. `initialize` handshake, `prompt` method, notification stream, 16 MiB line-buffer overflow guard. 7 tests pass.
14
+ - **ASP client** — `scripts/lib/peer-cli/asp-client.cjs` (Plan 27-02, commit `06fcdf6`). Codex App Server Protocol. `threadStart` + `threadResume` + `turn` lifecycle. `service_name = "gdd_peer_delegation"`. Error path resolves with `{status:"error"}` rather than throwing. 12 tests pass.
15
+ - **spawn-cmd Windows fix + broker-lifecycle** — `scripts/lib/peer-cli/spawn-cmd.cjs` + `broker-lifecycle.cjs` (Plan 27-03, commit `f9228cf`). `.cmd` EINVAL workaround per cc-multi-cli `transport-decisions.md` (D-04). Long-lived broker per `(peer, workspace)` over Unix socket on POSIX, named pipe on Windows (D-03). 18 tests pass.
16
+ - **Per-peer adapters** — `scripts/lib/peer-cli/adapters/{codex,gemini,cursor,copilot,qwen}.cjs` (Plan 27-04, commit `d58ab4f`). 5 thin wrappers with role→prompt-prefix maps + slash-command translation. 39 tests pass.
17
+ - **Registry + capability matrix** — `scripts/lib/peer-cli/registry.cjs` + `reference/peer-cli-capabilities.md` (Plan 27-05, commit `6ef4d27`). `findPeerFor(role, tier)` central dispatch, per-peer health probe, deterministic alphabetical tie-break. Per-peer capability matrix (D-05): codex→execute, gemini→research/exploration, cursor→debug/plan, copilot→review/research, qwen→write. 32 tests pass.
18
+ - **Agent `delegate_to:` frontmatter + session-runner peer-first dispatch** — `scripts/validate-frontmatter.ts` + `scripts/lib/session-runner/index.ts` (Plan 27-06, commit `4644d8e`). Optional additive frontmatter field; values are `<peer>-<role>` per capability matrix or `none` (explicit opt-out). Session-runner tries delegate first; transparent fallback on peer-absent OR peer-error per D-07. 8 tests pass.
19
+ - **Bandit posterior `delegate?` context dimension** — `scripts/lib/bandit-router.cjs` (Plan 27-07, commit `824bcf5`). Arm space expands from `(agent_type, touches_size_bin)` to `(agent_type, touches_size_bin, delegate)` where `delegate ∈ {none, gemini, codex, cursor, copilot, qwen}`. Bootstrap: existing priors carry forward as the `none` arm; 5 delegation arms start neutral. Reward signal unchanged (two-stage lexicographic). 12 tests pass.
20
+ - **Event chain `runtime_role` + `peer_id` + `peer_call_*` event types** — `scripts/lib/event-stream/types.ts` + `index.ts` + `scripts/lib/budget-enforcer.cjs` (Plan 27-08, commit `fd561ed`). Additive Phase 22 extension — every event optionally tags `runtime_role: "host" | "peer"` (defaults `"host"` for back-compat) and `peer_id`. 3 new event types: `peer_call_started`, `peer_call_complete`, `peer_call_failed`. costs.jsonl cost rows tag the same fields so Phase 26's reflector cost-arbitrage extends naturally. 8 tests pass.
21
+ - **`/gdd:peers` capability matrix command** — `skills/peers/SKILL.md` (Plan 27-09, commit `51ae40e`). Single-command discoverability — markdown table with peer × installed? × allowlisted? × claimed roles × posterior delta vs local.
22
+ - **`peer-cli-customize` + `peer-cli-add` skills** — `skills/peer-cli-customize/SKILL.md` + `skills/peer-cli-add/SKILL.md` (Plan 27-10, commit `4f07daf`). Customize rewires per-agent `delegate_to:` mappings; add walks the verification ladder for adding a brand-new peer (Apache 2.0 attribution comment in each — see NOTICE).
23
+ - **`peerBinary?` field on runtimes + detection helpers** — `scripts/lib/install/runtimes.cjs` (Plan 27-11, commit `0e2fb92`). 5 peer-capable runtimes (codex, gemini, cursor, copilot, qwen) gain platform-aware `peerBinary` paths. `listPeerCapableRuntimes()` and `detectInstalledPeers()` exported for `/gdd:peers` and the install-time nudge. 8 tests pass.
24
+
25
+ ### Tests
26
+
27
+ - 11 new test files (148 total new test cases) covering protocol clients, registry, adapters, frontmatter delegation, bandit dimension, event tagging, peer detection, end-to-end peer-call flow, and the phase-27 baseline.
28
+ - Phase 24/25/26 baseline tests refactored to be **version-agnostic** (D-12) — they read `package.json#version` dynamically and assert all 4 manifests align. No more literal-version hardcodes that break every closeout (Phase 26 lesson applied).
29
+
30
+ ### Decisions
31
+
32
+ D-01 through D-14 — see `.planning/phases/27-peer-cli-delegation/CONTEXT.md` for the full decision register. Highlights:
33
+
34
+ - **D-01 / D-02** — ACP for 4 peers, ASP for Codex. Port shapes from cc-multi-cli (Apache 2.0) with `NOTICE` attribution; do NOT vendor their hub (Claude-as-host assumption is incompatible with our any-runtime-as-host model).
35
+ - **D-03 / D-04** — Long-lived broker per `(peer, workspace)`. Windows `.cmd` EINVAL workaround documented in `spawn-cmd.cjs` so future maintainers don't "clean it up".
36
+ - **D-05** — Per-peer capability matrix is the dispatch source-of-truth. Roles a peer doesn't claim → registry refuses dispatch.
37
+ - **D-06 / D-07** — `delegate_to:` is additive optional; `none` is explicit opt-out; fallback on peer-absent OR peer-error is transparent to the calling skill.
38
+ - **D-08** — Bandit posterior gains `delegate?` dimension. 6× context expansion (~78 → ~468 contexts). Bootstrap discipline: existing priors carry forward as `delegate=none`; 5 delegation arms start neutral.
39
+ - **D-09** — Event chain extension is additive; `runtime_role` defaults `"host"` for back-compat; only the new `peer_call_*` events MUST carry `"peer"`.
40
+ - **D-10 / D-11** — `/gdd:peers` is single-command discoverability; install-time nudge is OPT-IN (default empty `enabled_peers`); `--no-peer-prompt` flag suppresses for CI.
41
+ - **D-12** — Version-agnostic baseline tests (Phase 26 closeout lesson). `phase-NN-baseline.test.cjs` no longer hardcodes literal versions.
42
+ - **D-14** — `NOTICE` Apache 2.0 attribution for cc-multi-cli ships with v1.27.0 (mandatory per Apache 2.0 §4 for derivative-shape code).
43
+
44
+ ### Documentation
45
+
46
+ - `docs/PEER-DELEGATION.md` (new) — ops guide covering when delegation fires, how to disable per-peer, fallback diagnostics, broker lifecycle troubleshooting, and Windows `.cmd` quirks.
47
+ - `reference/peer-protocols.md` (new) — ACP + ASP protocol cheat sheet for protocol authors and skill writers.
48
+ - `agents/README.md` — new `Peer-CLI delegation (delegate_to)` section documenting the field, valid values, opt-in gating, telemetry hookup, and cross-references.
49
+ - `NOTICE` (new) — Apache 2.0 attribution for cc-multi-cli's `acp-client.mjs`, `asp-client.mjs`, `transport-decisions.md`, and `customize` / `multi-cli-anything` skill patterns.
50
+
51
+ ### Known gaps
52
+
53
+ - Plan 27-11's interactive post-install peer-detection prompt (the `npx get-design-done` UX nudge: "I see you have Codex + Gemini installed — wire them as peers? [y/N]") is documented in CONTEXT.md D-11 but not yet wired into `scripts/install.cjs`. The detection helper (`detectInstalledPeers()`) IS shipped — what's missing is the `@clack/prompts` interactive integration. Users can manually populate `.design/config.json#peer_cli.enabled_peers` for now; `/gdd:peers` shows the current state. Tracked for Phase 28 hygiene or a 1.27.x patch if demand warrants.
54
+
55
+ ---
56
+
57
+ ## [1.26.0] — 2026-04-29
58
+
59
+ Phase 26 Headless Model Resolver milestone — closes the model-selection gap left by Phase 24's distribution headlessness. `default-tier: opus|sonnet|haiku` frontmatter now actually does something on the 13 non-Claude runtimes the multi-runtime installer ships to. Three layers gain runtime-awareness without a breaking change: the agent frontmatter (additive `reasoning-class` alias), the router output (additive `resolved_models` field), and the cost telemetry (per-runtime price tables + runtime-tagged events.jsonl rows). The phase ships **structure** — adapter layer, resolvers, schemas, contracts — not editorial picks for which model each runtime treats as opus/sonnet/haiku; those come from runtime adapter authors with provenance citations baked into `reference/runtime-models.md`.
60
+
61
+ ### Added
62
+
63
+ - **Per-runtime tier→model adapter source-of-truth** — `reference/runtime-models.md` ships the canonical map for all 14 runtimes (claude, codex, gemini, qwen, kilo, copilot, cursor, windsurf, antigravity, augment, trae, codebuddy, cline, opencode). Each row carries `tier_to_model` (`opus`/`sonnet`/`haiku`), `reasoning_class_to_model` (`high`/`medium`/`low`), and a `provenance` array (source URL + retrieval timestamp + last-validated cycle) per D-01. Schema lives at `reference/schemas/runtime-models.schema.json` with `$schema_version: 1` for forward-compatible bumps (D-03). Pure-JS strict validator at `scripts/lib/install/parse-runtime-models.cjs` — no `ajv` dependency at the parser layer; install-time validation catches typos before runtime. Canonical seed picks per D-02: `claude → claude-opus-4-7 / claude-sonnet-4-7 / claude-haiku-4-5`, `codex → gpt-5 / gpt-5-mini / gpt-5-nano`, `gemini → gemini-2.5-pro / gemini-2.5-flash / gemini-2.5-flash-lite`, `qwen → qwen3-max / qwen3-plus / qwen3-flash`. (Plan 26-01, commit `5541086`)
64
+
65
+ - **`tier-resolver.cjs` + `runtime-detect.cjs`** — `scripts/lib/tier-resolver.cjs` exports `resolve(runtime, tier, opts?) → model-string | null` translating frontmatter tier vocabulary into the concrete model name a specific runtime understands. Fallback chain per D-04: (1) runtime-specific entry → use; (2) claude row → use with `tier_resolution_fallback` event; (3) null + `tier_resolution_failed` event. Never throws — null is a valid output the consumer must handle. `scripts/lib/runtime-detect.cjs` exports `detect()` which reads the same `*_CONFIG_DIR` / `*_HOME` env-var chain Phase 24's installer uses (D-05); the env-var → runtime-ID mapping is owned by `scripts/lib/install/runtimes.cjs` and re-derived here so adding a runtime in one place automatically extends detection. Returns null when no recognized env-var is set (e.g. CI matrix, bare Node script). (Plan 26-02, commits `4bf7dea`, `c0bbae3`)
66
+
67
+ - **Installer emits `models.json` per runtime config-dir** — `scripts/lib/install/runtimes.cjs` gains a `tier_to_model` field; `installer.cjs` writes a `models.json` payload at install time per runtime config-dir per D-06: `{ tier_to_model, reasoning_class_to_model, runtime, schema_version: 1, generated_at: <ISO>, source: "reference/runtime-models.md" }`. `--dry-run` shows the same set without writing; `uninstall` removes the file (clean uninstall guarantee from Phase 24 carries forward). One file per config-dir means runtime harnesses can read it at session start without parsing markdown. (Plan 26-03, commit `2ab47cf`)
68
+
69
+ - **Router emits `resolved_models` field** — `skills/router/SKILL.md` JSON output gains `resolved_models: { "agent_name": "concrete-model-id", … }` next to the existing `model_tier_overrides` per D-07. Strict superset over v1.25.0: existing consumers reading `model_tier_overrides` keep working unchanged (enum stays `opus|sonnet|haiku` for back-compat across all 14 runtimes); new consumers (budget-enforcer cost computation, Phase 22 cost telemetry, Phase 23.5 bandit posterior store) read `resolved_models` for runtime-correct cost. Output schema versioning table bumped: `resolved_models` lands at v1.26.0 (26-04), `complexity_class` (Phase 25) and `model_tier_overrides` (legacy) preserved unchanged. (Plan 26-04, commit `eb38d4e`)
70
+
71
+ - **Per-runtime price tables + budget-enforcer shared backend** — `reference/model-prices.md` becomes a router that links to per-runtime sub-tables under `reference/prices/`: `claude.md` (Anthropic), `codex.md` (OpenAI Codex gpt-5 family), `gemini.md` (Google Gemini 2.5 family), `qwen.md` (Alibaba Qwen 3 family) carry confirmed prices; the remaining 10 runtimes ship as stubs with provenance citation TODOs per D-08. `scripts/lib/budget-enforcer.cjs` exports `computeCost({ model_id?, tier?, runtime, tokens_in, tokens_out, cache_hit? })` with the four-step lookup order (runtime price-table by model_id → runtime by tier → claude fallback by model_id → claude by tier → null + diagnostic reason). `hooks/budget-enforcer.ts` reaches into the shared backend via `createRequire` — same scheme as `rate-guard.cjs`. Cost telemetry events.jsonl rows tag `runtime` (Phase 22 event chain), so the cost-aggregator rolls up per-runtime AND per-tier for apples-to-apples comparison. (Plan 26-05, commit `57bf43e`)
72
+
73
+ - **Reflector cross-runtime cost-arbitrage** — `scripts/lib/cost-arbitrage.cjs` and reflector wiring surface a structured proposal when one runtime's spend exceeds another's by >50% on the same `(agent, tier)` per D-09. Mixed-runtime cycle history (some agent spawns ran in CC, others in Codex within the same cycle) is handled without crash or per-runtime double-count. Reflector emits `runtime_arbitrage_signal` events with both runtime IDs, the agent/tier pair, the observed spread, and the recommended cheaper-runtime tag. The 50% threshold is a starting heuristic — bandit-style learning over arbitrage outcomes is Phase 23.5+ territory. (Plan 26-06, commit `5de824c`)
74
+
75
+ - **`reasoning-class` runtime-neutral frontmatter alias** — `agents/README.md` documents `reasoning-class: high|medium|low` as an additive alias for `default-tier` per D-10. v1.26 ships the alias with full equivalence semantics (`high ↔ opus`, `medium ↔ sonnet`, `low ↔ haiku`) but does not deprecate `default-tier`. Both fields may coexist on the same agent; mismatched dual annotations are a validation error (D-11). Long-term winner is data-gated: alias adoption signal measured by `gdd-intel-updater` on `agents/*.md` changes; if alias share stays below 50% by Phase 28, `default-tier` is canonical and alias is deprecated; if alias wins majority, the reverse. Same evidence-gating discipline as Phase 23.5's deferred items. (Plan 26-07, commit `be3e590`)
76
+
77
+ - **Frontmatter validator + intel-updater integration** — `scripts/validate-frontmatter.ts` accepts optional `reasoning-class` enum; if both `default-tier` and `reasoning-class` are present, equivalence is enforced (`high+opus` / `medium+sonnet` / `low+haiku` — mismatch is a validation error per D-11). `gdd-intel-updater` re-runs on changes under `agents/*.md` to keep `.design/intel/agent-tiers.json` current with **both** fields populated for downstream tooling. Tests assert tier↔class equivalence across all 26 agents. (Plan 26-08, commit `14afa72`)
78
+
79
+ - **`docs/MULTI-RUNTIME-MODELS.md`** — Plan 26-09 ships an ops guide covering: how to add a new runtime tier-map (edit `reference/runtime-models.md`, follow schema, run the parser test), the `reasoning-class ↔ default-tier` equivalence table, the `tier-resolver.cjs` fallback chain (runtime entry → claude row + warn event → null + fail event), how cost telemetry rolls up (per-runtime + per-tier), and the future `budget.json#runtime_overrides.<runtime>.tier_to_model` per-runtime override hook.
80
+
81
+ ### Tests
82
+
83
+ - `tests/runtime-models-schema.test.cjs` (new) — calls `parseRuntimeModels()` from the dependency-free pure-JS parser at `scripts/lib/install/parse-runtime-models.cjs` (no `ajv` pulled in — the parser does strict validation natively), asserts `$schema_version === 1`, all 14 runtime IDs from `runtimes.cjs` present, canonical seed picks correct (claude→claude-opus-4-7, codex→gpt-5, gemini→gemini-2.5-pro, qwen→qwen3-max), and provenance fields present per row.
84
+ - `tests/router-resolved-models.test.cjs` (new) — content-level assertions on `skills/router/SKILL.md`: `resolved_models` mentioned in the JSON example, in the field docstring, and at v1.26.0 in the Output schema versioning table; `complexity_class` (Phase 25) still mentioned (no regression); `model_tier_overrides` still mentioned (back-compat).
85
+ - `tests/budget-enforcer-runtime-aware.test.cjs` (new) — pure-function tests of `scripts/lib/budget-enforcer.cjs#computeCost()`: codex/gpt-5-mini path returns cost from `reference/prices/codex.md`; claude/opus path returns cost from `reference/prices/claude.md`; missing-runtime / missing-tier falls back to claude with the `fallback: true` flag; cache-hit path swaps `cached_input_per_1m` for `input_per_1m`.
86
+ - `tests/phase-26-baseline.test.cjs` (new) — same shape as `phase-25-baseline.test.cjs`. Asserts all 9 plans landed (runtime-models source + tier-resolver + runtime-detect + installer models.json + router resolved_models + budget-enforcer + cost-arbitrage + reasoning-class alias + frontmatter validator extension) plus all 4 manifests align at 1.26.0 + CHANGELOG `## [1.26.0]` block exists.
87
+ - `tests/semver-compare.test.cjs` `OFF_CADENCE_VERSIONS` gains `1.26.0` with the milestone summary.
88
+
89
+ ### Decisions
90
+
91
+ D-01 through D-13 — see `.planning/phases/26-headless-model-resolver/CONTEXT.md` for the full register. Highlights:
92
+ - **D-01** — `reference/runtime-models.md` is the single source of truth for all 14 runtimes; each row carries provenance (URL + retrieval timestamp + last-validated cycle) so the future authority-watcher can flag drift.
93
+ - **D-04** — `tier-resolver.cjs` fallback chain is non-blocking: runtime entry → claude row + warning event → null + fail event. Never throws; null is a valid output the consumer must handle.
94
+ - **D-05** — `runtime-detect.cjs` reuses Phase 24's env-var → runtime-ID mapping verbatim; single source of truth lives in `runtimes.cjs`. Adding a new runtime extends both detection and installation.
95
+ - **D-07** — `resolved_models` is additive to `model_tier_overrides` — strict superset, same back-compat discipline as Phase 25's `complexity_class` next to `path`.
96
+ - **D-08** — Cost telemetry split: `reference/model-prices.md` becomes a router; per-runtime sub-tables under `reference/prices/<runtime>.md`. events.jsonl rows tag `runtime`. Aggregation rolls up per-runtime AND per-tier.
97
+ - **D-10** — `reasoning-class` is additive, NOT a replacement for `default-tier`. Both may coexist; equivalence is enforced. Deprecation is data-gated (Phase 28 measurement).
98
+ - **D-12** — All 9 plans land together with one CHANGELOG block. 4 manifests bump in lockstep (`package.json` + `.claude-plugin/plugin.json` + `.claude-plugin/marketplace.json` × 2 slots + `tests/semver-compare.test.cjs` `OFF_CADENCE_VERSIONS`).
99
+ - **D-13** — Plan boundary discipline: Wave A (26-01..26-03) builds the adapter; Wave B (26-04..26-06) wires the existing pipeline; Wave C (26-07..26-08) lands the runtime-neutral alias additively; Wave D (26-09) closes out.
100
+
101
+ ---
102
+
7
103
  ## [1.25.0] — 2026-04-29
8
104
 
9
105
  Phase 25 Pipeline Hardening milestone — converts four pipeline gaps surfaced in the post-Phase-24 retrospective from side roads into first-class pipeline citizens: a prototype gate that makes sketches/spikes a read/write member of the decision graph, an S/M/L/XL complexity refinement to the router that distinguishes trivial from full-pipeline work, a Stage 4.5 quality gate that runs lint/typecheck/test between Design and Verify, and a Stop-hook turn closeout that closes the events.jsonl gap at turn-end. All four sub-features are additive — no state-machine break (5 stages stay 5 stages), no breaking router contract (`path: fast|quick|full` is preserved alongside the new `complexity_class`), and the existing budget-enforcer / verify-entry / decision-injector consumers gain the new fields without a code change to their existing call sites.
package/README.md CHANGED
@@ -87,17 +87,23 @@ Use it when you care that tokens match, contrast passes WCAG, motion feels cohes
87
87
 
88
88
  You do not need to be a designer to benefit from it. The pipeline carries the design discipline into the agent workflow: it extracts context, asks only for missing decisions, grounds the work in references, and catches the issues people usually find too late.
89
89
 
90
- ### v1.25.0 Highlights — Pipeline Hardening
90
+ ### v1.27.0 Highlights — Peer-CLI Delegation Layer
91
91
 
92
- Four pipeline gaps surfaced in the post-Phase-24 retrospective land as first-class pipeline citizens. All four are additiveno state-machine break, no breaking router contract.
92
+ Closes the **outbound** half of multi-runtime: gdd agents now OPTIONALLY delegate to local peer CLIs (Codex via App Server Protocol; Gemini/Cursor/Copilot/Qwen via Agent Client Protocol) when measurably cheaper or higher-quality for the role. Falls back to in-process Anthropic SDK when peer is unavailable. Honors v1.26.0's tier maps + v1.22.0's event chain + v1.23.5's bandit posterior `delegate?` becomes another arm in `(agent_type × tier × delegate)` Thompson sampling.
93
93
 
94
- - **Prototype gate** — sketches and spikes become read/write members of the decision graph. New `prototype-gate` Haiku agent emits a recommend/skip verdict at two firing points (post-explore for sketches, post-plan for spikes); `sketch-wrap-up` and `spike-wrap-up` close the loop with a coupled D-XX + `<prototyping>` outcome write. The `decision-injector` surfaces top-N prior outcomes when downstream agents read planning files. STATE.md gains a `<prototyping>` block round-trips byte-identically through the parser/serializer.
95
- - **S/M/L/XL complexity buckets** router heuristic table refined from 3 to 4 tiers so telemetry distinguishes trivial from full-pipeline. JSON output now carries `complexity_class` (`S | M | L | XL`) next to the existing `path` (`fast | quick | full`) — strict superset, no breaking change for existing consumers. Canonical mapping: `/gdd:help`→S (short-circuit), `/gdd:scan`→M, standalone `/gdd:plan`→L, `/gdd:next` autonomous flowXL. Budget enforcer reads class-specific caps from `.design/budget.json#class_caps_usd` when present.
96
- - **Quality gate (Stage 4.5)** — new lint/typecheck/test/visual-regression gate sits between `/gdd:design` and `/gdd:verify`. Detects commands automatically from `package.json#scripts` (or honors `.design/config.json#quality_gate.commands` when declared); excludes `test:e2e` for speed. Bounded fix loop (default `max_iters: 3`) reuses `design-fixer`. Timeout warns and proceeds (non-blocking on slow suites); failures mark STATE `<quality_gate>` block status=fail and verify entry refuses. Six lifecycle events emitted to `events.jsonl`.
97
- - **Turn closeout (Stop hook)** — new `gdd-turn-closeout.js` Stop hook fires when the assistant turn ends. Reads STATE.md + tails events.jsonl; if mid-pipeline and the last event is stale, appends a `turn_end` event and surfaces a stage-completion or paused-mid-task nudge. ≤10ms typical latency budget. Portable Skill mirror at `skills/turn-closeout/SKILL.md` covers the 13 non-Claude runtimes that lack a Stop hook surface.
94
+ - **ACP + ASP protocol clients** — `scripts/lib/peer-cli/{acp-client,asp-client}.cjs` ship line-delimited JSON-RPC over stdio with 16 MiB line-buffer overflow guards. ACP serves Gemini/Cursor/Copilot/Qwen; ASP serves Codex (thread-oriented, multi-turn). Protocol shapes adapted from `greenpolo/cc-multi-cli` (Apache 2.0, see `NOTICE`). Long-lived broker per `(peer, workspace)` over Unix socket / named pipe cold-spawn cost amortized across delegated calls in a cycle.
95
+ - **5 per-peer adapters + central registry** `scripts/lib/peer-cli/adapters/{codex,gemini,cursor,copilot,qwen}.cjs` thin-wrap the protocol clients with role→prompt-prefix maps and slash-command translation. Central dispatch via `registry.cjs#findPeerFor(role, tier)` consults the locked capability matrix (codex→execute; gemini→research/exploration; cursor→debug/plan; copilotreview/research; qwen→write).
96
+ - **`delegate_to:` agent frontmatter** — additive optional field; values are `<peer>-<role>` per matrix or `none` for explicit opt-out. Session-runner tries delegate first; transparent fallback on peer-absent OR peer-error. `peer_call_failed` event logs to `events.jsonl` for reflector telemetry. v1.27.0 ships the field but no agent in the fleet uses it yet opt-in per agent via `/gdd:run-skill peer-cli-customize`.
97
+ - **Bandit posterior `delegate?` dimension** — Phase 23.5's `(agent_type, touches_size_bin)` arm space expands to `(agent_type, touches_size_bin, delegate)` where `delegate {none, gemini, codex, cursor, copilot, qwen}`. Existing priors carry forward as the `none` arm (no behavior change for unconfigured agents); 5 delegation arms start neutral. Reward signal unchanged (two-stage lexicographic). The bandit measures and learns which delegations actually pay off over cycles.
98
+ - **Event chain `runtime_role` + `peer_id` tags** — additive Phase 22 extension. `events.jsonl` rows optionally tag `runtime_role: "host" | "peer"` (defaults `"host"` for back-compat) and `peer_id`. 3 new event types: `peer_call_started`, `peer_call_complete`, `peer_call_failed`. costs.jsonl threads the same fields so v1.26's cross-runtime cost-arbitrage reflector extends to host-vs-peer arbitrage naturally.
99
+ - **`/gdd:peers` discoverability + skills for setup** — `/gdd:peers` shows a single-command capability matrix (peer × installed? × allowlisted? × claimed roles × posterior delta vs local). `peer-cli-customize` rewires per-agent `delegate_to:` mappings; `peer-cli-add` walks the verification ladder for adding a brand-new peer (model-ID `-preview`-suffix trap, Windows `.cmd` quirks, 3-file footprint). Install-time peer-detection helpers (`detectInstalledPeers()`) ship in `runtimes.cjs` ready for the interactive nudge (deferred to a 1.27.x patch).
100
+
101
+ See [docs/PEER-DELEGATION.md](docs/PEER-DELEGATION.md) for the ops guide (when delegation fires, fallback diagnostics, broker lifecycle, Windows quirks) and [reference/peer-protocols.md](reference/peer-protocols.md) for the ACP + ASP protocol cheat sheet.
98
102
 
99
103
  ### Previous releases
100
104
 
105
+ - **v1.26.0** — Headless Model Resolver (per-runtime tier→model map, `resolved_models` router field, per-runtime price tables, `reasoning-class` runtime-neutral alias).
106
+ - **v1.25.0** — Pipeline Hardening (prototype gate + STATE `<prototyping>` block, router S/M/L/XL `complexity_class`, quality-gate Stage 4.5, Stop-hook turn closeout).
101
107
  - **v1.24.0** — Multi-Runtime Installer (`@clack/prompts` interactive multi-select for all 14 runtimes, idempotent + foreign-AGENTS.md-safe, scripted CI surface preserved 1:1).
102
108
  - **v1.23.5** — No-Regret Adaptive Layer (Thompson sampling bandit + AdaNormalHedge ensemble + MMR rerank; single-user via informed-prior bootstrap, no opt-in telemetry).
103
109
  - **v1.23.0** — SDK Domain Primitives (solidify-with-rollback gate, JSON output contracts, auto-crystallization of `Touches:` patterns).
package/SKILL.md CHANGED
@@ -89,6 +89,9 @@ Each stage produces artifacts in `.design/` inside the current project.
89
89
  | `skill-manifest [--refresh]` | `get-design-done:skill-manifest` | List or refresh the local skill manifest used by the router for discovery |
90
90
  | `quality-gate` | `get-design-done:quality-gate` | Phase 25 — parallel lint/type/test/visual command runner; classifies failures via quality-gate-runner agent |
91
91
  | `turn-closeout` | `get-design-done:turn-closeout` | Phase 25 — Stop-hook mirror skill; finalizes per-turn STATE blocks and emits closeout events |
92
+ | `peers` | `get-design-done:peers` | Phase 27 — `/gdd:peers` capability matrix command; shows installed peer-CLIs (codex/gemini/cursor/copilot/qwen), allowlist status, claimed roles, posterior delta vs local |
93
+ | `peer-cli-customize` | `get-design-done:peer-cli-customize` | Phase 27 — rewire role→peer mappings on a per-agent basis (edits frontmatter `delegate_to:` directly) |
94
+ | `peer-cli-add` | `get-design-done:peer-cli-add` | Phase 27 — guided ladder for adding a brand-new peer (verification ladder + adapter scaffolding + capability-matrix update) |
92
95
  | `watch-authorities [--refresh] [--since <date>] [--feed <name>] [--schedule <cadence>]` | `get-design-done:gdd-watch-authorities` | Run design-authority-watcher — fetch curated feeds, diff snapshot, classify new entries → `.design/authority-report.md` (consumed by `/gdd:reflect`) |
93
96
  | `benchmark <component\|--wave N\|--list\|--refresh component>` | `get-design-done:gdd-benchmark` | Harvest + synthesize per-component design specs from 18 design systems → `reference/components/<name>.md` |
94
97
  | `benchmark <component\|--wave N\|--list\|--refresh component>` | `get-design-done:gdd-benchmark` | Harvest + synthesize per-component design specs from 18 design systems → `reference/components/<name>.md` |
package/agents/README.md CHANGED
@@ -64,6 +64,95 @@ color: blue
64
64
 
65
65
  ---
66
66
 
67
+ ## Runtime-neutral reasoning class (alias for default-tier)
68
+
69
+ **Phase 26 (v1.26.0).** Agents may carry an optional `reasoning-class: high|medium|low` field as a runtime-neutral alias for `default-tier`. The alias exists because `default-tier`'s enum (`opus|sonnet|haiku`) hard-codes Anthropic model names, while the multi-runtime installer (Phase 24) ships agents to 14 runtimes whose authors do not all use those names. `reasoning-class` describes the *reasoning density* the agent needs without naming a vendor's model lineup.
70
+
71
+ **This field is additive, not a replacement.** `default-tier: opus|sonnet|haiku` remains the authoritative, required field for v1.26 and is the source of truth that `hooks/budget-enforcer.ts`, `skills/router/SKILL.md`, and `agents/gdd-intel-updater.md` read. Both fields may coexist on the same agent during the transition window. The long-term winner — which field is canonical and which is deprecated — is data-gated per Phase 28+ measurement of adoption rates (CONTEXT D-10); no deprecation lands in v1.26.
72
+
73
+ ### Frontmatter shape
74
+
75
+ | Field | Type | Accepted values | Required | Purpose |
76
+ |-------|------|-----------------|----------|---------|
77
+ | `reasoning-class` | enum | `high`, `medium`, `low` | optional | Runtime-neutral name for the reasoning-density tier this agent needs. Equivalent to `default-tier` per the equivalence table below. |
78
+
79
+ ### Equivalence (locked in CONTEXT D-10)
80
+
81
+ | `reasoning-class` | `default-tier` | Typical role classes |
82
+ |-------------------|----------------|----------------------|
83
+ | `high` | `opus` | Planners, critics, advisors, strategic reflectors. |
84
+ | `medium` | `sonnet` | Researchers, mappers, doc-writers, executors, fixers. |
85
+ | `low` | `haiku` | Verifiers and checkers with deterministic rubrics. |
86
+
87
+ The mapping is bidirectional and exhaustive — there is no `reasoning-class` value without a `default-tier` equivalent and vice versa. See `reference/model-tiers.md` for the per-class role rationale (the tier-selection guide that `default-tier` is keyed against — `reasoning-class` inherits the same semantics through the equivalence above).
88
+
89
+ ### Coexistence rule
90
+
91
+ Both fields may appear in the same agent's frontmatter:
92
+
93
+ ```yaml
94
+ ---
95
+ name: design-planner
96
+ default-tier: opus
97
+ reasoning-class: high
98
+ tier-rationale: "Authors DESIGN-PLAN.md — the contract every downstream agent follows"
99
+ ---
100
+ ```
101
+
102
+ When both are present, the values MUST be equivalent per the table above. Mismatched dual annotations (e.g. `default-tier: opus` paired with `reasoning-class: medium`) are a validation error — `scripts/validate-frontmatter.ts` (extended in Plan 26-08) enforces equivalence at lint time. If only one of the two is present, the validator accepts it and downstream consumers use the equivalence table to derive the missing field.
103
+
104
+ ### How runtime-aware tooling reads either field
105
+
106
+ Downstream consumers (`skills/router/SKILL.md`, `hooks/budget-enforcer.ts`, `scripts/lib/budget-enforcer.cjs`, `agents/gdd-intel-updater.md`) accept either field individually and map between them via the equivalence table:
107
+
108
+ - **`default-tier` only** — consumers read `default-tier` directly. This is the v1.26 baseline state for all 26 shipped agents.
109
+ - **`reasoning-class` only** — consumers map `high → opus`, `medium → sonnet`, `low → haiku` and feed the resulting tier into `tier-resolver.cjs` (Plan 26-02) for runtime-correct model resolution. Consumers that have not yet been updated to read `reasoning-class` natively still see a valid `default-tier` semantically (via the alias), so no consumer breaks when an agent author chooses the runtime-neutral name.
110
+ - **Both present** — consumers prefer `default-tier` for now (v1.26 canonical), with `reasoning-class` carried through to telemetry (`gdd-intel-updater` writes both fields to `.design/intel/agent-tiers.json` per Plan 26-08) so adoption can be measured for the Phase 28 deprecation gate.
111
+
112
+ ### Rollout policy for v1.26
113
+
114
+ - The 26 existing agents continue to carry `default-tier` only — **no per-agent retrofit lands in v1.26**. New agents (added in Phase 27+) MAY carry `reasoning-class` instead of, or alongside, `default-tier`.
115
+ - Validators, intel-updater, router, and budget-enforcer accept either field starting in v1.26 (Plans 26-04, 26-05, 26-08).
116
+ - Adoption is measured by `gdd-intel-updater` over `agents/*.md` changes; if alias adoption stays below 50% by Phase 28, `default-tier` remains canonical and the alias is deprecated. If alias wins majority share, the reverse. **No deprecation in v1.26.**
117
+
118
+ ### Cross-references
119
+
120
+ - `reference/model-tiers.md` — tier-selection guide and per-agent map for `default-tier`. The same role-class rationale applies to `reasoning-class` via the equivalence table.
121
+ - `reference/runtime-models.md` (Plan 26-01) — per-runtime tier→model adapter that consumes the resolved tier (whether sourced from `default-tier` or via `reasoning-class` alias).
122
+ - `scripts/validate-frontmatter.ts` (Plan 26-08) — validator extension that accepts the optional field and enforces equivalence when both are present.
123
+ - `.planning/phases/26-headless-model-resolver/CONTEXT.md` D-10, D-11 — decision lineage for additive-alias and equivalence-enforced semantics.
124
+
125
+ ---
126
+
127
+ ## Peer-CLI delegation (delegate_to)
128
+
129
+ Phase 27 introduces an **optional** frontmatter field `delegate_to:` that lets an agent OPT IN to running on a peer CLI (Codex via ASP; Gemini/Cursor/Copilot/Qwen via ACP) instead of the in-process Anthropic SDK call.
130
+
131
+ | Property | Value |
132
+ |----------|-------|
133
+ | Field | `delegate_to: <peer>-<role> \| none` |
134
+ | Required | NO — optional, additive |
135
+ | Default | absent = use local Anthropic call (existing behavior) |
136
+ | Valid values | `gemini-research`, `gemini-exploration`, `codex-execute`, `cursor-debug`, `cursor-plan`, `copilot-review`, `copilot-research`, `qwen-write`, or `none` (explicit opt-out) |
137
+ | Validator | `scripts/validate-frontmatter.ts` (Plan 27-06) — checks format + cross-references the capability matrix in `scripts/lib/peer-cli/registry.cjs`. Mismatched `<peer>-<role>` values that aren't in the matrix → validation error. |
138
+
139
+ **Behavior at runtime:**
140
+ - When session-runner spawns an agent with `delegate_to: gemini-research`, it tries `peer-cli/registry.dispatch('research', tier, prompt, opts)` first. On null result (peer absent OR peer error per D-07) it transparently falls back to the local Anthropic call. The skill never sees the peer failure.
141
+ - `delegate_to: none` explicitly skips registry dispatch (security-sensitive agents).
142
+ - Absent field = same as not setting it = local Anthropic call (unchanged behavior).
143
+
144
+ **Opt-in gating:** Even with `delegate_to:` set on an agent, dispatch only fires if the peer is in `.design/config.json#peer_cli.enabled_peers` allowlist (populated by the install-time nudge in Plan 27-11; default empty). This keeps cost surprises off — users explicitly authorize each peer.
145
+
146
+ **Telemetry:** Peer calls emit `peer_call_started` / `peer_call_complete` / `peer_call_failed` events in `events.jsonl`, tagged with `runtime_role: "peer"` and `peer_id` (Plan 27-08). Cost rows in `costs.jsonl` carry the same tags so reflector cross-runtime arbitrage (Phase 26) extends naturally.
147
+
148
+ **Cross-references:**
149
+ - `scripts/lib/peer-cli/registry.cjs` (Plan 27-05) — capability matrix + dispatch.
150
+ - `scripts/lib/peer-cli/adapters/{codex,gemini,cursor,copilot,qwen}.cjs` (Plan 27-04) — per-peer thin adapters.
151
+ - `reference/peer-cli-capabilities.md` (Plan 27-05) — full capability matrix doc.
152
+ - `.planning/phases/27-peer-cli-delegation/CONTEXT.md` D-06, D-07, D-11 — decision lineage.
153
+
154
+ ---
155
+
67
156
  ## Required Reading Pattern
68
157
 
69
158
  When an agent must read specific files before acting, the orchestrating stage embeds a `<required_reading>` block in the prompt it passes to `Task`. The block is part of the **prompt string**, not the agent file.
@@ -106,6 +106,49 @@ Read `.design/telemetry/costs.jsonl` (if exists). Aggregate per agent:
106
106
 
107
107
  If `.design/budget.json` doesn't exist: note "budget.json not found — Phase 10.1 budget governance required."
108
108
 
109
+ ### 7. Cross-runtime cost arbitrage (Phase 26 — D-09)
110
+
111
+ **Why this exists:** Phase 24 ships gdd to 14 runtimes (claude, codex, gemini, qwen, …). The same `(agent, tier)` pair can cost dramatically different amounts depending on which runtime executed the spawn — runtime-author pricing varies, and the user may already be paying for one runtime via subscription while paying per-token in another. This section surfaces those arbitrage opportunities as **structured, measurable signals** — never hand-wavy assumptions.
112
+
113
+ **Data source:** `.design/telemetry/events.jsonl` — filter entries where `type === 'cost.update'`. Each cost row is tagged with `payload.runtime` (Plan 26-05) so spawns from different runtimes are attributable apples-to-apples. The reflector reads cost events from this stream alongside Section 6's `costs.jsonl` rollup; events.jsonl is authoritative for runtime attribution.
114
+
115
+ **The rule:**
116
+
117
+ For each `(agent, tier)` pair observed in the last 5 cycles (D-09 default window):
118
+
119
+ 1. Bucket cost events by `(agent, tier, runtime, cycle)` and sum within each bucket. Sum-then-average is critical: a cycle that ran 4 design-verifier spawns in claude and 1 in codex must NOT inflate claude's per-cycle average by a factor of 4. Sum the 4 spawns into one cycle-sum, then average across the cycles where the runtime appeared.
120
+ 2. Compute `avg_cost_per_cycle` per `(agent, tier, runtime)` triple, restricted to the recency window.
121
+ 3. For each pair that has ≥2 runtimes in the window, find the cheapest and most expensive runtime. Compute `delta_pct = (max_avg - min_avg) / min_avg`.
122
+ 4. If `delta_pct > 0.5` (50%, D-09 starting heuristic), emit a structured `cost_arbitrage` proposal.
123
+
124
+ **Important guardrails (failure modes the rule must avoid):**
125
+
126
+ - **Mixed-runtime cycles must not crash or double-count.** A single cycle where some agent spawns ran in CC and others in Codex is normal — runtime attribution is per-spawn (`payload.runtime`), never per-cycle.
127
+ - **Single-runtime-only history is silent.** If only one runtime has events for an `(agent, tier)` pair in the window, no arbitrage can be computed — emit nothing rather than a misleading "no comparison available" proposal.
128
+ - **Zero-cost denominators are skipped.** A runtime that averaged $0 in the window would produce `delta_pct: Infinity`; skip the pair rather than emit a useless signal.
129
+ - **The 50% threshold is a starting heuristic.** Bandit-style learning over arbitrage outcomes (was the proposal applied? did costs drop?) is **Phase 23.5+ territory** — it lives in the bandit posterior, NOT here. This section's job is to surface measurement signals; tier-selection learning is a separate data product.
130
+
131
+ **Helper:** `scripts/lib/cost-arbitrage.cjs` exports `analyze(events, options) → proposals[]` implementing the above rule deterministically. The executor agent following this skill loads `events.jsonl`, parses each line as JSON (skipping malformed lines), and passes the array of envelopes to `analyze()`. No re-derivation of the rule in prose — call the helper.
132
+
133
+ **Proposal output shape** (one entry per arbitrage signal, JSON-serializable for `/gdd:apply-reflections`):
134
+
135
+ ```json
136
+ {
137
+ "type": "cost_arbitrage",
138
+ "agent": "design-reflector",
139
+ "tier": "opus",
140
+ "runtimes": {
141
+ "claude": { "avg_cost_per_cycle": 0.42, "n_cycles": 5 },
142
+ "codex": { "avg_cost_per_cycle": 1.10, "n_cycles": 5 }
143
+ },
144
+ "delta_pct": 0.617,
145
+ "proposal": "Switch design-reflector tier=opus invocations from codex to claude for ~62% cost saving",
146
+ "evidence_window": "last_5_cycles"
147
+ }
148
+ ```
149
+
150
+ Render each `cost_arbitrage` entry into the Proposals section as a `[BUDGET]`-tagged proposal carrying the structured payload verbatim — `/gdd:apply-reflections` will route it to the runtime-routing layer (Phase 26's tier-resolver / runtime-detect) rather than to `.design/budget.json`.
151
+
109
152
  ---
110
153
 
111
154
  ## Proposals
@@ -19,6 +19,7 @@ writes:
19
19
  - .design/intel/decisions.json
20
20
  - .design/intel/debt.json
21
21
  - .design/intel/graph.json
22
+ - .design/intel/agent-tiers.json
22
23
  ---
23
24
 
24
25
  @reference/shared-preamble.md
@@ -63,6 +64,38 @@ Expected: `components.json decisions.json debt.json dependencies.json exports.js
63
64
 
64
65
  Report any missing slices as warnings.
65
66
 
67
+ ### Step 3.5 — Sync `.design/intel/agent-tiers.json` (Plan 26-08)
68
+
69
+ Phase 26 introduced the runtime-neutral `reasoning-class` alias for `default-tier` (CONTEXT D-10/D-11). Downstream tooling that wants tier information without re-parsing markdown reads `.design/intel/agent-tiers.json`. Both fields MUST be populated per agent so consumers do not have to know the equivalence table — the intel-updater is the single source of truth that fills the missing field via the locked map:
70
+
71
+ | `reasoning-class` | `default-tier` |
72
+ |-------------------|----------------|
73
+ | `high` | `opus` |
74
+ | `medium` | `sonnet` |
75
+ | `low` | `haiku` |
76
+
77
+ Walk every `agents/*.md` file (skip `README.md`), parse its frontmatter, and emit one entry per agent into `.design/intel/agent-tiers.json` with the shape:
78
+
79
+ ```json
80
+ {
81
+ "schema_version": 1,
82
+ "generated_at": "<ISO-8601-UTC>",
83
+ "agents": {
84
+ "design-planner": { "default-tier": "opus", "reasoning-class": "high" },
85
+ "design-verifier": { "default-tier": "haiku", "reasoning-class": "low" }
86
+ }
87
+ }
88
+ ```
89
+
90
+ Population rules:
91
+
92
+ 1. If both `default-tier` and `reasoning-class` are present in the agent's frontmatter, write both verbatim (validator already enforced equivalence at lint time — see `scripts/validate-frontmatter.ts`).
93
+ 2. If only `default-tier` is present (the v1.26 baseline state for all 26 shipped agents), derive `reasoning-class` from the table above and write both.
94
+ 3. If only `reasoning-class` is present, derive `default-tier` from the table above and write both.
95
+ 4. If neither is present, omit the agent from the JSON and emit a warning — the upstream `validate-frontmatter` gate would have caught this at CI; the intel-updater stays non-throwing on lint-edges.
96
+
97
+ Validation is exclusively the validator's job; this step assumes the gate has passed and writes the queryable index. If a pre-existing `.design/intel/agent-tiers.json` is present, overwrite it atomically (write to a `.tmp` then `rename`).
98
+
66
99
  ### Step 4 — Report summary
67
100
 
68
101
  Print a concise update summary:
@@ -71,7 +104,7 @@ Print a concise update summary:
71
104
  ━━━ Intel store updated ━━━
72
105
  Files indexed: <N>
73
106
  Changed files: <N>
74
- Slices written: 10
107
+ Slices written: 11 (10 build-intel slices + agent-tiers.json from Step 3.5)
75
108
  Generated: <timestamp>
76
109
  ━━━━━━━━━━━━━━━━━━━━━━━━━━
77
110
  ```
@@ -72,6 +72,38 @@ function resolveHookPath(): string {
72
72
  const nodeRequire = createRequire(resolveHookPath());
73
73
  const rateGuard = nodeRequire('../scripts/lib/rate-guard.cjs') as typeof import('../scripts/lib/rate-guard.cjs');
74
74
  const iterationBudget = nodeRequire('../scripts/lib/iteration-budget.cjs') as typeof import('../scripts/lib/iteration-budget.cjs');
75
+ // Plan 26-05: shared cost-computation backend for the resolved_models
76
+ // consumer path. Pure module — takes (model_id, runtime, token_counts) →
77
+ // cost_usd by reading per-runtime price tables under reference/prices/.
78
+ // See scripts/lib/budget-enforcer.cjs for the lookup chain.
79
+ interface BudgetEnforcerBackend {
80
+ computeCost(args: {
81
+ model_id?: string | null;
82
+ tier?: string | null;
83
+ runtime: string;
84
+ tokens_in: number;
85
+ tokens_out: number;
86
+ cache_hit?: boolean;
87
+ }): {
88
+ cost_usd: number | null;
89
+ model: string | null;
90
+ tier: string | null;
91
+ runtime_used: string | null;
92
+ fallback: boolean;
93
+ reason: string | null;
94
+ };
95
+ modelFromResolved(resolved: unknown, agent: string): string | null;
96
+ }
97
+ const budgetBackend = nodeRequire('../scripts/lib/budget-enforcer.cjs') as BudgetEnforcerBackend;
98
+ // Plan 26-05: runtime detection for the cost-event runtime tag. Returns
99
+ // 'claude' for the CC hook context (CLAUDE_CONFIG_DIR is set when CC is
100
+ // the host), null when running outside any of the 14 runtime envs (e.g.
101
+ // CI matrix). The hook defaults the null case to 'claude' since the .ts
102
+ // hook only runs inside CC anyway.
103
+ interface RuntimeDetectModule {
104
+ detect(): string | null;
105
+ }
106
+ const runtimeDetect = nodeRequire('../scripts/lib/runtime-detect.cjs') as RuntimeDetectModule;
75
107
 
76
108
  // ── Types ───────────────────────────────────────────────────────────────────
77
109
 
@@ -92,6 +124,25 @@ export type ComplexityClass = 'S' | 'M' | 'L' | 'XL';
92
124
  interface RouterDecision {
93
125
  path?: 'fast' | 'quick' | 'full';
94
126
  complexity_class?: ComplexityClass;
127
+ /**
128
+ * Phase 26 / D-07: per-agent concrete model name resolved by the
129
+ * router via `scripts/lib/tier-resolver.cjs`. Strict superset of
130
+ * `model_tier_overrides` — existing consumers still read tier names
131
+ * from `model_tier_overrides`; new consumers read `resolved_models`
132
+ * for runtime-correct cost lookup.
133
+ */
134
+ resolved_models?: Record<string, string>;
135
+ /**
136
+ * Phase 26 / D-08: runtime ID the router computed `resolved_models`
137
+ * against. Optional; the hook falls back to `runtime-detect.cjs`
138
+ * when absent.
139
+ */
140
+ runtime?: string;
141
+ /**
142
+ * Phase 25 back-compat: tier-name overrides per agent. Phase 26 keeps
143
+ * this as the legacy fallback path when `resolved_models` is absent.
144
+ */
145
+ model_tier_overrides?: Record<string, string>;
95
146
  [key: string]: unknown;
96
147
  }
97
148
 
@@ -520,6 +571,53 @@ function emitHookFired(decision: HookDecision, cycle?: string): void {
520
571
  }
521
572
  }
522
573
 
574
+ /**
575
+ * Plan 26-05 / D-08: emit a `cost_recorded` event with runtime tag,
576
+ * concrete model, tier, token counts, and computed cost. Cost-aggregator
577
+ * downstream rolls these up per-runtime AND per-tier so reflector class-
578
+ * specific cost analysis (Phase 26-06) can compare apples-to-apples
579
+ * across runtimes.
580
+ *
581
+ * The event uses the BaseEvent envelope shape (free-form `type` per
582
+ * Phase 22 events.jsonl contract). Fail-open like every other emit in
583
+ * this hook — never block the spawn on a telemetry failure.
584
+ */
585
+ function emitCostRecorded(
586
+ payload: {
587
+ runtime: string;
588
+ agent: string;
589
+ model_id: string | null;
590
+ tier: string | null;
591
+ tokens_in: number;
592
+ tokens_out: number;
593
+ cost_usd: number | null;
594
+ },
595
+ cycle?: string,
596
+ ): void {
597
+ const ev = {
598
+ type: 'cost_recorded',
599
+ timestamp: new Date().toISOString(),
600
+ sessionId: getSessionId(),
601
+ ...(cycle !== undefined && cycle !== 'unknown' ? { cycle } : {}),
602
+ payload: {
603
+ runtime: payload.runtime,
604
+ agent: payload.agent,
605
+ model_id: payload.model_id,
606
+ tier: payload.tier,
607
+ tokens_in: payload.tokens_in,
608
+ tokens_out: payload.tokens_out,
609
+ cost_usd: payload.cost_usd,
610
+ },
611
+ };
612
+ try {
613
+ // BaseEvent shape; cost_recorded is a free-form subtype (the
614
+ // Phase 22 events stream is structurally validated, not enum-locked).
615
+ appendEvent(ev as unknown as HookFiredEvent);
616
+ } catch {
617
+ // Fail open.
618
+ }
619
+ }
620
+
523
621
  // ── main ────────────────────────────────────────────────────────────────────
524
622
 
525
623
  async function readStdin(): Promise<string> {
@@ -787,13 +885,54 @@ export async function main(): Promise<void> {
787
885
  toolInput._tier_override = budget.tier_overrides[agent];
788
886
  }
789
887
 
888
+ // Plan 26-05 / D-07 + D-08: resolved_models consumer path. When the
889
+ // router decision payload carries a concrete model ID for this agent
890
+ // under `resolved_models`, look up the cost in the per-runtime price
891
+ // table by model ID. Otherwise fall back to the legacy tier-name
892
+ // lookup (which still resolves through claude.md as the default
893
+ // runtime — back-compat with v1.25.x callers).
894
+ const resolvedModelId = budgetBackend.modelFromResolved(
895
+ routerDecision?.resolved_models,
896
+ agent,
897
+ );
898
+ const resolvedTier =
899
+ toolInput._tier_override ?? toolInput._default_tier ?? 'sonnet';
900
+ // Runtime tag: prefer the router's explicit `runtime` (D-08) field;
901
+ // fall back to env-var detection; default to 'claude' since the .ts
902
+ // hook itself only runs inside Claude Code.
903
+ const runtimeId =
904
+ (typeof routerDecision?.runtime === 'string' && routerDecision.runtime.length > 0
905
+ ? routerDecision.runtime
906
+ : runtimeDetect.detect()) ?? 'claude';
907
+
908
+ // Compute runtime-aware cost via the shared backend. Failures return
909
+ // null cost; we emit the event regardless so the cost-aggregator sees
910
+ // the lookup attempt (Phase 22 events.jsonl tagging).
911
+ const costLookup = budgetBackend.computeCost({
912
+ model_id: resolvedModelId,
913
+ tier: resolvedTier,
914
+ runtime: runtimeId,
915
+ tokens_in: Number(toolInput._tokens_in_est ?? 0),
916
+ tokens_out: Number(toolInput._tokens_out_est ?? 0),
917
+ cache_hit: false,
918
+ });
919
+ emitCostRecorded(
920
+ {
921
+ runtime: runtimeId,
922
+ agent,
923
+ model_id: resolvedModelId ?? costLookup.model,
924
+ tier: costLookup.tier ?? resolvedTier,
925
+ tokens_in: Number(toolInput._tokens_in_est ?? 0),
926
+ tokens_out: Number(toolInput._tokens_out_est ?? 0),
927
+ cost_usd: costLookup.cost_usd,
928
+ },
929
+ cycle,
930
+ );
931
+
790
932
  // Branch E: standard spawn-allowed (includes tier-downgraded path).
791
933
  writeTelemetry({
792
934
  agent,
793
- tier:
794
- toolInput._tier_override ??
795
- toolInput._default_tier ??
796
- 'sonnet',
935
+ tier: resolvedTier,
797
936
  tokens_in: Number(toolInput._tokens_in_est ?? 0),
798
937
  tokens_out: Number(toolInput._tokens_out_est ?? 0),
799
938
  cache_hit: false,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hegemonart/get-design-done",
3
- "version": "1.25.0",
3
+ "version": "1.27.0",
4
4
  "description": "A design-quality pipeline for AI coding agents: brief, plan, implement, and verify UI work against your design system.",
5
5
  "author": "Hegemon",
6
6
  "homepage": "https://github.com/hegemonart/get-design-done",