@hegemonart/get-design-done 1.24.2 → 1.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/SKILL.md CHANGED
@@ -87,6 +87,8 @@ Each stage produces artifacts in `.design/` inside the current project.
87
87
  | `analyze-dependencies [--slice <name>]` | `get-design-done:analyze-dependencies` | Query the `.design/intel/` store — dependency slices, graph queries, phase-scoped reads |
88
88
  | `extract-learnings [--cycle <slug>]` | `get-design-done:extract-learnings` | Extract decisions, lessons, patterns, and surprises from a completed cycle → `.design/cycles/<slug>/LEARNINGS.md` |
89
89
  | `skill-manifest [--refresh]` | `get-design-done:skill-manifest` | List or refresh the local skill manifest used by the router for discovery |
90
+ | `quality-gate` | `get-design-done:quality-gate` | Phase 25 — parallel lint/type/test/visual command runner; classifies failures via quality-gate-runner agent |
91
+ | `turn-closeout` | `get-design-done:turn-closeout` | Phase 25 — Stop-hook mirror skill; finalizes per-turn STATE blocks and emits closeout events |
90
92
  | `watch-authorities [--refresh] [--since <date>] [--feed <name>] [--schedule <cadence>]` | `get-design-done:gdd-watch-authorities` | Run design-authority-watcher — fetch curated feeds, diff snapshot, classify new entries → `.design/authority-report.md` (consumed by `/gdd:reflect`) |
91
93
  | `benchmark <component\|--wave N\|--list\|--refresh component>` | `get-design-done:gdd-benchmark` | Harvest + synthesize per-component design specs from 18 design systems → `reference/components/<name>.md` |
92
94
  | `benchmark <component\|--wave N\|--list\|--refresh component>` | `get-design-done:gdd-benchmark` | Harvest + synthesize per-component design specs from 18 design systems → `reference/components/<name>.md` |
@@ -0,0 +1,122 @@
1
+ ---
2
+ name: prototype-gate
3
+ description: "Cheap Haiku gate that scores sketch / spike signals from the active brief / context / plan and emits a JSON verdict recommending whether to prototype before continuing."
4
+ tools: Read, Bash, Grep
5
+ color: yellow
6
+ model: inherit
7
+ default-tier: haiku
8
+ tier-rationale: "Signal-counting rubric over a few small inputs — no synthesis, no writes, no agent spawning. Belongs on Haiku to keep gate latency cheap (≤ 2 s typical)."
9
+ size_budget: S
10
+ parallel-safe: always
11
+ typical-duration-seconds: 5
12
+ reads-only: true
13
+ writes: []
14
+ ---
15
+
16
+ @reference/shared-preamble.md
17
+
18
+ # prototype-gate
19
+
20
+ ## Role
21
+
22
+ You answer one question at a checkpoint: *should the pipeline pause to sketch or spike before continuing?*
23
+
24
+ You run at two firing points (Phase 25 D-02):
25
+ 1. **Post-`/gdd:explore`** — sketch territory. The question is "what visual / direction?".
26
+ 2. **Post-`/gdd:plan` plan-checker** — spike territory. The question is "can this work technically?".
27
+
28
+ You are read-only. You do not write STATE.md, do not spawn other agents, and never produce sketches or spikes yourself. Your only job is to score signals and emit a JSON verdict.
29
+
30
+ You also honor the cycle-scoped skip rule (D-02): if `STATE.md` `<prototyping>` already contains a `<skipped at=<your_firing_point> cycle=<active_cycle>/>` entry, recommend `none` immediately with `reason: "skipped this cycle"`. Do not re-evaluate signals.
31
+
32
+ ## Input Contract
33
+
34
+ The orchestrator supplies these fields in the prompt context:
35
+
36
+ - `firing_point` — `"explore"` or `"plan"`. Determines which signal rubric you apply.
37
+ - `cycle` — the active cycle identifier from STATE frontmatter.
38
+ - `state_path` — absolute path to the active `.design/STATE.md`.
39
+ - `inputs` — paths to context the rubric scans:
40
+ - `brief_path` (always supplied) — `.design/BRIEF.md` or equivalent.
41
+ - `context_path` (firing_point=`"explore"`) — `.design/DESIGN-CONTEXT.md`.
42
+ - `design_path` (firing_point=`"explore"` if present) — `.design/DESIGN.md`.
43
+ - `plan_tasks_path` (firing_point=`"plan"`) — `.design/PLAN.md` or `.design/plans/*.md`.
44
+ - `decisions_snapshot` (always supplied) — newline-separated `D-NN: text (locked|tentative)` lines extracted from STATE `<decisions>`.
45
+
46
+ Missing input files are not an error — score the signals you can read; treat absent files as zero-signal contributions.
47
+
48
+ ## Cycle-skip short-circuit
49
+
50
+ Before scoring, scan `<prototyping>` in `state_path` for a `<skipped/>` entry whose `at` matches `firing_point` AND whose `cycle` matches the active `cycle`. If found, emit:
51
+
52
+ ```json
53
+ {"recommend": "none", "confidence": 1.0, "reasons": ["skipped this cycle at the prototype gate"]}
54
+ ```
55
+
56
+ Then exit. Do not score further.
57
+
58
+ ## Signal Rubric
59
+
60
+ ### Sketch signals (firing_point = `"explore"`)
61
+
62
+ Score 1 point per matched signal:
63
+
64
+ - **Hero / first-impression language** — BRIEF mentions "hero", "first impression", "novel surface", "landing", "above-the-fold", or names a single high-stakes screen.
65
+ - **DESIGN-CONTEXT visual gray areas** — DESIGN-CONTEXT.md contains an unresolved item tagged `visual:` or `direction:` (case-insensitive).
66
+ - **Empty design canvas** — DESIGN.md is missing or its scan returned no existing patterns to follow (no component references, no token references).
67
+ - **Decision conflict on the same surface** — at least two D-XX entries in `decisions_snapshot` discuss the same surface but disagree (look for paired references to the same component / page / area).
68
+ - **Open-ended language in interview answers** — BRIEF or DESIGN-CONTEXT contains "not sure", "open to", "??", "tbd", "we could" within answer regions.
69
+ - **Multiple viable patterns** — DESIGN-CONTEXT or a phase-researcher artifact lists more than one viable pattern for a single section without a chosen winner.
70
+
71
+ ### Spike signals (firing_point = `"plan"`)
72
+
73
+ Score 1 point per matched signal:
74
+
75
+ - **High-risk task** — a plan task carries `Risk: high` or `Confidence: low` (case-insensitive).
76
+ - **Tech outside the components mapper** — a plan task references a library, framework, API, or pattern not present in the project's components / mapper artifacts.
77
+ - **Failed required connection** — `<connections>` reports `unavailable` for a connection that a plan task explicitly depends on.
78
+ - **Experimental language** — a plan task description contains "experimental", "TBD", "unsure", "spike", "prove out", "validate that".
79
+ - **Probe deferred** — a plan task notes "will check at runtime" or similar deferred verification.
80
+
81
+ ## Threshold
82
+
83
+ | Score | recommend | confidence |
84
+ |-------|-----------|------------|
85
+ | ≥ 3 | `sketch` (explore) or `spike` (plan) | `0.9` |
86
+ | 1–2 | same as above | `0.5` |
87
+ | 0 | `none` | `0.95` |
88
+
89
+ Confidence is rubric-derived only — do not infer confidence from the size of the inputs or your own uncertainty. The thresholds above are the only valid values.
90
+
91
+ ## Output Contract
92
+
93
+ Emit exactly one JSON object on its own line. No prose wrapper, no code fence, no leading or trailing text.
94
+
95
+ ```json
96
+ {"recommend": "sketch", "confidence": 0.9, "reasons": ["BRIEF mentions hero", "DESIGN-CONTEXT visual gray area on home"]}
97
+ ```
98
+
99
+ Schema:
100
+
101
+ - `recommend` — string enum, one of `"sketch" | "spike" | "none"`.
102
+ - `confidence` — number in `[0, 1]`. One of `0.5`, `0.9`, `0.95` per the threshold table; or `1.0` for the cycle-skip short-circuit.
103
+ - `reasons` — array of short strings (≤ 80 chars each). One entry per matched signal, in match order. Empty array allowed when `recommend === "none"` from the threshold (not the skip path).
104
+
105
+ ## Constraints
106
+
107
+ - **Do not** propose what to sketch / spike — that's the wrap-up flow's job. Your reasons are evidence, not directives.
108
+ - **Do not** read or write STATE.md outside of the cycle-skip lookup described above.
109
+ - **Do not** consult external services or MCP tools. Signal scoring is purely a function of the supplied inputs.
110
+ - **Do not** exceed `size_budget: S`. If inputs are unexpectedly large, prefer to score signals on the first 8 KB of each file rather than refuse to answer.
111
+
112
+ ## Record
113
+
114
+ At run-end, append one JSONL line to `.design/intel/insights.jsonl`:
115
+
116
+ ```json
117
+ {"ts":"<ISO-8601>","agent":"<name>","cycle":"<cycle from STATE.md>","stage":"<stage from STATE.md>","one_line_insight":"<what was produced or learned>","artifacts_written":["<files written>"]}
118
+ ```
119
+
120
+ Schema: `reference/schemas/insight-line.schema.json`. Use an empty `artifacts_written` array for read-only agents.
121
+
122
+ ## GATE COMPLETE
@@ -0,0 +1,125 @@
1
+ ---
2
+ name: quality-gate-runner
3
+ description: "Cheap Haiku classifier that ingests {command, exit_code, stderr} tuples from the quality-gate skill's parallel run and emits a JSON verdict — pass/fail plus per-bucket failure groupings (lint / type / test / visual). Read-only. Does not run commands itself."
4
+ tools: Read, Bash, Grep
5
+ color: amber
6
+ model: inherit
7
+ default-tier: haiku
8
+ tier-rationale: "Pattern-match exit codes and bucket stderr into four named categories — no synthesis, no rewrites, no spawning. Belongs on Haiku to keep classification cost trivial relative to the actual command runs."
9
+ size_budget: S
10
+ parallel-safe: always
11
+ typical-duration-seconds: 5
12
+ reads-only: true
13
+ writes: []
14
+ ---
15
+
16
+ @reference/shared-preamble.md
17
+
18
+ # quality-gate-runner
19
+
20
+ ## Role
21
+
22
+ You answer one question for the `quality-gate` skill (Phase 25 Plan 25-03): *given the outputs of the parallel command run, did the gate pass — and if not, into which buckets do the failures fall?*
23
+
24
+ You are read-only. You do not re-run any commands, do not write STATE.md, do not spawn agents, do not produce fixes. Your only job is to classify the outputs and return JSON.
25
+
26
+ ## Input Contract
27
+
28
+ The skill supplies a JSON object on stdin (or as the first line of the prompt context — handle both). Shape:
29
+
30
+ ```json
31
+ {
32
+ "outputs": [
33
+ {"command": "npm run lint", "exit_code": 0, "stderr": ""},
34
+ {"command": "npm run typecheck", "exit_code": 1, "stderr": "<verbatim stderr>"},
35
+ {"command": "npm run test", "exit_code": 0, "stderr": ""},
36
+ {"command": "npm run chromatic", "exit_code": 1, "stderr": "<verbatim stderr>"}
37
+ ]
38
+ }
39
+ ```
40
+
41
+ Schema:
42
+ - `outputs` — array, one entry per command actually executed in Step 2 of the skill. Order is preserved from the skill (matches command-list order from Step 1).
43
+ - `command` — verbatim shell string the skill ran.
44
+ - `exit_code` — integer. `0` = clean; non-zero = failure to be classified.
45
+ - `stderr` — verbatim stderr capture. May be empty even on failure (some tools write to stdout); do not assume non-empty stderr means failure.
46
+
47
+ You may also receive a `stdout` field per entry (forward-compat — the skill plans to add it). Tolerate its absence.
48
+
49
+ ## Bucketing rule
50
+
51
+ Map each command to exactly one of four buckets based on the verbatim command string. Use case-insensitive substring match against the command line:
52
+
53
+ | Substring (case-insensitive) | Bucket |
54
+ |------------------------------|--------|
55
+ | `lint`, `eslint`, `stylelint`, `biome lint` | `lint` |
56
+ | `typecheck`, `tsc`, `tsc --noemit`, `flow check` | `type` |
57
+ | `test` (but NOT one of the visual matches below — visual wins) | `test` |
58
+ | `chromatic`, `test:visual`, `loki test`, `playwright test --grep visual` | `visual` |
59
+
60
+ When a command matches multiple substrings (e.g., `npm run test:visual` matches both `test` and `test:visual`), `visual` wins. If a command matches none, bucket it under `test` (catch-all — most user-supplied custom commands are test-like). Do not invent a fifth bucket.
61
+
62
+ ## Pass / fail rule
63
+
64
+ - `status === "pass"` if and only if **every** entry's `exit_code === 0`.
65
+ - `status === "fail"` if **any** entry's `exit_code !== 0`.
66
+
67
+ Empty `outputs` array means `status === "pass"` (no commands ran → nothing failed). The skill is responsible for emitting `quality_gate_skipped` in the no-commands path; you do not.
68
+
69
+ ## Failure summarization
70
+
71
+ For each failed entry (exit_code !== 0), produce one short summary string and add it to the bucket the command maps to. Summaries should:
72
+
73
+ - Quote the command name (the basename — e.g., `lint` from `npm run lint`).
74
+ - Include the first non-empty line of `stderr` truncated to 120 chars, if present.
75
+ - Otherwise include `exit_code=N` so the reader still sees something concrete.
76
+
77
+ Example summary strings:
78
+ - `"lint: 4 problems (3 errors, 1 warning)"` — when stderr's first line is informative.
79
+ - `"typecheck: error TS2304: Cannot find name 'foo' in src/x.ts"` — same.
80
+ - `"test: exit_code=1"` — when stderr is empty.
81
+
82
+ Do NOT inline full stderr — the bucket entries are summaries, not transcripts. The skill keeps the verbatim outputs for the fixer; your output is for routing only.
83
+
84
+ Buckets that have no failures are OMITTED from `classified_failures`. Do not emit empty arrays for unaffected buckets — the consumer relies on key-presence as a signal.
85
+
86
+ ## Output Contract
87
+
88
+ Emit exactly one JSON object on its own line. No prose wrapper, no code fence, no leading or trailing text.
89
+
90
+ Pass example:
91
+
92
+ ```json
93
+ {"status": "pass", "classified_failures": {}}
94
+ ```
95
+
96
+ Fail example:
97
+
98
+ ```json
99
+ {"status": "fail", "classified_failures": {"type": ["typecheck: error TS2304 in src/x.ts"], "visual": ["chromatic: 2 stories changed"]}}
100
+ ```
101
+
102
+ Schema:
103
+ - `status` — string enum, one of `"pass" | "fail"`. Note: this is NOT the same enum as the skill's STATE-block status (which also has `timeout` and `skipped`); those two cases are decided by the skill, not by you. You only emit `pass | fail`.
104
+ - `classified_failures` — object. Keys are a subset of `lint | type | test | visual`. Values are arrays of short summary strings (≤ 120 chars each). The object is `{}` (empty) when `status === "pass"`.
105
+
106
+ ## Constraints
107
+
108
+ - **Do not** read `stderr` content beyond the first non-empty line. The skill keeps the verbatim outputs for the design-fixer; your job is routing, not analysis.
109
+ - **Do not** invent buckets outside the four-name set.
110
+ - **Do not** ever emit `status: "timeout"` or `status: "skipped"` — those are skill-level statuses, not classifier outputs.
111
+ - **Do not** consult external services or MCP tools. Classification is a pure function of the supplied input.
112
+ - **Do not** exceed `size_budget: S`. If `outputs[*].stderr` is unexpectedly large, prefer to summarize from the first 4 KB of each stderr rather than refuse.
113
+ - The output JSON object must be parseable with `JSON.parse` — no trailing comma, no comments, no surrounding markdown.
114
+
115
+ ## Record
116
+
117
+ At run-end, append one JSONL line to `.design/intel/insights.jsonl`:
118
+
119
+ ```json
120
+ {"ts":"<ISO-8601>","agent":"<name>","cycle":"<cycle from STATE.md>","stage":"<stage from STATE.md>","one_line_insight":"<what was produced or learned>","artifacts_written":["<files written>"]}
121
+ ```
122
+
123
+ Schema: `reference/schemas/insight-line.schema.json`. Use an empty `artifacts_written` array for read-only agents.
124
+
125
+ ## GATE COMPLETE
@@ -80,6 +80,26 @@ const iterationBudget = nodeRequire('../scripts/lib/iteration-budget.cjs') as ty
80
80
  * for every hook invocation. The tool_input shape is tool-specific;
81
81
  * this hook only consumes Agent-shaped tool_input so we narrow here.
82
82
  */
83
+ /** Phase 25 / D-04, D-05: router complexity-class enum. */
84
+ export type ComplexityClass = 'S' | 'M' | 'L' | 'XL';
85
+
86
+ /**
87
+ * Phase 25 / D-05: router decision payload as surfaced on
88
+ * tool_input.context.router_decision. Only the fields this hook reads
89
+ * are typed; the router emits more (model_tier_overrides,
90
+ * estimated_cost_usd, cache_hits) but they are not consumed here.
91
+ */
92
+ interface RouterDecision {
93
+ path?: 'fast' | 'quick' | 'full';
94
+ complexity_class?: ComplexityClass;
95
+ [key: string]: unknown;
96
+ }
97
+
98
+ interface ToolInputContext {
99
+ router_decision?: RouterDecision;
100
+ [key: string]: unknown;
101
+ }
102
+
83
103
  interface ToolInput {
84
104
  subagent_type?: string;
85
105
  agent?: string;
@@ -91,6 +111,7 @@ interface ToolInput {
91
111
  _default_tier?: string;
92
112
  _tier_downgraded?: boolean;
93
113
  lazy_skipped?: boolean;
114
+ context?: ToolInputContext;
94
115
  [key: string]: unknown;
95
116
  }
96
117
 
@@ -199,6 +220,46 @@ const BUDGET_DEFAULTS: Required<
199
220
  enforcement_mode: 'enforce',
200
221
  };
201
222
 
223
+ /**
224
+ * Phase 25 / D-05: optional per-class cap map on .design/budget.json.
225
+ * Documented in reference/config-schema.md as `class_caps_usd?: { S?: number; M?: number; L?: number; XL?: number }`.
226
+ * Read through the BudgetSchema index signature so we don't have to
227
+ * regenerate the schema for an additive optional field.
228
+ */
229
+ type ClassCapsUsd = Partial<Record<ComplexityClass, number>>;
230
+
231
+ function readClassCaps(budget: BudgetSchema): ClassCapsUsd | undefined {
232
+ const raw = (budget as { class_caps_usd?: unknown }).class_caps_usd;
233
+ if (raw === undefined || raw === null || typeof raw !== 'object') {
234
+ return undefined;
235
+ }
236
+ const out: ClassCapsUsd = {};
237
+ for (const k of ['S', 'M', 'L', 'XL'] as const) {
238
+ const v = (raw as Record<string, unknown>)[k];
239
+ if (typeof v === 'number' && Number.isFinite(v) && v > 0) {
240
+ out[k] = v;
241
+ }
242
+ }
243
+ return out;
244
+ }
245
+
246
+ /**
247
+ * Phase 25 / D-05: resolve the per-spawn cap. If the router decision
248
+ * payload contains a `complexity_class` AND `.design/budget.json#class_caps_usd[class]`
249
+ * is defined, use that. Otherwise fall back to `per_task_cap_usd`.
250
+ */
251
+ function resolvePerSpawnCap(
252
+ budget: ResolvedBudget,
253
+ complexityClass: ComplexityClass | undefined,
254
+ ): number {
255
+ if (complexityClass !== undefined) {
256
+ const caps = readClassCaps(budget);
257
+ const classCap = caps?.[complexityClass];
258
+ if (classCap !== undefined) return classCap;
259
+ }
260
+ return budget.per_task_cap_usd;
261
+ }
262
+
202
263
  /**
203
264
  * Concrete budget shape after defaults-merge. Every field becomes
204
265
  * non-optional so downstream branches don't have to null-guard. Defined
@@ -490,6 +551,27 @@ export async function main(): Promise<void> {
490
551
  const inputHash =
491
552
  typeof toolInput._input_hash === 'string' ? toolInput._input_hash : null;
492
553
 
554
+ // Phase 25 / D-05: extract complexity_class from router decision.
555
+ // Absent payload → legacy per_task_cap behavior (no regression).
556
+ // Present payload with class === 'S' → skip enforcement entirely
557
+ // (defensive: the typical S path is upstream short-circuit where
558
+ // router never ran and this hook still applies legacy caps; an
559
+ // explicit S signal here means a caller bypassed the upstream skip
560
+ // and is asking us to honor the class).
561
+ const routerDecision: RouterDecision | undefined =
562
+ toolInput.context?.router_decision !== undefined &&
563
+ typeof toolInput.context.router_decision === 'object' &&
564
+ toolInput.context.router_decision !== null
565
+ ? toolInput.context.router_decision
566
+ : undefined;
567
+ const complexityClass: ComplexityClass | undefined =
568
+ routerDecision?.complexity_class !== undefined &&
569
+ (['S', 'M', 'L', 'XL'] as const).includes(
570
+ routerDecision.complexity_class as ComplexityClass,
571
+ )
572
+ ? (routerDecision.complexity_class as ComplexityClass)
573
+ : undefined;
574
+
493
575
  const { cycle, phase } = readCycleAndPhase();
494
576
  const cyclePhase = { cycle, phase };
495
577
 
@@ -513,6 +595,38 @@ export async function main(): Promise<void> {
513
595
 
514
596
  const budget = loadBudget();
515
597
 
598
+ // Phase 25 / D-05: explicit S-class short-circuit. The typical S path
599
+ // skips the router entirely and this hook never runs at all (the
600
+ // command's SKILL.md does the deterministic skip upstream). When we
601
+ // DO see complexity_class === 'S' in the payload it means a caller
602
+ // routed an S-class command through the hook anyway — honor the
603
+ // class by skipping enforcement (no cap check, no downgrade) but
604
+ // still write a zero-cost telemetry row + emit an 'allow' event so
605
+ // observability stays consistent.
606
+ if (complexityClass === 'S') {
607
+ writeTelemetry({
608
+ agent,
609
+ tier:
610
+ toolInput._tier_override ??
611
+ toolInput._default_tier ??
612
+ 'haiku',
613
+ tokens_in: Number(toolInput._tokens_in_est ?? 0),
614
+ tokens_out: Number(toolInput._tokens_out_est ?? 0),
615
+ cache_hit: false,
616
+ est_cost_usd: Number(toolInput._est_cost_usd ?? 0),
617
+ enforcement_mode: budget.enforcement_mode,
618
+ _cyclePhase: cyclePhase,
619
+ });
620
+ emitHookFired('allow', cycle);
621
+ const response: ToolOutput = {
622
+ continue: true,
623
+ suppressOutput: true,
624
+ modified_tool_input: toolInput,
625
+ };
626
+ process.stdout.write(JSON.stringify(response));
627
+ return;
628
+ }
629
+
516
630
  // Branch B: cache short-circuit (D-05).
517
631
  if (inputHash !== null) {
518
632
  const cached = cacheLookup(agent, inputHash);
@@ -589,9 +703,15 @@ export async function main(): Promise<void> {
589
703
  const estCost = Number(toolInput._est_cost_usd ?? 0);
590
704
  const phaseSpend = currentPhaseSpend(phase);
591
705
 
706
+ // Phase 25 / D-05: per-spawn cap is class-specific when
707
+ // complexity_class is present and class_caps_usd[class] is defined.
708
+ // Falls back to per_task_cap_usd for backwards compatibility — when
709
+ // no router decision is supplied, behavior is identical to pre-25.
710
+ const perSpawnCap = resolvePerSpawnCap(budget, complexityClass);
711
+
592
712
  if (budget.enforcement_mode === 'enforce') {
593
- // Branch C: 100% per_task cap hard block.
594
- if (estCost >= budget.per_task_cap_usd) {
713
+ // Branch C: 100% per-spawn cap hard block (class-specific or per_task).
714
+ if (estCost >= perSpawnCap) {
595
715
  writeTelemetry({
596
716
  agent,
597
717
  tier:
@@ -607,10 +727,14 @@ export async function main(): Promise<void> {
607
727
  _cyclePhase: cyclePhase,
608
728
  });
609
729
  emitHookFired('block', cycle);
730
+ const capLabel =
731
+ complexityClass !== undefined && perSpawnCap !== budget.per_task_cap_usd
732
+ ? `class_caps_usd.${complexityClass}`
733
+ : 'per-task';
610
734
  const response: ToolOutput = {
611
735
  continue: false,
612
736
  suppressOutput: false,
613
- message: `Budget cap reached for per-task. Estimated: $${estCost.toFixed(4)}, cap: $${budget.per_task_cap_usd.toFixed(2)}. Raise cap in .design/budget.json or retry after next task.`,
737
+ message: `Budget cap reached for ${capLabel}. Estimated: $${estCost.toFixed(4)}, cap: $${perSpawnCap.toFixed(2)}. Raise cap in .design/budget.json or retry after next task.`,
614
738
  };
615
739
  process.stdout.write(JSON.stringify(response));
616
740
  return;
@@ -640,18 +764,19 @@ export async function main(): Promise<void> {
640
764
  process.stdout.write(JSON.stringify(response));
641
765
  return;
642
766
  }
643
- // 80% soft-threshold downgrade (D-03): task-scoped.
767
+ // 80% soft-threshold downgrade (D-03): task-scoped, against the
768
+ // resolved per-spawn cap so class-specific caps participate.
644
769
  if (
645
770
  budget.auto_downgrade_on_cap &&
646
- estCost >= 0.8 * budget.per_task_cap_usd
771
+ estCost >= 0.8 * perSpawnCap
647
772
  ) {
648
773
  toolInput._tier_override = 'haiku';
649
774
  toolInput._tier_downgraded = true;
650
775
  }
651
776
  } else if (budget.enforcement_mode === 'warn') {
652
- if (estCost >= budget.per_task_cap_usd) {
777
+ if (estCost >= perSpawnCap) {
653
778
  process.stderr.write(
654
- `gdd-budget-enforcer WARN: per-task cap will be exceeded ($${estCost.toFixed(4)} >= $${budget.per_task_cap_usd})\n`,
779
+ `gdd-budget-enforcer WARN: per-spawn cap will be exceeded ($${estCost.toFixed(4)} >= $${perSpawnCap})\n`,
655
780
  );
656
781
  }
657
782
  }
@@ -23,6 +23,7 @@ const { spawnSync } = require('child_process');
23
23
 
24
24
  const MIN_BYTES = 1500;
25
25
  const TOP_N = 15;
26
+ const PROTOTYPING_TOP_N = 5;
26
27
  const MATCHER_RE = /[\\/](?:\.design|reference|\.planning)[\\/][^\n]*\.md$/;
27
28
 
28
29
  // Phase 19.5: try FTS5 backend first; fall back to grep silently.
@@ -111,6 +112,174 @@ function sortKeyFor(tag) {
111
112
  return 0;
112
113
  }
113
114
 
115
+ /**
116
+ * Parse a self-closing-tag attribute string ("a=\"x\" b=\"y\"") into a kv map.
117
+ * Self-contained: avoids a TS-parser import to keep the hook hot path JS-only.
118
+ */
119
+ function parseAttrs(attrStr) {
120
+ const out = {};
121
+ if (!attrStr) return out;
122
+ const re = /(\w+)\s*=\s*"([^"]*)"/g;
123
+ let m;
124
+ while ((m = re.exec(attrStr)) !== null) out[m[1]] = m[2];
125
+ return out;
126
+ }
127
+
128
+ /**
129
+ * One-shot read of STATE.md. Returns `{ prototyping, decisionsMap }` where
130
+ * `prototyping` is the inner body of `<prototyping>...</prototyping>` (or '')
131
+ * and `decisionsMap` is a `D-XX -> rationale` lookup parsed from `<decisions>`.
132
+ * Both fields default to safe empties on unreadable file / absent blocks.
133
+ *
134
+ * Single read keeps the hot path tight (STATE.md is small but reading once
135
+ * beats reading twice).
136
+ */
137
+ function readStateForPrototyping(stateFile) {
138
+ const empty = { prototyping: '', decisionsMap: Object.create(null) };
139
+ if (!stateFile) return empty;
140
+ let content;
141
+ try { content = fs.readFileSync(stateFile, 'utf8'); } catch { return empty; }
142
+ const out = { prototyping: '', decisionsMap: Object.create(null) };
143
+ const protoMatch = content.match(/<prototyping>([\s\S]*?)<\/prototyping>/);
144
+ if (protoMatch) out.prototyping = protoMatch[1];
145
+ const decBlock = content.match(/<decisions>([\s\S]*?)<\/decisions>/);
146
+ if (decBlock) {
147
+ const re = /^\s*(D-\d+)\s*:\s*(.+?)\s*$/gm;
148
+ let m;
149
+ while ((m = re.exec(decBlock[1])) !== null) {
150
+ // Strip a trailing `(locked)` / `(tentative)` qualifier if present.
151
+ out.decisionsMap[m[1]] = m[2].replace(/\s*\((?:locked|tentative)\)\s*$/i, '').trim();
152
+ }
153
+ }
154
+ return out;
155
+ }
156
+
157
+ /**
158
+ * Parse `<prototyping>` body into typed entries. Skips comments and unknown tags.
159
+ */
160
+ function parsePrototypingEntries(body) {
161
+ const entries = [];
162
+ if (!body) return entries;
163
+ const re = /<(sketch|spike|skipped)\b([^>]*?)\/>/g;
164
+ let m;
165
+ while ((m = re.exec(body)) !== null) {
166
+ const type = m[1];
167
+ const attrs = parseAttrs(m[2]);
168
+ entries.push({ type, attrs });
169
+ }
170
+ return entries;
171
+ }
172
+
173
+ /**
174
+ * Tokenize a slug / basename / path for fuzzy comparison.
175
+ * Splits on hyphens, underscores, dots, and path separators; lowercases;
176
+ * drops common no-signal tokens (`md`, file extensions, single chars).
177
+ */
178
+ function tokenize(s) {
179
+ if (!s) return [];
180
+ const parts = String(s).toLowerCase().split(/[-_./\\\s]+/).filter(Boolean);
181
+ const stop = new Set(['md', 'txt', 'json', 'ts', 'js', 'plan', 'context', 'state']);
182
+ return parts.filter((p) => p.length > 1 && !stop.has(p));
183
+ }
184
+
185
+ /**
186
+ * Score a prototyping entry against the opened file's basename + relPath tokens.
187
+ * Returns the entry's matcher term if any slug-token is shared with a
188
+ * basename/relPath token (case-insensitive). Falls back to plain substring
189
+ * for terms that don't tokenize (e.g., free-form `reason` strings).
190
+ *
191
+ * Symmetric with the D-XX matcher: the existing recall path greps source
192
+ * lines for the opened file's basename; here we surface a prototyping entry
193
+ * whenever it would have grepped successfully — when the entry's slug
194
+ * mentions the same concept the file's name encodes.
195
+ */
196
+ function matchPrototypingEntry(entry, basename, relPath) {
197
+ let term;
198
+ if (entry.type === 'sketch' || entry.type === 'spike') {
199
+ term = entry.attrs.slug;
200
+ } else if (entry.type === 'skipped') {
201
+ term = entry.attrs.reason;
202
+ }
203
+ if (!term) return null;
204
+ const fileTokens = new Set([...tokenize(basename), ...tokenize(relPath)]);
205
+ if (fileTokens.size === 0) return null;
206
+ const termTokens = tokenize(term);
207
+ for (const t of termTokens) {
208
+ if (fileTokens.has(t)) return term;
209
+ }
210
+ // Fallback: plain substring (helps `reason` strings and slugs containing
211
+ // tokens that don't survive the stop-word filter).
212
+ const needle = String(term).toLowerCase();
213
+ if (basename.toLowerCase().includes(needle) || relPath.toLowerCase().includes(needle)) return term;
214
+ return null;
215
+ }
216
+
217
+ /**
218
+ * Format a single prototyping entry for the additionalContext block.
219
+ * Shape: "Prototyping outcome (cycle <cycle>): <type>/<slug> — D-<id> — <verdict-or-status>: <rationale>"
220
+ * Falls back gracefully when fields are missing (e.g., skipped entries lack a D-XX).
221
+ */
222
+ function formatPrototypingEntry(entry, decisionsMap) {
223
+ const a = entry.attrs;
224
+ const cycle = a.cycle || '?';
225
+ const ident = a.slug || a.at || '?';
226
+ const segs = [`Prototyping outcome (cycle ${cycle}): ${entry.type}/${ident}`];
227
+ if (a.decision) {
228
+ const rationale = decisionsMap[a.decision];
229
+ segs.push(rationale ? `${a.decision} — ${rationale}` : a.decision);
230
+ }
231
+ if (entry.type === 'spike' && a.verdict) {
232
+ segs.push(`verdict: ${a.verdict}`);
233
+ } else if (a.status) {
234
+ segs.push(`status: ${a.status}`);
235
+ } else if (entry.type === 'skipped' && a.reason) {
236
+ segs.push(`reason: ${a.reason}`);
237
+ }
238
+ return segs.join(' — ');
239
+ }
240
+
241
+ /**
242
+ * Build the prototyping outcomes block. Returns null when nothing matches so the
243
+ * caller can decide whether to omit the heading entirely.
244
+ *
245
+ * Sort: most recent cycle first (matches the existing sortKeyFor recency bias).
246
+ */
247
+ function buildPrototypingBlock(stateFile, basename, relPath) {
248
+ if (!stateFile) return null;
249
+ const { prototyping, decisionsMap } = readStateForPrototyping(stateFile);
250
+ if (!prototyping) return null;
251
+ const entries = parsePrototypingEntries(prototyping);
252
+ if (!entries.length) return null;
253
+
254
+ const matched = [];
255
+ for (const e of entries) {
256
+ const term = matchPrototypingEntry(e, basename, relPath);
257
+ if (term) matched.push(e);
258
+ }
259
+ if (!matched.length) return null;
260
+
261
+ // Recency: cycle is typically `cycle-N` or `N`; coerce to a number for sorting.
262
+ const cycleNum = (e) => {
263
+ const c = String(e.attrs.cycle || '');
264
+ const m = c.match(/(\d+)/);
265
+ return m ? Number(m[1]) : 0;
266
+ };
267
+ matched.sort((a, b) => cycleNum(b) - cycleNum(a));
268
+ const top = matched.slice(0, PROTOTYPING_TOP_N);
269
+
270
+ const lines = [];
271
+ lines.push('');
272
+ lines.push('### Prior prototyping outcomes');
273
+ for (const e of top) {
274
+ lines.push(`> - ${formatPrototypingEntry(e, decisionsMap)}`);
275
+ }
276
+ if (matched.length > PROTOTYPING_TOP_N) {
277
+ lines.push(`> … (${matched.length - PROTOTYPING_TOP_N} more prototyping entr${matched.length - PROTOTYPING_TOP_N === 1 ? 'y' : 'ies'})`);
278
+ }
279
+ lines.push('');
280
+ return lines.join('\n');
281
+ }
282
+
114
283
  function buildRecallBlock(matches, basename, backendLabel) {
115
284
  if (!matches.length) return null;
116
285
  const uniq = [];
@@ -202,16 +371,27 @@ async function main() {
202
371
 
203
372
  const backendLabel = BACKEND || (useRgGlobal ? 'ripgrep' : 'node-grep');
204
373
  const block = buildRecallBlock(hits, basename, backendLabel);
205
- if (!block) {
374
+
375
+ // Phase 25 (plan 25-06): surface <prototyping> outcomes when an opened
376
+ // planning/design .md ≥1500 bytes shares a slug/reason token with a
377
+ // resolved sketch/spike/skipped entry. STATE.md is the canonical home for
378
+ // the block (D-01); we read it directly here rather than via the TS parser
379
+ // so the hook stays self-contained JS.
380
+ const stateFile = sources.find((p) => p.endsWith(path.sep + 'STATE.md') || p.endsWith('/STATE.md'));
381
+ const protoBlock = buildPrototypingBlock(stateFile, basename, relPath);
382
+
383
+ if (!block && !protoBlock) {
206
384
  try { require('./_hook-emit.js').emitHookFired('gdd-decision-injector', 'no-hits', { backend: backendLabel }); } catch { /* swallow */ }
207
385
  process.stdout.write(JSON.stringify({ continue: true }));
208
386
  return;
209
387
  }
210
388
 
211
- try { require('./_hook-emit.js').emitHookFired('gdd-decision-injector', 'inject', { backend: backendLabel, hit_count: hits.length }); } catch { /* swallow */ }
389
+ const additionalContext = [block, protoBlock].filter(Boolean).join('\n');
390
+
391
+ try { require('./_hook-emit.js').emitHookFired('gdd-decision-injector', 'inject', { backend: backendLabel, hit_count: hits.length, prototyping: !!protoBlock }); } catch { /* swallow */ }
212
392
  process.stdout.write(JSON.stringify({
213
393
  continue: true,
214
- hookSpecificOutput: { hookEventName: 'PreToolUse', additionalContext: block },
394
+ hookSpecificOutput: { hookEventName: 'PreToolUse', additionalContext },
215
395
  }));
216
396
  }
217
397