@hegemonart/get-design-done 1.24.2 → 1.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +41 -0
- package/README.de.md +679 -0
- package/README.fr.md +679 -0
- package/README.it.md +679 -0
- package/README.ja.md +679 -0
- package/README.ko.md +679 -0
- package/README.md +396 -729
- package/README.zh-CN.md +480 -133
- package/SKILL.md +2 -0
- package/agents/prototype-gate.md +122 -0
- package/agents/quality-gate-runner.md +125 -0
- package/hooks/budget-enforcer.ts +132 -7
- package/hooks/gdd-decision-injector.js +183 -3
- package/hooks/gdd-turn-closeout.js +238 -0
- package/hooks/hooks.json +10 -0
- package/package.json +5 -5
- package/reference/STATE-TEMPLATE.md +41 -0
- package/reference/config-schema.md +30 -0
- package/scripts/lib/gdd-state/mutator.ts +454 -0
- package/scripts/lib/gdd-state/parser.ts +351 -1
- package/scripts/lib/gdd-state/types.ts +193 -0
- package/scripts/lib/quality-gate-detect.cjs +126 -0
- package/skills/quality-gate/SKILL.md +222 -0
- package/skills/router/SKILL.md +29 -9
- package/skills/sketch-wrap-up/SKILL.md +47 -2
- package/skills/spike-wrap-up/SKILL.md +41 -2
- package/skills/turn-closeout/SKILL.md +115 -0
- package/skills/verify/SKILL.md +22 -0
package/SKILL.md
CHANGED
|
@@ -87,6 +87,8 @@ Each stage produces artifacts in `.design/` inside the current project.
|
|
|
87
87
|
| `analyze-dependencies [--slice <name>]` | `get-design-done:analyze-dependencies` | Query the `.design/intel/` store — dependency slices, graph queries, phase-scoped reads |
|
|
88
88
|
| `extract-learnings [--cycle <slug>]` | `get-design-done:extract-learnings` | Extract decisions, lessons, patterns, and surprises from a completed cycle → `.design/cycles/<slug>/LEARNINGS.md` |
|
|
89
89
|
| `skill-manifest [--refresh]` | `get-design-done:skill-manifest` | List or refresh the local skill manifest used by the router for discovery |
|
|
90
|
+
| `quality-gate` | `get-design-done:quality-gate` | Phase 25 — parallel lint/type/test/visual command runner; classifies failures via quality-gate-runner agent |
|
|
91
|
+
| `turn-closeout` | `get-design-done:turn-closeout` | Phase 25 — Stop-hook mirror skill; finalizes per-turn STATE blocks and emits closeout events |
|
|
90
92
|
| `watch-authorities [--refresh] [--since <date>] [--feed <name>] [--schedule <cadence>]` | `get-design-done:gdd-watch-authorities` | Run design-authority-watcher — fetch curated feeds, diff snapshot, classify new entries → `.design/authority-report.md` (consumed by `/gdd:reflect`) |
|
|
91
93
|
| `benchmark <component\|--wave N\|--list\|--refresh component>` | `get-design-done:gdd-benchmark` | Harvest + synthesize per-component design specs from 18 design systems → `reference/components/<name>.md` |
|
|
92
94
|
| `benchmark <component\|--wave N\|--list\|--refresh component>` | `get-design-done:gdd-benchmark` | Harvest + synthesize per-component design specs from 18 design systems → `reference/components/<name>.md` |
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: prototype-gate
|
|
3
|
+
description: "Cheap Haiku gate that scores sketch / spike signals from the active brief / context / plan and emits a JSON verdict recommending whether to prototype before continuing."
|
|
4
|
+
tools: Read, Bash, Grep
|
|
5
|
+
color: yellow
|
|
6
|
+
model: inherit
|
|
7
|
+
default-tier: haiku
|
|
8
|
+
tier-rationale: "Signal-counting rubric over a few small inputs — no synthesis, no writes, no agent spawning. Belongs on Haiku to keep gate latency cheap (≤ 2 s typical)."
|
|
9
|
+
size_budget: S
|
|
10
|
+
parallel-safe: always
|
|
11
|
+
typical-duration-seconds: 5
|
|
12
|
+
reads-only: true
|
|
13
|
+
writes: []
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
@reference/shared-preamble.md
|
|
17
|
+
|
|
18
|
+
# prototype-gate
|
|
19
|
+
|
|
20
|
+
## Role
|
|
21
|
+
|
|
22
|
+
You answer one question at a checkpoint: *should the pipeline pause to sketch or spike before continuing?*
|
|
23
|
+
|
|
24
|
+
You run at two firing points (Phase 25 D-02):
|
|
25
|
+
1. **Post-`/gdd:explore`** — sketch territory. The question is "what visual / direction?".
|
|
26
|
+
2. **Post-`/gdd:plan` plan-checker** — spike territory. The question is "can this work technically?".
|
|
27
|
+
|
|
28
|
+
You are read-only. You do not write STATE.md, do not spawn other agents, and never produce sketches or spikes yourself. Your only job is to score signals and emit a JSON verdict.
|
|
29
|
+
|
|
30
|
+
You also honor the cycle-scoped skip rule (D-02): if `STATE.md` `<prototyping>` already contains a `<skipped at=<your_firing_point> cycle=<active_cycle>/>` entry, recommend `none` immediately with `reason: "skipped this cycle"`. Do not re-evaluate signals.
|
|
31
|
+
|
|
32
|
+
## Input Contract
|
|
33
|
+
|
|
34
|
+
The orchestrator supplies these fields in the prompt context:
|
|
35
|
+
|
|
36
|
+
- `firing_point` — `"explore"` or `"plan"`. Determines which signal rubric you apply.
|
|
37
|
+
- `cycle` — the active cycle identifier from STATE frontmatter.
|
|
38
|
+
- `state_path` — absolute path to the active `.design/STATE.md`.
|
|
39
|
+
- `inputs` — paths to context the rubric scans:
|
|
40
|
+
- `brief_path` (always supplied) — `.design/BRIEF.md` or equivalent.
|
|
41
|
+
- `context_path` (firing_point=`"explore"`) — `.design/DESIGN-CONTEXT.md`.
|
|
42
|
+
- `design_path` (firing_point=`"explore"` if present) — `.design/DESIGN.md`.
|
|
43
|
+
- `plan_tasks_path` (firing_point=`"plan"`) — `.design/PLAN.md` or `.design/plans/*.md`.
|
|
44
|
+
- `decisions_snapshot` (always supplied) — newline-separated `D-NN: text (locked|tentative)` lines extracted from STATE `<decisions>`.
|
|
45
|
+
|
|
46
|
+
Missing input files are not an error — score the signals you can read; treat absent files as zero-signal contributions.
|
|
47
|
+
|
|
48
|
+
## Cycle-skip short-circuit
|
|
49
|
+
|
|
50
|
+
Before scoring, scan `<prototyping>` in `state_path` for a `<skipped/>` entry whose `at` matches `firing_point` AND whose `cycle` matches the active `cycle`. If found, emit:
|
|
51
|
+
|
|
52
|
+
```json
|
|
53
|
+
{"recommend": "none", "confidence": 1.0, "reasons": ["skipped this cycle at the prototype gate"]}
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Then exit. Do not score further.
|
|
57
|
+
|
|
58
|
+
## Signal Rubric
|
|
59
|
+
|
|
60
|
+
### Sketch signals (firing_point = `"explore"`)
|
|
61
|
+
|
|
62
|
+
Score 1 point per matched signal:
|
|
63
|
+
|
|
64
|
+
- **Hero / first-impression language** — BRIEF mentions "hero", "first impression", "novel surface", "landing", "above-the-fold", or names a single high-stakes screen.
|
|
65
|
+
- **DESIGN-CONTEXT visual gray areas** — DESIGN-CONTEXT.md contains an unresolved item tagged `visual:` or `direction:` (case-insensitive).
|
|
66
|
+
- **Empty design canvas** — DESIGN.md is missing or its scan returned no existing patterns to follow (no component references, no token references).
|
|
67
|
+
- **Decision conflict on the same surface** — at least two D-XX entries in `decisions_snapshot` discuss the same surface but disagree (look for paired references to the same component / page / area).
|
|
68
|
+
- **Open-ended language in interview answers** — BRIEF or DESIGN-CONTEXT contains "not sure", "open to", "??", "tbd", "we could" within answer regions.
|
|
69
|
+
- **Multiple viable patterns** — DESIGN-CONTEXT or a phase-researcher artifact lists more than one viable pattern for a single section without a chosen winner.
|
|
70
|
+
|
|
71
|
+
### Spike signals (firing_point = `"plan"`)
|
|
72
|
+
|
|
73
|
+
Score 1 point per matched signal:
|
|
74
|
+
|
|
75
|
+
- **High-risk task** — a plan task carries `Risk: high` or `Confidence: low` (case-insensitive).
|
|
76
|
+
- **Tech outside the components mapper** — a plan task references a library, framework, API, or pattern not present in the project's components / mapper artifacts.
|
|
77
|
+
- **Failed required connection** — `<connections>` reports `unavailable` for a connection that a plan task explicitly depends on.
|
|
78
|
+
- **Experimental language** — a plan task description contains "experimental", "TBD", "unsure", "spike", "prove out", "validate that".
|
|
79
|
+
- **Probe deferred** — a plan task notes "will check at runtime" or similar deferred verification.
|
|
80
|
+
|
|
81
|
+
## Threshold
|
|
82
|
+
|
|
83
|
+
| Score | recommend | confidence |
|
|
84
|
+
|-------|-----------|------------|
|
|
85
|
+
| ≥ 3 | `sketch` (explore) or `spike` (plan) | `0.9` |
|
|
86
|
+
| 1–2 | same as above | `0.5` |
|
|
87
|
+
| 0 | `none` | `0.95` |
|
|
88
|
+
|
|
89
|
+
Confidence is rubric-derived only — do not infer confidence from the size of the inputs or your own uncertainty. The thresholds above are the only valid values.
|
|
90
|
+
|
|
91
|
+
## Output Contract
|
|
92
|
+
|
|
93
|
+
Emit exactly one JSON object on its own line. No prose wrapper, no code fence, no leading or trailing text.
|
|
94
|
+
|
|
95
|
+
```json
|
|
96
|
+
{"recommend": "sketch", "confidence": 0.9, "reasons": ["BRIEF mentions hero", "DESIGN-CONTEXT visual gray area on home"]}
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Schema:
|
|
100
|
+
|
|
101
|
+
- `recommend` — string enum, one of `"sketch" | "spike" | "none"`.
|
|
102
|
+
- `confidence` — number in `[0, 1]`. One of `0.5`, `0.9`, `0.95` per the threshold table; or `1.0` for the cycle-skip short-circuit.
|
|
103
|
+
- `reasons` — array of short strings (≤ 80 chars each). One entry per matched signal, in match order. Empty array allowed when `recommend === "none"` from the threshold (not the skip path).
|
|
104
|
+
|
|
105
|
+
## Constraints
|
|
106
|
+
|
|
107
|
+
- **Do not** propose what to sketch / spike — that's the wrap-up flow's job. Your reasons are evidence, not directives.
|
|
108
|
+
- **Do not** read or write STATE.md outside of the cycle-skip lookup described above.
|
|
109
|
+
- **Do not** consult external services or MCP tools. Signal scoring is purely a function of the supplied inputs.
|
|
110
|
+
- **Do not** exceed `size_budget: S`. If inputs are unexpectedly large, prefer to score signals on the first 8 KB of each file rather than refuse to answer.
|
|
111
|
+
|
|
112
|
+
## Record
|
|
113
|
+
|
|
114
|
+
At run-end, append one JSONL line to `.design/intel/insights.jsonl`:
|
|
115
|
+
|
|
116
|
+
```json
|
|
117
|
+
{"ts":"<ISO-8601>","agent":"<name>","cycle":"<cycle from STATE.md>","stage":"<stage from STATE.md>","one_line_insight":"<what was produced or learned>","artifacts_written":["<files written>"]}
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Schema: `reference/schemas/insight-line.schema.json`. Use an empty `artifacts_written` array for read-only agents.
|
|
121
|
+
|
|
122
|
+
## GATE COMPLETE
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: quality-gate-runner
|
|
3
|
+
description: "Cheap Haiku classifier that ingests {command, exit_code, stderr} tuples from the quality-gate skill's parallel run and emits a JSON verdict — pass/fail plus per-bucket failure groupings (lint / type / test / visual). Read-only. Does not run commands itself."
|
|
4
|
+
tools: Read, Bash, Grep
|
|
5
|
+
color: amber
|
|
6
|
+
model: inherit
|
|
7
|
+
default-tier: haiku
|
|
8
|
+
tier-rationale: "Pattern-match exit codes and bucket stderr into four named categories — no synthesis, no rewrites, no spawning. Belongs on Haiku to keep classification cost trivial relative to the actual command runs."
|
|
9
|
+
size_budget: S
|
|
10
|
+
parallel-safe: always
|
|
11
|
+
typical-duration-seconds: 5
|
|
12
|
+
reads-only: true
|
|
13
|
+
writes: []
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
@reference/shared-preamble.md
|
|
17
|
+
|
|
18
|
+
# quality-gate-runner
|
|
19
|
+
|
|
20
|
+
## Role
|
|
21
|
+
|
|
22
|
+
You answer one question for the `quality-gate` skill (Phase 25 Plan 25-03): *given the outputs of the parallel command run, did the gate pass — and if not, into which buckets do the failures fall?*
|
|
23
|
+
|
|
24
|
+
You are read-only. You do not re-run any commands, do not write STATE.md, do not spawn agents, do not produce fixes. Your only job is to classify the outputs and return JSON.
|
|
25
|
+
|
|
26
|
+
## Input Contract
|
|
27
|
+
|
|
28
|
+
The skill supplies a JSON object on stdin (or as the first line of the prompt context — handle both). Shape:
|
|
29
|
+
|
|
30
|
+
```json
|
|
31
|
+
{
|
|
32
|
+
"outputs": [
|
|
33
|
+
{"command": "npm run lint", "exit_code": 0, "stderr": ""},
|
|
34
|
+
{"command": "npm run typecheck", "exit_code": 1, "stderr": "<verbatim stderr>"},
|
|
35
|
+
{"command": "npm run test", "exit_code": 0, "stderr": ""},
|
|
36
|
+
{"command": "npm run chromatic", "exit_code": 1, "stderr": "<verbatim stderr>"}
|
|
37
|
+
]
|
|
38
|
+
}
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Schema:
|
|
42
|
+
- `outputs` — array, one entry per command actually executed in Step 2 of the skill. Order is preserved from the skill (matches command-list order from Step 1).
|
|
43
|
+
- `command` — verbatim shell string the skill ran.
|
|
44
|
+
- `exit_code` — integer. `0` = clean; non-zero = failure to be classified.
|
|
45
|
+
- `stderr` — verbatim stderr capture. May be empty even on failure (some tools write to stdout); do not assume non-empty stderr means failure.
|
|
46
|
+
|
|
47
|
+
You may also receive a `stdout` field per entry (forward-compat — the skill plans to add it). Tolerate its absence.
|
|
48
|
+
|
|
49
|
+
## Bucketing rule
|
|
50
|
+
|
|
51
|
+
Map each command to exactly one of four buckets based on the verbatim command string. Use case-insensitive substring match against the command line:
|
|
52
|
+
|
|
53
|
+
| Substring (case-insensitive) | Bucket |
|
|
54
|
+
|------------------------------|--------|
|
|
55
|
+
| `lint`, `eslint`, `stylelint`, `biome lint` | `lint` |
|
|
56
|
+
| `typecheck`, `tsc`, `tsc --noemit`, `flow check` | `type` |
|
|
57
|
+
| `test` (but NOT one of the visual matches below — visual wins) | `test` |
|
|
58
|
+
| `chromatic`, `test:visual`, `loki test`, `playwright test --grep visual` | `visual` |
|
|
59
|
+
|
|
60
|
+
When a command matches multiple substrings (e.g., `npm run test:visual` matches both `test` and `test:visual`), `visual` wins. If a command matches none, bucket it under `test` (catch-all — most user-supplied custom commands are test-like). Do not invent a fifth bucket.
|
|
61
|
+
|
|
62
|
+
## Pass / fail rule
|
|
63
|
+
|
|
64
|
+
- `status === "pass"` if and only if **every** entry's `exit_code === 0`.
|
|
65
|
+
- `status === "fail"` if **any** entry's `exit_code !== 0`.
|
|
66
|
+
|
|
67
|
+
Empty `outputs` array means `status === "pass"` (no commands ran → nothing failed). The skill is responsible for emitting `quality_gate_skipped` in the no-commands path; you do not.
|
|
68
|
+
|
|
69
|
+
## Failure summarization
|
|
70
|
+
|
|
71
|
+
For each failed entry (exit_code !== 0), produce one short summary string and add it to the bucket the command maps to. Summaries should:
|
|
72
|
+
|
|
73
|
+
- Quote the command name (the basename — e.g., `lint` from `npm run lint`).
|
|
74
|
+
- Include the first non-empty line of `stderr` truncated to 120 chars, if present.
|
|
75
|
+
- Otherwise include `exit_code=N` so the reader still sees something concrete.
|
|
76
|
+
|
|
77
|
+
Example summary strings:
|
|
78
|
+
- `"lint: 4 problems (3 errors, 1 warning)"` — when stderr's first line is informative.
|
|
79
|
+
- `"typecheck: error TS2304: Cannot find name 'foo' in src/x.ts"` — same.
|
|
80
|
+
- `"test: exit_code=1"` — when stderr is empty.
|
|
81
|
+
|
|
82
|
+
Do NOT inline full stderr — the bucket entries are summaries, not transcripts. The skill keeps the verbatim outputs for the fixer; your output is for routing only.
|
|
83
|
+
|
|
84
|
+
Buckets that have no failures are OMITTED from `classified_failures`. Do not emit empty arrays for unaffected buckets — the consumer relies on key-presence as a signal.
|
|
85
|
+
|
|
86
|
+
## Output Contract
|
|
87
|
+
|
|
88
|
+
Emit exactly one JSON object on its own line. No prose wrapper, no code fence, no leading or trailing text.
|
|
89
|
+
|
|
90
|
+
Pass example:
|
|
91
|
+
|
|
92
|
+
```json
|
|
93
|
+
{"status": "pass", "classified_failures": {}}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Fail example:
|
|
97
|
+
|
|
98
|
+
```json
|
|
99
|
+
{"status": "fail", "classified_failures": {"type": ["typecheck: error TS2304 in src/x.ts"], "visual": ["chromatic: 2 stories changed"]}}
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Schema:
|
|
103
|
+
- `status` — string enum, one of `"pass" | "fail"`. Note: this is NOT the same enum as the skill's STATE-block status (which also has `timeout` and `skipped`); those two cases are decided by the skill, not by you. You only emit `pass | fail`.
|
|
104
|
+
- `classified_failures` — object. Keys are a subset of `lint | type | test | visual`. Values are arrays of short summary strings (≤ 120 chars each). The object is `{}` (empty) when `status === "pass"`.
|
|
105
|
+
|
|
106
|
+
## Constraints
|
|
107
|
+
|
|
108
|
+
- **Do not** read `stderr` content beyond the first non-empty line. The skill keeps the verbatim outputs for the design-fixer; your job is routing, not analysis.
|
|
109
|
+
- **Do not** invent buckets outside the four-name set.
|
|
110
|
+
- **Do not** ever emit `status: "timeout"` or `status: "skipped"` — those are skill-level statuses, not classifier outputs.
|
|
111
|
+
- **Do not** consult external services or MCP tools. Classification is a pure function of the supplied input.
|
|
112
|
+
- **Do not** exceed `size_budget: S`. If `outputs[*].stderr` is unexpectedly large, prefer to summarize from the first 4 KB of each stderr rather than refuse.
|
|
113
|
+
- The output JSON object must be parseable with `JSON.parse` — no trailing comma, no comments, no surrounding markdown.
|
|
114
|
+
|
|
115
|
+
## Record
|
|
116
|
+
|
|
117
|
+
At run-end, append one JSONL line to `.design/intel/insights.jsonl`:
|
|
118
|
+
|
|
119
|
+
```json
|
|
120
|
+
{"ts":"<ISO-8601>","agent":"<name>","cycle":"<cycle from STATE.md>","stage":"<stage from STATE.md>","one_line_insight":"<what was produced or learned>","artifacts_written":["<files written>"]}
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
Schema: `reference/schemas/insight-line.schema.json`. Use an empty `artifacts_written` array for read-only agents.
|
|
124
|
+
|
|
125
|
+
## GATE COMPLETE
|
package/hooks/budget-enforcer.ts
CHANGED
|
@@ -80,6 +80,26 @@ const iterationBudget = nodeRequire('../scripts/lib/iteration-budget.cjs') as ty
|
|
|
80
80
|
* for every hook invocation. The tool_input shape is tool-specific;
|
|
81
81
|
* this hook only consumes Agent-shaped tool_input so we narrow here.
|
|
82
82
|
*/
|
|
83
|
+
/** Phase 25 / D-04, D-05: router complexity-class enum. */
|
|
84
|
+
export type ComplexityClass = 'S' | 'M' | 'L' | 'XL';
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Phase 25 / D-05: router decision payload as surfaced on
|
|
88
|
+
* tool_input.context.router_decision. Only the fields this hook reads
|
|
89
|
+
* are typed; the router emits more (model_tier_overrides,
|
|
90
|
+
* estimated_cost_usd, cache_hits) but they are not consumed here.
|
|
91
|
+
*/
|
|
92
|
+
interface RouterDecision {
|
|
93
|
+
path?: 'fast' | 'quick' | 'full';
|
|
94
|
+
complexity_class?: ComplexityClass;
|
|
95
|
+
[key: string]: unknown;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
interface ToolInputContext {
|
|
99
|
+
router_decision?: RouterDecision;
|
|
100
|
+
[key: string]: unknown;
|
|
101
|
+
}
|
|
102
|
+
|
|
83
103
|
interface ToolInput {
|
|
84
104
|
subagent_type?: string;
|
|
85
105
|
agent?: string;
|
|
@@ -91,6 +111,7 @@ interface ToolInput {
|
|
|
91
111
|
_default_tier?: string;
|
|
92
112
|
_tier_downgraded?: boolean;
|
|
93
113
|
lazy_skipped?: boolean;
|
|
114
|
+
context?: ToolInputContext;
|
|
94
115
|
[key: string]: unknown;
|
|
95
116
|
}
|
|
96
117
|
|
|
@@ -199,6 +220,46 @@ const BUDGET_DEFAULTS: Required<
|
|
|
199
220
|
enforcement_mode: 'enforce',
|
|
200
221
|
};
|
|
201
222
|
|
|
223
|
+
/**
|
|
224
|
+
* Phase 25 / D-05: optional per-class cap map on .design/budget.json.
|
|
225
|
+
* Documented in reference/config-schema.md as `class_caps_usd?: { S?: number; M?: number; L?: number; XL?: number }`.
|
|
226
|
+
* Read through the BudgetSchema index signature so we don't have to
|
|
227
|
+
* regenerate the schema for an additive optional field.
|
|
228
|
+
*/
|
|
229
|
+
type ClassCapsUsd = Partial<Record<ComplexityClass, number>>;
|
|
230
|
+
|
|
231
|
+
function readClassCaps(budget: BudgetSchema): ClassCapsUsd | undefined {
|
|
232
|
+
const raw = (budget as { class_caps_usd?: unknown }).class_caps_usd;
|
|
233
|
+
if (raw === undefined || raw === null || typeof raw !== 'object') {
|
|
234
|
+
return undefined;
|
|
235
|
+
}
|
|
236
|
+
const out: ClassCapsUsd = {};
|
|
237
|
+
for (const k of ['S', 'M', 'L', 'XL'] as const) {
|
|
238
|
+
const v = (raw as Record<string, unknown>)[k];
|
|
239
|
+
if (typeof v === 'number' && Number.isFinite(v) && v > 0) {
|
|
240
|
+
out[k] = v;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
return out;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
/**
|
|
247
|
+
* Phase 25 / D-05: resolve the per-spawn cap. If the router decision
|
|
248
|
+
* payload contains a `complexity_class` AND `.design/budget.json#class_caps_usd[class]`
|
|
249
|
+
* is defined, use that. Otherwise fall back to `per_task_cap_usd`.
|
|
250
|
+
*/
|
|
251
|
+
function resolvePerSpawnCap(
|
|
252
|
+
budget: ResolvedBudget,
|
|
253
|
+
complexityClass: ComplexityClass | undefined,
|
|
254
|
+
): number {
|
|
255
|
+
if (complexityClass !== undefined) {
|
|
256
|
+
const caps = readClassCaps(budget);
|
|
257
|
+
const classCap = caps?.[complexityClass];
|
|
258
|
+
if (classCap !== undefined) return classCap;
|
|
259
|
+
}
|
|
260
|
+
return budget.per_task_cap_usd;
|
|
261
|
+
}
|
|
262
|
+
|
|
202
263
|
/**
|
|
203
264
|
* Concrete budget shape after defaults-merge. Every field becomes
|
|
204
265
|
* non-optional so downstream branches don't have to null-guard. Defined
|
|
@@ -490,6 +551,27 @@ export async function main(): Promise<void> {
|
|
|
490
551
|
const inputHash =
|
|
491
552
|
typeof toolInput._input_hash === 'string' ? toolInput._input_hash : null;
|
|
492
553
|
|
|
554
|
+
// Phase 25 / D-05: extract complexity_class from router decision.
|
|
555
|
+
// Absent payload → legacy per_task_cap behavior (no regression).
|
|
556
|
+
// Present payload with class === 'S' → skip enforcement entirely
|
|
557
|
+
// (defensive: the typical S path is upstream short-circuit where
|
|
558
|
+
// router never ran and this hook still applies legacy caps; an
|
|
559
|
+
// explicit S signal here means a caller bypassed the upstream skip
|
|
560
|
+
// and is asking us to honor the class).
|
|
561
|
+
const routerDecision: RouterDecision | undefined =
|
|
562
|
+
toolInput.context?.router_decision !== undefined &&
|
|
563
|
+
typeof toolInput.context.router_decision === 'object' &&
|
|
564
|
+
toolInput.context.router_decision !== null
|
|
565
|
+
? toolInput.context.router_decision
|
|
566
|
+
: undefined;
|
|
567
|
+
const complexityClass: ComplexityClass | undefined =
|
|
568
|
+
routerDecision?.complexity_class !== undefined &&
|
|
569
|
+
(['S', 'M', 'L', 'XL'] as const).includes(
|
|
570
|
+
routerDecision.complexity_class as ComplexityClass,
|
|
571
|
+
)
|
|
572
|
+
? (routerDecision.complexity_class as ComplexityClass)
|
|
573
|
+
: undefined;
|
|
574
|
+
|
|
493
575
|
const { cycle, phase } = readCycleAndPhase();
|
|
494
576
|
const cyclePhase = { cycle, phase };
|
|
495
577
|
|
|
@@ -513,6 +595,38 @@ export async function main(): Promise<void> {
|
|
|
513
595
|
|
|
514
596
|
const budget = loadBudget();
|
|
515
597
|
|
|
598
|
+
// Phase 25 / D-05: explicit S-class short-circuit. The typical S path
|
|
599
|
+
// skips the router entirely and this hook never runs at all (the
|
|
600
|
+
// command's SKILL.md does the deterministic skip upstream). When we
|
|
601
|
+
// DO see complexity_class === 'S' in the payload it means a caller
|
|
602
|
+
// routed an S-class command through the hook anyway — honor the
|
|
603
|
+
// class by skipping enforcement (no cap check, no downgrade) but
|
|
604
|
+
// still write a zero-cost telemetry row + emit an 'allow' event so
|
|
605
|
+
// observability stays consistent.
|
|
606
|
+
if (complexityClass === 'S') {
|
|
607
|
+
writeTelemetry({
|
|
608
|
+
agent,
|
|
609
|
+
tier:
|
|
610
|
+
toolInput._tier_override ??
|
|
611
|
+
toolInput._default_tier ??
|
|
612
|
+
'haiku',
|
|
613
|
+
tokens_in: Number(toolInput._tokens_in_est ?? 0),
|
|
614
|
+
tokens_out: Number(toolInput._tokens_out_est ?? 0),
|
|
615
|
+
cache_hit: false,
|
|
616
|
+
est_cost_usd: Number(toolInput._est_cost_usd ?? 0),
|
|
617
|
+
enforcement_mode: budget.enforcement_mode,
|
|
618
|
+
_cyclePhase: cyclePhase,
|
|
619
|
+
});
|
|
620
|
+
emitHookFired('allow', cycle);
|
|
621
|
+
const response: ToolOutput = {
|
|
622
|
+
continue: true,
|
|
623
|
+
suppressOutput: true,
|
|
624
|
+
modified_tool_input: toolInput,
|
|
625
|
+
};
|
|
626
|
+
process.stdout.write(JSON.stringify(response));
|
|
627
|
+
return;
|
|
628
|
+
}
|
|
629
|
+
|
|
516
630
|
// Branch B: cache short-circuit (D-05).
|
|
517
631
|
if (inputHash !== null) {
|
|
518
632
|
const cached = cacheLookup(agent, inputHash);
|
|
@@ -589,9 +703,15 @@ export async function main(): Promise<void> {
|
|
|
589
703
|
const estCost = Number(toolInput._est_cost_usd ?? 0);
|
|
590
704
|
const phaseSpend = currentPhaseSpend(phase);
|
|
591
705
|
|
|
706
|
+
// Phase 25 / D-05: per-spawn cap is class-specific when
|
|
707
|
+
// complexity_class is present and class_caps_usd[class] is defined.
|
|
708
|
+
// Falls back to per_task_cap_usd for backwards compatibility — when
|
|
709
|
+
// no router decision is supplied, behavior is identical to pre-25.
|
|
710
|
+
const perSpawnCap = resolvePerSpawnCap(budget, complexityClass);
|
|
711
|
+
|
|
592
712
|
if (budget.enforcement_mode === 'enforce') {
|
|
593
|
-
// Branch C: 100%
|
|
594
|
-
if (estCost >=
|
|
713
|
+
// Branch C: 100% per-spawn cap hard block (class-specific or per_task).
|
|
714
|
+
if (estCost >= perSpawnCap) {
|
|
595
715
|
writeTelemetry({
|
|
596
716
|
agent,
|
|
597
717
|
tier:
|
|
@@ -607,10 +727,14 @@ export async function main(): Promise<void> {
|
|
|
607
727
|
_cyclePhase: cyclePhase,
|
|
608
728
|
});
|
|
609
729
|
emitHookFired('block', cycle);
|
|
730
|
+
const capLabel =
|
|
731
|
+
complexityClass !== undefined && perSpawnCap !== budget.per_task_cap_usd
|
|
732
|
+
? `class_caps_usd.${complexityClass}`
|
|
733
|
+
: 'per-task';
|
|
610
734
|
const response: ToolOutput = {
|
|
611
735
|
continue: false,
|
|
612
736
|
suppressOutput: false,
|
|
613
|
-
message: `Budget cap reached for
|
|
737
|
+
message: `Budget cap reached for ${capLabel}. Estimated: $${estCost.toFixed(4)}, cap: $${perSpawnCap.toFixed(2)}. Raise cap in .design/budget.json or retry after next task.`,
|
|
614
738
|
};
|
|
615
739
|
process.stdout.write(JSON.stringify(response));
|
|
616
740
|
return;
|
|
@@ -640,18 +764,19 @@ export async function main(): Promise<void> {
|
|
|
640
764
|
process.stdout.write(JSON.stringify(response));
|
|
641
765
|
return;
|
|
642
766
|
}
|
|
643
|
-
// 80% soft-threshold downgrade (D-03): task-scoped
|
|
767
|
+
// 80% soft-threshold downgrade (D-03): task-scoped, against the
|
|
768
|
+
// resolved per-spawn cap so class-specific caps participate.
|
|
644
769
|
if (
|
|
645
770
|
budget.auto_downgrade_on_cap &&
|
|
646
|
-
estCost >= 0.8 *
|
|
771
|
+
estCost >= 0.8 * perSpawnCap
|
|
647
772
|
) {
|
|
648
773
|
toolInput._tier_override = 'haiku';
|
|
649
774
|
toolInput._tier_downgraded = true;
|
|
650
775
|
}
|
|
651
776
|
} else if (budget.enforcement_mode === 'warn') {
|
|
652
|
-
if (estCost >=
|
|
777
|
+
if (estCost >= perSpawnCap) {
|
|
653
778
|
process.stderr.write(
|
|
654
|
-
`gdd-budget-enforcer WARN: per-
|
|
779
|
+
`gdd-budget-enforcer WARN: per-spawn cap will be exceeded ($${estCost.toFixed(4)} >= $${perSpawnCap})\n`,
|
|
655
780
|
);
|
|
656
781
|
}
|
|
657
782
|
}
|
|
@@ -23,6 +23,7 @@ const { spawnSync } = require('child_process');
|
|
|
23
23
|
|
|
24
24
|
const MIN_BYTES = 1500;
|
|
25
25
|
const TOP_N = 15;
|
|
26
|
+
const PROTOTYPING_TOP_N = 5;
|
|
26
27
|
const MATCHER_RE = /[\\/](?:\.design|reference|\.planning)[\\/][^\n]*\.md$/;
|
|
27
28
|
|
|
28
29
|
// Phase 19.5: try FTS5 backend first; fall back to grep silently.
|
|
@@ -111,6 +112,174 @@ function sortKeyFor(tag) {
|
|
|
111
112
|
return 0;
|
|
112
113
|
}
|
|
113
114
|
|
|
115
|
+
/**
|
|
116
|
+
* Parse a self-closing-tag attribute string ("a=\"x\" b=\"y\"") into a kv map.
|
|
117
|
+
* Self-contained: avoids a TS-parser import to keep the hook hot path JS-only.
|
|
118
|
+
*/
|
|
119
|
+
function parseAttrs(attrStr) {
|
|
120
|
+
const out = {};
|
|
121
|
+
if (!attrStr) return out;
|
|
122
|
+
const re = /(\w+)\s*=\s*"([^"]*)"/g;
|
|
123
|
+
let m;
|
|
124
|
+
while ((m = re.exec(attrStr)) !== null) out[m[1]] = m[2];
|
|
125
|
+
return out;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* One-shot read of STATE.md. Returns `{ prototyping, decisionsMap }` where
|
|
130
|
+
* `prototyping` is the inner body of `<prototyping>...</prototyping>` (or '')
|
|
131
|
+
* and `decisionsMap` is a `D-XX -> rationale` lookup parsed from `<decisions>`.
|
|
132
|
+
* Both fields default to safe empties on unreadable file / absent blocks.
|
|
133
|
+
*
|
|
134
|
+
* Single read keeps the hot path tight (STATE.md is small but reading once
|
|
135
|
+
* beats reading twice).
|
|
136
|
+
*/
|
|
137
|
+
function readStateForPrototyping(stateFile) {
|
|
138
|
+
const empty = { prototyping: '', decisionsMap: Object.create(null) };
|
|
139
|
+
if (!stateFile) return empty;
|
|
140
|
+
let content;
|
|
141
|
+
try { content = fs.readFileSync(stateFile, 'utf8'); } catch { return empty; }
|
|
142
|
+
const out = { prototyping: '', decisionsMap: Object.create(null) };
|
|
143
|
+
const protoMatch = content.match(/<prototyping>([\s\S]*?)<\/prototyping>/);
|
|
144
|
+
if (protoMatch) out.prototyping = protoMatch[1];
|
|
145
|
+
const decBlock = content.match(/<decisions>([\s\S]*?)<\/decisions>/);
|
|
146
|
+
if (decBlock) {
|
|
147
|
+
const re = /^\s*(D-\d+)\s*:\s*(.+?)\s*$/gm;
|
|
148
|
+
let m;
|
|
149
|
+
while ((m = re.exec(decBlock[1])) !== null) {
|
|
150
|
+
// Strip a trailing `(locked)` / `(tentative)` qualifier if present.
|
|
151
|
+
out.decisionsMap[m[1]] = m[2].replace(/\s*\((?:locked|tentative)\)\s*$/i, '').trim();
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
return out;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
/**
|
|
158
|
+
* Parse `<prototyping>` body into typed entries. Skips comments and unknown tags.
|
|
159
|
+
*/
|
|
160
|
+
function parsePrototypingEntries(body) {
|
|
161
|
+
const entries = [];
|
|
162
|
+
if (!body) return entries;
|
|
163
|
+
const re = /<(sketch|spike|skipped)\b([^>]*?)\/>/g;
|
|
164
|
+
let m;
|
|
165
|
+
while ((m = re.exec(body)) !== null) {
|
|
166
|
+
const type = m[1];
|
|
167
|
+
const attrs = parseAttrs(m[2]);
|
|
168
|
+
entries.push({ type, attrs });
|
|
169
|
+
}
|
|
170
|
+
return entries;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Tokenize a slug / basename / path for fuzzy comparison.
|
|
175
|
+
* Splits on hyphens, underscores, dots, and path separators; lowercases;
|
|
176
|
+
* drops common no-signal tokens (`md`, file extensions, single chars).
|
|
177
|
+
*/
|
|
178
|
+
function tokenize(s) {
|
|
179
|
+
if (!s) return [];
|
|
180
|
+
const parts = String(s).toLowerCase().split(/[-_./\\\s]+/).filter(Boolean);
|
|
181
|
+
const stop = new Set(['md', 'txt', 'json', 'ts', 'js', 'plan', 'context', 'state']);
|
|
182
|
+
return parts.filter((p) => p.length > 1 && !stop.has(p));
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Score a prototyping entry against the opened file's basename + relPath tokens.
|
|
187
|
+
* Returns the entry's matcher term if any slug-token is shared with a
|
|
188
|
+
* basename/relPath token (case-insensitive). Falls back to plain substring
|
|
189
|
+
* for terms that don't tokenize (e.g., free-form `reason` strings).
|
|
190
|
+
*
|
|
191
|
+
* Symmetric with the D-XX matcher: the existing recall path greps source
|
|
192
|
+
* lines for the opened file's basename; here we surface a prototyping entry
|
|
193
|
+
* whenever it would have grepped successfully — when the entry's slug
|
|
194
|
+
* mentions the same concept the file's name encodes.
|
|
195
|
+
*/
|
|
196
|
+
function matchPrototypingEntry(entry, basename, relPath) {
|
|
197
|
+
let term;
|
|
198
|
+
if (entry.type === 'sketch' || entry.type === 'spike') {
|
|
199
|
+
term = entry.attrs.slug;
|
|
200
|
+
} else if (entry.type === 'skipped') {
|
|
201
|
+
term = entry.attrs.reason;
|
|
202
|
+
}
|
|
203
|
+
if (!term) return null;
|
|
204
|
+
const fileTokens = new Set([...tokenize(basename), ...tokenize(relPath)]);
|
|
205
|
+
if (fileTokens.size === 0) return null;
|
|
206
|
+
const termTokens = tokenize(term);
|
|
207
|
+
for (const t of termTokens) {
|
|
208
|
+
if (fileTokens.has(t)) return term;
|
|
209
|
+
}
|
|
210
|
+
// Fallback: plain substring (helps `reason` strings and slugs containing
|
|
211
|
+
// tokens that don't survive the stop-word filter).
|
|
212
|
+
const needle = String(term).toLowerCase();
|
|
213
|
+
if (basename.toLowerCase().includes(needle) || relPath.toLowerCase().includes(needle)) return term;
|
|
214
|
+
return null;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Format a single prototyping entry for the additionalContext block.
|
|
219
|
+
* Shape: "Prototyping outcome (cycle <cycle>): <type>/<slug> — D-<id> — <verdict-or-status>: <rationale>"
|
|
220
|
+
* Falls back gracefully when fields are missing (e.g., skipped entries lack a D-XX).
|
|
221
|
+
*/
|
|
222
|
+
function formatPrototypingEntry(entry, decisionsMap) {
|
|
223
|
+
const a = entry.attrs;
|
|
224
|
+
const cycle = a.cycle || '?';
|
|
225
|
+
const ident = a.slug || a.at || '?';
|
|
226
|
+
const segs = [`Prototyping outcome (cycle ${cycle}): ${entry.type}/${ident}`];
|
|
227
|
+
if (a.decision) {
|
|
228
|
+
const rationale = decisionsMap[a.decision];
|
|
229
|
+
segs.push(rationale ? `${a.decision} — ${rationale}` : a.decision);
|
|
230
|
+
}
|
|
231
|
+
if (entry.type === 'spike' && a.verdict) {
|
|
232
|
+
segs.push(`verdict: ${a.verdict}`);
|
|
233
|
+
} else if (a.status) {
|
|
234
|
+
segs.push(`status: ${a.status}`);
|
|
235
|
+
} else if (entry.type === 'skipped' && a.reason) {
|
|
236
|
+
segs.push(`reason: ${a.reason}`);
|
|
237
|
+
}
|
|
238
|
+
return segs.join(' — ');
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Build the prototyping outcomes block. Returns null when nothing matches so the
|
|
243
|
+
* caller can decide whether to omit the heading entirely.
|
|
244
|
+
*
|
|
245
|
+
* Sort: most recent cycle first (matches the existing sortKeyFor recency bias).
|
|
246
|
+
*/
|
|
247
|
+
function buildPrototypingBlock(stateFile, basename, relPath) {
|
|
248
|
+
if (!stateFile) return null;
|
|
249
|
+
const { prototyping, decisionsMap } = readStateForPrototyping(stateFile);
|
|
250
|
+
if (!prototyping) return null;
|
|
251
|
+
const entries = parsePrototypingEntries(prototyping);
|
|
252
|
+
if (!entries.length) return null;
|
|
253
|
+
|
|
254
|
+
const matched = [];
|
|
255
|
+
for (const e of entries) {
|
|
256
|
+
const term = matchPrototypingEntry(e, basename, relPath);
|
|
257
|
+
if (term) matched.push(e);
|
|
258
|
+
}
|
|
259
|
+
if (!matched.length) return null;
|
|
260
|
+
|
|
261
|
+
// Recency: cycle is typically `cycle-N` or `N`; coerce to a number for sorting.
|
|
262
|
+
const cycleNum = (e) => {
|
|
263
|
+
const c = String(e.attrs.cycle || '');
|
|
264
|
+
const m = c.match(/(\d+)/);
|
|
265
|
+
return m ? Number(m[1]) : 0;
|
|
266
|
+
};
|
|
267
|
+
matched.sort((a, b) => cycleNum(b) - cycleNum(a));
|
|
268
|
+
const top = matched.slice(0, PROTOTYPING_TOP_N);
|
|
269
|
+
|
|
270
|
+
const lines = [];
|
|
271
|
+
lines.push('');
|
|
272
|
+
lines.push('### Prior prototyping outcomes');
|
|
273
|
+
for (const e of top) {
|
|
274
|
+
lines.push(`> - ${formatPrototypingEntry(e, decisionsMap)}`);
|
|
275
|
+
}
|
|
276
|
+
if (matched.length > PROTOTYPING_TOP_N) {
|
|
277
|
+
lines.push(`> … (${matched.length - PROTOTYPING_TOP_N} more prototyping entr${matched.length - PROTOTYPING_TOP_N === 1 ? 'y' : 'ies'})`);
|
|
278
|
+
}
|
|
279
|
+
lines.push('');
|
|
280
|
+
return lines.join('\n');
|
|
281
|
+
}
|
|
282
|
+
|
|
114
283
|
function buildRecallBlock(matches, basename, backendLabel) {
|
|
115
284
|
if (!matches.length) return null;
|
|
116
285
|
const uniq = [];
|
|
@@ -202,16 +371,27 @@ async function main() {
|
|
|
202
371
|
|
|
203
372
|
const backendLabel = BACKEND || (useRgGlobal ? 'ripgrep' : 'node-grep');
|
|
204
373
|
const block = buildRecallBlock(hits, basename, backendLabel);
|
|
205
|
-
|
|
374
|
+
|
|
375
|
+
// Phase 25 (plan 25-06): surface <prototyping> outcomes when an opened
|
|
376
|
+
// planning/design .md ≥1500 bytes shares a slug/reason token with a
|
|
377
|
+
// resolved sketch/spike/skipped entry. STATE.md is the canonical home for
|
|
378
|
+
// the block (D-01); we read it directly here rather than via the TS parser
|
|
379
|
+
// so the hook stays self-contained JS.
|
|
380
|
+
const stateFile = sources.find((p) => p.endsWith(path.sep + 'STATE.md') || p.endsWith('/STATE.md'));
|
|
381
|
+
const protoBlock = buildPrototypingBlock(stateFile, basename, relPath);
|
|
382
|
+
|
|
383
|
+
if (!block && !protoBlock) {
|
|
206
384
|
try { require('./_hook-emit.js').emitHookFired('gdd-decision-injector', 'no-hits', { backend: backendLabel }); } catch { /* swallow */ }
|
|
207
385
|
process.stdout.write(JSON.stringify({ continue: true }));
|
|
208
386
|
return;
|
|
209
387
|
}
|
|
210
388
|
|
|
211
|
-
|
|
389
|
+
const additionalContext = [block, protoBlock].filter(Boolean).join('\n');
|
|
390
|
+
|
|
391
|
+
try { require('./_hook-emit.js').emitHookFired('gdd-decision-injector', 'inject', { backend: backendLabel, hit_count: hits.length, prototyping: !!protoBlock }); } catch { /* swallow */ }
|
|
212
392
|
process.stdout.write(JSON.stringify({
|
|
213
393
|
continue: true,
|
|
214
|
-
hookSpecificOutput: { hookEventName: 'PreToolUse', additionalContext
|
|
394
|
+
hookSpecificOutput: { hookEventName: 'PreToolUse', additionalContext },
|
|
215
395
|
}));
|
|
216
396
|
}
|
|
217
397
|
|