@hegemonart/get-design-done 1.25.0 → 1.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +46 -0
- package/README.md +10 -6
- package/agents/README.md +60 -0
- package/agents/design-reflector.md +43 -0
- package/agents/gdd-intel-updater.md +34 -1
- package/hooks/budget-enforcer.ts +143 -4
- package/package.json +1 -1
- package/reference/model-prices.md +40 -19
- package/reference/prices/antigravity.md +21 -0
- package/reference/prices/augment.md +21 -0
- package/reference/prices/claude.md +42 -0
- package/reference/prices/cline.md +23 -0
- package/reference/prices/codebuddy.md +21 -0
- package/reference/prices/codex.md +25 -0
- package/reference/prices/copilot.md +21 -0
- package/reference/prices/cursor.md +21 -0
- package/reference/prices/gemini.md +25 -0
- package/reference/prices/kilo.md +21 -0
- package/reference/prices/opencode.md +23 -0
- package/reference/prices/qwen.md +25 -0
- package/reference/prices/trae.md +23 -0
- package/reference/prices/windsurf.md +21 -0
- package/reference/registry.json +107 -1
- package/reference/runtime-models.md +446 -0
- package/reference/schemas/runtime-models.schema.json +123 -0
- package/scripts/install.cjs +8 -0
- package/scripts/lib/budget-enforcer.cjs +446 -0
- package/scripts/lib/cost-arbitrage.cjs +294 -0
- package/scripts/lib/install/installer.cjs +188 -11
- package/scripts/lib/install/parse-runtime-models.cjs +267 -0
- package/scripts/lib/install/runtimes.cjs +43 -0
- package/scripts/lib/runtime-detect.cjs +96 -0
- package/scripts/lib/tier-resolver.cjs +311 -0
- package/scripts/validate-frontmatter.ts +138 -1
- package/skills/router/SKILL.md +51 -2
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
// scripts/lib/tier-resolver.cjs
|
|
2
|
+
//
|
|
3
|
+
// Plan 26-02 — tier→model resolver with fallback chain.
|
|
4
|
+
//
|
|
5
|
+
// `resolve(runtime, tier, opts?) → model-string | null`
|
|
6
|
+
//
|
|
7
|
+
// Translates the tier vocabulary frontmatter speaks (`opus`, `sonnet`,
|
|
8
|
+
// `haiku`) into the concrete model name a specific runtime understands
|
|
9
|
+
// (e.g. `gpt-5`, `gemini-2.5-pro`, `qwen3-max`). Source-of-truth for the
|
|
10
|
+
// mapping is `reference/runtime-models.md` (plan 26-01); this module
|
|
11
|
+
// reads the parsed form via 26-01's parser helper at
|
|
12
|
+
// `scripts/lib/install/parse-runtime-models.cjs`.
|
|
13
|
+
//
|
|
14
|
+
// Parsed-models shape (from 26-01):
|
|
15
|
+
// {
|
|
16
|
+
// schema_version: 1,
|
|
17
|
+
// runtimes: [
|
|
18
|
+
// { id: 'claude',
|
|
19
|
+
// tier_to_model: { opus: { model: 'claude-opus-4-7' }, … },
|
|
20
|
+
// reasoning_class_to_model: { high: { model: '…' }, … },
|
|
21
|
+
// provenance: [...]
|
|
22
|
+
// },
|
|
23
|
+
// …
|
|
24
|
+
// ]
|
|
25
|
+
// }
|
|
26
|
+
//
|
|
27
|
+
// Fallback chain (D-04):
|
|
28
|
+
// 1. runtime-specific entry has the tier → use directly (no event).
|
|
29
|
+
// 2. runtime row missing OR tier missing on the row → fall back to the
|
|
30
|
+
// `claude` row (Anthropic-default convention 26-01 baked into every
|
|
31
|
+
// placeholder runtime), emit `tier_resolution_fallback`.
|
|
32
|
+
// 3. neither available (e.g. a parsed map with no claude row, or a
|
|
33
|
+
// claude row missing the requested tier) → return null, emit
|
|
34
|
+
// `tier_resolution_failed`.
|
|
35
|
+
//
|
|
36
|
+
// Never throws. null is a valid output the caller (router, budget-
|
|
37
|
+
// enforcer) must handle gracefully. Garbage input (undefined runtime,
|
|
38
|
+
// bogus tier, malformed models) returns null + failure event.
|
|
39
|
+
//
|
|
40
|
+
// `.cjs` to match Phase 22 primitives and let .ts hooks require it
|
|
41
|
+
// under --experimental-strip-types without ESM-interop friction.
|
|
42
|
+
//
|
|
43
|
+
// Pure module — no top-level side effects beyond reading the parsed
|
|
44
|
+
// runtime-models document on first call. The parsed form is cached per-
|
|
45
|
+
// process; callers that need a fresh read between cycles call `reset()`.
|
|
46
|
+
//
|
|
47
|
+
// Test-injection contract: callers may pass `opts.models` to bypass the
|
|
48
|
+
// on-disk lookup entirely. Used by `tests/tier-resolver.test.cjs` to
|
|
49
|
+
// exercise the fallback branches deterministically.
|
|
50
|
+
|
|
51
|
+
'use strict';
|
|
52
|
+
|
|
53
|
+
const fs = require('node:fs');
|
|
54
|
+
const path = require('node:path');
|
|
55
|
+
|
|
56
|
+
const VALID_TIERS = Object.freeze(['opus', 'sonnet', 'haiku']);
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Runtime-id whose row supplies the fallback for missing entries.
|
|
60
|
+
* 26-01's runtime-models.md uses Anthropic models as the closest-
|
|
61
|
+
* published-equivalent placeholder for every runtime that lacks a
|
|
62
|
+
* confirmed tier-map; that convention makes `claude` the natural
|
|
63
|
+
* D-04-branch-2 default. If 26-01 ever changes that convention,
|
|
64
|
+
* update this constant in lockstep.
|
|
65
|
+
*/
|
|
66
|
+
const DEFAULT_RUNTIME_ID = 'claude';
|
|
67
|
+
|
|
68
|
+
const DEFAULT_EVENTS_PATH = path.join('.design', 'telemetry', 'events.jsonl');
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Cached parsed-models data. `null` until first lazy load (or after
|
|
72
|
+
* `reset()`).
|
|
73
|
+
*/
|
|
74
|
+
let _cachedModels = null;
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Lazy soft-import of the 26-01 parser. Returns null if the parser
|
|
78
|
+
* file is unreachable — the resolver then degrades to "always emit
|
|
79
|
+
* failed" for on-disk callers, while test callers using `opts.models`
|
|
80
|
+
* are unaffected.
|
|
81
|
+
*/
|
|
82
|
+
function loadParser() {
|
|
83
|
+
try {
|
|
84
|
+
const modPath = path.join(__dirname, 'install', 'parse-runtime-models.cjs');
|
|
85
|
+
if (!fs.existsSync(modPath)) return null;
|
|
86
|
+
return require(modPath);
|
|
87
|
+
} catch {
|
|
88
|
+
return null;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Lazy load + cache the parsed runtime-models map. Returns null when
|
|
94
|
+
* the parser is unavailable or throws on the source markdown.
|
|
95
|
+
*/
|
|
96
|
+
function loadModels() {
|
|
97
|
+
if (_cachedModels !== null) return _cachedModels;
|
|
98
|
+
const parser = loadParser();
|
|
99
|
+
if (parser === null) return null;
|
|
100
|
+
try {
|
|
101
|
+
const fn = typeof parser.parseRuntimeModels === 'function'
|
|
102
|
+
? parser.parseRuntimeModels
|
|
103
|
+
: (typeof parser === 'function' ? parser : null);
|
|
104
|
+
if (fn === null) return null;
|
|
105
|
+
const out = fn();
|
|
106
|
+
if (out && typeof out === 'object') {
|
|
107
|
+
_cachedModels = out;
|
|
108
|
+
return out;
|
|
109
|
+
}
|
|
110
|
+
return null;
|
|
111
|
+
} catch {
|
|
112
|
+
// Parser throws on schema validation failure — treat as
|
|
113
|
+
// "no usable models" so the resolver fails open with events
|
|
114
|
+
// rather than crashing the consumer.
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Reset the parsed-models cache. Tests use this after writing fixture
|
|
121
|
+
* runtime-models.md to a temp cwd; production callers rarely need it.
|
|
122
|
+
*/
|
|
123
|
+
function reset() {
|
|
124
|
+
_cachedModels = null;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Append a single event line to the on-disk events.jsonl. Honors
|
|
129
|
+
* `GDD_EVENTS_PATH` for test isolation (matches the TS EventWriter's
|
|
130
|
+
* env-var contract). Never throws — diagnostic on stderr only.
|
|
131
|
+
*
|
|
132
|
+
* We don't `require` the .ts EventWriter from .cjs (would force every
|
|
133
|
+
* consumer to run under --experimental-strip-types); instead we write
|
|
134
|
+
* the same JSONL line shape directly. The envelope matches BaseEvent
|
|
135
|
+
* so downstream consumers don't care which producer wrote the line.
|
|
136
|
+
*/
|
|
137
|
+
function emitEvent(type, payload) {
|
|
138
|
+
const line = JSON.stringify({
|
|
139
|
+
type,
|
|
140
|
+
timestamp: new Date().toISOString(),
|
|
141
|
+
sessionId: process.env.GDD_SESSION_ID || 'tier-resolver',
|
|
142
|
+
payload,
|
|
143
|
+
_meta: {
|
|
144
|
+
pid: process.pid,
|
|
145
|
+
host: 'tier-resolver',
|
|
146
|
+
source: 'tier-resolver',
|
|
147
|
+
},
|
|
148
|
+
});
|
|
149
|
+
const envPath = process.env.GDD_EVENTS_PATH;
|
|
150
|
+
const target = envPath && envPath.length > 0
|
|
151
|
+
? envPath
|
|
152
|
+
: path.join(process.cwd(), DEFAULT_EVENTS_PATH);
|
|
153
|
+
try {
|
|
154
|
+
fs.mkdirSync(path.dirname(target), { recursive: true });
|
|
155
|
+
fs.appendFileSync(target, line + '\n', { encoding: 'utf8' });
|
|
156
|
+
} catch (err) {
|
|
157
|
+
// Don't let event-emission failure cascade into resolver failure;
|
|
158
|
+
// the resolver's job is to return a model (or null), not to
|
|
159
|
+
// guarantee telemetry. The event-stream has its own resilience
|
|
160
|
+
// story (Phase 20-14 / Phase 22).
|
|
161
|
+
try {
|
|
162
|
+
process.stderr.write(
|
|
163
|
+
`[tier-resolver] event emit failed: ${err && err.message ? err.message : String(err)}\n`,
|
|
164
|
+
);
|
|
165
|
+
} catch {
|
|
166
|
+
/* swallow */
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Find a runtime row by id. Accepts both the 26-01 array shape
|
|
173
|
+
* (`runtimes: [{id, …}, …]`) and a plain-object map shape
|
|
174
|
+
* (`runtimes: {id: {…}}`) used by some test fixtures. Returns the row
|
|
175
|
+
* or null when not found / malformed.
|
|
176
|
+
*/
|
|
177
|
+
function findRuntimeRow(models, id) {
|
|
178
|
+
if (!models || typeof models !== 'object') return null;
|
|
179
|
+
const r = models.runtimes;
|
|
180
|
+
if (Array.isArray(r)) {
|
|
181
|
+
for (const row of r) {
|
|
182
|
+
if (row && typeof row === 'object' && row.id === id) return row;
|
|
183
|
+
}
|
|
184
|
+
return null;
|
|
185
|
+
}
|
|
186
|
+
if (r && typeof r === 'object') {
|
|
187
|
+
const row = r[id];
|
|
188
|
+
return row && typeof row === 'object' ? row : null;
|
|
189
|
+
}
|
|
190
|
+
return null;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Read the model string for `tier` from a runtime row. The 26-01
|
|
195
|
+
* shape nests one level: `tier_to_model.opus = { model: '…' }`. A
|
|
196
|
+
* flat shape (`tier_to_model.opus = '…'`) is also accepted to keep
|
|
197
|
+
* test fixtures terse. Returns the model string or null when absent
|
|
198
|
+
* or malformed.
|
|
199
|
+
*/
|
|
200
|
+
function lookupTier(row, tier) {
|
|
201
|
+
if (!row || typeof row !== 'object') return null;
|
|
202
|
+
const map = row.tier_to_model;
|
|
203
|
+
if (!map || typeof map !== 'object') return null;
|
|
204
|
+
const v = map[tier];
|
|
205
|
+
if (typeof v === 'string' && v.length > 0) return v;
|
|
206
|
+
if (v && typeof v === 'object' && typeof v.model === 'string' && v.model.length > 0) {
|
|
207
|
+
return v.model;
|
|
208
|
+
}
|
|
209
|
+
return null;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Resolve a `(runtime, tier)` pair to a concrete model string. Returns
|
|
214
|
+
* null when neither the runtime-specific entry nor the runtime-default
|
|
215
|
+
* fallback supplies a value for the tier; emits a structured event in
|
|
216
|
+
* both the fallback and failure branches.
|
|
217
|
+
*
|
|
218
|
+
* @param {string | null | undefined} runtime
|
|
219
|
+
* Runtime ID (e.g. 'claude', 'codex'). Garbage input returns null +
|
|
220
|
+
* failure event.
|
|
221
|
+
* @param {string | null | undefined} tier
|
|
222
|
+
* Tier name. Must be one of `opus`/`sonnet`/`haiku`. Anything else
|
|
223
|
+
* returns null + failure event.
|
|
224
|
+
* @param {object} [opts]
|
|
225
|
+
* @param {object} [opts.models]
|
|
226
|
+
* Pre-parsed models map. When supplied, bypasses the on-disk lookup
|
|
227
|
+
* entirely (tests use this).
|
|
228
|
+
* @param {boolean} [opts.silent]
|
|
229
|
+
* When true, suppresses event emission on the fallback / failure
|
|
230
|
+
* paths. Used by callers that batch-resolve and prefer to roll up
|
|
231
|
+
* their own diagnostics. Default false.
|
|
232
|
+
* @returns {string | null}
|
|
233
|
+
*/
|
|
234
|
+
function resolve(runtime, tier, opts) {
|
|
235
|
+
const models = (opts && opts.models) || loadModels();
|
|
236
|
+
const silent = !!(opts && opts.silent);
|
|
237
|
+
|
|
238
|
+
// Validate inputs FIRST so the failure event payload carries the
|
|
239
|
+
// garbage values verbatim — useful for telemetry diagnosis.
|
|
240
|
+
const runtimeOk = typeof runtime === 'string' && runtime.length > 0;
|
|
241
|
+
const tierOk = typeof tier === 'string' && VALID_TIERS.indexOf(tier) >= 0;
|
|
242
|
+
|
|
243
|
+
if (!runtimeOk || !tierOk || !models || typeof models !== 'object') {
|
|
244
|
+
if (!silent) {
|
|
245
|
+
emitEvent('tier_resolution_failed', {
|
|
246
|
+
runtime: runtimeOk ? runtime : (runtime === undefined ? null : runtime),
|
|
247
|
+
tier: tierOk ? tier : (tier === undefined ? null : tier),
|
|
248
|
+
reason: !runtimeOk
|
|
249
|
+
? 'invalid_runtime'
|
|
250
|
+
: !tierOk
|
|
251
|
+
? 'invalid_tier'
|
|
252
|
+
: 'models_unavailable',
|
|
253
|
+
});
|
|
254
|
+
}
|
|
255
|
+
return null;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
const row = findRuntimeRow(models, runtime);
|
|
259
|
+
|
|
260
|
+
// Branch 1: runtime-specific hit.
|
|
261
|
+
const direct = lookupTier(row, tier);
|
|
262
|
+
if (direct !== null) return direct;
|
|
263
|
+
|
|
264
|
+
// Branch 2: fall back to the default-runtime row. 26-01 inlines
|
|
265
|
+
// Anthropic-default models on every placeholder runtime, so this
|
|
266
|
+
// branch primarily catches "runtime id not in the 14-runtime map"
|
|
267
|
+
// and "claude row itself missing the tier" — the latter being
|
|
268
|
+
// structurally near-impossible if 26-01's schema validation is on,
|
|
269
|
+
// but we still handle it.
|
|
270
|
+
const defaultRow = findRuntimeRow(models, DEFAULT_RUNTIME_ID);
|
|
271
|
+
// Don't double-fall-back if the runtime IS the default and we
|
|
272
|
+
// already missed the tier — that's a true failure.
|
|
273
|
+
const fallbackModel = runtime === DEFAULT_RUNTIME_ID
|
|
274
|
+
? null
|
|
275
|
+
: lookupTier(defaultRow, tier);
|
|
276
|
+
if (fallbackModel !== null) {
|
|
277
|
+
if (!silent) {
|
|
278
|
+
emitEvent('tier_resolution_fallback', {
|
|
279
|
+
runtime,
|
|
280
|
+
tier,
|
|
281
|
+
model: fallbackModel,
|
|
282
|
+
reason: row === null ? 'runtime_not_in_map' : 'tier_missing_for_runtime',
|
|
283
|
+
fallback_runtime: DEFAULT_RUNTIME_ID,
|
|
284
|
+
});
|
|
285
|
+
}
|
|
286
|
+
return fallbackModel;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// Branch 3: nothing usable.
|
|
290
|
+
if (!silent) {
|
|
291
|
+
emitEvent('tier_resolution_failed', {
|
|
292
|
+
runtime,
|
|
293
|
+
tier,
|
|
294
|
+
reason: row === null
|
|
295
|
+
? 'runtime_not_in_map'
|
|
296
|
+
: (runtime === DEFAULT_RUNTIME_ID
|
|
297
|
+
? 'tier_missing_on_default_runtime'
|
|
298
|
+
: 'tier_missing_no_default'),
|
|
299
|
+
});
|
|
300
|
+
}
|
|
301
|
+
return null;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
module.exports = {
|
|
305
|
+
resolve,
|
|
306
|
+
reset,
|
|
307
|
+
VALID_TIERS,
|
|
308
|
+
DEFAULT_RUNTIME_ID,
|
|
309
|
+
// internals surfaced for tests only — stable API = `resolve` + `reset`.
|
|
310
|
+
_internal: { lookupTier, findRuntimeRow, emitEvent, loadParser, loadModels },
|
|
311
|
+
};
|
|
@@ -40,6 +40,7 @@ export interface AgentFrontmatter {
|
|
|
40
40
|
'reads-only': boolean | string;
|
|
41
41
|
writes: string | string[];
|
|
42
42
|
'default-tier'?: 'haiku' | 'sonnet' | 'opus';
|
|
43
|
+
'reasoning-class'?: 'high' | 'medium' | 'low';
|
|
43
44
|
'size_budget'?: 'S' | 'M' | 'L' | 'XL';
|
|
44
45
|
}
|
|
45
46
|
|
|
@@ -54,6 +55,120 @@ const REQUIRED_FIELDS: readonly (keyof AgentFrontmatter)[] = [
|
|
|
54
55
|
'writes',
|
|
55
56
|
];
|
|
56
57
|
|
|
58
|
+
/**
|
|
59
|
+
* Phase 26 (Plan 26-08) — runtime-neutral `reasoning-class` alias for
|
|
60
|
+
* `default-tier`. Equivalence table is locked in CONTEXT D-10 / D-11:
|
|
61
|
+
*
|
|
62
|
+
* high <-> opus
|
|
63
|
+
* medium <-> sonnet
|
|
64
|
+
* low <-> haiku
|
|
65
|
+
*
|
|
66
|
+
* The alias is OPTIONAL (no per-agent retrofit lands in v1.26 — see
|
|
67
|
+
* agents/README.md "Runtime-neutral reasoning class"). When both fields
|
|
68
|
+
* appear together they MUST satisfy the equivalence; mismatched dual
|
|
69
|
+
* annotations are a validation error.
|
|
70
|
+
*/
|
|
71
|
+
export type DefaultTier = 'haiku' | 'sonnet' | 'opus';
|
|
72
|
+
export type ReasoningClass = 'high' | 'medium' | 'low';
|
|
73
|
+
|
|
74
|
+
export const REASONING_CLASS_VALUES: readonly ReasoningClass[] = [
|
|
75
|
+
'high',
|
|
76
|
+
'medium',
|
|
77
|
+
'low',
|
|
78
|
+
];
|
|
79
|
+
|
|
80
|
+
export const DEFAULT_TIER_VALUES: readonly DefaultTier[] = [
|
|
81
|
+
'opus',
|
|
82
|
+
'sonnet',
|
|
83
|
+
'haiku',
|
|
84
|
+
];
|
|
85
|
+
|
|
86
|
+
/** Equivalence map: reasoning-class -> default-tier. */
|
|
87
|
+
export const CLASS_TO_TIER: Readonly<Record<ReasoningClass, DefaultTier>> = {
|
|
88
|
+
high: 'opus',
|
|
89
|
+
medium: 'sonnet',
|
|
90
|
+
low: 'haiku',
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
/** Equivalence map: default-tier -> reasoning-class. */
|
|
94
|
+
export const TIER_TO_CLASS: Readonly<Record<DefaultTier, ReasoningClass>> = {
|
|
95
|
+
opus: 'high',
|
|
96
|
+
sonnet: 'medium',
|
|
97
|
+
haiku: 'low',
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
/** Type guard for a valid `reasoning-class` value. */
|
|
101
|
+
export function isReasoningClass(v: unknown): v is ReasoningClass {
|
|
102
|
+
return typeof v === 'string' && REASONING_CLASS_VALUES.includes(v as ReasoningClass);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/** Type guard for a valid `default-tier` value. */
|
|
106
|
+
export function isDefaultTier(v: unknown): v is DefaultTier {
|
|
107
|
+
return typeof v === 'string' && DEFAULT_TIER_VALUES.includes(v as DefaultTier);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Validate the optional `reasoning-class` field and its equivalence with
|
|
112
|
+
* `default-tier` when both are present. Returns an array of violation
|
|
113
|
+
* messages; an empty array means the agent passes the Plan 26-08 rules.
|
|
114
|
+
*
|
|
115
|
+
* Rules (Plan 26-08, CONTEXT D-11):
|
|
116
|
+
* 1. `reasoning-class` is OPTIONAL. Absence is fine.
|
|
117
|
+
* 2. If present, it MUST be one of `high|medium|low`.
|
|
118
|
+
* 3. If both `default-tier` and `reasoning-class` are present, the values
|
|
119
|
+
* MUST satisfy the equivalence table (high+opus, medium+sonnet,
|
|
120
|
+
* low+haiku). Mismatch is a validation error.
|
|
121
|
+
*
|
|
122
|
+
* Existing agents that carry only `default-tier` (the v1.26 baseline state
|
|
123
|
+
* for all 26 shipped agents) are unaffected — this helper returns an empty
|
|
124
|
+
* array for them.
|
|
125
|
+
*
|
|
126
|
+
* The `agentName` argument is used in error messages to surface which agent
|
|
127
|
+
* is misconfigured when the validator runs against the full roster.
|
|
128
|
+
*/
|
|
129
|
+
export function validateReasoningClass(
|
|
130
|
+
fm: Record<string, unknown>,
|
|
131
|
+
agentName: string,
|
|
132
|
+
): string[] {
|
|
133
|
+
const violations: string[] = [];
|
|
134
|
+
const hasClass = 'reasoning-class' in fm && !isMissing(fm['reasoning-class']);
|
|
135
|
+
const hasTier = 'default-tier' in fm && !isMissing(fm['default-tier']);
|
|
136
|
+
|
|
137
|
+
if (!hasClass) {
|
|
138
|
+
// Field absent — allowed. `default-tier` is the v1.26 source of truth and
|
|
139
|
+
// is enforced by separate Phase 10.1 contracts (not this validator).
|
|
140
|
+
return violations;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const rawClass = fm['reasoning-class'];
|
|
144
|
+
if (!isReasoningClass(rawClass)) {
|
|
145
|
+
violations.push(
|
|
146
|
+
`reasoning-class: invalid value "${String(rawClass)}" for agent "${agentName}" — must be one of ${REASONING_CLASS_VALUES.join('|')}`,
|
|
147
|
+
);
|
|
148
|
+
return violations;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
if (hasTier) {
|
|
152
|
+
const rawTier = fm['default-tier'];
|
|
153
|
+
if (!isDefaultTier(rawTier)) {
|
|
154
|
+
// default-tier shape is enforced elsewhere; we still surface a clear
|
|
155
|
+
// message so co-validation is debuggable in one pass.
|
|
156
|
+
violations.push(
|
|
157
|
+
`default-tier: invalid value "${String(rawTier)}" for agent "${agentName}" — must be one of ${DEFAULT_TIER_VALUES.join('|')}`,
|
|
158
|
+
);
|
|
159
|
+
return violations;
|
|
160
|
+
}
|
|
161
|
+
const expectedTier = CLASS_TO_TIER[rawClass];
|
|
162
|
+
if (rawTier !== expectedTier) {
|
|
163
|
+
violations.push(
|
|
164
|
+
`reasoning-class/default-tier: mismatch for agent "${agentName}" — reasoning-class="${rawClass}" expects default-tier="${expectedTier}", but got default-tier="${rawTier}". Equivalence table: high<->opus, medium<->sonnet, low<->haiku.`,
|
|
165
|
+
);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
return violations;
|
|
170
|
+
}
|
|
171
|
+
|
|
57
172
|
function walkMd(dir: string): string[] {
|
|
58
173
|
const out: string[] = [];
|
|
59
174
|
for (const entry of readdirSync(dir, { withFileTypes: true })) {
|
|
@@ -105,10 +220,32 @@ function main(): void {
|
|
|
105
220
|
violations++;
|
|
106
221
|
}
|
|
107
222
|
}
|
|
223
|
+
|
|
224
|
+
// Plan 26-08 — runtime-neutral reasoning-class alias validation.
|
|
225
|
+
const agentName: string =
|
|
226
|
+
typeof fm.name === 'string' && fm.name.length > 0
|
|
227
|
+
? fm.name
|
|
228
|
+
: basename(f).replace(/\.md$/, '');
|
|
229
|
+
const classViolations = validateReasoningClass(
|
|
230
|
+
fm as Record<string, unknown>,
|
|
231
|
+
agentName,
|
|
232
|
+
);
|
|
233
|
+
for (const msg of classViolations) {
|
|
234
|
+
console.log(`${f}:${msg}`);
|
|
235
|
+
violations++;
|
|
236
|
+
}
|
|
108
237
|
}
|
|
109
238
|
|
|
110
239
|
console.log(`summary: ${files.length} file(s) checked, ${violations} violation(s)`);
|
|
111
240
|
process.exit(violations === 0 ? 0 : 1);
|
|
112
241
|
}
|
|
113
242
|
|
|
114
|
-
|
|
243
|
+
// Only run as a CLI when invoked directly (Plan 26-08: tests import the
|
|
244
|
+
// helpers above without triggering process.exit). Node's strip-types ESM
|
|
245
|
+
// loader sets `process.argv[1]` to the resolved entry path; a substring
|
|
246
|
+
// match against this filename catches both direct execution and the
|
|
247
|
+
// `node --experimental-strip-types` wrapper used by `npm run validate:frontmatter`.
|
|
248
|
+
const entry: string = process.argv[1] ?? '';
|
|
249
|
+
if (entry.endsWith('validate-frontmatter.ts') || entry.endsWith('validate-frontmatter.js')) {
|
|
250
|
+
main();
|
|
251
|
+
}
|
package/skills/router/SKILL.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: gdd-router
|
|
3
|
-
description: "Routes a /gdd command to fast|quick|full path + S|M|L|XL complexity_class and returns {path, complexity_class, model_tier_overrides, estimated_cost_usd, cache_hits}. Deterministic — no model call. Invoked once at command entry before any Agent spawn. Read by hooks/budget-enforcer.js."
|
|
3
|
+
description: "Routes a /gdd command to fast|quick|full path + S|M|L|XL complexity_class and returns {path, complexity_class, model_tier_overrides, resolved_models, estimated_cost_usd, cache_hits}. Deterministic — no model call. Invoked once at command entry before any Agent spawn. Read by hooks/budget-enforcer.js."
|
|
4
4
|
argument-hint: "<intent-string> [<target-artifacts-csv>]"
|
|
5
5
|
tools: Read, Bash, Grep
|
|
6
6
|
---
|
|
@@ -20,16 +20,37 @@ You are a deterministic routing skill. You do not spawn agents. You read `.desig
|
|
|
20
20
|
"path": "fast",
|
|
21
21
|
"complexity_class": "M",
|
|
22
22
|
"model_tier_overrides": {"design-verifier": "haiku"},
|
|
23
|
+
"resolved_models": {
|
|
24
|
+
"design-reflector": "gpt-5",
|
|
25
|
+
"design-context-checker": "gpt-5-nano",
|
|
26
|
+
"design-verifier": "gpt-5-nano"
|
|
27
|
+
},
|
|
23
28
|
"estimated_cost_usd": 0.034,
|
|
24
29
|
"cache_hits": ["design-context-builder:abc123"]
|
|
25
30
|
}
|
|
26
31
|
```
|
|
27
32
|
- `path` enum: `fast` (single Haiku + no checkers), `quick` (Sonnet mappers + Haiku verify), `full` (Opus planners + full quality gates). Stays unchanged for back-compat per D-05.
|
|
28
33
|
- `complexity_class` enum: `S | M | L | XL` (Phase 25 / D-04, D-05). Additive to `path` — existing consumers reading only `path` keep working. Mapping is documented in the Path Selection Heuristic table below.
|
|
29
|
-
- `model_tier_overrides` merges agent frontmatter `default-tier` with `.design/budget.json.tier_overrides` — budget.json wins per D-04.
|
|
34
|
+
- `model_tier_overrides` merges agent frontmatter `default-tier` with `.design/budget.json.tier_overrides` — budget.json wins per D-04. Enum stays `opus|sonnet|haiku` for back-compat across all 14 runtimes; consumers that need the **concrete** model name for the active runtime read `resolved_models` instead.
|
|
35
|
+
- `resolved_models` is a per-agent map of concrete model IDs for the runtime in use (Phase 26 / D-07). Keys are agent names; values are runtime-specific model strings (e.g. `"gpt-5"` under codex, `"gemini-2.5-pro"` under gemini, `"claude-opus-4-7"` under claude) or `null` when the resolver can supply no model (missing tier-map row, missing tier on the row). Additive to `model_tier_overrides` — existing consumers reading the tier-name map keep working unchanged; new consumers (budget-enforcer cost computation, cost telemetry, bandit posterior store) read `resolved_models` for runtime-correct cost. See **Runtime-aware model resolution** below for the computation contract.
|
|
30
36
|
- `estimated_cost_usd` is the sum of per-spawn estimates using the D-06 formula and `reference/model-prices.md`.
|
|
31
37
|
- `cache_hits` is a list of `{agent}:{input-hash}` strings that exist in `.design/cache-manifest.json` and are within TTL; emitting a hit lets the hook short-circuit that spawn per D-05.
|
|
32
38
|
|
|
39
|
+
### Output schema versioning
|
|
40
|
+
|
|
41
|
+
The router output contract is additive across phases. The current shape (Phase 26, v1.26.0) carries:
|
|
42
|
+
|
|
43
|
+
| Field | Added in | Status |
|
|
44
|
+
|-------|----------|--------|
|
|
45
|
+
| `path` | v1.10.1 (10.1-01) | stable |
|
|
46
|
+
| `model_tier_overrides` | v1.10.1 (10.1-01) | stable, enum unchanged |
|
|
47
|
+
| `estimated_cost_usd` | v1.10.1 (10.1-01) | stable |
|
|
48
|
+
| `cache_hits` | v1.10.1 (10.1-01) | stable |
|
|
49
|
+
| `complexity_class` | v1.25.0 (25-02) | stable, additive |
|
|
50
|
+
| `resolved_models` | v1.26.0 (26-04) | stable, additive |
|
|
51
|
+
|
|
52
|
+
Existing consumers reading any subset of the older fields keep working unchanged across these bumps — the schema is a strict superset at every phase boundary. New fields are documented inline in this skill rather than in a separate JSON-schema file (the SKILL is the contract — same convention Phase 25 followed for `complexity_class`).
|
|
53
|
+
|
|
33
54
|
## Path Selection Heuristic
|
|
34
55
|
|
|
35
56
|
The router emits both `path` (legacy 3-tier enum) and `complexity_class` (Phase 25 4-tier enum). The canonical mapping is:
|
|
@@ -70,6 +91,34 @@ for each agent in planned spawn graph:
|
|
|
70
91
|
return total
|
|
71
92
|
```
|
|
72
93
|
|
|
94
|
+
## Runtime-aware model resolution
|
|
95
|
+
|
|
96
|
+
The router emits `resolved_models` alongside `model_tier_overrides` so downstream consumers (budget-enforcer cost computation, Phase 22 cost telemetry, Phase 23.5 bandit posterior store) can read the **concrete model ID** for the active runtime without re-deriving it from the tier name. The resolution is per-agent and additive — `model_tier_overrides` keeps its `opus|sonnet|haiku` enum for back-compat across all 14 runtimes, and `resolved_models` runs the runtime-specific translation on top of it.
|
|
97
|
+
|
|
98
|
+
Computation contract (per D-07):
|
|
99
|
+
|
|
100
|
+
```
|
|
101
|
+
runtime = runtimeDetect.detect() ?? 'claude'
|
|
102
|
+
for each agent in planned spawn graph:
|
|
103
|
+
tier = resolve_tier(agent) # same merge as model_tier_overrides
|
|
104
|
+
resolved_models[agent] = tierResolver.resolve(runtime, tier)
|
|
105
|
+
# → concrete model string OR null
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
Implementation surfaces (Phase 26 / Wave A):
|
|
109
|
+
|
|
110
|
+
- `scripts/lib/runtime-detect.cjs` — `detect() → runtime-id | null`. Reads the same `*_CONFIG_DIR` / `*_HOME` env-vars Phase 24's installer uses (single source of truth in `scripts/lib/install/runtimes.cjs`). Returns `null` when no recognized runtime env-var is set; the router falls back to `'claude'` so the resolver always has a runtime ID to work with.
|
|
111
|
+
- `scripts/lib/tier-resolver.cjs` — `resolve(runtime, tier, opts?) → model | null`. Translates `opus|sonnet|haiku` to the concrete model the runtime understands using the `reference/runtime-models.md` mapping (Phase 26 / Wave A). Fallback chain (D-04): runtime-specific entry → `claude` row default with `tier_resolution_fallback` event → `null` with `tier_resolution_failed` event. Never throws; `null` is a valid output the consumer must handle.
|
|
112
|
+
|
|
113
|
+
Per-agent emission rules:
|
|
114
|
+
|
|
115
|
+
- One key per agent in the planned spawn graph (same key set the cost-estimation loop iterates over). Keys MUST match agent names exactly so consumers can join `resolved_models` against `model_tier_overrides` and the spawn graph by name.
|
|
116
|
+
- Value is the concrete model string returned by `tier-resolver.resolve(runtime, tier)`.
|
|
117
|
+
- When the resolver returns `null` (missing tier-map row, missing tier, garbage input), the value is JSON `null` — NOT omitted, NOT the empty string. Consumers (budget-enforcer, telemetry) MUST handle `null`: typically by skipping the cost row for that spawn and emitting their own diagnostic event, never by crashing.
|
|
118
|
+
- When `complexity_class` is `S` and the router itself short-circuits (see **S-class short-circuit** above), no payload is emitted at all and `resolved_models` does not exist for that invocation — the budget-enforcer's "no router decision payload" branch already handles this case.
|
|
119
|
+
|
|
120
|
+
Back-compat assertion: a router invocation in a Claude runtime (or any environment where `runtime-detect.detect()` returns `null` and the router falls back to `'claude'`) produces `resolved_models` values that are the canonical Anthropic model IDs (`claude-opus-4-7`, `claude-sonnet-4-6`, `claude-haiku-4-5`) for the corresponding tiers. Pre-Phase-26 consumers that ignore `resolved_models` see the same `model_tier_overrides` they always saw (Plan 26-09 owns the runtime fixture diff that asserts this).
|
|
121
|
+
|
|
73
122
|
## Cache-Hit Detection
|
|
74
123
|
|
|
75
124
|
Delegate to `skills/cache-manager/SKILL.md` (Plan 10.1-02). The router lists candidate `{agent}:{input-hash}` tuples; the cache-manager confirms freshness against TTL from `budget.json.cache_ttl_seconds`.
|