@hegemonart/get-design-done 1.24.2 → 1.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +87 -0
- package/README.de.md +679 -0
- package/README.fr.md +679 -0
- package/README.it.md +679 -0
- package/README.ja.md +679 -0
- package/README.ko.md +679 -0
- package/README.md +399 -728
- package/README.zh-CN.md +480 -133
- package/SKILL.md +2 -0
- package/agents/README.md +60 -0
- package/agents/design-reflector.md +43 -0
- package/agents/gdd-intel-updater.md +34 -1
- package/agents/prototype-gate.md +122 -0
- package/agents/quality-gate-runner.md +125 -0
- package/hooks/budget-enforcer.ts +275 -11
- package/hooks/gdd-decision-injector.js +183 -3
- package/hooks/gdd-turn-closeout.js +238 -0
- package/hooks/hooks.json +10 -0
- package/package.json +5 -5
- package/reference/STATE-TEMPLATE.md +41 -0
- package/reference/config-schema.md +30 -0
- package/reference/model-prices.md +40 -19
- package/reference/prices/antigravity.md +21 -0
- package/reference/prices/augment.md +21 -0
- package/reference/prices/claude.md +42 -0
- package/reference/prices/cline.md +23 -0
- package/reference/prices/codebuddy.md +21 -0
- package/reference/prices/codex.md +25 -0
- package/reference/prices/copilot.md +21 -0
- package/reference/prices/cursor.md +21 -0
- package/reference/prices/gemini.md +25 -0
- package/reference/prices/kilo.md +21 -0
- package/reference/prices/opencode.md +23 -0
- package/reference/prices/qwen.md +25 -0
- package/reference/prices/trae.md +23 -0
- package/reference/prices/windsurf.md +21 -0
- package/reference/registry.json +107 -1
- package/reference/runtime-models.md +446 -0
- package/reference/schemas/runtime-models.schema.json +123 -0
- package/scripts/install.cjs +8 -0
- package/scripts/lib/budget-enforcer.cjs +446 -0
- package/scripts/lib/cost-arbitrage.cjs +294 -0
- package/scripts/lib/gdd-state/mutator.ts +454 -0
- package/scripts/lib/gdd-state/parser.ts +351 -1
- package/scripts/lib/gdd-state/types.ts +193 -0
- package/scripts/lib/install/installer.cjs +188 -11
- package/scripts/lib/install/parse-runtime-models.cjs +267 -0
- package/scripts/lib/install/runtimes.cjs +43 -0
- package/scripts/lib/quality-gate-detect.cjs +126 -0
- package/scripts/lib/runtime-detect.cjs +96 -0
- package/scripts/lib/tier-resolver.cjs +311 -0
- package/scripts/validate-frontmatter.ts +138 -1
- package/skills/quality-gate/SKILL.md +222 -0
- package/skills/router/SKILL.md +79 -10
- package/skills/sketch-wrap-up/SKILL.md +47 -2
- package/skills/spike-wrap-up/SKILL.md +41 -2
- package/skills/turn-closeout/SKILL.md +115 -0
- package/skills/verify/SKILL.md +22 -0
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
// scripts/lib/cost-arbitrage.cjs
|
|
2
|
+
//
|
|
3
|
+
// Plan 26-06 — cross-runtime cost-arbitrage analysis (D-09).
|
|
4
|
+
//
|
|
5
|
+
// Pure function: given a sequence of cost events (each tagged with
|
|
6
|
+
// runtime, agent, tier, cycle, and cost), surfaces structured arbitrage
|
|
7
|
+
// proposals when one runtime's spend on a given `(agent, tier)` pair
|
|
8
|
+
// significantly exceeds another's over the most recent N cycles.
|
|
9
|
+
//
|
|
10
|
+
// Contract:
|
|
11
|
+
// analyze(events, options?) → proposals[]
|
|
12
|
+
//
|
|
13
|
+
// Inputs:
|
|
14
|
+
// * `events` — array of event envelopes shaped like Phase 22's
|
|
15
|
+
// `cost.update` events:
|
|
16
|
+
// {
|
|
17
|
+
// type: 'cost.update',
|
|
18
|
+
// cycle?: 'cycle-3',
|
|
19
|
+
// payload: {
|
|
20
|
+
// agent: 'design-reflector',
|
|
21
|
+
// tier: 'opus',
|
|
22
|
+
// runtime: 'claude' | 'codex' | …,
|
|
23
|
+
// usd: 0.42,
|
|
24
|
+
// ...
|
|
25
|
+
// }
|
|
26
|
+
// }
|
|
27
|
+
// Non-cost events and malformed entries are skipped silently.
|
|
28
|
+
// * `options.windowCycles` — how many of the most recent cycles to
|
|
29
|
+
// consider. Default 5 (D-09). Cycles are ordered by first-appearance
|
|
30
|
+
// in the events array (events.jsonl is append-only, so insertion
|
|
31
|
+
// order ≡ chronological order).
|
|
32
|
+
// * `options.thresholdPct` — relative-delta threshold above which an
|
|
33
|
+
// arbitrage signal is emitted. Default 0.5 (50%, D-09). Computed as
|
|
34
|
+
// `|maxAvg - minAvg| / minAvg`. The 50% number is a starting
|
|
35
|
+
// heuristic; bandit-style learning over arbitrage outcomes is
|
|
36
|
+
// Phase 23.5+ territory.
|
|
37
|
+
//
|
|
38
|
+
// Output:
|
|
39
|
+
// Array of structured proposals, each shaped like:
|
|
40
|
+
// {
|
|
41
|
+
// type: 'cost_arbitrage',
|
|
42
|
+
// agent: 'design-reflector',
|
|
43
|
+
// tier: 'opus',
|
|
44
|
+
// runtimes: {
|
|
45
|
+
// claude: { avg_cost_per_cycle: 0.42, n_cycles: 5 },
|
|
46
|
+
// codex: { avg_cost_per_cycle: 1.10, n_cycles: 5 }
|
|
47
|
+
// },
|
|
48
|
+
// delta_pct: 0.617,
|
|
49
|
+
// proposal: 'Switch design-reflector tier=opus invocations from codex to claude for ~62% cost saving',
|
|
50
|
+
// evidence_window: 'last_5_cycles'
|
|
51
|
+
// }
|
|
52
|
+
//
|
|
53
|
+
// Design notes:
|
|
54
|
+
// - Per-cycle averaging: events are first summed per
|
|
55
|
+
// (agent, tier, runtime, cycle), then averaged across the cycles
|
|
56
|
+
// where that triple was observed. This prevents per-runtime
|
|
57
|
+
// double-counting when a single cycle had multiple agent spawns
|
|
58
|
+
// in the same runtime (sum first, average next).
|
|
59
|
+
// - Mixed-runtime cycle history: a cycle that ran some spawns in CC
|
|
60
|
+
// and others in Codex is correctly attributed — each spawn's
|
|
61
|
+
// `payload.runtime` tag drives the bucket, never the cycle.
|
|
62
|
+
// - Single-runtime-only history: when only one runtime has events
|
|
63
|
+
// for a given (agent, tier), no arbitrage signal can be computed
|
|
64
|
+
// (need at least two runtimes to compare). The rule is silent — no
|
|
65
|
+
// false-positive proposals.
|
|
66
|
+
// - Pure: no I/O, no global state. Tests inject synthetic event
|
|
67
|
+
// arrays; production callers (the reflector agent) read
|
|
68
|
+
// `.design/telemetry/events.jsonl`, parse line-by-line, and pass
|
|
69
|
+
// the parsed array in.
|
|
70
|
+
|
|
71
|
+
'use strict';
|
|
72
|
+
|
|
73
|
+
const DEFAULT_WINDOW_CYCLES = 5;
|
|
74
|
+
const DEFAULT_THRESHOLD_PCT = 0.5;
|
|
75
|
+
|
|
76
|
+
const COST_EVENT_TYPE = 'cost.update';
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Phase 26-05 will tag cost events with a `runtime` field on
|
|
80
|
+
* `payload.runtime`. We accept that as the canonical site. As a fallback
|
|
81
|
+
* (for legacy events written before 26-05 lands, or for harnesses that
|
|
82
|
+
* stamp the runtime on the envelope's `_meta.runtime` instead), we also
|
|
83
|
+
* peek at top-level `runtime` and `_meta.runtime`. Whichever is present
|
|
84
|
+
* wins; payload-first to keep 26-05's contract authoritative.
|
|
85
|
+
*/
|
|
86
|
+
function extractRuntime(event) {
|
|
87
|
+
if (!event || typeof event !== 'object') return null;
|
|
88
|
+
const p = event.payload;
|
|
89
|
+
if (p && typeof p === 'object' && typeof p.runtime === 'string' && p.runtime.length > 0) {
|
|
90
|
+
return p.runtime;
|
|
91
|
+
}
|
|
92
|
+
if (typeof event.runtime === 'string' && event.runtime.length > 0) {
|
|
93
|
+
return event.runtime;
|
|
94
|
+
}
|
|
95
|
+
const meta = event._meta;
|
|
96
|
+
if (meta && typeof meta === 'object' && typeof meta.runtime === 'string' && meta.runtime.length > 0) {
|
|
97
|
+
return meta.runtime;
|
|
98
|
+
}
|
|
99
|
+
return null;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Extract the (agent, tier, runtime, cycle, usd) tuple from a single
|
|
104
|
+
* event envelope. Returns null when the event is not a cost.update or
|
|
105
|
+
* is missing any required field. Garbage input never throws.
|
|
106
|
+
*/
|
|
107
|
+
function extractCostRow(event) {
|
|
108
|
+
if (!event || typeof event !== 'object') return null;
|
|
109
|
+
if (event.type !== COST_EVENT_TYPE) return null;
|
|
110
|
+
const p = event.payload;
|
|
111
|
+
if (!p || typeof p !== 'object') return null;
|
|
112
|
+
if (typeof p.agent !== 'string' || p.agent.length === 0) return null;
|
|
113
|
+
if (typeof p.tier !== 'string' || p.tier.length === 0) return null;
|
|
114
|
+
const runtime = extractRuntime(event);
|
|
115
|
+
if (runtime === null) return null;
|
|
116
|
+
const usd = typeof p.usd === 'number' && Number.isFinite(p.usd) ? p.usd : null;
|
|
117
|
+
if (usd === null) return null;
|
|
118
|
+
// Cycle is optional in the BaseEvent envelope but required for
|
|
119
|
+
// per-cycle averaging. Events without a cycle are silently skipped —
|
|
120
|
+
// they would otherwise collapse all of history into a single bucket
|
|
121
|
+
// and produce misleading averages.
|
|
122
|
+
const cycle = typeof event.cycle === 'string' && event.cycle.length > 0
|
|
123
|
+
? event.cycle
|
|
124
|
+
: null;
|
|
125
|
+
if (cycle === null) return null;
|
|
126
|
+
return { agent: p.agent, tier: p.tier, runtime, cycle, usd };
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Build the per-(agent, tier, runtime, cycle) sum map. This is the
|
|
131
|
+
* primary defense against double-counting: if a cycle has 4 spawns of
|
|
132
|
+
* design-verifier in claude, those 4 usd values become a single
|
|
133
|
+
* cycle-bucket sum; downstream averaging then divides by the number of
|
|
134
|
+
* cycles, not the number of spawns.
|
|
135
|
+
*/
|
|
136
|
+
function aggregateByCycle(events) {
|
|
137
|
+
// Map<agent, Map<tier, Map<runtime, Map<cycle, sum-usd>>>>
|
|
138
|
+
const buckets = new Map();
|
|
139
|
+
// Cycle ordering: the order each cycle id first appears in the
|
|
140
|
+
// events stream. Events.jsonl is append-only, so first-appearance
|
|
141
|
+
// ≡ chronological order. We don't try to parse cycle ids as
|
|
142
|
+
// sequential — slugs like "cycle-3" or "2026-04-29" are both valid.
|
|
143
|
+
const cycleOrder = [];
|
|
144
|
+
const seenCycles = new Set();
|
|
145
|
+
|
|
146
|
+
for (const ev of events) {
|
|
147
|
+
const row = extractCostRow(ev);
|
|
148
|
+
if (row === null) continue;
|
|
149
|
+
if (!seenCycles.has(row.cycle)) {
|
|
150
|
+
seenCycles.add(row.cycle);
|
|
151
|
+
cycleOrder.push(row.cycle);
|
|
152
|
+
}
|
|
153
|
+
let agentBucket = buckets.get(row.agent);
|
|
154
|
+
if (agentBucket === undefined) {
|
|
155
|
+
agentBucket = new Map();
|
|
156
|
+
buckets.set(row.agent, agentBucket);
|
|
157
|
+
}
|
|
158
|
+
let tierBucket = agentBucket.get(row.tier);
|
|
159
|
+
if (tierBucket === undefined) {
|
|
160
|
+
tierBucket = new Map();
|
|
161
|
+
agentBucket.set(row.tier, tierBucket);
|
|
162
|
+
}
|
|
163
|
+
let runtimeBucket = tierBucket.get(row.runtime);
|
|
164
|
+
if (runtimeBucket === undefined) {
|
|
165
|
+
runtimeBucket = new Map();
|
|
166
|
+
tierBucket.set(row.runtime, runtimeBucket);
|
|
167
|
+
}
|
|
168
|
+
const existing = runtimeBucket.get(row.cycle);
|
|
169
|
+
runtimeBucket.set(row.cycle, (existing === undefined ? 0 : existing) + row.usd);
|
|
170
|
+
}
|
|
171
|
+
return { buckets, cycleOrder };
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Compute per-runtime averages for a single (agent, tier) pair,
|
|
176
|
+
* restricted to the window of recent cycles. Returns:
|
|
177
|
+
* { runtime: { avg_cost_per_cycle, n_cycles } }
|
|
178
|
+
* Only runtimes with at least one cycle in the window appear.
|
|
179
|
+
*/
|
|
180
|
+
function averageWithinWindow(tierBucket, cycleWindowSet) {
|
|
181
|
+
const out = {};
|
|
182
|
+
for (const [runtime, runtimeBucket] of tierBucket.entries()) {
|
|
183
|
+
let sum = 0;
|
|
184
|
+
let n = 0;
|
|
185
|
+
for (const [cycle, cycleSum] of runtimeBucket.entries()) {
|
|
186
|
+
if (!cycleWindowSet.has(cycle)) continue;
|
|
187
|
+
sum += cycleSum;
|
|
188
|
+
n += 1;
|
|
189
|
+
}
|
|
190
|
+
if (n === 0) continue;
|
|
191
|
+
out[runtime] = { avg_cost_per_cycle: sum / n, n_cycles: n };
|
|
192
|
+
}
|
|
193
|
+
return out;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Build the proposal sentence. Fixed phrasing keeps test assertions
|
|
198
|
+
* stable across cycle slugs. Direction (cheap-runtime, expensive-runtime)
|
|
199
|
+
* is inferred from the averages.
|
|
200
|
+
*/
|
|
201
|
+
function buildProposalText(agent, tier, cheapRuntime, expensiveRuntime, deltaPct) {
|
|
202
|
+
const pct = Math.round(deltaPct * 100);
|
|
203
|
+
return `Switch ${agent} tier=${tier} invocations from ${expensiveRuntime} to ${cheapRuntime} for ~${pct}% cost saving`;
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Main entry point. See module-level header for contract.
|
|
208
|
+
*/
|
|
209
|
+
function analyze(events, options) {
|
|
210
|
+
const opts = options && typeof options === 'object' ? options : {};
|
|
211
|
+
const windowCycles = typeof opts.windowCycles === 'number' && opts.windowCycles > 0
|
|
212
|
+
? Math.floor(opts.windowCycles)
|
|
213
|
+
: DEFAULT_WINDOW_CYCLES;
|
|
214
|
+
const thresholdPct = typeof opts.thresholdPct === 'number' && opts.thresholdPct > 0
|
|
215
|
+
? opts.thresholdPct
|
|
216
|
+
: DEFAULT_THRESHOLD_PCT;
|
|
217
|
+
|
|
218
|
+
if (!Array.isArray(events) || events.length === 0) return [];
|
|
219
|
+
|
|
220
|
+
const { buckets, cycleOrder } = aggregateByCycle(events);
|
|
221
|
+
if (cycleOrder.length === 0) return [];
|
|
222
|
+
|
|
223
|
+
// Window = last N cycles by first-appearance order.
|
|
224
|
+
const recentCycles = cycleOrder.slice(-windowCycles);
|
|
225
|
+
const cycleWindowSet = new Set(recentCycles);
|
|
226
|
+
const evidenceWindowLabel = `last_${recentCycles.length}_cycles`;
|
|
227
|
+
|
|
228
|
+
const proposals = [];
|
|
229
|
+
|
|
230
|
+
// Iterate (agent, tier) pairs deterministically (sorted) so output
|
|
231
|
+
// ordering is stable across runs and platforms — useful for snapshot
|
|
232
|
+
// tests and reproducible reflection files.
|
|
233
|
+
const agentNames = Array.from(buckets.keys()).sort();
|
|
234
|
+
for (const agent of agentNames) {
|
|
235
|
+
const agentBucket = buckets.get(agent);
|
|
236
|
+
if (agentBucket === undefined) continue;
|
|
237
|
+
const tierNames = Array.from(agentBucket.keys()).sort();
|
|
238
|
+
for (const tier of tierNames) {
|
|
239
|
+
const tierBucket = agentBucket.get(tier);
|
|
240
|
+
if (tierBucket === undefined) continue;
|
|
241
|
+
const runtimeAverages = averageWithinWindow(tierBucket, cycleWindowSet);
|
|
242
|
+
const runtimeIds = Object.keys(runtimeAverages);
|
|
243
|
+
// Single-runtime-only history → silent (D-09: no false-positive
|
|
244
|
+
// arbitrage signal when there's nothing to compare against).
|
|
245
|
+
if (runtimeIds.length < 2) continue;
|
|
246
|
+
|
|
247
|
+
// Find the runtime pair with the largest spread. We could emit
|
|
248
|
+
// one proposal per runtime pair but that gets noisy fast — the
|
|
249
|
+
// reflector wants the most-actionable signal first. Pair = (min, max).
|
|
250
|
+
let minRuntime = null;
|
|
251
|
+
let maxRuntime = null;
|
|
252
|
+
let minAvg = Infinity;
|
|
253
|
+
let maxAvg = -Infinity;
|
|
254
|
+
for (const r of runtimeIds) {
|
|
255
|
+
const v = runtimeAverages[r];
|
|
256
|
+
if (v === undefined) continue;
|
|
257
|
+
const avg = v.avg_cost_per_cycle;
|
|
258
|
+
if (avg < minAvg) { minAvg = avg; minRuntime = r; }
|
|
259
|
+
if (avg > maxAvg) { maxAvg = avg; maxRuntime = r; }
|
|
260
|
+
}
|
|
261
|
+
if (minRuntime === null || maxRuntime === null) continue;
|
|
262
|
+
if (minRuntime === maxRuntime) continue;
|
|
263
|
+
// Guard against zero-cost denominators — if both runtimes
|
|
264
|
+
// averaged $0 we have nothing to arbitrage; if only one did
|
|
265
|
+
// we report a finite spread but zero-divide on the threshold
|
|
266
|
+
// check, which would emit a misleading "Infinity%" proposal.
|
|
267
|
+
if (minAvg <= 0) continue;
|
|
268
|
+
|
|
269
|
+
const deltaPct = (maxAvg - minAvg) / minAvg;
|
|
270
|
+
if (deltaPct <= thresholdPct) continue;
|
|
271
|
+
|
|
272
|
+
proposals.push({
|
|
273
|
+
type: 'cost_arbitrage',
|
|
274
|
+
agent,
|
|
275
|
+
tier,
|
|
276
|
+
runtimes: runtimeAverages,
|
|
277
|
+
delta_pct: Number(deltaPct.toFixed(3)),
|
|
278
|
+
proposal: buildProposalText(agent, tier, minRuntime, maxRuntime, deltaPct),
|
|
279
|
+
evidence_window: evidenceWindowLabel,
|
|
280
|
+
});
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
return proposals;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
module.exports = {
|
|
288
|
+
analyze,
|
|
289
|
+
// Exposed for test injection / unit-testing the lower layers.
|
|
290
|
+
extractCostRow,
|
|
291
|
+
aggregateByCycle,
|
|
292
|
+
DEFAULT_WINDOW_CYCLES,
|
|
293
|
+
DEFAULT_THRESHOLD_PCT,
|
|
294
|
+
};
|