@hegemonart/get-design-done 1.24.2 → 1.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/CHANGELOG.md +87 -0
  4. package/README.de.md +679 -0
  5. package/README.fr.md +679 -0
  6. package/README.it.md +679 -0
  7. package/README.ja.md +679 -0
  8. package/README.ko.md +679 -0
  9. package/README.md +399 -728
  10. package/README.zh-CN.md +480 -133
  11. package/SKILL.md +2 -0
  12. package/agents/README.md +60 -0
  13. package/agents/design-reflector.md +43 -0
  14. package/agents/gdd-intel-updater.md +34 -1
  15. package/agents/prototype-gate.md +122 -0
  16. package/agents/quality-gate-runner.md +125 -0
  17. package/hooks/budget-enforcer.ts +275 -11
  18. package/hooks/gdd-decision-injector.js +183 -3
  19. package/hooks/gdd-turn-closeout.js +238 -0
  20. package/hooks/hooks.json +10 -0
  21. package/package.json +5 -5
  22. package/reference/STATE-TEMPLATE.md +41 -0
  23. package/reference/config-schema.md +30 -0
  24. package/reference/model-prices.md +40 -19
  25. package/reference/prices/antigravity.md +21 -0
  26. package/reference/prices/augment.md +21 -0
  27. package/reference/prices/claude.md +42 -0
  28. package/reference/prices/cline.md +23 -0
  29. package/reference/prices/codebuddy.md +21 -0
  30. package/reference/prices/codex.md +25 -0
  31. package/reference/prices/copilot.md +21 -0
  32. package/reference/prices/cursor.md +21 -0
  33. package/reference/prices/gemini.md +25 -0
  34. package/reference/prices/kilo.md +21 -0
  35. package/reference/prices/opencode.md +23 -0
  36. package/reference/prices/qwen.md +25 -0
  37. package/reference/prices/trae.md +23 -0
  38. package/reference/prices/windsurf.md +21 -0
  39. package/reference/registry.json +107 -1
  40. package/reference/runtime-models.md +446 -0
  41. package/reference/schemas/runtime-models.schema.json +123 -0
  42. package/scripts/install.cjs +8 -0
  43. package/scripts/lib/budget-enforcer.cjs +446 -0
  44. package/scripts/lib/cost-arbitrage.cjs +294 -0
  45. package/scripts/lib/gdd-state/mutator.ts +454 -0
  46. package/scripts/lib/gdd-state/parser.ts +351 -1
  47. package/scripts/lib/gdd-state/types.ts +193 -0
  48. package/scripts/lib/install/installer.cjs +188 -11
  49. package/scripts/lib/install/parse-runtime-models.cjs +267 -0
  50. package/scripts/lib/install/runtimes.cjs +43 -0
  51. package/scripts/lib/quality-gate-detect.cjs +126 -0
  52. package/scripts/lib/runtime-detect.cjs +96 -0
  53. package/scripts/lib/tier-resolver.cjs +311 -0
  54. package/scripts/validate-frontmatter.ts +138 -1
  55. package/skills/quality-gate/SKILL.md +222 -0
  56. package/skills/router/SKILL.md +79 -10
  57. package/skills/sketch-wrap-up/SKILL.md +47 -2
  58. package/skills/spike-wrap-up/SKILL.md +41 -2
  59. package/skills/turn-closeout/SKILL.md +115 -0
  60. package/skills/verify/SKILL.md +22 -0
@@ -0,0 +1,294 @@
1
+ // scripts/lib/cost-arbitrage.cjs
2
+ //
3
+ // Plan 26-06 — cross-runtime cost-arbitrage analysis (D-09).
4
+ //
5
+ // Pure function: given a sequence of cost events (each tagged with
6
+ // runtime, agent, tier, cycle, and cost), surfaces structured arbitrage
7
+ // proposals when one runtime's spend on a given `(agent, tier)` pair
8
+ // significantly exceeds another's over the most recent N cycles.
9
+ //
10
+ // Contract:
11
+ // analyze(events, options?) → proposals[]
12
+ //
13
+ // Inputs:
14
+ // * `events` — array of event envelopes shaped like Phase 22's
15
+ // `cost.update` events:
16
+ // {
17
+ // type: 'cost.update',
18
+ // cycle?: 'cycle-3',
19
+ // payload: {
20
+ // agent: 'design-reflector',
21
+ // tier: 'opus',
22
+ // runtime: 'claude' | 'codex' | …,
23
+ // usd: 0.42,
24
+ // ...
25
+ // }
26
+ // }
27
+ // Non-cost events and malformed entries are skipped silently.
28
+ // * `options.windowCycles` — how many of the most recent cycles to
29
+ // consider. Default 5 (D-09). Cycles are ordered by first-appearance
30
+ // in the events array (events.jsonl is append-only, so insertion
31
+ // order ≡ chronological order).
32
+ // * `options.thresholdPct` — relative-delta threshold above which an
33
+ // arbitrage signal is emitted. Default 0.5 (50%, D-09). Computed as
34
+ // `|maxAvg - minAvg| / minAvg`. The 50% number is a starting
35
+ // heuristic; bandit-style learning over arbitrage outcomes is
36
+ // Phase 23.5+ territory.
37
+ //
38
+ // Output:
39
+ // Array of structured proposals, each shaped like:
40
+ // {
41
+ // type: 'cost_arbitrage',
42
+ // agent: 'design-reflector',
43
+ // tier: 'opus',
44
+ // runtimes: {
45
+ // claude: { avg_cost_per_cycle: 0.42, n_cycles: 5 },
46
+ // codex: { avg_cost_per_cycle: 1.10, n_cycles: 5 }
47
+ // },
48
+ // delta_pct: 0.617,
49
+ // proposal: 'Switch design-reflector tier=opus invocations from codex to claude for ~62% cost saving',
50
+ // evidence_window: 'last_5_cycles'
51
+ // }
52
+ //
53
+ // Design notes:
54
+ // - Per-cycle averaging: events are first summed per
55
+ // (agent, tier, runtime, cycle), then averaged across the cycles
56
+ // where that triple was observed. This prevents per-runtime
57
+ // double-counting when a single cycle had multiple agent spawns
58
+ // in the same runtime (sum first, average next).
59
+ // - Mixed-runtime cycle history: a cycle that ran some spawns in CC
60
+ // and others in Codex is correctly attributed — each spawn's
61
+ // `payload.runtime` tag drives the bucket, never the cycle.
62
+ // - Single-runtime-only history: when only one runtime has events
63
+ // for a given (agent, tier), no arbitrage signal can be computed
64
+ // (need at least two runtimes to compare). The rule is silent — no
65
+ // false-positive proposals.
66
+ // - Pure: no I/O, no global state. Tests inject synthetic event
67
+ // arrays; production callers (the reflector agent) read
68
+ // `.design/telemetry/events.jsonl`, parse line-by-line, and pass
69
+ // the parsed array in.
70
+
71
+ 'use strict';
72
+
73
+ const DEFAULT_WINDOW_CYCLES = 5;
74
+ const DEFAULT_THRESHOLD_PCT = 0.5;
75
+
76
+ const COST_EVENT_TYPE = 'cost.update';
77
+
78
+ /**
79
+ * Phase 26-05 will tag cost events with a `runtime` field on
80
+ * `payload.runtime`. We accept that as the canonical site. As a fallback
81
+ * (for legacy events written before 26-05 lands, or for harnesses that
82
+ * stamp the runtime on the envelope's `_meta.runtime` instead), we also
83
+ * peek at top-level `runtime` and `_meta.runtime`. Whichever is present
84
+ * wins; payload-first to keep 26-05's contract authoritative.
85
+ */
86
+ function extractRuntime(event) {
87
+ if (!event || typeof event !== 'object') return null;
88
+ const p = event.payload;
89
+ if (p && typeof p === 'object' && typeof p.runtime === 'string' && p.runtime.length > 0) {
90
+ return p.runtime;
91
+ }
92
+ if (typeof event.runtime === 'string' && event.runtime.length > 0) {
93
+ return event.runtime;
94
+ }
95
+ const meta = event._meta;
96
+ if (meta && typeof meta === 'object' && typeof meta.runtime === 'string' && meta.runtime.length > 0) {
97
+ return meta.runtime;
98
+ }
99
+ return null;
100
+ }
101
+
102
+ /**
103
+ * Extract the (agent, tier, runtime, cycle, usd) tuple from a single
104
+ * event envelope. Returns null when the event is not a cost.update or
105
+ * is missing any required field. Garbage input never throws.
106
+ */
107
+ function extractCostRow(event) {
108
+ if (!event || typeof event !== 'object') return null;
109
+ if (event.type !== COST_EVENT_TYPE) return null;
110
+ const p = event.payload;
111
+ if (!p || typeof p !== 'object') return null;
112
+ if (typeof p.agent !== 'string' || p.agent.length === 0) return null;
113
+ if (typeof p.tier !== 'string' || p.tier.length === 0) return null;
114
+ const runtime = extractRuntime(event);
115
+ if (runtime === null) return null;
116
+ const usd = typeof p.usd === 'number' && Number.isFinite(p.usd) ? p.usd : null;
117
+ if (usd === null) return null;
118
+ // Cycle is optional in the BaseEvent envelope but required for
119
+ // per-cycle averaging. Events without a cycle are silently skipped —
120
+ // they would otherwise collapse all of history into a single bucket
121
+ // and produce misleading averages.
122
+ const cycle = typeof event.cycle === 'string' && event.cycle.length > 0
123
+ ? event.cycle
124
+ : null;
125
+ if (cycle === null) return null;
126
+ return { agent: p.agent, tier: p.tier, runtime, cycle, usd };
127
+ }
128
+
129
+ /**
130
+ * Build the per-(agent, tier, runtime, cycle) sum map. This is the
131
+ * primary defense against double-counting: if a cycle has 4 spawns of
132
+ * design-verifier in claude, those 4 usd values become a single
133
+ * cycle-bucket sum; downstream averaging then divides by the number of
134
+ * cycles, not the number of spawns.
135
+ */
136
+ function aggregateByCycle(events) {
137
+ // Map<agent, Map<tier, Map<runtime, Map<cycle, sum-usd>>>>
138
+ const buckets = new Map();
139
+ // Cycle ordering: the order each cycle id first appears in the
140
+ // events stream. Events.jsonl is append-only, so first-appearance
141
+ // ≡ chronological order. We don't try to parse cycle ids as
142
+ // sequential — slugs like "cycle-3" or "2026-04-29" are both valid.
143
+ const cycleOrder = [];
144
+ const seenCycles = new Set();
145
+
146
+ for (const ev of events) {
147
+ const row = extractCostRow(ev);
148
+ if (row === null) continue;
149
+ if (!seenCycles.has(row.cycle)) {
150
+ seenCycles.add(row.cycle);
151
+ cycleOrder.push(row.cycle);
152
+ }
153
+ let agentBucket = buckets.get(row.agent);
154
+ if (agentBucket === undefined) {
155
+ agentBucket = new Map();
156
+ buckets.set(row.agent, agentBucket);
157
+ }
158
+ let tierBucket = agentBucket.get(row.tier);
159
+ if (tierBucket === undefined) {
160
+ tierBucket = new Map();
161
+ agentBucket.set(row.tier, tierBucket);
162
+ }
163
+ let runtimeBucket = tierBucket.get(row.runtime);
164
+ if (runtimeBucket === undefined) {
165
+ runtimeBucket = new Map();
166
+ tierBucket.set(row.runtime, runtimeBucket);
167
+ }
168
+ const existing = runtimeBucket.get(row.cycle);
169
+ runtimeBucket.set(row.cycle, (existing === undefined ? 0 : existing) + row.usd);
170
+ }
171
+ return { buckets, cycleOrder };
172
+ }
173
+
174
+ /**
175
+ * Compute per-runtime averages for a single (agent, tier) pair,
176
+ * restricted to the window of recent cycles. Returns:
177
+ * { runtime: { avg_cost_per_cycle, n_cycles } }
178
+ * Only runtimes with at least one cycle in the window appear.
179
+ */
180
+ function averageWithinWindow(tierBucket, cycleWindowSet) {
181
+ const out = {};
182
+ for (const [runtime, runtimeBucket] of tierBucket.entries()) {
183
+ let sum = 0;
184
+ let n = 0;
185
+ for (const [cycle, cycleSum] of runtimeBucket.entries()) {
186
+ if (!cycleWindowSet.has(cycle)) continue;
187
+ sum += cycleSum;
188
+ n += 1;
189
+ }
190
+ if (n === 0) continue;
191
+ out[runtime] = { avg_cost_per_cycle: sum / n, n_cycles: n };
192
+ }
193
+ return out;
194
+ }
195
+
196
+ /**
197
+ * Build the proposal sentence. Fixed phrasing keeps test assertions
198
+ * stable across cycle slugs. Direction (cheap-runtime, expensive-runtime)
199
+ * is inferred from the averages.
200
+ */
201
+ function buildProposalText(agent, tier, cheapRuntime, expensiveRuntime, deltaPct) {
202
+ const pct = Math.round(deltaPct * 100);
203
+ return `Switch ${agent} tier=${tier} invocations from ${expensiveRuntime} to ${cheapRuntime} for ~${pct}% cost saving`;
204
+ }
205
+
206
+ /**
207
+ * Main entry point. See module-level header for contract.
208
+ */
209
+ function analyze(events, options) {
210
+ const opts = options && typeof options === 'object' ? options : {};
211
+ const windowCycles = typeof opts.windowCycles === 'number' && opts.windowCycles > 0
212
+ ? Math.floor(opts.windowCycles)
213
+ : DEFAULT_WINDOW_CYCLES;
214
+ const thresholdPct = typeof opts.thresholdPct === 'number' && opts.thresholdPct > 0
215
+ ? opts.thresholdPct
216
+ : DEFAULT_THRESHOLD_PCT;
217
+
218
+ if (!Array.isArray(events) || events.length === 0) return [];
219
+
220
+ const { buckets, cycleOrder } = aggregateByCycle(events);
221
+ if (cycleOrder.length === 0) return [];
222
+
223
+ // Window = last N cycles by first-appearance order.
224
+ const recentCycles = cycleOrder.slice(-windowCycles);
225
+ const cycleWindowSet = new Set(recentCycles);
226
+ const evidenceWindowLabel = `last_${recentCycles.length}_cycles`;
227
+
228
+ const proposals = [];
229
+
230
+ // Iterate (agent, tier) pairs deterministically (sorted) so output
231
+ // ordering is stable across runs and platforms — useful for snapshot
232
+ // tests and reproducible reflection files.
233
+ const agentNames = Array.from(buckets.keys()).sort();
234
+ for (const agent of agentNames) {
235
+ const agentBucket = buckets.get(agent);
236
+ if (agentBucket === undefined) continue;
237
+ const tierNames = Array.from(agentBucket.keys()).sort();
238
+ for (const tier of tierNames) {
239
+ const tierBucket = agentBucket.get(tier);
240
+ if (tierBucket === undefined) continue;
241
+ const runtimeAverages = averageWithinWindow(tierBucket, cycleWindowSet);
242
+ const runtimeIds = Object.keys(runtimeAverages);
243
+ // Single-runtime-only history → silent (D-09: no false-positive
244
+ // arbitrage signal when there's nothing to compare against).
245
+ if (runtimeIds.length < 2) continue;
246
+
247
+ // Find the runtime pair with the largest spread. We could emit
248
+ // one proposal per runtime pair but that gets noisy fast — the
249
+ // reflector wants the most-actionable signal first. Pair = (min, max).
250
+ let minRuntime = null;
251
+ let maxRuntime = null;
252
+ let minAvg = Infinity;
253
+ let maxAvg = -Infinity;
254
+ for (const r of runtimeIds) {
255
+ const v = runtimeAverages[r];
256
+ if (v === undefined) continue;
257
+ const avg = v.avg_cost_per_cycle;
258
+ if (avg < minAvg) { minAvg = avg; minRuntime = r; }
259
+ if (avg > maxAvg) { maxAvg = avg; maxRuntime = r; }
260
+ }
261
+ if (minRuntime === null || maxRuntime === null) continue;
262
+ if (minRuntime === maxRuntime) continue;
263
+ // Guard against zero-cost denominators — if both runtimes
264
+ // averaged $0 we have nothing to arbitrage; if only one did
265
+ // we report a finite spread but zero-divide on the threshold
266
+ // check, which would emit a misleading "Infinity%" proposal.
267
+ if (minAvg <= 0) continue;
268
+
269
+ const deltaPct = (maxAvg - minAvg) / minAvg;
270
+ if (deltaPct <= thresholdPct) continue;
271
+
272
+ proposals.push({
273
+ type: 'cost_arbitrage',
274
+ agent,
275
+ tier,
276
+ runtimes: runtimeAverages,
277
+ delta_pct: Number(deltaPct.toFixed(3)),
278
+ proposal: buildProposalText(agent, tier, minRuntime, maxRuntime, deltaPct),
279
+ evidence_window: evidenceWindowLabel,
280
+ });
281
+ }
282
+ }
283
+
284
+ return proposals;
285
+ }
286
+
287
+ module.exports = {
288
+ analyze,
289
+ // Exposed for test injection / unit-testing the lower layers.
290
+ extractCostRow,
291
+ aggregateByCycle,
292
+ DEFAULT_WINDOW_CYCLES,
293
+ DEFAULT_THRESHOLD_PCT,
294
+ };