@hegemonart/get-design-done 1.59.7 → 1.59.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/CHANGELOG.md +59 -0
  4. package/README.md +2 -2
  5. package/SKILL.md +1 -1
  6. package/agents/design-authority-watcher.md +24 -5
  7. package/bin/gdd-graph +4 -1
  8. package/hooks/_hook-emit.js +113 -29
  9. package/hooks/budget-enforcer.ts +104 -5
  10. package/hooks/gdd-mcp-circuit-breaker.js +72 -3
  11. package/hooks/gdd-sessionstart-recap.js +23 -14
  12. package/hooks/hooks.json +2 -2
  13. package/package.json +2 -2
  14. package/reference/bandit-integration.md +13 -2
  15. package/reference/prices/claude.md +11 -0
  16. package/reference/runtime-models.md +9 -9
  17. package/reference/schemas/generated.d.ts +4 -0
  18. package/reference/schemas/runtime-models.schema.json +5 -0
  19. package/scripts/bootstrap.cjs +40 -8
  20. package/scripts/install.cjs +23 -1
  21. package/scripts/lib/bandit-router.cjs +47 -5
  22. package/scripts/lib/budget-enforcer.cjs +34 -5
  23. package/scripts/lib/detect/cli.cjs +13 -3
  24. package/scripts/lib/install/converters/cursor.cjs +11 -19
  25. package/scripts/lib/install/installer.cjs +72 -21
  26. package/scripts/lib/install/merge.cjs +31 -3
  27. package/scripts/lib/install/parse-runtime-models.cjs +9 -1
  28. package/scripts/lib/install/runtime-artifact-layout.cjs +42 -8
  29. package/scripts/lib/manifest/harnesses.json +29 -1
  30. package/scripts/lib/manifest/skills.json +1 -1
  31. package/scripts/lib/model-id.cjs +141 -0
  32. package/scripts/lib/session-runner/index.ts +87 -16
  33. package/scripts/skill-templates/bandit-reset/SKILL.md +2 -0
  34. package/scripts/skill-templates/bandit-status/SKILL.md +4 -1
  35. package/scripts/skill-templates/darkmode/SKILL.md +1 -1
  36. package/scripts/skill-templates/graphify/SKILL.md +6 -6
  37. package/scripts/skill-templates/quick/SKILL.md +3 -1
  38. package/scripts/skill-templates/reflect/SKILL.md +1 -1
  39. package/scripts/skill-templates/router/SKILL.md +4 -2
  40. package/sdk/cli/index.js +132 -55
  41. package/sdk/dashboard/data/source.cjs +50 -4
  42. package/sdk/event-stream/writer.ts +112 -30
  43. package/sdk/mcp/gdd-mcp/server.js +49 -36
  44. package/sdk/mcp/gdd-mcp/tools/shared.ts +20 -2
  45. package/sdk/mcp/gdd-state/server.js +107 -41
  46. package/sdk/primitives/lockfile.cjs +26 -5
  47. package/sdk/state/index.ts +91 -17
  48. package/sdk/state/lockfile.ts +47 -8
  49. package/skills/bandit-reset/SKILL.md +2 -0
  50. package/skills/bandit-status/SKILL.md +4 -1
  51. package/skills/darkmode/SKILL.md +1 -1
  52. package/skills/graphify/SKILL.md +6 -6
  53. package/skills/quick/SKILL.md +3 -1
  54. package/skills/reflect/SKILL.md +1 -1
  55. package/skills/router/SKILL.md +4 -2
@@ -25,6 +25,32 @@ const DEFAULT_FILE = path.join(REPO_ROOT, 'reference', 'mcp-budget.default.json'
25
25
 
26
26
  const TRACKED_TOOL_RE = /^mcp__.*use_(figma|paper|pencil)$/;
27
27
 
28
+ // Bounded fallback window (ms) for counting volume when no session id is
29
+ // available on the payload. Without this, `total_calls` would count every row
30
+ // ever appended to the ledger — so after `max_calls_per_task` cumulative calls
31
+ // across ALL sessions for the lifetime of the file, every mutation is blocked
32
+ // forever (and a BLOCKER is appended to STATE.md each time). The volume gate is
33
+ // meant to be PER-TASK; this window keeps the fallback path per-task-ish so a
34
+ // long-lived user is never permanently locked out.
35
+ const SESSIONLESS_WINDOW_MS = 6 * 60 * 60 * 1000; // 6 hours
36
+
37
+ /**
38
+ * Resolve the current session id from the hook payload (Claude Code passes
39
+ * `session_id`; tolerate `sessionId`), falling back to GDD_SESSION_ID, else
40
+ * null. A non-null id makes the volume window exact (count only this session's
41
+ * rows); null falls back to the bounded time window.
42
+ *
43
+ * @param {any} payload
44
+ * @returns {string|null}
45
+ */
46
+ function resolveSessionId(payload) {
47
+ const fromPayload = payload && (payload.session_id || payload.sessionId);
48
+ if (typeof fromPayload === 'string' && fromPayload.length > 0) return fromPayload;
49
+ const fromEnv = process.env.GDD_SESSION_ID;
50
+ if (typeof fromEnv === 'string' && fromEnv.length > 0) return fromEnv;
51
+ return null;
52
+ }
53
+
28
54
  function loadBudget(cwd) {
29
55
  let defaults = { max_calls_per_task: 30, max_consecutive_timeouts: 3, reset_on_success: true };
30
56
  try {
@@ -106,7 +132,25 @@ function classifyOutcome(toolResponse) {
106
132
  return 'error';
107
133
  }
108
134
 
109
- function readJsonlTail(filePath) {
135
+ /**
136
+ * Read the ledger and compute the prior volume + consecutive-timeout state
137
+ * for the CURRENT task window only — not the whole-file lifetime.
138
+ *
139
+ * Window membership for a row:
140
+ * - If a current session id is known AND the row carries a `session` field:
141
+ * the row counts iff `row.session === sessionId`.
142
+ * - Otherwise (sessionless harness/tests, or legacy rows without `session`):
143
+ * the row counts iff its timestamp is within SESSIONLESS_WINDOW_MS of now.
144
+ *
145
+ * This bounds the volume count so a long-lived ledger can never permanently
146
+ * trip `volumeBreak`, while keeping rapid same-task calls (the common case and
147
+ * the existing test scenario) counted together.
148
+ *
149
+ * @param {string} filePath
150
+ * @param {string|null} sessionId
151
+ * @param {number} nowMs
152
+ */
153
+ function readJsonlTail(filePath, sessionId, nowMs) {
110
154
  if (!fs.existsSync(filePath)) return { lastRow: null, total_calls: 0, consecutive_timeouts: 0 };
111
155
  let total = 0;
112
156
  let lastTimeoutsChain = 0;
@@ -118,6 +162,25 @@ function readJsonlTail(filePath) {
118
162
  if (!t) continue;
119
163
  let row;
120
164
  try { row = JSON.parse(t); } catch { continue; }
165
+
166
+ // Decide whether this row belongs to the current task window.
167
+ let inWindow;
168
+ if (sessionId !== null && typeof row.session === 'string' && row.session.length > 0) {
169
+ inWindow = row.session === sessionId;
170
+ } else {
171
+ const rowMs = typeof row.ts === 'string' ? Date.parse(row.ts) : NaN;
172
+ // Unparseable timestamps fall back to "in window" so we never
173
+ // under-count; a malformed-ts row is treated as recent.
174
+ inWindow = Number.isNaN(rowMs) ? true : (nowMs - rowMs) <= SESSIONLESS_WINDOW_MS;
175
+ }
176
+
177
+ if (!inWindow) {
178
+ // Out-of-window rows reset the streak — a new task/session must not
179
+ // inherit a stale consecutive-timeout chain.
180
+ lastTimeoutsChain = 0;
181
+ continue;
182
+ }
183
+
121
184
  total++;
122
185
  if (row.outcome === 'timeout') lastTimeoutsChain++;
123
186
  else lastTimeoutsChain = 0;
@@ -158,7 +221,9 @@ async function main() {
158
221
  const budget = loadBudget(cwd);
159
222
  const ledgerPath = path.join(cwd, '.design', 'telemetry', 'mcp-budget.jsonl');
160
223
 
161
- const prior = readJsonlTail(ledgerPath);
224
+ const sessionId = resolveSessionId(payload);
225
+ const nowMs = Date.now();
226
+ const prior = readJsonlTail(ledgerPath, sessionId, nowMs);
162
227
  const outcome = classifyOutcome(payload?.tool_response);
163
228
  const total_calls = prior.total_calls + 1;
164
229
  const consecutive_timeouts = outcome === 'timeout'
@@ -166,12 +231,16 @@ async function main() {
166
231
  : (budget.reset_on_success && outcome === 'success' ? 0 : prior.consecutive_timeouts);
167
232
 
168
233
  const row = {
169
- ts: new Date().toISOString(),
234
+ ts: new Date(nowMs).toISOString(),
170
235
  tool,
171
236
  outcome,
172
237
  consecutive_timeouts,
173
238
  total_calls,
174
239
  };
240
+ // Stamp the session id so future calls can scope the volume window exactly.
241
+ // Omitted when unknown (keeps the row schema stable for the sessionless path,
242
+ // which relies on the time window instead).
243
+ if (sessionId !== null) row.session = sessionId;
175
244
  appendJsonl(ledgerPath, row);
176
245
 
177
246
  const timeoutBreak = consecutive_timeouts >= budget.max_consecutive_timeouts;
@@ -57,17 +57,21 @@ function detectHarness() {
57
57
  }
58
58
 
59
59
  // ---------------------------------------------------------------------------
60
- // Lazy event-stream emit (best-effort)
60
+ // Event emit (best-effort) — delegate to the shared _hook-emit helper, which
61
+ // uses the SDK writer when loadable (modern Node) and an inline JSONL appender
62
+ // otherwise. The previous direct `require('../sdk/event-stream')` resolved to
63
+ // the `.ts` ESM index and threw under plain `node` on Node 22.0–22.17, leaving
64
+ // recap.emitted permanently no-op'd. emitEvent lands the line on every Node.
61
65
  // ---------------------------------------------------------------------------
62
66
 
63
- function getAppendEvent() {
67
+ function getEmitEvent() {
64
68
  try {
65
- const m = require('../sdk/event-stream');
66
- if (m && typeof m.appendEvent === 'function') return m.appendEvent;
69
+ const m = require('./_hook-emit.js');
70
+ if (m && typeof m.emitEvent === 'function') return m.emitEvent;
67
71
  } catch {
68
- /* swallow — event-stream is optional infrastructure */
72
+ /* swallow — telemetry is optional infrastructure */
69
73
  }
70
- return function noopAppend(_ev) {
74
+ return function noopEmit(_ev) {
71
75
  /* no-op */
72
76
  };
73
77
  }
@@ -87,9 +91,12 @@ function readStateMd(paths) {
87
91
  }
88
92
 
89
93
  const frontmatter = {};
90
- const fmMatch = body.match(/^---\n([\s\S]*?)\n---\n/);
94
+ // Tolerate CRLF line endings — the STATE.md mutator preserves CRLF, so a
95
+ // strict `\n`-only anchor fails to match the frontmatter block on Windows
96
+ // checkouts and the recap silently reports an empty cycle/decisions diff.
97
+ const fmMatch = body.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n/);
91
98
  if (fmMatch) {
92
- for (const line of fmMatch[1].split('\n')) {
99
+ for (const line of fmMatch[1].split(/\r?\n/)) {
93
100
  const m = line.match(/^(\w+):\s*(.+)$/);
94
101
  if (m) frontmatter[m[1]] = m[2].trim();
95
102
  }
@@ -273,9 +280,9 @@ async function main() {
273
280
  }
274
281
 
275
282
  // Best-effort event emit.
276
- const appendEvent = getAppendEvent();
283
+ const emitEvent = getEmitEvent();
277
284
  try {
278
- appendEvent({
285
+ emitEvent({
279
286
  type: 'recap.emitted',
280
287
  timestamp: new Date().toISOString(),
281
288
  sessionId: process.env.GDD_SESSION_ID || 'sessionstart-hook',
@@ -300,9 +307,11 @@ async function main() {
300
307
  process.exit(0);
301
308
  }
302
309
 
303
- try {
304
- main();
305
- } catch (err) {
310
+ // `main` is async: a sync try/catch cannot observe a rejected promise, so a
311
+ // throw inside an `await` boundary would escape as an unhandled rejection and
312
+ // exit non-zero — violating the silent-exit-0 contract for SessionStart hooks.
313
+ // Attach `.catch` so every failure mode is swallowed and we exit 0.
314
+ main().catch((err) => {
306
315
  try {
307
316
  process.stderr.write(
308
317
  '[gdd-sessionstart-recap] uncaught: ' +
@@ -313,4 +322,4 @@ try {
313
322
  /* swallow */
314
323
  }
315
324
  process.exit(0);
316
- }
325
+ });
package/hooks/hooks.json CHANGED
@@ -45,7 +45,7 @@
45
45
  ],
46
46
  "PreToolUse": [
47
47
  {
48
- "matcher": "Agent",
48
+ "matcher": "Task|Agent",
49
49
  "hooks": [
50
50
  {
51
51
  "type": "command",
@@ -119,7 +119,7 @@
119
119
  ]
120
120
  },
121
121
  {
122
- "matcher": "Agent",
122
+ "matcher": "Task|Agent",
123
123
  "hooks": [
124
124
  {
125
125
  "type": "command",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hegemonart/get-design-done",
3
- "version": "1.59.7",
3
+ "version": "1.59.9",
4
4
  "description": "A design-quality pipeline for AI coding agents: brief, explore, plan, design, and verify UI work against your design system.",
5
5
  "author": "Hegemon",
6
6
  "homepage": "https://github.com/hegemonart/get-design-done",
@@ -10,7 +10,7 @@
10
10
  },
11
11
  "license": "MIT",
12
12
  "engines": {
13
- "node": ">=22"
13
+ "node": ">=22.6.0"
14
14
  },
15
15
  "files": [
16
16
  ".claude-plugin/",
@@ -10,7 +10,7 @@ description: Bandit posterior + production-integration shim cheat sheet - signat
10
10
 
11
11
  **Phase 27.5 (v1.27.5).** Reference for the bandit production-integration surface. Authoring or modifying a caller of the bandit posterior? Debugging a routing decision at the code level? Start here.
12
12
 
13
- For ops-level guidance (when bandit fires, how to disable, posterior inspection), see `docs/BANDIT-INTEGRATION.md`.
13
+ For ops-level guidance (when bandit fires, how to disable, posterior inspection), use the read-only diagnostic surfaces: `/gdd:bandit-status` (per-arm posterior snapshots) and `/gdd:bandit-reset` (confirm-then-reset). The `adaptive_mode` gate below covers enable/disable.
14
14
 
15
15
  In-scope modules:
16
16
 
@@ -104,6 +104,17 @@ Phase 27.5 passes `wallTimeMs: 0` always (D-08 unchanged from Phase 23.5).
104
104
 
105
105
  ---
106
106
 
107
+ ## Where adaptive routing actually learns
108
+
109
+ This is a deliberate design boundary, not a bug - read it before assuming the bandit "learns" in every runtime.
110
+
111
+ - **The posterior is updated only on the SDK / headless path.** `recordOutcome` (the learning update that moves `alpha`/`beta`) is called from `scripts/lib/session-runner/index.ts` after a session terminates. That path runs in the SDK / headless `session-runner` execution model. It is the only place a reward is folded back into the posterior.
112
+ - **In interactive Claude Code with `adaptive_mode: full`, the bandit samples but does not currently learn from in-session outcomes.** When a plugin/interactive run consults the bandit, `consultBandit` performs a Thompson sample from the *configured priors* (and whatever the SDK path has already written), and `pull()` bumps `last_used` + `count` - but no `recordOutcome` fires from an interactive Claude Code hook, so the success/fail posterior does not move within the interactive session. With an un-seeded posterior, sampling therefore reflects the informed `TIER_PRIOR` (which leans toward the higher tiers, e.g. opus). Wiring `recordOutcome` into an interactive hook is intentionally out of scope for this phase.
113
+ - **`adaptive_mode` defaults to `static` - the feature is opt-in.** Per `scripts/lib/adaptive-mode.cjs`, the default mode is `static`, in which the bandit is fully silent (no reads, no writes) and `default-tier:` is authoritative. Adaptive routing only engages when an operator explicitly sets `adaptive_mode: full` in `.design/budget.json`.
114
+ - **Contextual dimensions are supplied by the caller, not inferred here.** The `bin` (glob-count bucket via `binForGlobCount`) and `delegate` dimensions are passed in at the call site; the router does not derive them from ambient session state.
115
+
116
+ Net: enable `adaptive_mode: full` and run the SDK/headless `session-runner` path to accumulate a posterior that genuinely reflects observed outcomes. In interactive Claude Code, `full` mode gives you prior-driven Thompson sampling, not in-session reinforcement.
117
+
107
118
  ## `adaptive_mode` gate semantics
108
119
 
109
120
  Phase 23.5 ladder (D-07):
@@ -154,7 +165,7 @@ Phase 27.5 wires these consumers:
154
165
 
155
166
  ## Cross-references
156
167
 
157
- - `docs/BANDIT-INTEGRATION.md` - operator guide (when bandit fires, how to disable, troubleshooting).
168
+ - `/gdd:bandit-status` + `/gdd:bandit-reset` - read-only operator surfaces (when bandit fires, posterior inspection, reset). Disable/enable is the `adaptive_mode` gate in `.design/budget.json` (see above).
158
169
  - `reference/peer-protocols.md` - Phase 27 ACP/ASP cheat sheet (peer-CLI delegation transport).
159
170
  - `scripts/lib/bandit-router.cjs` - Phase 23.5 primitives surface.
160
171
  - `scripts/lib/bandit-router/integration.cjs` - Phase 27.5 production shim.
@@ -13,6 +13,17 @@
13
13
  | claude-sonnet-4-7 | sonnet | 3.00 | 15.00 | 0.30 |
14
14
  | claude-sonnet-4-6 | sonnet | 3.00 | 15.00 | 0.30 |
15
15
  | claude-opus-4-7 | opus | 15.00 | 75.00 | 1.50 |
16
+ | claude-opus-4-8 | opus | 15.00 | 75.00 | 1.50 |
17
+
18
+ > **>200k-input (1M-context) pricing note.** The rates above are the
19
+ > standard (≤200k-input) per-1M-token prices. Anthropic's flagship
20
+ > `claude-opus-4-8` ships a 1M-context (`[1m]`) variant; long-context
21
+ > requests above the 200k-input threshold may be billed at a higher
22
+ > tiered rate. This table tracks only the standard tier today; the
23
+ > >200k tiered figure will be added as a separate row/column once
24
+ > Anthropic publishes it. The parser is positional-by-header and
25
+ > tolerates right-edge columns, so a future `>200k_input_per_1m`
26
+ > column can be appended without breaking cost lookups.
16
27
 
17
28
  ## size_budget → conservative token ranges
18
29
 
@@ -44,21 +44,21 @@ Anthropic's first-party runtime. Public tier docs at https://docs.anthropic.com/
44
44
  {
45
45
  "id": "claude",
46
46
  "tier_to_model": {
47
- "opus": { "model": "claude-opus-4-7" },
48
- "sonnet": { "model": "claude-sonnet-4-6" },
49
- "haiku": { "model": "claude-haiku-4-5" }
47
+ "opus": { "model": "claude-opus-4-8", "context_window": 1000000 },
48
+ "sonnet": { "model": "claude-sonnet-4-6", "context_window": 200000 },
49
+ "haiku": { "model": "claude-haiku-4-5", "context_window": 200000 }
50
50
  },
51
51
  "reasoning_class_to_model": {
52
- "high": { "model": "claude-opus-4-7" },
53
- "medium": { "model": "claude-sonnet-4-6" },
54
- "low": { "model": "claude-haiku-4-5" }
52
+ "high": { "model": "claude-opus-4-8", "context_window": 1000000 },
53
+ "medium": { "model": "claude-sonnet-4-6", "context_window": 200000 },
54
+ "low": { "model": "claude-haiku-4-5", "context_window": 200000 }
55
55
  },
56
56
  "provenance": [
57
57
  {
58
58
  "source_url": "https://docs.anthropic.com/en/docs/about-claude/models",
59
- "retrieved_at": "2026-04-29T00:00:00.000Z",
60
- "last_validated_cycle": "2026-04-29-v1.26",
61
- "note": "Anthropic public model catalog — first-party runtime."
59
+ "retrieved_at": "2026-06-10T00:00:00.000Z",
60
+ "last_validated_cycle": "2026-06-10-v1.59.9",
61
+ "note": "Anthropic public model catalog — first-party runtime. Opus tier moved to claude-opus-4-8 (1M-context [1m] variant) this cycle."
62
62
  }
63
63
  ]
64
64
  }
@@ -1115,6 +1115,10 @@ export interface ModelRow {
1115
1115
  * Optional internal/provider model ID for runtimes whose API identifiers differ from the public name (D-03).
1116
1116
  */
1117
1117
  provider_model_id?: string;
1118
+ /**
1119
+ * Optional context-window size (max tokens) for this model, recorded as machine-readable metadata. The Anthropic opus tier ships the 1M-context [1m] variant (1000000). Recorded as fact this cycle; not yet a budgeting driver (deferred — no consumer is wired).
1120
+ */
1121
+ context_window?: number;
1118
1122
  }
1119
1123
 
1120
1124
  export type RuntimeModelsSchema = RuntimeModelsTierToModelMap;
@@ -121,6 +121,11 @@
121
121
  "type": "string",
122
122
  "minLength": 1,
123
123
  "description": "Optional internal/provider model ID for runtimes whose API identifiers differ from the public name (D-03)."
124
+ },
125
+ "context_window": {
126
+ "type": "integer",
127
+ "minimum": 1,
128
+ "description": "Optional context-window size (max tokens) for this model, recorded as machine-readable metadata. The Anthropic opus tier ships the 1M-context [1m] variant (1000000). Recorded as fact this cycle; not yet a budgeting driver (deferred — no consumer is wired)."
124
129
  }
125
130
  }
126
131
  }
@@ -148,6 +148,14 @@ function filesEqual(a, b) {
148
148
  }
149
149
  }
150
150
 
151
+ /**
152
+ * Network timeout (ms) for the git clone/pull. SessionStart hooks must never
153
+ * block the harness: without a timeout, a hung network connection would stall
154
+ * the whole session-start sequence indefinitely. spawnSync kills the child
155
+ * with `killSignal` once this elapses and reports it as a failure.
156
+ */
157
+ const GIT_TIMEOUT_MS = 15000;
158
+
151
159
  /**
152
160
  * Match the .sh `clone_or_update`:
153
161
  * - target/.git exists → `git -C target pull --quiet --ff-only`, log on fail
@@ -157,8 +165,14 @@ function filesEqual(a, b) {
157
165
  * We invoke the `git` CLI directly via spawnSync. spawnSync('git', …) is fine —
158
166
  * the prohibition is on spawnSync('bash', …).
159
167
  *
168
+ * Returns true ONLY when the repo is in a good post-condition (pull/clone
169
+ * succeeded, or a pre-existing non-git dir we intentionally skip). Returns
170
+ * false when a network op failed or timed out — so the caller can withhold the
171
+ * success marker and retry next session instead of recording failure as done.
172
+ *
160
173
  * @param {string} repoUrl
161
174
  * @param {string} target
175
+ * @returns {boolean} success
162
176
  */
163
177
  function cloneOrUpdate(repoUrl, target) {
164
178
  let isGitCheckout = false;
@@ -177,16 +191,22 @@ function cloneOrUpdate(repoUrl, target) {
177
191
  const r = spawnSync('git', ['-C', target, 'pull', '--quiet', '--ff-only'], {
178
192
  stdio: ['ignore', 'ignore', 'ignore'],
179
193
  windowsHide: true,
194
+ timeout: GIT_TIMEOUT_MS,
195
+ killSignal: 'SIGKILL',
180
196
  });
181
197
  if (r.error || r.status !== 0) {
182
- log(`pull failed for ${target} (continuing)`);
198
+ const why = r.error && r.error.code === 'ETIMEDOUT' ? 'timed out' : 'failed';
199
+ log(`pull ${why} for ${target} (continuing)`);
200
+ return false;
183
201
  }
184
- return;
202
+ return true;
185
203
  }
186
204
 
187
205
  if (targetExists) {
188
206
  log(`${target} exists and is not a git checkout — skipping`);
189
- return;
207
+ // A pre-existing non-git dir is a stable post-condition, not a failure:
208
+ // re-running won't change it, so don't force a retry every session.
209
+ return true;
190
210
  }
191
211
 
192
212
  // Defense in depth: refuse repoUrl / target arguments that look like git
@@ -196,7 +216,7 @@ function cloneOrUpdate(repoUrl, target) {
196
216
  if (typeof repoUrl !== 'string' || repoUrl.startsWith('-') ||
197
217
  typeof target !== 'string' || target.startsWith('-')) {
198
218
  log(`refusing suspicious clone args for ${repoUrl} -> ${target}`);
199
- return;
219
+ return false;
200
220
  }
201
221
 
202
222
  log(`cloning ${repoUrl} -> ${target}`);
@@ -205,10 +225,15 @@ function cloneOrUpdate(repoUrl, target) {
205
225
  const r = spawnSync('git', ['clone', '--quiet', '--depth', '1', '--', repoUrl, target], {
206
226
  stdio: ['ignore', 'ignore', 'ignore'],
207
227
  windowsHide: true,
228
+ timeout: GIT_TIMEOUT_MS,
229
+ killSignal: 'SIGKILL',
208
230
  });
209
231
  if (r.error || r.status !== 0) {
210
- log(`clone failed for ${repoUrl}`);
232
+ const why = r.error && r.error.code === 'ETIMEDOUT' ? 'timed out' : 'failed';
233
+ log(`clone ${why} for ${repoUrl}`);
234
+ return false;
211
235
  }
236
+ return true;
212
237
  }
213
238
 
214
239
  /**
@@ -315,7 +340,7 @@ function run(opts = {}) {
315
340
  }
316
341
 
317
342
  // Required library: VoltAgent/awesome-design-md.
318
- cloneOrUpdate(
343
+ const repoOk = cloneOrUpdate(
319
344
  'https://github.com/VoltAgent/awesome-design-md.git',
320
345
  ctx.awesomeRepoTarget
321
346
  );
@@ -332,8 +357,15 @@ function run(opts = {}) {
332
357
  // Phase 10.1: .design/budget.json + .design/telemetry/ (D-12).
333
358
  ensureDesignDir(cwd);
334
359
 
335
- // Record success so we don't re-run until the bundled manifest changes.
336
- copyManifestToMarker(ctx.manifest, ctx.marker);
360
+ // Record success ONLY when the network provisioning actually succeeded.
361
+ // Writing the marker unconditionally records a failed clone as "done" and
362
+ // never retries — leaving the required library permanently absent. Gating on
363
+ // repoOk means a transient network failure/timeout is retried next session.
364
+ if (repoOk) {
365
+ copyManifestToMarker(ctx.manifest, ctx.marker);
366
+ } else {
367
+ log('skipping success marker — provisioning incomplete, will retry next session');
368
+ }
337
369
 
338
370
  return 0;
339
371
  }
@@ -211,6 +211,28 @@ async function main() {
211
211
  }
212
212
  runtimes = picked.runtimes;
213
213
  if (picked.location) location = picked.location;
214
+ } else if (uninstall) {
215
+ // B4 fix (Phase 59.8): bare `--uninstall` in a non-TTY context must NOT
216
+ // silently default to removing claude. The interactive path is the only
217
+ // safe way to pick what to remove without an explicit flag; in non-TTY
218
+ // we refuse and require an explicit runtime flag so a scripted/CI
219
+ // invocation can never destroy an install the operator didn't name.
220
+ // (See the comment at shouldUseInteractive: bare --uninstall is meant to
221
+ // trigger the interactive select-which-to-remove flow.)
222
+ process.stderr.write(
223
+ [
224
+ 'Refusing to uninstall: no runtime specified and not running in an',
225
+ 'interactive terminal.',
226
+ '',
227
+ 'Re-run with an explicit runtime flag, e.g.:',
228
+ ' npx @hegemonart/get-design-done --uninstall --claude',
229
+ ' npx @hegemonart/get-design-done --uninstall --all',
230
+ '',
231
+ 'Run with --help to list available runtime flags.',
232
+ '',
233
+ ].join('\n'),
234
+ );
235
+ process.exit(2);
214
236
  } else {
215
237
  // Non-TTY zero-flag fallback: back-compat with v1.23.5 behaviour.
216
238
  runtimes = ['claude'];
@@ -359,7 +381,7 @@ async function maybeNudgePeerCli({ flags }) {
359
381
  '✓ Detected peer CLIs: ' + detectedDisplay,
360
382
  '',
361
383
  'gdd v1.27.0 introduced optional peer-CLI delegation. With your',
362
- 'agents\\u2019 frontmatter `delegate_to:` set, gdd can route specific',
384
+ "agents' frontmatter `delegate_to:` set, gdd can route specific",
363
385
  'roles through these peer CLIs (cost or quality wins per Phase 23.5',
364
386
  'bandit). You can change this anytime via .design/config.json.',
365
387
  '',
@@ -38,7 +38,9 @@
38
38
  * - The `prior_class` value is persisted on the arm so subsequent
39
39
  * reads + decay calculations preserve it (forward-compat).
40
40
  *
41
- * Atomic .tmp + rename. Discounted Thompson via per-arm time-decay
41
+ * Atomic per-pid-unique .tmp + rename (Phase 59-8 C2: unique tmp name per
42
+ * process so parallel waves never interleave writes on one scratch file).
43
+ * Discounted Thompson via per-arm time-decay
42
44
  * factor `rho^days_since_last_use` applied at sample time, not stored.
43
45
  *
44
46
  * Reward computation (D-06): two-stage lexicographic — UNCHANGED.
@@ -57,6 +59,17 @@ const path = require('node:path');
57
59
  const DEFAULT_POSTERIOR_PATH = '.design/telemetry/posterior.json';
58
60
  const SCHEMA_VERSION = '1.0.0';
59
61
 
62
+ // C2 fix (Phase 59-8): monotonic per-process counter for tmp-file naming.
63
+ // Combined with process.pid it guarantees that two concurrent writers — even
64
+ // within the same process, even firing in the same millisecond — never target
65
+ // the same `.tmp` path. The old fixed `p + '.tmp'` name let parallel agent
66
+ // waves interleave partial writes on one tmp file, producing truncated JSON
67
+ // that loadPosterior() then silently reset to an empty posterior (losing all
68
+ // learned arms). Unique tmp + atomic rename makes a half-written file
69
+ // invisible to readers: rename is atomic on the same filesystem, so a reader
70
+ // sees either the old complete file or the new complete file, never a partial.
71
+ let _tmpCounter = 0;
72
+
60
73
  // Decay factor — 60-day half-life.
61
74
  const DEFAULT_DECAY = 0.988;
62
75
 
@@ -136,6 +149,12 @@ function loadPosterior(opts = {}) {
136
149
  }
137
150
  return data;
138
151
  } catch {
152
+ // Corrupt-JSON recovery (preserved, Phase 59-8 C2): fall back to an empty
153
+ // posterior. With the per-pid unique-tmp + atomic-rename write discipline
154
+ // (see savePosterior), a reader can no longer observe a half-written file
155
+ // — rename publishes the complete file in one step — so this branch should
156
+ // now only fire on genuine on-disk corruption (e.g. external truncation),
157
+ // not on a write/read race during a parallel agent wave.
139
158
  return { schema_version: SCHEMA_VERSION, generated_at: new Date().toISOString(), arms: [] };
140
159
  }
141
160
  }
@@ -159,9 +178,19 @@ function savePosterior(posterior, opts = {}) {
159
178
  const p = resolvePath(opts);
160
179
  fs.mkdirSync(path.dirname(p), { recursive: true });
161
180
  posterior.generated_at = new Date().toISOString();
162
- const tmp = p + '.tmp';
163
- fs.writeFileSync(tmp, JSON.stringify(posterior, null, 2));
164
- fs.renameSync(tmp, p);
181
+ // C2 fix (Phase 59-8): per-process-unique tmp name (pid + monotonic
182
+ // counter) so concurrent writers never collide on the same scratch file.
183
+ // The atomic rename then publishes the fully-written file in one step.
184
+ const tmp = `${p}.${process.pid}.${_tmpCounter++}.tmp`;
185
+ try {
186
+ fs.writeFileSync(tmp, JSON.stringify(posterior, null, 2));
187
+ fs.renameSync(tmp, p);
188
+ } catch (err) {
189
+ // Best-effort cleanup of the orphaned tmp on failure so a crashed
190
+ // write never leaves stale scratch files behind. ENOENT is fine.
191
+ try { fs.unlinkSync(tmp); } catch { /* already gone */ }
192
+ throw err;
193
+ }
165
194
  return p;
166
195
  }
167
196
 
@@ -347,7 +376,20 @@ function decayArm(arm, opts = {}) {
347
376
  const factor = Math.pow(decay, days);
348
377
  // Decay shrinks both α and β toward the prior. We never go below the
349
378
  // initial prior strength — caller can rebuild a fresh prior via reset().
350
- const { alpha: pa, beta: pb } = priorFor(arm.tier, opts.strength ?? PRIOR_STRENGTH);
379
+ //
380
+ // C1 fix (Phase 59-8): decay MUST target the SAME prior the arm was
381
+ // bootstrapped with. The arm persists `prior_class` (Phase 29 Plan 06 /
382
+ // D-04), so pass it through to priorFor — otherwise a promoted-incubator
383
+ // arm (Beta(2,8)) would drift back toward the informed TIER_PRIOR while
384
+ // idle, undoing the D-04 preferential-selection suppression. Default-class
385
+ // arms have no `prior_class` field, so `arm.prior_class` is undefined and
386
+ // priorFor falls through to the Phase 23.5 informed prior (byte-for-byte
387
+ // unchanged).
388
+ const { alpha: pa, beta: pb } = priorFor(
389
+ arm.tier,
390
+ opts.strength ?? PRIOR_STRENGTH,
391
+ arm.prior_class,
392
+ );
351
393
  return {
352
394
  alpha: pa + factor * Math.max(0, arm.alpha - pa),
353
395
  beta: pb + factor * Math.max(0, arm.beta - pb),
@@ -52,6 +52,8 @@
52
52
  const fs = require('node:fs');
53
53
  const path = require('node:path');
54
54
 
55
+ const { normalizeModelId, tierForModelId } = require('./model-id.cjs');
56
+
55
57
  const REPO_ROOT_GUESS = path.resolve(__dirname, '..', '..');
56
58
  const DEFAULT_RUNTIME_ID = 'claude';
57
59
  const VALID_TIERS = Object.freeze(['opus', 'sonnet', 'haiku']);
@@ -326,10 +328,18 @@ function computeCost(args, opts) {
326
328
  tokens_out: Number(args.tokens_out || 0),
327
329
  cache_hit: args.cache_hit === true,
328
330
  };
331
+ // Normalize the model id (strip a trailing `[1m]`/`[200k]` variant suffix)
332
+ // BEFORE table lookup so e.g. `claude-opus-4-8[1m]` matches the
333
+ // `claude-opus-4-8` row. The variant encodes a context-window SKU; the
334
+ // current price tables are keyed on the base id.
335
+ const rawModelId = typeof args.model_id === 'string' && args.model_id.length > 0
336
+ ? args.model_id
337
+ : null;
338
+ const normalizedModelId = rawModelId !== null
339
+ ? (normalizeModelId(rawModelId).base || rawModelId)
340
+ : null;
329
341
  const q = {
330
- model_id: typeof args.model_id === 'string' && args.model_id.length > 0
331
- ? args.model_id
332
- : null,
342
+ model_id: normalizedModelId,
333
343
  tier: typeof args.tier === 'string' && args.tier.length > 0
334
344
  ? args.tier
335
345
  : null,
@@ -365,14 +375,33 @@ function computeCost(args, opts) {
365
375
  }
366
376
  }
367
377
 
368
- // Branch 5: nothing matched.
378
+ // Branch 5: nothing matched. Rather than silently returning a null cost
379
+ // (which downstream aggregators treat as $0 — a frontier model billed as
380
+ // free), compute a CONSERVATIVE CEILING at the OPUS rate from the claude
381
+ // price table. An unknown/new model is thus priced LOUDLY (cost_estimated)
382
+ // and CONSERVATIVELY (opus ceiling), never $0 and never the sonnet rate.
383
+ const reason = rows.length === 0 ? 'runtime_table_missing' : 'model_not_found';
384
+ const claudeRows = loadPriceTable(DEFAULT_RUNTIME_ID, opts);
385
+ const opusRow = findPriceRow(claudeRows, { tier: 'opus' });
386
+ if (opusRow !== null) {
387
+ return {
388
+ cost_usd: applyFormula(opusRow, tokens),
389
+ model: normalizedModelId,
390
+ tier: 'opus',
391
+ runtime_used: DEFAULT_RUNTIME_ID,
392
+ fallback: true,
393
+ reason,
394
+ cost_estimated: true,
395
+ };
396
+ }
397
+ // Even the opus row is unavailable → genuinely cannot price. Keep null.
369
398
  return {
370
399
  cost_usd: null,
371
400
  model: null,
372
401
  tier: q.tier,
373
402
  runtime_used: null,
374
403
  fallback: false,
375
- reason: rows.length === 0 ? 'runtime_table_missing' : 'model_not_found',
404
+ reason,
376
405
  };
377
406
  }
378
407