npm - @hegemonart/get-design-done - Versions diffs - 1.59.7 → 1.59.9 - Mend

@hegemonart/get-design-done 1.59.7 → 1.59.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/CHANGELOG.md +59 -0
package/README.md +2 -2
package/SKILL.md +1 -1
package/agents/design-authority-watcher.md +24 -5
package/bin/gdd-graph +4 -1
package/hooks/_hook-emit.js +113 -29
package/hooks/budget-enforcer.ts +104 -5
package/hooks/gdd-mcp-circuit-breaker.js +72 -3
package/hooks/gdd-sessionstart-recap.js +23 -14
package/hooks/hooks.json +2 -2
package/package.json +2 -2
package/reference/bandit-integration.md +13 -2
package/reference/prices/claude.md +11 -0
package/reference/runtime-models.md +9 -9
package/reference/schemas/generated.d.ts +4 -0
package/reference/schemas/runtime-models.schema.json +5 -0
package/scripts/bootstrap.cjs +40 -8
package/scripts/install.cjs +23 -1
package/scripts/lib/bandit-router.cjs +47 -5
package/scripts/lib/budget-enforcer.cjs +34 -5
package/scripts/lib/detect/cli.cjs +13 -3
package/scripts/lib/install/converters/cursor.cjs +11 -19
package/scripts/lib/install/installer.cjs +72 -21
package/scripts/lib/install/merge.cjs +31 -3
package/scripts/lib/install/parse-runtime-models.cjs +9 -1
package/scripts/lib/install/runtime-artifact-layout.cjs +42 -8
package/scripts/lib/manifest/harnesses.json +29 -1
package/scripts/lib/manifest/skills.json +1 -1
package/scripts/lib/model-id.cjs +141 -0
package/scripts/lib/session-runner/index.ts +87 -16
package/scripts/skill-templates/bandit-reset/SKILL.md +2 -0
package/scripts/skill-templates/bandit-status/SKILL.md +4 -1
package/scripts/skill-templates/darkmode/SKILL.md +1 -1
package/scripts/skill-templates/graphify/SKILL.md +6 -6
package/scripts/skill-templates/quick/SKILL.md +3 -1
package/scripts/skill-templates/reflect/SKILL.md +1 -1
package/scripts/skill-templates/router/SKILL.md +4 -2
package/sdk/cli/index.js +132 -55
package/sdk/dashboard/data/source.cjs +50 -4
package/sdk/event-stream/writer.ts +112 -30
package/sdk/mcp/gdd-mcp/server.js +49 -36
package/sdk/mcp/gdd-mcp/tools/shared.ts +20 -2
package/sdk/mcp/gdd-state/server.js +107 -41
package/sdk/primitives/lockfile.cjs +26 -5
package/sdk/state/index.ts +91 -17
package/sdk/state/lockfile.ts +47 -8
package/skills/bandit-reset/SKILL.md +2 -0
package/skills/bandit-status/SKILL.md +4 -1
package/skills/darkmode/SKILL.md +1 -1
package/skills/graphify/SKILL.md +6 -6
package/skills/quick/SKILL.md +3 -1
package/skills/reflect/SKILL.md +1 -1
package/skills/router/SKILL.md +4 -2

package/hooks/gdd-mcp-circuit-breaker.js CHANGED Viewed

@@ -25,6 +25,32 @@ const DEFAULT_FILE = path.join(REPO_ROOT, 'reference', 'mcp-budget.default.json'
 const TRACKED_TOOL_RE = /^mcp__.*use_(figma|paper|pencil)$/;
+// Bounded fallback window (ms) for counting volume when no session id is
+// available on the payload. Without this, `total_calls` would count every row
+// ever appended to the ledger — so after `max_calls_per_task` cumulative calls
+// across ALL sessions for the lifetime of the file, every mutation is blocked
+// forever (and a BLOCKER is appended to STATE.md each time). The volume gate is
+// meant to be PER-TASK; this window keeps the fallback path per-task-ish so a
+// long-lived user is never permanently locked out.
+const SESSIONLESS_WINDOW_MS = 6 * 60 * 60 * 1000; // 6 hours
+/**
+ * Resolve the current session id from the hook payload (Claude Code passes
+ * `session_id`; tolerate `sessionId`), falling back to GDD_SESSION_ID, else
+ * null. A non-null id makes the volume window exact (count only this session's
+ * rows); null falls back to the bounded time window.
+ *
+ * @param {any} payload
+ * @returns {string|null}
+ */
+function resolveSessionId(payload) {
+  const fromPayload = payload && (payload.session_id || payload.sessionId);
+  if (typeof fromPayload === 'string' && fromPayload.length > 0) return fromPayload;
+  const fromEnv = process.env.GDD_SESSION_ID;
+  if (typeof fromEnv === 'string' && fromEnv.length > 0) return fromEnv;
+  return null;
+}
 function loadBudget(cwd) {
   let defaults = { max_calls_per_task: 30, max_consecutive_timeouts: 3, reset_on_success: true };
   try {
@@ -106,7 +132,25 @@ function classifyOutcome(toolResponse) {
   return 'error';
 }
-function readJsonlTail(filePath) {
+/**
+ * Read the ledger and compute the prior volume + consecutive-timeout state
+ * for the CURRENT task window only — not the whole-file lifetime.
+ *
+ * Window membership for a row:
+ *   - If a current session id is known AND the row carries a `session` field:
+ *     the row counts iff `row.session === sessionId`.
+ *   - Otherwise (sessionless harness/tests, or legacy rows without `session`):
+ *     the row counts iff its timestamp is within SESSIONLESS_WINDOW_MS of now.
+ *
+ * This bounds the volume count so a long-lived ledger can never permanently
+ * trip `volumeBreak`, while keeping rapid same-task calls (the common case and
+ * the existing test scenario) counted together.
+ *
+ * @param {string} filePath
+ * @param {string|null} sessionId
+ * @param {number} nowMs
+ */
+function readJsonlTail(filePath, sessionId, nowMs) {
   if (!fs.existsSync(filePath)) return { lastRow: null, total_calls: 0, consecutive_timeouts: 0 };
   let total = 0;
   let lastTimeoutsChain = 0;
@@ -118,6 +162,25 @@ function readJsonlTail(filePath) {
       if (!t) continue;
       let row;
       try { row = JSON.parse(t); } catch { continue; }
+      // Decide whether this row belongs to the current task window.
+      let inWindow;
+      if (sessionId !== null && typeof row.session === 'string' && row.session.length > 0) {
+        inWindow = row.session === sessionId;
+      } else {
+        const rowMs = typeof row.ts === 'string' ? Date.parse(row.ts) : NaN;
+        // Unparseable timestamps fall back to "in window" so we never
+        // under-count; a malformed-ts row is treated as recent.
+        inWindow = Number.isNaN(rowMs) ? true : (nowMs - rowMs) <= SESSIONLESS_WINDOW_MS;
+      }
+      if (!inWindow) {
+        // Out-of-window rows reset the streak — a new task/session must not
+        // inherit a stale consecutive-timeout chain.
+        lastTimeoutsChain = 0;
+        continue;
+      }
       total++;
       if (row.outcome === 'timeout') lastTimeoutsChain++;
       else lastTimeoutsChain = 0;
@@ -158,7 +221,9 @@ async function main() {
   const budget = loadBudget(cwd);
   const ledgerPath = path.join(cwd, '.design', 'telemetry', 'mcp-budget.jsonl');
-  const prior = readJsonlTail(ledgerPath);
+  const sessionId = resolveSessionId(payload);
+  const nowMs = Date.now();
+  const prior = readJsonlTail(ledgerPath, sessionId, nowMs);
   const outcome = classifyOutcome(payload?.tool_response);
   const total_calls = prior.total_calls + 1;
   const consecutive_timeouts = outcome === 'timeout'
@@ -166,12 +231,16 @@ async function main() {
     : (budget.reset_on_success && outcome === 'success' ? 0 : prior.consecutive_timeouts);
   const row = {
-    ts: new Date().toISOString(),
+    ts: new Date(nowMs).toISOString(),
     tool,
     outcome,
     consecutive_timeouts,
     total_calls,
   };
+  // Stamp the session id so future calls can scope the volume window exactly.
+  // Omitted when unknown (keeps the row schema stable for the sessionless path,
+  // which relies on the time window instead).
+  if (sessionId !== null) row.session = sessionId;
   appendJsonl(ledgerPath, row);
   const timeoutBreak = consecutive_timeouts >= budget.max_consecutive_timeouts;

package/hooks/gdd-sessionstart-recap.js CHANGED Viewed

@@ -57,17 +57,21 @@ function detectHarness() {
 }
 // ---------------------------------------------------------------------------
-// Lazy event-stream emit (best-effort)
+// Event emit (best-effort) — delegate to the shared _hook-emit helper, which
+// uses the SDK writer when loadable (modern Node) and an inline JSONL appender
+// otherwise. The previous direct `require('../sdk/event-stream')` resolved to
+// the `.ts` ESM index and threw under plain `node` on Node 22.0–22.17, leaving
+// recap.emitted permanently no-op'd. emitEvent lands the line on every Node.
 // ---------------------------------------------------------------------------
-function getAppendEvent() {
+function getEmitEvent() {
   try {
-    const m = require('../sdk/event-stream');
-    if (m && typeof m.appendEvent === 'function') return m.appendEvent;
+    const m = require('./_hook-emit.js');
+    if (m && typeof m.emitEvent === 'function') return m.emitEvent;
   } catch {
-    /* swallow — event-stream is optional infrastructure */
+    /* swallow — telemetry is optional infrastructure */
   }
-  return function noopAppend(_ev) {
+  return function noopEmit(_ev) {
     /* no-op */
   };
 }
@@ -87,9 +91,12 @@ function readStateMd(paths) {
   }
   const frontmatter = {};
-  const fmMatch = body.match(/^---\n([\s\S]*?)\n---\n/);
+  // Tolerate CRLF line endings — the STATE.md mutator preserves CRLF, so a
+  // strict `\n`-only anchor fails to match the frontmatter block on Windows
+  // checkouts and the recap silently reports an empty cycle/decisions diff.
+  const fmMatch = body.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n/);
   if (fmMatch) {
-    for (const line of fmMatch[1].split('\n')) {
+    for (const line of fmMatch[1].split(/\r?\n/)) {
       const m = line.match(/^(\w+):\s*(.+)$/);
       if (m) frontmatter[m[1]] = m[2].trim();
     }
@@ -273,9 +280,9 @@ async function main() {
   }
   // Best-effort event emit.
-  const appendEvent = getAppendEvent();
+  const emitEvent = getEmitEvent();
   try {
-    appendEvent({
+    emitEvent({
       type: 'recap.emitted',
       timestamp: new Date().toISOString(),
       sessionId: process.env.GDD_SESSION_ID || 'sessionstart-hook',
@@ -300,9 +307,11 @@ async function main() {
   process.exit(0);
 }
-try {
-  main();
-} catch (err) {
+// `main` is async: a sync try/catch cannot observe a rejected promise, so a
+// throw inside an `await` boundary would escape as an unhandled rejection and
+// exit non-zero — violating the silent-exit-0 contract for SessionStart hooks.
+// Attach `.catch` so every failure mode is swallowed and we exit 0.
+main().catch((err) => {
   try {
     process.stderr.write(
       '[gdd-sessionstart-recap] uncaught: ' +
@@ -313,4 +322,4 @@ try {
     /* swallow */
   }
   process.exit(0);
-}
+});

package/hooks/hooks.json CHANGED Viewed

@@ -45,7 +45,7 @@
     ],
     "PreToolUse": [
       {
-        "matcher": "Agent",
+        "matcher": "Task|Agent",
         "hooks": [
           {
             "type": "command",
@@ -119,7 +119,7 @@
         ]
       },
       {
-        "matcher": "Agent",
+        "matcher": "Task|Agent",
         "hooks": [
           {
             "type": "command",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@hegemonart/get-design-done",
-  "version": "1.59.7",
+  "version": "1.59.9",
   "description": "A design-quality pipeline for AI coding agents: brief, explore, plan, design, and verify UI work against your design system.",
   "author": "Hegemon",
   "homepage": "https://github.com/hegemonart/get-design-done",
@@ -10,7 +10,7 @@
   },
   "license": "MIT",
   "engines": {
-    "node": ">=22"
+    "node": ">=22.6.0"
   },
   "files": [
     ".claude-plugin/",

package/reference/bandit-integration.md CHANGED Viewed

@@ -10,7 +10,7 @@ description: Bandit posterior + production-integration shim cheat sheet - signat
 **Phase 27.5 (v1.27.5).** Reference for the bandit production-integration surface. Authoring or modifying a caller of the bandit posterior? Debugging a routing decision at the code level? Start here.
-For ops-level guidance (when bandit fires, how to disable, posterior inspection), see `docs/BANDIT-INTEGRATION.md`.
+For ops-level guidance (when bandit fires, how to disable, posterior inspection), use the read-only diagnostic surfaces: `/gdd:bandit-status` (per-arm posterior snapshots) and `/gdd:bandit-reset` (confirm-then-reset). The `adaptive_mode` gate below covers enable/disable.
 In-scope modules:
@@ -104,6 +104,17 @@ Phase 27.5 passes `wallTimeMs: 0` always (D-08 unchanged from Phase 23.5).
 ---
+## Where adaptive routing actually learns
+This is a deliberate design boundary, not a bug - read it before assuming the bandit "learns" in every runtime.
+- **The posterior is updated only on the SDK / headless path.** `recordOutcome` (the learning update that moves `alpha`/`beta`) is called from `scripts/lib/session-runner/index.ts` after a session terminates. That path runs in the SDK / headless `session-runner` execution model. It is the only place a reward is folded back into the posterior.
+- **In interactive Claude Code with `adaptive_mode: full`, the bandit samples but does not currently learn from in-session outcomes.** When a plugin/interactive run consults the bandit, `consultBandit` performs a Thompson sample from the *configured priors* (and whatever the SDK path has already written), and `pull()` bumps `last_used` + `count` - but no `recordOutcome` fires from an interactive Claude Code hook, so the success/fail posterior does not move within the interactive session. With an un-seeded posterior, sampling therefore reflects the informed `TIER_PRIOR` (which leans toward the higher tiers, e.g. opus). Wiring `recordOutcome` into an interactive hook is intentionally out of scope for this phase.
+- **`adaptive_mode` defaults to `static` - the feature is opt-in.** Per `scripts/lib/adaptive-mode.cjs`, the default mode is `static`, in which the bandit is fully silent (no reads, no writes) and `default-tier:` is authoritative. Adaptive routing only engages when an operator explicitly sets `adaptive_mode: full` in `.design/budget.json`.
+- **Contextual dimensions are supplied by the caller, not inferred here.** The `bin` (glob-count bucket via `binForGlobCount`) and `delegate` dimensions are passed in at the call site; the router does not derive them from ambient session state.
+Net: enable `adaptive_mode: full` and run the SDK/headless `session-runner` path to accumulate a posterior that genuinely reflects observed outcomes. In interactive Claude Code, `full` mode gives you prior-driven Thompson sampling, not in-session reinforcement.
 ## `adaptive_mode` gate semantics
 Phase 23.5 ladder (D-07):
@@ -154,7 +165,7 @@ Phase 27.5 wires these consumers:
 ## Cross-references
-- `docs/BANDIT-INTEGRATION.md` - operator guide (when bandit fires, how to disable, troubleshooting).
+- `/gdd:bandit-status` + `/gdd:bandit-reset` - read-only operator surfaces (when bandit fires, posterior inspection, reset). Disable/enable is the `adaptive_mode` gate in `.design/budget.json` (see above).
 - `reference/peer-protocols.md` - Phase 27 ACP/ASP cheat sheet (peer-CLI delegation transport).
 - `scripts/lib/bandit-router.cjs` - Phase 23.5 primitives surface.
 - `scripts/lib/bandit-router/integration.cjs` - Phase 27.5 production shim.

package/reference/prices/claude.md CHANGED Viewed

@@ -13,6 +13,17 @@
 | claude-sonnet-4-7 | sonnet | 3.00 | 15.00 | 0.30 |
 | claude-sonnet-4-6 | sonnet | 3.00 | 15.00 | 0.30 |
 | claude-opus-4-7 | opus | 15.00 | 75.00 | 1.50 |
+| claude-opus-4-8 | opus | 15.00 | 75.00 | 1.50 |
+> **>200k-input (1M-context) pricing note.** The rates above are the
+> standard (≤200k-input) per-1M-token prices. Anthropic's flagship
+> `claude-opus-4-8` ships a 1M-context (`[1m]`) variant; long-context
+> requests above the 200k-input threshold may be billed at a higher
+> tiered rate. This table tracks only the standard tier today; the
+> >200k tiered figure will be added as a separate row/column once
+> Anthropic publishes it. The parser is positional-by-header and
+> tolerates right-edge columns, so a future `>200k_input_per_1m`
+> column can be appended without breaking cost lookups.
 ## size_budget → conservative token ranges

package/reference/runtime-models.md CHANGED Viewed

@@ -44,21 +44,21 @@ Anthropic's first-party runtime. Public tier docs at https://docs.anthropic.com/
 {
   "id": "claude",
   "tier_to_model": {
-    "opus":   { "model": "claude-opus-4-7" },
-    "sonnet": { "model": "claude-sonnet-4-6" },
-    "haiku":  { "model": "claude-haiku-4-5" }
+    "opus":   { "model": "claude-opus-4-8", "context_window": 1000000 },
+    "sonnet": { "model": "claude-sonnet-4-6", "context_window": 200000 },
+    "haiku":  { "model": "claude-haiku-4-5", "context_window": 200000 }
   },
   "reasoning_class_to_model": {
-    "high":   { "model": "claude-opus-4-7" },
-    "medium": { "model": "claude-sonnet-4-6" },
-    "low":    { "model": "claude-haiku-4-5" }
+    "high":   { "model": "claude-opus-4-8", "context_window": 1000000 },
+    "medium": { "model": "claude-sonnet-4-6", "context_window": 200000 },
+    "low":    { "model": "claude-haiku-4-5", "context_window": 200000 }
   },
   "provenance": [
     {
       "source_url": "https://docs.anthropic.com/en/docs/about-claude/models",
-      "retrieved_at": "2026-04-29T00:00:00.000Z",
-      "last_validated_cycle": "2026-04-29-v1.26",
-      "note": "Anthropic public model catalog — first-party runtime."
+      "retrieved_at": "2026-06-10T00:00:00.000Z",
+      "last_validated_cycle": "2026-06-10-v1.59.9",
+      "note": "Anthropic public model catalog — first-party runtime. Opus tier moved to claude-opus-4-8 (1M-context [1m] variant) this cycle."
     }
   ]
 }

package/reference/schemas/generated.d.ts CHANGED Viewed

@@ -1115,6 +1115,10 @@ export interface ModelRow {
    * Optional internal/provider model ID for runtimes whose API identifiers differ from the public name (D-03).
    */
   provider_model_id?: string;
+  /**
+   * Optional context-window size (max tokens) for this model, recorded as machine-readable metadata. The Anthropic opus tier ships the 1M-context [1m] variant (1000000). Recorded as fact this cycle; not yet a budgeting driver (deferred — no consumer is wired).
+   */
+  context_window?: number;
 }
 export type RuntimeModelsSchema = RuntimeModelsTierToModelMap;

package/reference/schemas/runtime-models.schema.json CHANGED Viewed

@@ -121,6 +121,11 @@
           "type": "string",
           "minLength": 1,
           "description": "Optional internal/provider model ID for runtimes whose API identifiers differ from the public name (D-03)."
+        },
+        "context_window": {
+          "type": "integer",
+          "minimum": 1,
+          "description": "Optional context-window size (max tokens) for this model, recorded as machine-readable metadata. The Anthropic opus tier ships the 1M-context [1m] variant (1000000). Recorded as fact this cycle; not yet a budgeting driver (deferred — no consumer is wired)."
         }
       }
     }

package/scripts/bootstrap.cjs CHANGED Viewed

@@ -148,6 +148,14 @@ function filesEqual(a, b) {
   }
 }
+/**
+ * Network timeout (ms) for the git clone/pull. SessionStart hooks must never
+ * block the harness: without a timeout, a hung network connection would stall
+ * the whole session-start sequence indefinitely. spawnSync kills the child
+ * with `killSignal` once this elapses and reports it as a failure.
+ */
+const GIT_TIMEOUT_MS = 15000;
 /**
  * Match the .sh `clone_or_update`:
  *   - target/.git exists  → `git -C target pull --quiet --ff-only`, log on fail
@@ -157,8 +165,14 @@ function filesEqual(a, b) {
  * We invoke the `git` CLI directly via spawnSync. spawnSync('git', …) is fine —
  * the prohibition is on spawnSync('bash', …).
  *
+ * Returns true ONLY when the repo is in a good post-condition (pull/clone
+ * succeeded, or a pre-existing non-git dir we intentionally skip). Returns
+ * false when a network op failed or timed out — so the caller can withhold the
+ * success marker and retry next session instead of recording failure as done.
+ *
  * @param {string} repoUrl
  * @param {string} target
+ * @returns {boolean} success
  */
 function cloneOrUpdate(repoUrl, target) {
   let isGitCheckout = false;
@@ -177,16 +191,22 @@ function cloneOrUpdate(repoUrl, target) {
     const r = spawnSync('git', ['-C', target, 'pull', '--quiet', '--ff-only'], {
       stdio: ['ignore', 'ignore', 'ignore'],
       windowsHide: true,
+      timeout: GIT_TIMEOUT_MS,
+      killSignal: 'SIGKILL',
     });
     if (r.error || r.status !== 0) {
-      log(`pull failed for ${target} (continuing)`);
+      const why = r.error && r.error.code === 'ETIMEDOUT' ? 'timed out' : 'failed';
+      log(`pull ${why} for ${target} (continuing)`);
+      return false;
     }
-    return;
+    return true;
   }
   if (targetExists) {
     log(`${target} exists and is not a git checkout — skipping`);
-    return;
+    // A pre-existing non-git dir is a stable post-condition, not a failure:
+    // re-running won't change it, so don't force a retry every session.
+    return true;
   }
   // Defense in depth: refuse repoUrl / target arguments that look like git
@@ -196,7 +216,7 @@ function cloneOrUpdate(repoUrl, target) {
   if (typeof repoUrl !== 'string' || repoUrl.startsWith('-') ||
       typeof target !== 'string' || target.startsWith('-')) {
     log(`refusing suspicious clone args for ${repoUrl} -> ${target}`);
-    return;
+    return false;
   }
   log(`cloning ${repoUrl} -> ${target}`);
@@ -205,10 +225,15 @@ function cloneOrUpdate(repoUrl, target) {
   const r = spawnSync('git', ['clone', '--quiet', '--depth', '1', '--', repoUrl, target], {
     stdio: ['ignore', 'ignore', 'ignore'],
     windowsHide: true,
+    timeout: GIT_TIMEOUT_MS,
+    killSignal: 'SIGKILL',
   });
   if (r.error || r.status !== 0) {
-    log(`clone failed for ${repoUrl}`);
+    const why = r.error && r.error.code === 'ETIMEDOUT' ? 'timed out' : 'failed';
+    log(`clone ${why} for ${repoUrl}`);
+    return false;
   }
+  return true;
 }
 /**
@@ -315,7 +340,7 @@ function run(opts = {}) {
   }
   // Required library: VoltAgent/awesome-design-md.
-  cloneOrUpdate(
+  const repoOk = cloneOrUpdate(
     'https://github.com/VoltAgent/awesome-design-md.git',
     ctx.awesomeRepoTarget
   );
@@ -332,8 +357,15 @@ function run(opts = {}) {
   // Phase 10.1: .design/budget.json + .design/telemetry/ (D-12).
   ensureDesignDir(cwd);
-  // Record success so we don't re-run until the bundled manifest changes.
-  copyManifestToMarker(ctx.manifest, ctx.marker);
+  // Record success ONLY when the network provisioning actually succeeded.
+  // Writing the marker unconditionally records a failed clone as "done" and
+  // never retries — leaving the required library permanently absent. Gating on
+  // repoOk means a transient network failure/timeout is retried next session.
+  if (repoOk) {
+    copyManifestToMarker(ctx.manifest, ctx.marker);
+  } else {
+    log('skipping success marker — provisioning incomplete, will retry next session');
+  }
   return 0;
 }

package/scripts/install.cjs CHANGED Viewed

@@ -211,6 +211,28 @@ async function main() {
       }
       runtimes = picked.runtimes;
       if (picked.location) location = picked.location;
+    } else if (uninstall) {
+      // B4 fix (Phase 59.8): bare `--uninstall` in a non-TTY context must NOT
+      // silently default to removing claude. The interactive path is the only
+      // safe way to pick what to remove without an explicit flag; in non-TTY
+      // we refuse and require an explicit runtime flag so a scripted/CI
+      // invocation can never destroy an install the operator didn't name.
+      // (See the comment at shouldUseInteractive: bare --uninstall is meant to
+      // trigger the interactive select-which-to-remove flow.)
+      process.stderr.write(
+        [
+          'Refusing to uninstall: no runtime specified and not running in an',
+          'interactive terminal.',
+          '',
+          'Re-run with an explicit runtime flag, e.g.:',
+          '  npx @hegemonart/get-design-done --uninstall --claude',
+          '  npx @hegemonart/get-design-done --uninstall --all',
+          '',
+          'Run with --help to list available runtime flags.',
+          '',
+        ].join('\n'),
+      );
+      process.exit(2);
     } else {
       // Non-TTY zero-flag fallback: back-compat with v1.23.5 behaviour.
       runtimes = ['claude'];
@@ -359,7 +381,7 @@ async function maybeNudgePeerCli({ flags }) {
       '✓ Detected peer CLIs: ' + detectedDisplay,
       '',
       'gdd v1.27.0 introduced optional peer-CLI delegation. With your',
-      'agents\\u2019 frontmatter `delegate_to:` set, gdd can route specific',
+      "agents' frontmatter `delegate_to:` set, gdd can route specific",
       'roles through these peer CLIs (cost or quality wins per Phase 23.5',
       'bandit). You can change this anytime via .design/config.json.',
       '',

package/scripts/lib/bandit-router.cjs CHANGED Viewed

@@ -38,7 +38,9 @@
  *   - The `prior_class` value is persisted on the arm so subsequent
  *     reads + decay calculations preserve it (forward-compat).
  *
- * Atomic .tmp + rename. Discounted Thompson via per-arm time-decay
+ * Atomic per-pid-unique .tmp + rename (Phase 59-8 C2: unique tmp name per
+ * process so parallel waves never interleave writes on one scratch file).
+ * Discounted Thompson via per-arm time-decay
  * factor `rho^days_since_last_use` applied at sample time, not stored.
  *
  * Reward computation (D-06): two-stage lexicographic — UNCHANGED.
@@ -57,6 +59,17 @@ const path = require('node:path');
 const DEFAULT_POSTERIOR_PATH = '.design/telemetry/posterior.json';
 const SCHEMA_VERSION = '1.0.0';
+// C2 fix (Phase 59-8): monotonic per-process counter for tmp-file naming.
+// Combined with process.pid it guarantees that two concurrent writers — even
+// within the same process, even firing in the same millisecond — never target
+// the same `.tmp` path. The old fixed `p + '.tmp'` name let parallel agent
+// waves interleave partial writes on one tmp file, producing truncated JSON
+// that loadPosterior() then silently reset to an empty posterior (losing all
+// learned arms). Unique tmp + atomic rename makes a half-written file
+// invisible to readers: rename is atomic on the same filesystem, so a reader
+// sees either the old complete file or the new complete file, never a partial.
+let _tmpCounter = 0;
 // Decay factor — 60-day half-life.
 const DEFAULT_DECAY = 0.988;
@@ -136,6 +149,12 @@ function loadPosterior(opts = {}) {
     }
     return data;
   } catch {
+    // Corrupt-JSON recovery (preserved, Phase 59-8 C2): fall back to an empty
+    // posterior. With the per-pid unique-tmp + atomic-rename write discipline
+    // (see savePosterior), a reader can no longer observe a half-written file
+    // — rename publishes the complete file in one step — so this branch should
+    // now only fire on genuine on-disk corruption (e.g. external truncation),
+    // not on a write/read race during a parallel agent wave.
     return { schema_version: SCHEMA_VERSION, generated_at: new Date().toISOString(), arms: [] };
   }
 }
@@ -159,9 +178,19 @@ function savePosterior(posterior, opts = {}) {
   const p = resolvePath(opts);
   fs.mkdirSync(path.dirname(p), { recursive: true });
   posterior.generated_at = new Date().toISOString();
-  const tmp = p + '.tmp';
-  fs.writeFileSync(tmp, JSON.stringify(posterior, null, 2));
-  fs.renameSync(tmp, p);
+  // C2 fix (Phase 59-8): per-process-unique tmp name (pid + monotonic
+  // counter) so concurrent writers never collide on the same scratch file.
+  // The atomic rename then publishes the fully-written file in one step.
+  const tmp = `${p}.${process.pid}.${_tmpCounter++}.tmp`;
+  try {
+    fs.writeFileSync(tmp, JSON.stringify(posterior, null, 2));
+    fs.renameSync(tmp, p);
+  } catch (err) {
+    // Best-effort cleanup of the orphaned tmp on failure so a crashed
+    // write never leaves stale scratch files behind. ENOENT is fine.
+    try { fs.unlinkSync(tmp); } catch { /* already gone */ }
+    throw err;
+  }
   return p;
 }
@@ -347,7 +376,20 @@ function decayArm(arm, opts = {}) {
   const factor = Math.pow(decay, days);
   // Decay shrinks both α and β toward the prior. We never go below the
   // initial prior strength — caller can rebuild a fresh prior via reset().
-  const { alpha: pa, beta: pb } = priorFor(arm.tier, opts.strength ?? PRIOR_STRENGTH);
+  //
+  // C1 fix (Phase 59-8): decay MUST target the SAME prior the arm was
+  // bootstrapped with. The arm persists `prior_class` (Phase 29 Plan 06 /
+  // D-04), so pass it through to priorFor — otherwise a promoted-incubator
+  // arm (Beta(2,8)) would drift back toward the informed TIER_PRIOR while
+  // idle, undoing the D-04 preferential-selection suppression. Default-class
+  // arms have no `prior_class` field, so `arm.prior_class` is undefined and
+  // priorFor falls through to the Phase 23.5 informed prior (byte-for-byte
+  // unchanged).
+  const { alpha: pa, beta: pb } = priorFor(
+    arm.tier,
+    opts.strength ?? PRIOR_STRENGTH,
+    arm.prior_class,
+  );
   return {
     alpha: pa + factor * Math.max(0, arm.alpha - pa),
     beta: pb + factor * Math.max(0, arm.beta - pb),

package/scripts/lib/budget-enforcer.cjs CHANGED Viewed

@@ -52,6 +52,8 @@
 const fs = require('node:fs');
 const path = require('node:path');
+const { normalizeModelId, tierForModelId } = require('./model-id.cjs');
 const REPO_ROOT_GUESS = path.resolve(__dirname, '..', '..');
 const DEFAULT_RUNTIME_ID = 'claude';
 const VALID_TIERS = Object.freeze(['opus', 'sonnet', 'haiku']);
@@ -326,10 +328,18 @@ function computeCost(args, opts) {
     tokens_out: Number(args.tokens_out || 0),
     cache_hit: args.cache_hit === true,
   };
+  // Normalize the model id (strip a trailing `[1m]`/`[200k]` variant suffix)
+  // BEFORE table lookup so e.g. `claude-opus-4-8[1m]` matches the
+  // `claude-opus-4-8` row. The variant encodes a context-window SKU; the
+  // current price tables are keyed on the base id.
+  const rawModelId = typeof args.model_id === 'string' && args.model_id.length > 0
+    ? args.model_id
+    : null;
+  const normalizedModelId = rawModelId !== null
+    ? (normalizeModelId(rawModelId).base || rawModelId)
+    : null;
   const q = {
-    model_id: typeof args.model_id === 'string' && args.model_id.length > 0
-      ? args.model_id
-      : null,
+    model_id: normalizedModelId,
     tier: typeof args.tier === 'string' && args.tier.length > 0
       ? args.tier
       : null,
@@ -365,14 +375,33 @@ function computeCost(args, opts) {
     }
   }
-  // Branch 5: nothing matched.
+  // Branch 5: nothing matched. Rather than silently returning a null cost
+  // (which downstream aggregators treat as $0 — a frontier model billed as
+  // free), compute a CONSERVATIVE CEILING at the OPUS rate from the claude
+  // price table. An unknown/new model is thus priced LOUDLY (cost_estimated)
+  // and CONSERVATIVELY (opus ceiling), never $0 and never the sonnet rate.
+  const reason = rows.length === 0 ? 'runtime_table_missing' : 'model_not_found';
+  const claudeRows = loadPriceTable(DEFAULT_RUNTIME_ID, opts);
+  const opusRow = findPriceRow(claudeRows, { tier: 'opus' });
+  if (opusRow !== null) {
+    return {
+      cost_usd: applyFormula(opusRow, tokens),
+      model: normalizedModelId,
+      tier: 'opus',
+      runtime_used: DEFAULT_RUNTIME_ID,
+      fallback: true,
+      reason,
+      cost_estimated: true,
+    };
+  }
+  // Even the opus row is unavailable → genuinely cannot price. Keep null.
   return {
     cost_usd: null,
     model: null,
     tier: q.tier,
     runtime_used: null,
     fallback: false,
-    reason: rows.length === 0 ? 'runtime_table_missing' : 'model_not_found',
+    reason,
   };
 }