npm - @hegemonart/get-design-done - Versions diffs - 1.59.8 → 1.59.9 - Mend

@hegemonart/get-design-done 1.59.8 → 1.59.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/CHANGELOG.md +26 -0
package/hooks/budget-enforcer.ts +60 -0
package/package.json +1 -1
package/reference/prices/claude.md +11 -0
package/reference/runtime-models.md +9 -9
package/reference/schemas/generated.d.ts +4 -0
package/reference/schemas/runtime-models.schema.json +5 -0
package/scripts/lib/budget-enforcer.cjs +34 -5
package/scripts/lib/install/parse-runtime-models.cjs +9 -1
package/scripts/lib/model-id.cjs +141 -0
package/scripts/lib/session-runner/index.ts +87 -16
package/sdk/cli/index.js +18 -8

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -5,14 +5,14 @@
   },
   "metadata": {
     "description": "Get Design Done — 5-stage agent-orchestrated design pipeline (Brief → Explore → Plan → Design → Verify) for AI coding agents. 64 agents, 96 skills, 39 connection integrations, two MCP servers, opt-in SQLite state backbone, bidirectional Figma write-back, and a reflector-driven self-improvement loop. Cross-runtime install for Claude Code, Codex, Cursor, OpenCode, Gemini, and more.",
-    "version": "1.59.8"
+    "version": "1.59.9"
   },
   "plugins": [
     {
       "name": "get-design-done",
       "source": "./",
       "description": "Agent-orchestrated 5-stage design pipeline (Brief → Explore → Plan → Design → Verify) for AI coding agents. 64 specialized agents, 96 skills, 39 connection integrations (Figma, Refero, Preview, Storybook, Chromatic, Graphify, Linear, Jira, Notion, …), bidirectional Figma write-back, queryable intel store, opt-in SQLite state backbone, and a reflector-driven self-improvement loop. Two MCP servers (gdd-state for typed STATE mutators, gdd-mcp for 13 read-only project-priming tools), tier-aware routing with cost telemetry, and defense-in-depth hooks (protected paths, MCP circuit breaker, injection scanner, budget enforcer). Cross-runtime install for Claude Code, Codex, Cursor, OpenCode, Gemini, Copilot, and more.",
-      "version": "1.59.8",
+      "version": "1.59.9",
       "author": {
         "name": "hegemonart"
       },

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "get-design-done",
   "short_name": "gdd",
-  "version": "1.59.8",
+  "version": "1.59.9",
   "description": "Agent-orchestrated 5-stage design pipeline (Brief → Explore → Plan → Design → Verify) for AI coding agents. 64 specialized agents, 96 skills, 39 connection integrations (Figma, Refero, Preview, Storybook, Chromatic, Graphify, Linear, Jira, Notion, …), bidirectional Figma write-back, queryable intel store for O(1) design-surface lookups, opt-in SQLite state backbone, and a reflector-driven self-improvement loop. Two MCP servers (`gdd-state` for typed STATE mutators, `gdd-mcp` for 13 read-only project-priming tools), tier-aware agent routing with cost telemetry, defense-in-depth hooks (protected paths, MCP circuit breaker, injection scanner, budget enforcer), and a cross-runtime install layer for Claude Code, Codex, Cursor, OpenCode, Gemini, Copilot, and more.",
   "author": {
     "name": "hegemonart",

package/CHANGELOG.md CHANGED Viewed

@@ -4,6 +4,32 @@ All notable changes to get-design-done are documented here. Versions follow [sem
 ---
+## [1.59.9] - 2026-06-10
+New-model-family readiness and cost truth (audit `.planning/audits/SELF-AUDIT-v1.59.7.md` §4). A new or unknown Anthropic model previously degraded cost accounting silently - billed at $0 or the sonnet rate and mis-attributed to the sonnet tier. This release makes unknown models loud and conservative, handles the 1M-context `[1m]` variant, and records context-window size in the model registry.
+### Added
+- **`scripts/lib/model-id.cjs`** - shared model-id normalization + tiering. `normalizeModelId` strips a bracketed variant suffix (`claude-opus-4-8[1m]` to base `claude-opus-4-8` + variant `1m`); `tierForModelId` resolves a tier via exact id, family pattern (`claude-(opus|sonnet|haiku)`), or an extensible alias map, returning `null` for an unknown family so callers price it conservatively instead of guessing.
+- **`context_window` in the model registry.** `reference/runtime-models.md` now records each Claude model's context window (the `claude-opus-4-8` `[1m]` variant is `1000000`; the others `200000`), allowed additively through both the JSON schema and the hand-rolled `parse-runtime-models.cjs` allowlist, with regenerated types. The 1M-context variant is now recognised as first-class metadata. (Deriving token budgets from it is deferred - this release records the fact.)
+- **`claude-opus-4-8` price + tier mapping.** Added to `reference/prices/claude.md` and mapped as the `opus`/`high` model in `reference/runtime-models.md`; provenance refreshed. The stale tables previously topped out at `claude-opus-4-7`.
+### Fixed
+- **Unknown / new models are now priced loudly and conservatively.** `budget-enforcer.cjs computeCost` normalizes the model id (so `[1m]` variants match their base row) and, when no price row matches, returns a conservative ceiling computed at the **opus** rate with `cost_estimated: true` and a `cost_lookup_fallback` telemetry event - instead of the old silent `cost_usd: null`. The headless `session-runner` `rateFor` falls back through the resolved tier's rate and uses the opus rate (not sonnet) as its last-resort default, ending the systematic under-billing of frontier models. Tier inference no longer relies on loose substring matching.
+### Deferred
+- Making `reasoning-class` the canonical routing vocabulary (opus/sonnet/haiku as aliases) and deriving SDK token budgets from `context_window` + tiered >200k long-context pricing - tracked for a later phase; not required for new-family readiness.
+### Breaking changes
+None.
+5,096/5,096 tests pass.
+---
 ## [1.59.8] - 2026-06-10
 Production-wiring repair and security hardening from a 4-agent self-audit (`.planning/audits/SELF-AUDIT-v1.59.7.md`). The theme: real, well-tested library code whose production call-sites silently neutered it. This release makes the wiring either true or honest.

package/hooks/budget-enforcer.ts CHANGED Viewed

@@ -91,6 +91,10 @@ interface BudgetEnforcerBackend {
     runtime_used: string | null;
     fallback: boolean;
     reason: string | null;
+    // Phase 59-9: true when the cost is a CONSERVATIVE ESTIMATE (opus ceiling)
+    // for an unknown/new model rather than a table-matched figure. Additive +
+    // optional — absent on the table-matched (branches 1-4) paths.
+    cost_estimated?: boolean;
   };
   modelFromResolved(resolved: unknown, agent: string): string | null;
   // Plan 33.6-03 (SC#6): the canonical cost-row payload builder (the
@@ -946,6 +950,40 @@ function emitCostRecorded(
   }
 }
+/**
+ * Phase 59-9: emit a `cost_lookup_fallback` event when the cost backend
+ * could not table-match a model and fell back to the CONSERVATIVE OPUS
+ * CEILING (or, more rarely, returned an unpriced fallback). Makes an
+ * unknown/new model OBSERVABLE in telemetry instead of silently mis-billed
+ * (or billed as $0). Reuses the same BaseEvent envelope + appendEvent path
+ * as every other emit. Fail-open — never throws, never blocks the spawn.
+ */
+function emitCostLookupFallback(
+  payload: {
+    runtime: string;
+    agent: string;
+    model_id: string | null;
+    tier: string | null;
+    reason: string | null;
+    cost_usd: number | null;
+    cost_estimated: boolean;
+  },
+  cycle?: string,
+): void {
+  const ev = {
+    type: 'cost_lookup_fallback',
+    timestamp: new Date().toISOString(),
+    sessionId: getSessionId(),
+    ...(cycle !== undefined && cycle !== 'unknown' ? { cycle } : {}),
+    payload,
+  };
+  try {
+    appendEvent(ev as unknown as HookFiredEvent);
+  } catch {
+    // Fail open.
+  }
+}
 /**
  * Plan 27.5-02 / D-03: emit `bandit.tier_selected` event when the bandit
  * is consulted (regardless of whether it overrode the prior tier). The
@@ -1588,6 +1626,28 @@ export async function main(): Promise<void> {
     cycle,
   );
+  // Phase 59-9: when the cost was a CONSERVATIVE ESTIMATE (unknown/new model
+  // priced at the opus ceiling) rather than a table-matched figure, emit a
+  // distinct telemetry signal so an unrecognized model is OBSERVABLE rather
+  // than silently mis-billed. Best-effort, never throws (fail-open).
+  if (
+    costLookup.cost_estimated === true ||
+    (costLookup.fallback === true && costLookup.reason === 'model_not_found')
+  ) {
+    emitCostLookupFallback(
+      {
+        runtime: runtimeId,
+        agent,
+        model_id: effectiveModelId ?? costLookup.model,
+        tier: costLookup.tier ?? effectiveTier,
+        reason: costLookup.reason,
+        cost_usd: costLookup.cost_usd,
+        cost_estimated: costLookup.cost_estimated === true,
+      },
+      cycle,
+    );
+  }
   // Branch E: standard spawn-allowed (includes tier-downgraded path).
   writeTelemetry({
     agent,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@hegemonart/get-design-done",
-  "version": "1.59.8",
+  "version": "1.59.9",
   "description": "A design-quality pipeline for AI coding agents: brief, explore, plan, design, and verify UI work against your design system.",
   "author": "Hegemon",
   "homepage": "https://github.com/hegemonart/get-design-done",

package/reference/prices/claude.md CHANGED Viewed

@@ -13,6 +13,17 @@
 | claude-sonnet-4-7 | sonnet | 3.00 | 15.00 | 0.30 |
 | claude-sonnet-4-6 | sonnet | 3.00 | 15.00 | 0.30 |
 | claude-opus-4-7 | opus | 15.00 | 75.00 | 1.50 |
+| claude-opus-4-8 | opus | 15.00 | 75.00 | 1.50 |
+> **>200k-input (1M-context) pricing note.** The rates above are the
+> standard (≤200k-input) per-1M-token prices. Anthropic's flagship
+> `claude-opus-4-8` ships a 1M-context (`[1m]`) variant; long-context
+> requests above the 200k-input threshold may be billed at a higher
+> tiered rate. This table tracks only the standard tier today; the
+> >200k tiered figure will be added as a separate row/column once
+> Anthropic publishes it. The parser is positional-by-header and
+> tolerates right-edge columns, so a future `>200k_input_per_1m`
+> column can be appended without breaking cost lookups.
 ## size_budget → conservative token ranges

package/reference/runtime-models.md CHANGED Viewed

@@ -44,21 +44,21 @@ Anthropic's first-party runtime. Public tier docs at https://docs.anthropic.com/
 {
   "id": "claude",
   "tier_to_model": {
-    "opus":   { "model": "claude-opus-4-7" },
-    "sonnet": { "model": "claude-sonnet-4-6" },
-    "haiku":  { "model": "claude-haiku-4-5" }
+    "opus":   { "model": "claude-opus-4-8", "context_window": 1000000 },
+    "sonnet": { "model": "claude-sonnet-4-6", "context_window": 200000 },
+    "haiku":  { "model": "claude-haiku-4-5", "context_window": 200000 }
   },
   "reasoning_class_to_model": {
-    "high":   { "model": "claude-opus-4-7" },
-    "medium": { "model": "claude-sonnet-4-6" },
-    "low":    { "model": "claude-haiku-4-5" }
+    "high":   { "model": "claude-opus-4-8", "context_window": 1000000 },
+    "medium": { "model": "claude-sonnet-4-6", "context_window": 200000 },
+    "low":    { "model": "claude-haiku-4-5", "context_window": 200000 }
   },
   "provenance": [
     {
       "source_url": "https://docs.anthropic.com/en/docs/about-claude/models",
-      "retrieved_at": "2026-04-29T00:00:00.000Z",
-      "last_validated_cycle": "2026-04-29-v1.26",
-      "note": "Anthropic public model catalog — first-party runtime."
+      "retrieved_at": "2026-06-10T00:00:00.000Z",
+      "last_validated_cycle": "2026-06-10-v1.59.9",
+      "note": "Anthropic public model catalog — first-party runtime. Opus tier moved to claude-opus-4-8 (1M-context [1m] variant) this cycle."
     }
   ]
 }

package/reference/schemas/generated.d.ts CHANGED Viewed

@@ -1115,6 +1115,10 @@ export interface ModelRow {
    * Optional internal/provider model ID for runtimes whose API identifiers differ from the public name (D-03).
    */
   provider_model_id?: string;
+  /**
+   * Optional context-window size (max tokens) for this model, recorded as machine-readable metadata. The Anthropic opus tier ships the 1M-context [1m] variant (1000000). Recorded as fact this cycle; not yet a budgeting driver (deferred — no consumer is wired).
+   */
+  context_window?: number;
 }
 export type RuntimeModelsSchema = RuntimeModelsTierToModelMap;

package/reference/schemas/runtime-models.schema.json CHANGED Viewed

@@ -121,6 +121,11 @@
           "type": "string",
           "minLength": 1,
           "description": "Optional internal/provider model ID for runtimes whose API identifiers differ from the public name (D-03)."
+        },
+        "context_window": {
+          "type": "integer",
+          "minimum": 1,
+          "description": "Optional context-window size (max tokens) for this model, recorded as machine-readable metadata. The Anthropic opus tier ships the 1M-context [1m] variant (1000000). Recorded as fact this cycle; not yet a budgeting driver (deferred — no consumer is wired)."
         }
       }
     }

package/scripts/lib/budget-enforcer.cjs CHANGED Viewed

@@ -52,6 +52,8 @@
 const fs = require('node:fs');
 const path = require('node:path');
+const { normalizeModelId, tierForModelId } = require('./model-id.cjs');
 const REPO_ROOT_GUESS = path.resolve(__dirname, '..', '..');
 const DEFAULT_RUNTIME_ID = 'claude';
 const VALID_TIERS = Object.freeze(['opus', 'sonnet', 'haiku']);
@@ -326,10 +328,18 @@ function computeCost(args, opts) {
     tokens_out: Number(args.tokens_out || 0),
     cache_hit: args.cache_hit === true,
   };
+  // Normalize the model id (strip a trailing `[1m]`/`[200k]` variant suffix)
+  // BEFORE table lookup so e.g. `claude-opus-4-8[1m]` matches the
+  // `claude-opus-4-8` row. The variant encodes a context-window SKU; the
+  // current price tables are keyed on the base id.
+  const rawModelId = typeof args.model_id === 'string' && args.model_id.length > 0
+    ? args.model_id
+    : null;
+  const normalizedModelId = rawModelId !== null
+    ? (normalizeModelId(rawModelId).base || rawModelId)
+    : null;
   const q = {
-    model_id: typeof args.model_id === 'string' && args.model_id.length > 0
-      ? args.model_id
-      : null,
+    model_id: normalizedModelId,
     tier: typeof args.tier === 'string' && args.tier.length > 0
       ? args.tier
       : null,
@@ -365,14 +375,33 @@ function computeCost(args, opts) {
     }
   }
-  // Branch 5: nothing matched.
+  // Branch 5: nothing matched. Rather than silently returning a null cost
+  // (which downstream aggregators treat as $0 — a frontier model billed as
+  // free), compute a CONSERVATIVE CEILING at the OPUS rate from the claude
+  // price table. An unknown/new model is thus priced LOUDLY (cost_estimated)
+  // and CONSERVATIVELY (opus ceiling), never $0 and never the sonnet rate.
+  const reason = rows.length === 0 ? 'runtime_table_missing' : 'model_not_found';
+  const claudeRows = loadPriceTable(DEFAULT_RUNTIME_ID, opts);
+  const opusRow = findPriceRow(claudeRows, { tier: 'opus' });
+  if (opusRow !== null) {
+    return {
+      cost_usd: applyFormula(opusRow, tokens),
+      model: normalizedModelId,
+      tier: 'opus',
+      runtime_used: DEFAULT_RUNTIME_ID,
+      fallback: true,
+      reason,
+      cost_estimated: true,
+    };
+  }
+  // Even the opus row is unavailable → genuinely cannot price. Keep null.
   return {
     cost_usd: null,
     model: null,
     tier: q.tier,
     runtime_used: null,
     fallback: false,
-    reason: rows.length === 0 ? 'runtime_table_missing' : 'model_not_found',
+    reason,
   };
 }

package/scripts/lib/install/parse-runtime-models.cjs CHANGED Viewed

@@ -78,7 +78,7 @@ function validateModelRow(row, where) {
   if (typeof row.model !== 'string' || row.model.length === 0) {
     throw new Error(`${where}: 'model' must be a non-empty string`);
   }
-  const allowedKeys = new Set(['model', 'provider_model_id']);
+  const allowedKeys = new Set(['model', 'provider_model_id', 'context_window']);
   for (const k of Object.keys(row)) {
     if (!allowedKeys.has(k)) {
       throw new Error(`${where}: unknown key '${k}' (allowed: ${[...allowedKeys].join(', ')})`);
@@ -89,6 +89,14 @@ function validateModelRow(row, where) {
       throw new Error(`${where}: 'provider_model_id' must be a non-empty string when present`);
     }
   }
+  // Optional context-window size — mirror the schema (integer >= 1). Recorded as
+  // machine-readable metadata (the 1M-context [1m] opus variant); not yet a
+  // budgeting driver (deferred — no consumer wired this cycle).
+  if (row.context_window !== undefined) {
+    if (typeof row.context_window !== 'number' || !Number.isInteger(row.context_window) || row.context_window < 1) {
+      throw new Error(`${where}: 'context_window' must be a positive integer when present`);
+    }
+  }
 }
 function validateProvenance(arr, where) {

package/scripts/lib/model-id.cjs ADDED Viewed

@@ -0,0 +1,141 @@
+'use strict';
+/*
+ * scripts/lib/model-id.cjs — model-id normalization + tiering (pure, dependency-free).
+ *
+ * WHY THIS EXISTS
+ * ---------------
+ * Two unrelated callers need to reason about model ids in identical ways:
+ *   - scripts/lib/session-runner/index.ts (routing: which tier am I running?)
+ *   - scripts/lib/budget-enforcer.cjs       (pricing: what does this model cost?)
+ * Each previously carried its own ad-hoc parsing, which drifted. This module is
+ * the single source of truth so a new model family is a DATA edit here (or in the
+ * price tables), never a logic change scattered across callers.
+ *
+ * DESIGN PRINCIPLES
+ * -----------------
+ * 1. TIER IS FOR ROUTING. `tierForModelId` answers "opus | sonnet | haiku" so the
+ *    router can pick an agent class. It is NOT a pricing key on its own — pricing
+ *    also depends on the exact id and (later) the context-window variant.
+ *
+ * 2. NULL MEANS UNKNOWN — PRICE CONSERVATIVELY + LOUDLY. We deliberately return
+ *    `null` for ids we cannot confidently classify rather than guessing a tier.
+ *    A wrong tier guess silently mis-routes or mis-prices. Callers MUST treat
+ *    null as "unknown model — assume the most expensive plausible price AND warn",
+ *    never as a tier and never as free. Do NOT add heuristic fallbacks that
+ *    invent a tier for arbitrary strings.
+ *
+ * 3. VARIANT SUFFIX IS FOR CONTEXT-WINDOW-AWARE PRICING (LATER). Ids may carry a
+ *    bracketed variant such as `claude-opus-4-8[1m]` or `...[200k]`. The variant
+ *    encodes a context-window SKU that can have different per-token pricing. We
+ *    split it off cleanly (`{ base, variant }`) so tiering operates on `base`
+ *    while a future price table can key on `(base, variant)`. Date stamps in the
+ *    base (e.g. `claude-opus-4-8-20260101`) are NOT variants and are left intact.
+ *
+ * 4. NEW FAMILIES ARE A DATA EDIT, NOT A CODE CHANGE. To onboard a new model:
+ *      - if its id contains the tier word (opus/sonnet/haiku), the family-pattern
+ *        rule already handles it — optionally pin it in KNOWN_TIER_BY_ID;
+ *      - if its id does NOT contain the tier word (e.g. a hypothetical
+ *        `claude-fable-5`), add one line to ALIAS_MAP (see comment there);
+ *      - pricing specifics go in the caller's price table keyed on the exact id.
+ */
+/**
+ * KNOWN_TIER_BY_ID — explicit, exact-id → tier pins.
+ * Seeded with the currently-shipping ids. Exact matches win over pattern rules,
+ * so this is also the place to OVERRIDE a family-pattern result if a specific
+ * sku is mis-classified by the generic regex. Keys are the normalized `base`
+ * (no bracket variant).
+ */
+const KNOWN_TIER_BY_ID = Object.freeze({
+  'claude-opus-4-8': 'opus',
+  'claude-opus-4-7': 'opus',
+  'claude-sonnet-4-7': 'sonnet',
+  'claude-sonnet-4-6': 'sonnet',
+  'claude-sonnet-4-5': 'sonnet',
+  'claude-haiku-4-5': 'haiku',
+});
+/**
+ * ALIAS_MAP — extension point for families whose id does NOT contain the tier word.
+ *
+ * Currently EMPTY by design. The family-pattern rule (step c in tierForModelId)
+ * already covers any id literally containing `opus`/`sonnet`/`haiku`. Use this map
+ * ONLY for a future lineup whose product name omits the tier word.
+ *
+ * Example — when Anthropic publishes the `claude-fable-5` sku lineup and we learn
+ * it maps to opus-class routing, add (keyed on normalized base):
+ *
+ *     'claude-fable-5': 'opus',
+ *
+ * Until the lineup is public we leave it empty rather than guess — an unknown
+ * `claude-fable-5` correctly resolves to null (conservative pricing + warning).
+ */
+const ALIAS_MAP = Object.freeze({
+  // 'claude-fable-5': 'opus',  // <- add when the fable-5 sku lineup is public
+});
+const VARIANT_RE = /\[([^\]]*)\]\s*$/; // trailing bracketed suffix, e.g. [1m] / [200k]
+const FAMILY_RE = /(?:^|-)(opus|sonnet|haiku)(?:-|$)/;
+/**
+ * normalizeModelId(id) → { base, variant }
+ *
+ * Splits off a single trailing bracketed variant suffix (e.g. `[1m]`, `[200k]`),
+ * returning it lowercased with brackets removed as `variant`, and the remaining
+ * trimmed id as `base`. Date stamps in the base are preserved. Null/empty/
+ * undefined input yields `{ base: '', variant: null }`.
+ *
+ * @param {string|null|undefined} id
+ * @returns {{ base: string, variant: string|null }}
+ */
+function normalizeModelId(id) {
+  if (id == null) return { base: '', variant: null };
+  const s = String(id).trim();
+  if (s === '') return { base: '', variant: null };
+  const m = s.match(VARIANT_RE);
+  if (m) {
+    const variant = m[1].trim().toLowerCase();
+    const base = s.slice(0, m.index).trim();
+    return { base, variant: variant === '' ? null : variant };
+  }
+  return { base: s, variant: null };
+}
+/**
+ * tierForModelId(id) → 'opus' | 'sonnet' | 'haiku' | null
+ *
+ * Resolution order:
+ *   (a) normalize → work on `base`;
+ *   (b) exact match in KNOWN_TIER_BY_ID;
+ *   (c) family-pattern: base contains the tier word as a token;
+ *   (d) ALIAS_MAP (families whose id omits the tier word);
+ *   (e) otherwise null — UNKNOWN. Callers must price conservatively + loudly,
+ *       NOT treat null as a tier.
+ *
+ * @param {string|null|undefined} id
+ * @returns {'opus'|'sonnet'|'haiku'|null}
+ */
+function tierForModelId(id) {
+  const { base } = normalizeModelId(id);
+  if (base === '') return null;
+  // (b) exact known-id pin
+  if (Object.prototype.hasOwnProperty.call(KNOWN_TIER_BY_ID, base)) {
+    return KNOWN_TIER_BY_ID[base];
+  }
+  // (c) family-pattern (tier word appears as a token in the id)
+  const fam = base.match(FAMILY_RE);
+  if (fam) return fam[1];
+  // (d) alias for families whose id omits the tier word
+  if (Object.prototype.hasOwnProperty.call(ALIAS_MAP, base)) {
+    return ALIAS_MAP[base];
+  }
+  // (e) unknown → null (conservative pricing + loud warning is the caller's job)
+  return null;
+}
+module.exports = { normalizeModelId, tierForModelId, KNOWN_TIER_BY_ID, ALIAS_MAP };

package/scripts/lib/session-runner/index.ts CHANGED Viewed

@@ -122,6 +122,21 @@ const adaptiveModeLib = _nodeRequire(
   getMode: (opts?: { baseDir?: string; budgetPath?: string; quiet?: boolean }) => 'static' | 'hedge' | 'full';
 };
+// ── Phase 59-9 — model-id normalization + tiering (single source of truth) ───
+//
+// `scripts/lib/model-id.cjs` is the canonical id parser shared with the
+// budget-enforcer. We route BOTH tier-labeling (`tierFromModel`) and pricing
+// (`rateFor`) through it so a new model family is a DATA edit there / in the
+// price tables, never scattered substring logic here. `tierForModelId` returns
+// `null` for an unknown family — callers MUST treat that as "price
+// conservatively + loudly", never as a tier or as free.
+const modelId = _nodeRequire(
+  _resolve(_REPO_ROOT, 'scripts/lib/model-id.cjs'),
+) as {
+  normalizeModelId: (id: string | null | undefined) => { base: string; variant: string | null };
+  tierForModelId: (id: string | null | undefined) => 'opus' | 'sonnet' | 'haiku' | null;
+};
 /** Rate-guard provider key for the Anthropic Agent SDK. */
 const RATE_GUARD_PROVIDER = 'anthropic';
@@ -144,16 +159,24 @@ const SESSION_RUNNER_DEFAULT_BIN = 'medium';
  *
  * Used at the 4 terminal-emit sites where the final tier isn't already
  * carried on `opts` — we fall back to inspecting `usage.model` (folded
- * during the run loop from SDK chunks). Unknown / empty model names
- * default to 'sonnet' (matches the DEFAULT_MODEL_RATE choice and is
- * the safest middle tier for posterior arms).
+ * during the run loop from SDK chunks). Delegates to the shared
+ * `model-id.cjs` resolver (variant suffix like `[1m]` is stripped, known
+ * ids classified identically to before).
+ *
+ * The shared resolver returns `null` for an UNKNOWN family. For tier
+ * LABELING (telemetry / posterior arms) we map null → 'sonnet' as the
+ * safest middle tier so the bandit arms stay well-defined. This is a
+ * TELEMETRY default only — it does NOT influence PRICING. Pricing of an
+ * unknown family uses the conservative OPUS ceiling, resolved separately in
+ * `rateFor` (see DEFAULT_MODEL_RATE / tier fallback there). Keep the two
+ * concerns distinct: a wrong tier label mis-attributes a posterior arm; a
+ * wrong price under-bills a frontier model.
  */
 function tierFromModel(modelName: string | null | undefined): 'opus' | 'sonnet' | 'haiku' {
-  if (typeof modelName !== 'string' || modelName.length === 0) return 'sonnet';
-  const lower = modelName.toLowerCase();
-  if (lower.includes('opus')) return 'opus';
-  if (lower.includes('haiku')) return 'haiku';
-  return 'sonnet';
+  const tier = modelId.tierForModelId(modelName);
+  // null = unknown family → conservative TELEMETRY default (pricing handled
+  // separately + conservatively in rateFor).
+  return tier ?? 'sonnet';
 }
 /**
@@ -539,29 +562,72 @@ function _logPeerCallComplete(args: {
 const RETRY_BACKOFF = { baseMs: 1000, maxMs: 30_000 } as const;
 /**
- * Per-million-token USD rates. Unknown models default to the Sonnet
- * rate (safer overestimate — we'd rather cap early than under-bill).
+ * Per-million-token USD rates.
+ *
+ * Canonical price source is `reference/prices/claude.md`; this table mirrors
+ * it for the sync headless path — keep in lockstep.
+ *
+ * Unknown FAMILIES default to the OPUS ceiling (see DEFAULT_MODEL_RATE) — a
+ * conservative overestimate. We'd rather cap early than silently under-bill a
+ * frontier model. Known families fall back to their per-tier representative
+ * rate (PER_TIER_RATE) so a dated/variant sku still prices correctly.
  */
 const MODEL_RATES: Readonly<Record<string, { input: number; output: number }>> = Object.freeze({
+  'claude-opus-4-8': { input: 15, output: 75 },
   'claude-opus-4-7': { input: 15, output: 75 },
   'claude-sonnet-4-5': { input: 3, output: 15 },
   'claude-haiku-4-5': { input: 0.8, output: 4 },
 });
-const DEFAULT_MODEL_RATE = Object.freeze({ input: 3, output: 15 });
-/** Resolve a per-M-token rate for a model name, matching prefix when possible. */
+/** Per-tier representative rates (match reference/prices/claude.md). Used as
+ *  the fallback when an exact/prefix MODEL_RATES match is absent but the
+ *  family tier is known. */
+const PER_TIER_RATE: Readonly<Record<'opus' | 'sonnet' | 'haiku', { input: number; output: number }>> =
+  Object.freeze({
+    opus: { input: 15, output: 75 },
+    sonnet: { input: 3, output: 15 },
+    haiku: { input: 1, output: 5 },
+  });
+/**
+ * DEFAULT_MODEL_RATE — conservative ceiling for a GENUINELY UNKNOWN family
+ * (tier resolves to null). Set to the OPUS rate, matching this file's own
+ * "safer overestimate" intent. The previous sonnet default UNDER-billed any
+ * frontier model whose id we did not yet recognize.
+ */
+const DEFAULT_MODEL_RATE = Object.freeze({ input: 15, output: 75 });
+/**
+ * Resolve a per-M-token rate for a model name.
+ *
+ * Resolution order (conservative + robust):
+ *   1. normalize the id (strip `[1m]`/`[200k]` variant suffix) → work on base;
+ *   2. exact match in MODEL_RATES;
+ *   3. prefix match (e.g. "claude-opus-4-7-20250101" → "claude-opus-4-7");
+ *   4. per-tier fallback via `tierForModelId(base)` (opus/sonnet/haiku → that
+ *      tier's representative rate) — keeps dated/variant skus of a known
+ *      family priced correctly;
+ *   5. ONLY if the tier is null (genuinely unknown family) → DEFAULT_MODEL_RATE
+ *      (opus ceiling — price LOUDLY + CONSERVATIVELY, never $0 or sonnet).
+ */
 function rateFor(modelName: string | null): { input: number; output: number } {
   if (modelName === null || modelName === '') return DEFAULT_MODEL_RATE;
-  // Direct match first.
-  const direct = MODEL_RATES[modelName];
+  const { base } = modelId.normalizeModelId(modelName);
+  if (base === '') return DEFAULT_MODEL_RATE;
+  // (2) Direct match first.
+  const direct = MODEL_RATES[base];
   if (direct !== undefined) return direct;
-  // Prefix match (e.g. "claude-opus-4-7-20250101" → "claude-opus-4-7").
+  // (3) Prefix match.
   for (const key of Object.keys(MODEL_RATES)) {
-    if (modelName.startsWith(key)) {
+    if (base.startsWith(key)) {
       const hit = MODEL_RATES[key];
       if (hit !== undefined) return hit;
     }
   }
+  // (4) Per-tier fallback for a known family.
+  const tier = modelId.tierForModelId(base);
+  if (tier !== null) return PER_TIER_RATE[tier];
+  // (5) Unknown family → conservative opus ceiling.
   return DEFAULT_MODEL_RATE;
 }
@@ -1281,3 +1347,8 @@ function buildResult(args: BuildResultArgs): SessionResult {
 // invariant: session-runner consumers can rely on these constants being
 // stable across minor releases.
 export { MODEL_RATES, DEFAULT_MODEL_RATE, RATE_GUARD_PROVIDER };
+// Pricing internals exported for regression tests (Phase 59-9 model-cost-truth):
+// verify unknown families resolve to the conservative opus ceiling while known
+// families price correctly via the per-tier fallback.
+export { rateFor, usdCost, tierFromModel, PER_TIER_RATE };

package/sdk/cli/index.js CHANGED Viewed

@@ -5442,15 +5442,15 @@ var banditIntegration = _nodeRequire(
 var adaptiveModeLib = _nodeRequire(
   (0, import_node_path9.resolve)(_REPO_ROOT, "scripts/lib/adaptive-mode.cjs")
 );
+var modelId = _nodeRequire(
+  (0, import_node_path9.resolve)(_REPO_ROOT, "scripts/lib/model-id.cjs")
+);
 var RATE_GUARD_PROVIDER = "anthropic";
 var DEFAULT_MAX_RETRIES = 2;
 var SESSION_RUNNER_DEFAULT_BIN = "medium";
 function tierFromModel(modelName) {
-  if (typeof modelName !== "string" || modelName.length === 0) return "sonnet";
-  const lower = modelName.toLowerCase();
-  if (lower.includes("opus")) return "opus";
-  if (lower.includes("haiku")) return "haiku";
-  return "sonnet";
+  const tier = modelId.tierForModelId(modelName);
+  return tier ?? "sonnet";
 }
 function _recordBanditOutcome(input) {
   try {
@@ -5659,21 +5659,31 @@ function _logPeerCallComplete(args) {
 }
 var RETRY_BACKOFF = { baseMs: 1e3, maxMs: 3e4 };
 var MODEL_RATES = Object.freeze({
+  "claude-opus-4-8": { input: 15, output: 75 },
   "claude-opus-4-7": { input: 15, output: 75 },
   "claude-sonnet-4-5": { input: 3, output: 15 },
   "claude-haiku-4-5": { input: 0.8, output: 4 }
 });
-var DEFAULT_MODEL_RATE = Object.freeze({ input: 3, output: 15 });
+var PER_TIER_RATE = Object.freeze({
+  opus: { input: 15, output: 75 },
+  sonnet: { input: 3, output: 15 },
+  haiku: { input: 1, output: 5 }
+});
+var DEFAULT_MODEL_RATE = Object.freeze({ input: 15, output: 75 });
 function rateFor(modelName) {
   if (modelName === null || modelName === "") return DEFAULT_MODEL_RATE;
-  const direct = MODEL_RATES[modelName];
+  const { base } = modelId.normalizeModelId(modelName);
+  if (base === "") return DEFAULT_MODEL_RATE;
+  const direct = MODEL_RATES[base];
   if (direct !== void 0) return direct;
   for (const key of Object.keys(MODEL_RATES)) {
-    if (modelName.startsWith(key)) {
+    if (base.startsWith(key)) {
       const hit = MODEL_RATES[key];
       if (hit !== void 0) return hit;
     }
   }
+  const tier = modelId.tierForModelId(base);
+  if (tier !== null) return PER_TIER_RATE[tier];
   return DEFAULT_MODEL_RATE;
 }
 function usdCost(inputTokens, outputTokens, modelName) {