@hegemonart/get-design-done 1.59.8 → 1.59.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,14 +5,14 @@
5
5
  },
6
6
  "metadata": {
7
7
  "description": "Get Design Done — 5-stage agent-orchestrated design pipeline (Brief → Explore → Plan → Design → Verify) for AI coding agents. 64 agents, 96 skills, 39 connection integrations, two MCP servers, opt-in SQLite state backbone, bidirectional Figma write-back, and a reflector-driven self-improvement loop. Cross-runtime install for Claude Code, Codex, Cursor, OpenCode, Gemini, and more.",
8
- "version": "1.59.8"
8
+ "version": "1.59.9"
9
9
  },
10
10
  "plugins": [
11
11
  {
12
12
  "name": "get-design-done",
13
13
  "source": "./",
14
14
  "description": "Agent-orchestrated 5-stage design pipeline (Brief → Explore → Plan → Design → Verify) for AI coding agents. 64 specialized agents, 96 skills, 39 connection integrations (Figma, Refero, Preview, Storybook, Chromatic, Graphify, Linear, Jira, Notion, …), bidirectional Figma write-back, queryable intel store, opt-in SQLite state backbone, and a reflector-driven self-improvement loop. Two MCP servers (gdd-state for typed STATE mutators, gdd-mcp for 13 read-only project-priming tools), tier-aware routing with cost telemetry, and defense-in-depth hooks (protected paths, MCP circuit breaker, injection scanner, budget enforcer). Cross-runtime install for Claude Code, Codex, Cursor, OpenCode, Gemini, Copilot, and more.",
15
- "version": "1.59.8",
15
+ "version": "1.59.9",
16
16
  "author": {
17
17
  "name": "hegemonart"
18
18
  },
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "get-design-done",
3
3
  "short_name": "gdd",
4
- "version": "1.59.8",
4
+ "version": "1.59.9",
5
5
  "description": "Agent-orchestrated 5-stage design pipeline (Brief → Explore → Plan → Design → Verify) for AI coding agents. 64 specialized agents, 96 skills, 39 connection integrations (Figma, Refero, Preview, Storybook, Chromatic, Graphify, Linear, Jira, Notion, …), bidirectional Figma write-back, queryable intel store for O(1) design-surface lookups, opt-in SQLite state backbone, and a reflector-driven self-improvement loop. Two MCP servers (`gdd-state` for typed STATE mutators, `gdd-mcp` for 13 read-only project-priming tools), tier-aware agent routing with cost telemetry, defense-in-depth hooks (protected paths, MCP circuit breaker, injection scanner, budget enforcer), and a cross-runtime install layer for Claude Code, Codex, Cursor, OpenCode, Gemini, Copilot, and more.",
6
6
  "author": {
7
7
  "name": "hegemonart",
package/CHANGELOG.md CHANGED
@@ -4,6 +4,32 @@ All notable changes to get-design-done are documented here. Versions follow [sem
4
4
 
5
5
  ---
6
6
 
7
+ ## [1.59.9] - 2026-06-10
8
+
9
+ New-model-family readiness and cost truth (audit `.planning/audits/SELF-AUDIT-v1.59.7.md` §4). A new or unknown Anthropic model previously degraded cost accounting silently - billed at $0 or the sonnet rate and mis-attributed to the sonnet tier. This release makes unknown models loud and conservative, handles the 1M-context `[1m]` variant, and records context-window size in the model registry.
10
+
11
+ ### Added
12
+
13
+ - **`scripts/lib/model-id.cjs`** - shared model-id normalization + tiering. `normalizeModelId` strips a bracketed variant suffix (`claude-opus-4-8[1m]` to base `claude-opus-4-8` + variant `1m`); `tierForModelId` resolves a tier via exact id, family pattern (`claude-(opus|sonnet|haiku)`), or an extensible alias map, returning `null` for an unknown family so callers price it conservatively instead of guessing.
14
+ - **`context_window` in the model registry.** `reference/runtime-models.md` now records each Claude model's context window (the `claude-opus-4-8` `[1m]` variant is `1000000`; the others `200000`), allowed additively through both the JSON schema and the hand-rolled `parse-runtime-models.cjs` allowlist, with regenerated types. The 1M-context variant is now recognised as first-class metadata. (Deriving token budgets from it is deferred - this release records the fact.)
15
+ - **`claude-opus-4-8` price + tier mapping.** Added to `reference/prices/claude.md` and mapped as the `opus`/`high` model in `reference/runtime-models.md`; provenance refreshed. The stale tables previously topped out at `claude-opus-4-7`.
16
+
17
+ ### Fixed
18
+
19
+ - **Unknown / new models are now priced loudly and conservatively.** `budget-enforcer.cjs computeCost` normalizes the model id (so `[1m]` variants match their base row) and, when no price row matches, returns a conservative ceiling computed at the **opus** rate with `cost_estimated: true` and a `cost_lookup_fallback` telemetry event - instead of the old silent `cost_usd: null`. The headless `session-runner` `rateFor` falls back through the resolved tier's rate and uses the opus rate (not sonnet) as its last-resort default, ending the systematic under-billing of frontier models. Tier inference no longer relies on loose substring matching.
20
+
21
+ ### Deferred
22
+
23
+ - Making `reasoning-class` the canonical routing vocabulary (opus/sonnet/haiku as aliases) and deriving SDK token budgets from `context_window` + tiered >200k long-context pricing - tracked for a later phase; not required for new-family readiness.
24
+
25
+ ### Breaking changes
26
+
27
+ None.
28
+
29
+ 5,096/5,096 tests pass.
30
+
31
+ ---
32
+
7
33
  ## [1.59.8] - 2026-06-10
8
34
 
9
35
  Production-wiring repair and security hardening from a 4-agent self-audit (`.planning/audits/SELF-AUDIT-v1.59.7.md`). The theme: real, well-tested library code whose production call-sites silently neutered it. This release makes the wiring either true or honest.
@@ -91,6 +91,10 @@ interface BudgetEnforcerBackend {
91
91
  runtime_used: string | null;
92
92
  fallback: boolean;
93
93
  reason: string | null;
94
+ // Phase 59-9: true when the cost is a CONSERVATIVE ESTIMATE (opus ceiling)
95
+ // for an unknown/new model rather than a table-matched figure. Additive +
96
+ // optional — absent on the table-matched (branches 1-4) paths.
97
+ cost_estimated?: boolean;
94
98
  };
95
99
  modelFromResolved(resolved: unknown, agent: string): string | null;
96
100
  // Plan 33.6-03 (SC#6): the canonical cost-row payload builder (the
@@ -946,6 +950,40 @@ function emitCostRecorded(
946
950
  }
947
951
  }
948
952
 
953
+ /**
954
+ * Phase 59-9: emit a `cost_lookup_fallback` event when the cost backend
955
+ * could not table-match a model and fell back to the CONSERVATIVE OPUS
956
+ * CEILING (or, more rarely, returned an unpriced fallback). Makes an
957
+ * unknown/new model OBSERVABLE in telemetry instead of silently mis-billed
958
+ * (or billed as $0). Reuses the same BaseEvent envelope + appendEvent path
959
+ * as every other emit. Fail-open — never throws, never blocks the spawn.
960
+ */
961
+ function emitCostLookupFallback(
962
+ payload: {
963
+ runtime: string;
964
+ agent: string;
965
+ model_id: string | null;
966
+ tier: string | null;
967
+ reason: string | null;
968
+ cost_usd: number | null;
969
+ cost_estimated: boolean;
970
+ },
971
+ cycle?: string,
972
+ ): void {
973
+ const ev = {
974
+ type: 'cost_lookup_fallback',
975
+ timestamp: new Date().toISOString(),
976
+ sessionId: getSessionId(),
977
+ ...(cycle !== undefined && cycle !== 'unknown' ? { cycle } : {}),
978
+ payload,
979
+ };
980
+ try {
981
+ appendEvent(ev as unknown as HookFiredEvent);
982
+ } catch {
983
+ // Fail open.
984
+ }
985
+ }
986
+
949
987
  /**
950
988
  * Plan 27.5-02 / D-03: emit `bandit.tier_selected` event when the bandit
951
989
  * is consulted (regardless of whether it overrode the prior tier). The
@@ -1588,6 +1626,28 @@ export async function main(): Promise<void> {
1588
1626
  cycle,
1589
1627
  );
1590
1628
 
1629
+ // Phase 59-9: when the cost was a CONSERVATIVE ESTIMATE (unknown/new model
1630
+ // priced at the opus ceiling) rather than a table-matched figure, emit a
1631
+ // distinct telemetry signal so an unrecognized model is OBSERVABLE rather
1632
+ // than silently mis-billed. Best-effort, never throws (fail-open).
1633
+ if (
1634
+ costLookup.cost_estimated === true ||
1635
+ (costLookup.fallback === true && costLookup.reason === 'model_not_found')
1636
+ ) {
1637
+ emitCostLookupFallback(
1638
+ {
1639
+ runtime: runtimeId,
1640
+ agent,
1641
+ model_id: effectiveModelId ?? costLookup.model,
1642
+ tier: costLookup.tier ?? effectiveTier,
1643
+ reason: costLookup.reason,
1644
+ cost_usd: costLookup.cost_usd,
1645
+ cost_estimated: costLookup.cost_estimated === true,
1646
+ },
1647
+ cycle,
1648
+ );
1649
+ }
1650
+
1591
1651
  // Branch E: standard spawn-allowed (includes tier-downgraded path).
1592
1652
  writeTelemetry({
1593
1653
  agent,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hegemonart/get-design-done",
3
- "version": "1.59.8",
3
+ "version": "1.59.9",
4
4
  "description": "A design-quality pipeline for AI coding agents: brief, explore, plan, design, and verify UI work against your design system.",
5
5
  "author": "Hegemon",
6
6
  "homepage": "https://github.com/hegemonart/get-design-done",
@@ -13,6 +13,17 @@
13
13
  | claude-sonnet-4-7 | sonnet | 3.00 | 15.00 | 0.30 |
14
14
  | claude-sonnet-4-6 | sonnet | 3.00 | 15.00 | 0.30 |
15
15
  | claude-opus-4-7 | opus | 15.00 | 75.00 | 1.50 |
16
+ | claude-opus-4-8 | opus | 15.00 | 75.00 | 1.50 |
17
+
18
+ > **>200k-input (1M-context) pricing note.** The rates above are the
19
+ > standard (≤200k-input) per-1M-token prices. Anthropic's flagship
20
+ > `claude-opus-4-8` ships a 1M-context (`[1m]`) variant; long-context
21
+ > requests above the 200k-input threshold may be billed at a higher
22
+ > tiered rate. This table tracks only the standard tier today; the
23
+ > >200k tiered figure will be added as a separate row/column once
24
+ > Anthropic publishes it. The parser is positional-by-header and
25
+ > tolerates right-edge columns, so a future `>200k_input_per_1m`
26
+ > column can be appended without breaking cost lookups.
16
27
 
17
28
  ## size_budget → conservative token ranges
18
29
 
@@ -44,21 +44,21 @@ Anthropic's first-party runtime. Public tier docs at https://docs.anthropic.com/
44
44
  {
45
45
  "id": "claude",
46
46
  "tier_to_model": {
47
- "opus": { "model": "claude-opus-4-7" },
48
- "sonnet": { "model": "claude-sonnet-4-6" },
49
- "haiku": { "model": "claude-haiku-4-5" }
47
+ "opus": { "model": "claude-opus-4-8", "context_window": 1000000 },
48
+ "sonnet": { "model": "claude-sonnet-4-6", "context_window": 200000 },
49
+ "haiku": { "model": "claude-haiku-4-5", "context_window": 200000 }
50
50
  },
51
51
  "reasoning_class_to_model": {
52
- "high": { "model": "claude-opus-4-7" },
53
- "medium": { "model": "claude-sonnet-4-6" },
54
- "low": { "model": "claude-haiku-4-5" }
52
+ "high": { "model": "claude-opus-4-8", "context_window": 1000000 },
53
+ "medium": { "model": "claude-sonnet-4-6", "context_window": 200000 },
54
+ "low": { "model": "claude-haiku-4-5", "context_window": 200000 }
55
55
  },
56
56
  "provenance": [
57
57
  {
58
58
  "source_url": "https://docs.anthropic.com/en/docs/about-claude/models",
59
- "retrieved_at": "2026-04-29T00:00:00.000Z",
60
- "last_validated_cycle": "2026-04-29-v1.26",
61
- "note": "Anthropic public model catalog — first-party runtime."
59
+ "retrieved_at": "2026-06-10T00:00:00.000Z",
60
+ "last_validated_cycle": "2026-06-10-v1.59.9",
61
+ "note": "Anthropic public model catalog — first-party runtime. Opus tier moved to claude-opus-4-8 (1M-context [1m] variant) this cycle."
62
62
  }
63
63
  ]
64
64
  }
@@ -1115,6 +1115,10 @@ export interface ModelRow {
1115
1115
  * Optional internal/provider model ID for runtimes whose API identifiers differ from the public name (D-03).
1116
1116
  */
1117
1117
  provider_model_id?: string;
1118
+ /**
1119
+ * Optional context-window size (max tokens) for this model, recorded as machine-readable metadata. The Anthropic opus tier ships the 1M-context [1m] variant (1000000). Recorded as fact this cycle; not yet a budgeting driver (deferred — no consumer is wired).
1120
+ */
1121
+ context_window?: number;
1118
1122
  }
1119
1123
 
1120
1124
  export type RuntimeModelsSchema = RuntimeModelsTierToModelMap;
@@ -121,6 +121,11 @@
121
121
  "type": "string",
122
122
  "minLength": 1,
123
123
  "description": "Optional internal/provider model ID for runtimes whose API identifiers differ from the public name (D-03)."
124
+ },
125
+ "context_window": {
126
+ "type": "integer",
127
+ "minimum": 1,
128
+ "description": "Optional context-window size (max tokens) for this model, recorded as machine-readable metadata. The Anthropic opus tier ships the 1M-context [1m] variant (1000000). Recorded as fact this cycle; not yet a budgeting driver (deferred — no consumer is wired)."
124
129
  }
125
130
  }
126
131
  }
@@ -52,6 +52,8 @@
52
52
  const fs = require('node:fs');
53
53
  const path = require('node:path');
54
54
 
55
+ const { normalizeModelId, tierForModelId } = require('./model-id.cjs');
56
+
55
57
  const REPO_ROOT_GUESS = path.resolve(__dirname, '..', '..');
56
58
  const DEFAULT_RUNTIME_ID = 'claude';
57
59
  const VALID_TIERS = Object.freeze(['opus', 'sonnet', 'haiku']);
@@ -326,10 +328,18 @@ function computeCost(args, opts) {
326
328
  tokens_out: Number(args.tokens_out || 0),
327
329
  cache_hit: args.cache_hit === true,
328
330
  };
331
+ // Normalize the model id (strip a trailing `[1m]`/`[200k]` variant suffix)
332
+ // BEFORE table lookup so e.g. `claude-opus-4-8[1m]` matches the
333
+ // `claude-opus-4-8` row. The variant encodes a context-window SKU; the
334
+ // current price tables are keyed on the base id.
335
+ const rawModelId = typeof args.model_id === 'string' && args.model_id.length > 0
336
+ ? args.model_id
337
+ : null;
338
+ const normalizedModelId = rawModelId !== null
339
+ ? (normalizeModelId(rawModelId).base || rawModelId)
340
+ : null;
329
341
  const q = {
330
- model_id: typeof args.model_id === 'string' && args.model_id.length > 0
331
- ? args.model_id
332
- : null,
342
+ model_id: normalizedModelId,
333
343
  tier: typeof args.tier === 'string' && args.tier.length > 0
334
344
  ? args.tier
335
345
  : null,
@@ -365,14 +375,33 @@ function computeCost(args, opts) {
365
375
  }
366
376
  }
367
377
 
368
- // Branch 5: nothing matched.
378
+ // Branch 5: nothing matched. Rather than silently returning a null cost
379
+ // (which downstream aggregators treat as $0 — a frontier model billed as
380
+ // free), compute a CONSERVATIVE CEILING at the OPUS rate from the claude
381
+ // price table. An unknown/new model is thus priced LOUDLY (cost_estimated)
382
+ // and CONSERVATIVELY (opus ceiling), never $0 and never the sonnet rate.
383
+ const reason = rows.length === 0 ? 'runtime_table_missing' : 'model_not_found';
384
+ const claudeRows = loadPriceTable(DEFAULT_RUNTIME_ID, opts);
385
+ const opusRow = findPriceRow(claudeRows, { tier: 'opus' });
386
+ if (opusRow !== null) {
387
+ return {
388
+ cost_usd: applyFormula(opusRow, tokens),
389
+ model: normalizedModelId,
390
+ tier: 'opus',
391
+ runtime_used: DEFAULT_RUNTIME_ID,
392
+ fallback: true,
393
+ reason,
394
+ cost_estimated: true,
395
+ };
396
+ }
397
+ // Even the opus row is unavailable → genuinely cannot price. Keep null.
369
398
  return {
370
399
  cost_usd: null,
371
400
  model: null,
372
401
  tier: q.tier,
373
402
  runtime_used: null,
374
403
  fallback: false,
375
- reason: rows.length === 0 ? 'runtime_table_missing' : 'model_not_found',
404
+ reason,
376
405
  };
377
406
  }
378
407
 
@@ -78,7 +78,7 @@ function validateModelRow(row, where) {
78
78
  if (typeof row.model !== 'string' || row.model.length === 0) {
79
79
  throw new Error(`${where}: 'model' must be a non-empty string`);
80
80
  }
81
- const allowedKeys = new Set(['model', 'provider_model_id']);
81
+ const allowedKeys = new Set(['model', 'provider_model_id', 'context_window']);
82
82
  for (const k of Object.keys(row)) {
83
83
  if (!allowedKeys.has(k)) {
84
84
  throw new Error(`${where}: unknown key '${k}' (allowed: ${[...allowedKeys].join(', ')})`);
@@ -89,6 +89,14 @@ function validateModelRow(row, where) {
89
89
  throw new Error(`${where}: 'provider_model_id' must be a non-empty string when present`);
90
90
  }
91
91
  }
92
+ // Optional context-window size — mirror the schema (integer >= 1). Recorded as
93
+ // machine-readable metadata (the 1M-context [1m] opus variant); not yet a
94
+ // budgeting driver (deferred — no consumer wired this cycle).
95
+ if (row.context_window !== undefined) {
96
+ if (typeof row.context_window !== 'number' || !Number.isInteger(row.context_window) || row.context_window < 1) {
97
+ throw new Error(`${where}: 'context_window' must be a positive integer when present`);
98
+ }
99
+ }
92
100
  }
93
101
 
94
102
  function validateProvenance(arr, where) {
@@ -0,0 +1,141 @@
1
+ 'use strict';
2
+ /*
3
+ * scripts/lib/model-id.cjs — model-id normalization + tiering (pure, dependency-free).
4
+ *
5
+ * WHY THIS EXISTS
6
+ * ---------------
7
+ * Two unrelated callers need to reason about model ids in identical ways:
8
+ * - scripts/lib/session-runner/index.ts (routing: which tier am I running?)
9
+ * - scripts/lib/budget-enforcer.cjs (pricing: what does this model cost?)
10
+ * Each previously carried its own ad-hoc parsing, which drifted. This module is
11
+ * the single source of truth so a new model family is a DATA edit here (or in the
12
+ * price tables), never a logic change scattered across callers.
13
+ *
14
+ * DESIGN PRINCIPLES
15
+ * -----------------
16
+ * 1. TIER IS FOR ROUTING. `tierForModelId` answers "opus | sonnet | haiku" so the
17
+ * router can pick an agent class. It is NOT a pricing key on its own — pricing
18
+ * also depends on the exact id and (later) the context-window variant.
19
+ *
20
+ * 2. NULL MEANS UNKNOWN — PRICE CONSERVATIVELY + LOUDLY. We deliberately return
21
+ * `null` for ids we cannot confidently classify rather than guessing a tier.
22
+ * A wrong tier guess silently mis-routes or mis-prices. Callers MUST treat
23
+ * null as "unknown model — assume the most expensive plausible price AND warn",
24
+ * never as a tier and never as free. Do NOT add heuristic fallbacks that
25
+ * invent a tier for arbitrary strings.
26
+ *
27
+ * 3. VARIANT SUFFIX IS FOR CONTEXT-WINDOW-AWARE PRICING (LATER). Ids may carry a
28
+ * bracketed variant such as `claude-opus-4-8[1m]` or `...[200k]`. The variant
29
+ * encodes a context-window SKU that can have different per-token pricing. We
30
+ * split it off cleanly (`{ base, variant }`) so tiering operates on `base`
31
+ * while a future price table can key on `(base, variant)`. Date stamps in the
32
+ * base (e.g. `claude-opus-4-8-20260101`) are NOT variants and are left intact.
33
+ *
34
+ * 4. NEW FAMILIES ARE A DATA EDIT, NOT A CODE CHANGE. To onboard a new model:
35
+ * - if its id contains the tier word (opus/sonnet/haiku), the family-pattern
36
+ * rule already handles it — optionally pin it in KNOWN_TIER_BY_ID;
37
+ * - if its id does NOT contain the tier word (e.g. a hypothetical
38
+ * `claude-fable-5`), add one line to ALIAS_MAP (see comment there);
39
+ * - pricing specifics go in the caller's price table keyed on the exact id.
40
+ */
41
+
42
+ /**
43
+ * KNOWN_TIER_BY_ID — explicit, exact-id → tier pins.
44
+ * Seeded with the currently-shipping ids. Exact matches win over pattern rules,
45
+ * so this is also the place to OVERRIDE a family-pattern result if a specific
46
+ * sku is mis-classified by the generic regex. Keys are the normalized `base`
47
+ * (no bracket variant).
48
+ */
49
+ const KNOWN_TIER_BY_ID = Object.freeze({
50
+ 'claude-opus-4-8': 'opus',
51
+ 'claude-opus-4-7': 'opus',
52
+ 'claude-sonnet-4-7': 'sonnet',
53
+ 'claude-sonnet-4-6': 'sonnet',
54
+ 'claude-sonnet-4-5': 'sonnet',
55
+ 'claude-haiku-4-5': 'haiku',
56
+ });
57
+
58
+ /**
59
+ * ALIAS_MAP — extension point for families whose id does NOT contain the tier word.
60
+ *
61
+ * Currently EMPTY by design. The family-pattern rule (step c in tierForModelId)
62
+ * already covers any id literally containing `opus`/`sonnet`/`haiku`. Use this map
63
+ * ONLY for a future lineup whose product name omits the tier word.
64
+ *
65
+ * Example — when Anthropic publishes the `claude-fable-5` sku lineup and we learn
66
+ * it maps to opus-class routing, add (keyed on normalized base):
67
+ *
68
+ * 'claude-fable-5': 'opus',
69
+ *
70
+ * Until the lineup is public we leave it empty rather than guess — an unknown
71
+ * `claude-fable-5` correctly resolves to null (conservative pricing + warning).
72
+ */
73
+ const ALIAS_MAP = Object.freeze({
74
+ // 'claude-fable-5': 'opus', // <- add when the fable-5 sku lineup is public
75
+ });
76
+
77
+ const VARIANT_RE = /\[([^\]]*)\]\s*$/; // trailing bracketed suffix, e.g. [1m] / [200k]
78
+ const FAMILY_RE = /(?:^|-)(opus|sonnet|haiku)(?:-|$)/;
79
+
80
+ /**
81
+ * normalizeModelId(id) → { base, variant }
82
+ *
83
+ * Splits off a single trailing bracketed variant suffix (e.g. `[1m]`, `[200k]`),
84
+ * returning it lowercased with brackets removed as `variant`, and the remaining
85
+ * trimmed id as `base`. Date stamps in the base are preserved. Null/empty/
86
+ * undefined input yields `{ base: '', variant: null }`.
87
+ *
88
+ * @param {string|null|undefined} id
89
+ * @returns {{ base: string, variant: string|null }}
90
+ */
91
+ function normalizeModelId(id) {
92
+ if (id == null) return { base: '', variant: null };
93
+ const s = String(id).trim();
94
+ if (s === '') return { base: '', variant: null };
95
+
96
+ const m = s.match(VARIANT_RE);
97
+ if (m) {
98
+ const variant = m[1].trim().toLowerCase();
99
+ const base = s.slice(0, m.index).trim();
100
+ return { base, variant: variant === '' ? null : variant };
101
+ }
102
+ return { base: s, variant: null };
103
+ }
104
+
105
+ /**
106
+ * tierForModelId(id) → 'opus' | 'sonnet' | 'haiku' | null
107
+ *
108
+ * Resolution order:
109
+ * (a) normalize → work on `base`;
110
+ * (b) exact match in KNOWN_TIER_BY_ID;
111
+ * (c) family-pattern: base contains the tier word as a token;
112
+ * (d) ALIAS_MAP (families whose id omits the tier word);
113
+ * (e) otherwise null — UNKNOWN. Callers must price conservatively + loudly,
114
+ * NOT treat null as a tier.
115
+ *
116
+ * @param {string|null|undefined} id
117
+ * @returns {'opus'|'sonnet'|'haiku'|null}
118
+ */
119
+ function tierForModelId(id) {
120
+ const { base } = normalizeModelId(id);
121
+ if (base === '') return null;
122
+
123
+ // (b) exact known-id pin
124
+ if (Object.prototype.hasOwnProperty.call(KNOWN_TIER_BY_ID, base)) {
125
+ return KNOWN_TIER_BY_ID[base];
126
+ }
127
+
128
+ // (c) family-pattern (tier word appears as a token in the id)
129
+ const fam = base.match(FAMILY_RE);
130
+ if (fam) return fam[1];
131
+
132
+ // (d) alias for families whose id omits the tier word
133
+ if (Object.prototype.hasOwnProperty.call(ALIAS_MAP, base)) {
134
+ return ALIAS_MAP[base];
135
+ }
136
+
137
+ // (e) unknown → null (conservative pricing + loud warning is the caller's job)
138
+ return null;
139
+ }
140
+
141
+ module.exports = { normalizeModelId, tierForModelId, KNOWN_TIER_BY_ID, ALIAS_MAP };
@@ -122,6 +122,21 @@ const adaptiveModeLib = _nodeRequire(
122
122
  getMode: (opts?: { baseDir?: string; budgetPath?: string; quiet?: boolean }) => 'static' | 'hedge' | 'full';
123
123
  };
124
124
 
125
+ // ── Phase 59-9 — model-id normalization + tiering (single source of truth) ───
126
+ //
127
+ // `scripts/lib/model-id.cjs` is the canonical id parser shared with the
128
+ // budget-enforcer. We route BOTH tier-labeling (`tierFromModel`) and pricing
129
+ // (`rateFor`) through it so a new model family is a DATA edit there / in the
130
+ // price tables, never scattered substring logic here. `tierForModelId` returns
131
+ // `null` for an unknown family — callers MUST treat that as "price
132
+ // conservatively + loudly", never as a tier or as free.
133
+ const modelId = _nodeRequire(
134
+ _resolve(_REPO_ROOT, 'scripts/lib/model-id.cjs'),
135
+ ) as {
136
+ normalizeModelId: (id: string | null | undefined) => { base: string; variant: string | null };
137
+ tierForModelId: (id: string | null | undefined) => 'opus' | 'sonnet' | 'haiku' | null;
138
+ };
139
+
125
140
  /** Rate-guard provider key for the Anthropic Agent SDK. */
126
141
  const RATE_GUARD_PROVIDER = 'anthropic';
127
142
 
@@ -144,16 +159,24 @@ const SESSION_RUNNER_DEFAULT_BIN = 'medium';
144
159
  *
145
160
  * Used at the 4 terminal-emit sites where the final tier isn't already
146
161
  * carried on `opts` — we fall back to inspecting `usage.model` (folded
147
- * during the run loop from SDK chunks). Unknown / empty model names
148
- * default to 'sonnet' (matches the DEFAULT_MODEL_RATE choice and is
149
- * the safest middle tier for posterior arms).
162
+ * during the run loop from SDK chunks). Delegates to the shared
163
+ * `model-id.cjs` resolver (variant suffix like `[1m]` is stripped, known
164
+ * ids classified identically to before).
165
+ *
166
+ * The shared resolver returns `null` for an UNKNOWN family. For tier
167
+ * LABELING (telemetry / posterior arms) we map null → 'sonnet' as the
168
+ * safest middle tier so the bandit arms stay well-defined. This is a
169
+ * TELEMETRY default only — it does NOT influence PRICING. Pricing of an
170
+ * unknown family uses the conservative OPUS ceiling, resolved separately in
171
+ * `rateFor` (see DEFAULT_MODEL_RATE / tier fallback there). Keep the two
172
+ * concerns distinct: a wrong tier label mis-attributes a posterior arm; a
173
+ * wrong price under-bills a frontier model.
150
174
  */
151
175
  function tierFromModel(modelName: string | null | undefined): 'opus' | 'sonnet' | 'haiku' {
152
- if (typeof modelName !== 'string' || modelName.length === 0) return 'sonnet';
153
- const lower = modelName.toLowerCase();
154
- if (lower.includes('opus')) return 'opus';
155
- if (lower.includes('haiku')) return 'haiku';
156
- return 'sonnet';
176
+ const tier = modelId.tierForModelId(modelName);
177
+ // null = unknown family → conservative TELEMETRY default (pricing handled
178
+ // separately + conservatively in rateFor).
179
+ return tier ?? 'sonnet';
157
180
  }
158
181
 
159
182
  /**
@@ -539,29 +562,72 @@ function _logPeerCallComplete(args: {
539
562
  const RETRY_BACKOFF = { baseMs: 1000, maxMs: 30_000 } as const;
540
563
 
541
564
  /**
542
- * Per-million-token USD rates. Unknown models default to the Sonnet
543
- * rate (safer overestimate — we'd rather cap early than under-bill).
565
+ * Per-million-token USD rates.
566
+ *
567
+ * Canonical price source is `reference/prices/claude.md`; this table mirrors
568
+ * it for the sync headless path — keep in lockstep.
569
+ *
570
+ * Unknown FAMILIES default to the OPUS ceiling (see DEFAULT_MODEL_RATE) — a
571
+ * conservative overestimate. We'd rather cap early than silently under-bill a
572
+ * frontier model. Known families fall back to their per-tier representative
573
+ * rate (PER_TIER_RATE) so a dated/variant sku still prices correctly.
544
574
  */
545
575
  const MODEL_RATES: Readonly<Record<string, { input: number; output: number }>> = Object.freeze({
576
+ 'claude-opus-4-8': { input: 15, output: 75 },
546
577
  'claude-opus-4-7': { input: 15, output: 75 },
547
578
  'claude-sonnet-4-5': { input: 3, output: 15 },
548
579
  'claude-haiku-4-5': { input: 0.8, output: 4 },
549
580
  });
550
- const DEFAULT_MODEL_RATE = Object.freeze({ input: 3, output: 15 });
551
581
 
552
- /** Resolve a per-M-token rate for a model name, matching prefix when possible. */
582
+ /** Per-tier representative rates (match reference/prices/claude.md). Used as
583
+ * the fallback when an exact/prefix MODEL_RATES match is absent but the
584
+ * family tier is known. */
585
+ const PER_TIER_RATE: Readonly<Record<'opus' | 'sonnet' | 'haiku', { input: number; output: number }>> =
586
+ Object.freeze({
587
+ opus: { input: 15, output: 75 },
588
+ sonnet: { input: 3, output: 15 },
589
+ haiku: { input: 1, output: 5 },
590
+ });
591
+
592
+ /**
593
+ * DEFAULT_MODEL_RATE — conservative ceiling for a GENUINELY UNKNOWN family
594
+ * (tier resolves to null). Set to the OPUS rate, matching this file's own
595
+ * "safer overestimate" intent. The previous sonnet default UNDER-billed any
596
+ * frontier model whose id we did not yet recognize.
597
+ */
598
+ const DEFAULT_MODEL_RATE = Object.freeze({ input: 15, output: 75 });
599
+
600
+ /**
601
+ * Resolve a per-M-token rate for a model name.
602
+ *
603
+ * Resolution order (conservative + robust):
604
+ * 1. normalize the id (strip `[1m]`/`[200k]` variant suffix) → work on base;
605
+ * 2. exact match in MODEL_RATES;
606
+ * 3. prefix match (e.g. "claude-opus-4-7-20250101" → "claude-opus-4-7");
607
+ * 4. per-tier fallback via `tierForModelId(base)` (opus/sonnet/haiku → that
608
+ * tier's representative rate) — keeps dated/variant skus of a known
609
+ * family priced correctly;
610
+ * 5. ONLY if the tier is null (genuinely unknown family) → DEFAULT_MODEL_RATE
611
+ * (opus ceiling — price LOUDLY + CONSERVATIVELY, never $0 or sonnet).
612
+ */
553
613
  function rateFor(modelName: string | null): { input: number; output: number } {
554
614
  if (modelName === null || modelName === '') return DEFAULT_MODEL_RATE;
555
- // Direct match first.
556
- const direct = MODEL_RATES[modelName];
615
+ const { base } = modelId.normalizeModelId(modelName);
616
+ if (base === '') return DEFAULT_MODEL_RATE;
617
+ // (2) Direct match first.
618
+ const direct = MODEL_RATES[base];
557
619
  if (direct !== undefined) return direct;
558
- // Prefix match (e.g. "claude-opus-4-7-20250101" → "claude-opus-4-7").
620
+ // (3) Prefix match.
559
621
  for (const key of Object.keys(MODEL_RATES)) {
560
- if (modelName.startsWith(key)) {
622
+ if (base.startsWith(key)) {
561
623
  const hit = MODEL_RATES[key];
562
624
  if (hit !== undefined) return hit;
563
625
  }
564
626
  }
627
+ // (4) Per-tier fallback for a known family.
628
+ const tier = modelId.tierForModelId(base);
629
+ if (tier !== null) return PER_TIER_RATE[tier];
630
+ // (5) Unknown family → conservative opus ceiling.
565
631
  return DEFAULT_MODEL_RATE;
566
632
  }
567
633
 
@@ -1281,3 +1347,8 @@ function buildResult(args: BuildResultArgs): SessionResult {
1281
1347
  // invariant: session-runner consumers can rely on these constants being
1282
1348
  // stable across minor releases.
1283
1349
  export { MODEL_RATES, DEFAULT_MODEL_RATE, RATE_GUARD_PROVIDER };
1350
+
1351
+ // Pricing internals exported for regression tests (Phase 59-9 model-cost-truth):
1352
+ // verify unknown families resolve to the conservative opus ceiling while known
1353
+ // families price correctly via the per-tier fallback.
1354
+ export { rateFor, usdCost, tierFromModel, PER_TIER_RATE };
package/sdk/cli/index.js CHANGED
@@ -5442,15 +5442,15 @@ var banditIntegration = _nodeRequire(
5442
5442
  var adaptiveModeLib = _nodeRequire(
5443
5443
  (0, import_node_path9.resolve)(_REPO_ROOT, "scripts/lib/adaptive-mode.cjs")
5444
5444
  );
5445
+ var modelId = _nodeRequire(
5446
+ (0, import_node_path9.resolve)(_REPO_ROOT, "scripts/lib/model-id.cjs")
5447
+ );
5445
5448
  var RATE_GUARD_PROVIDER = "anthropic";
5446
5449
  var DEFAULT_MAX_RETRIES = 2;
5447
5450
  var SESSION_RUNNER_DEFAULT_BIN = "medium";
5448
5451
  function tierFromModel(modelName) {
5449
- if (typeof modelName !== "string" || modelName.length === 0) return "sonnet";
5450
- const lower = modelName.toLowerCase();
5451
- if (lower.includes("opus")) return "opus";
5452
- if (lower.includes("haiku")) return "haiku";
5453
- return "sonnet";
5452
+ const tier = modelId.tierForModelId(modelName);
5453
+ return tier ?? "sonnet";
5454
5454
  }
5455
5455
  function _recordBanditOutcome(input) {
5456
5456
  try {
@@ -5659,21 +5659,31 @@ function _logPeerCallComplete(args) {
5659
5659
  }
5660
5660
  var RETRY_BACKOFF = { baseMs: 1e3, maxMs: 3e4 };
5661
5661
  var MODEL_RATES = Object.freeze({
5662
+ "claude-opus-4-8": { input: 15, output: 75 },
5662
5663
  "claude-opus-4-7": { input: 15, output: 75 },
5663
5664
  "claude-sonnet-4-5": { input: 3, output: 15 },
5664
5665
  "claude-haiku-4-5": { input: 0.8, output: 4 }
5665
5666
  });
5666
- var DEFAULT_MODEL_RATE = Object.freeze({ input: 3, output: 15 });
5667
+ var PER_TIER_RATE = Object.freeze({
5668
+ opus: { input: 15, output: 75 },
5669
+ sonnet: { input: 3, output: 15 },
5670
+ haiku: { input: 1, output: 5 }
5671
+ });
5672
+ var DEFAULT_MODEL_RATE = Object.freeze({ input: 15, output: 75 });
5667
5673
  function rateFor(modelName) {
5668
5674
  if (modelName === null || modelName === "") return DEFAULT_MODEL_RATE;
5669
- const direct = MODEL_RATES[modelName];
5675
+ const { base } = modelId.normalizeModelId(modelName);
5676
+ if (base === "") return DEFAULT_MODEL_RATE;
5677
+ const direct = MODEL_RATES[base];
5670
5678
  if (direct !== void 0) return direct;
5671
5679
  for (const key of Object.keys(MODEL_RATES)) {
5672
- if (modelName.startsWith(key)) {
5680
+ if (base.startsWith(key)) {
5673
5681
  const hit = MODEL_RATES[key];
5674
5682
  if (hit !== void 0) return hit;
5675
5683
  }
5676
5684
  }
5685
+ const tier = modelId.tierForModelId(base);
5686
+ if (tier !== null) return PER_TIER_RATE[tier];
5677
5687
  return DEFAULT_MODEL_RATE;
5678
5688
  }
5679
5689
  function usdCost(inputTokens, outputTokens, modelName) {