@hegemonart/get-design-done 1.59.8 → 1.59.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/CHANGELOG.md +26 -0
- package/hooks/budget-enforcer.ts +60 -0
- package/package.json +1 -1
- package/reference/prices/claude.md +11 -0
- package/reference/runtime-models.md +9 -9
- package/reference/schemas/generated.d.ts +4 -0
- package/reference/schemas/runtime-models.schema.json +5 -0
- package/scripts/lib/budget-enforcer.cjs +34 -5
- package/scripts/lib/install/parse-runtime-models.cjs +9 -1
- package/scripts/lib/model-id.cjs +141 -0
- package/scripts/lib/session-runner/index.ts +87 -16
- package/sdk/cli/index.js +18 -8
|
@@ -5,14 +5,14 @@
|
|
|
5
5
|
},
|
|
6
6
|
"metadata": {
|
|
7
7
|
"description": "Get Design Done — 5-stage agent-orchestrated design pipeline (Brief → Explore → Plan → Design → Verify) for AI coding agents. 64 agents, 96 skills, 39 connection integrations, two MCP servers, opt-in SQLite state backbone, bidirectional Figma write-back, and a reflector-driven self-improvement loop. Cross-runtime install for Claude Code, Codex, Cursor, OpenCode, Gemini, and more.",
|
|
8
|
-
"version": "1.59.
|
|
8
|
+
"version": "1.59.9"
|
|
9
9
|
},
|
|
10
10
|
"plugins": [
|
|
11
11
|
{
|
|
12
12
|
"name": "get-design-done",
|
|
13
13
|
"source": "./",
|
|
14
14
|
"description": "Agent-orchestrated 5-stage design pipeline (Brief → Explore → Plan → Design → Verify) for AI coding agents. 64 specialized agents, 96 skills, 39 connection integrations (Figma, Refero, Preview, Storybook, Chromatic, Graphify, Linear, Jira, Notion, …), bidirectional Figma write-back, queryable intel store, opt-in SQLite state backbone, and a reflector-driven self-improvement loop. Two MCP servers (gdd-state for typed STATE mutators, gdd-mcp for 13 read-only project-priming tools), tier-aware routing with cost telemetry, and defense-in-depth hooks (protected paths, MCP circuit breaker, injection scanner, budget enforcer). Cross-runtime install for Claude Code, Codex, Cursor, OpenCode, Gemini, Copilot, and more.",
|
|
15
|
-
"version": "1.59.
|
|
15
|
+
"version": "1.59.9",
|
|
16
16
|
"author": {
|
|
17
17
|
"name": "hegemonart"
|
|
18
18
|
},
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "get-design-done",
|
|
3
3
|
"short_name": "gdd",
|
|
4
|
-
"version": "1.59.
|
|
4
|
+
"version": "1.59.9",
|
|
5
5
|
"description": "Agent-orchestrated 5-stage design pipeline (Brief → Explore → Plan → Design → Verify) for AI coding agents. 64 specialized agents, 96 skills, 39 connection integrations (Figma, Refero, Preview, Storybook, Chromatic, Graphify, Linear, Jira, Notion, …), bidirectional Figma write-back, queryable intel store for O(1) design-surface lookups, opt-in SQLite state backbone, and a reflector-driven self-improvement loop. Two MCP servers (`gdd-state` for typed STATE mutators, `gdd-mcp` for 13 read-only project-priming tools), tier-aware agent routing with cost telemetry, defense-in-depth hooks (protected paths, MCP circuit breaker, injection scanner, budget enforcer), and a cross-runtime install layer for Claude Code, Codex, Cursor, OpenCode, Gemini, Copilot, and more.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "hegemonart",
|
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,32 @@ All notable changes to get-design-done are documented here. Versions follow [sem
|
|
|
4
4
|
|
|
5
5
|
---
|
|
6
6
|
|
|
7
|
+
## [1.59.9] - 2026-06-10
|
|
8
|
+
|
|
9
|
+
New-model-family readiness and cost truth (audit `.planning/audits/SELF-AUDIT-v1.59.7.md` §4). A new or unknown Anthropic model previously degraded cost accounting silently - billed at $0 or the sonnet rate and mis-attributed to the sonnet tier. This release makes unknown models loud and conservative, handles the 1M-context `[1m]` variant, and records context-window size in the model registry.
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
|
|
13
|
+
- **`scripts/lib/model-id.cjs`** - shared model-id normalization + tiering. `normalizeModelId` strips a bracketed variant suffix (`claude-opus-4-8[1m]` to base `claude-opus-4-8` + variant `1m`); `tierForModelId` resolves a tier via exact id, family pattern (`claude-(opus|sonnet|haiku)`), or an extensible alias map, returning `null` for an unknown family so callers price it conservatively instead of guessing.
|
|
14
|
+
- **`context_window` in the model registry.** `reference/runtime-models.md` now records each Claude model's context window (the `claude-opus-4-8` `[1m]` variant is `1000000`; the others `200000`), allowed additively through both the JSON schema and the hand-rolled `parse-runtime-models.cjs` allowlist, with regenerated types. The 1M-context variant is now recognised as first-class metadata. (Deriving token budgets from it is deferred - this release records the fact.)
|
|
15
|
+
- **`claude-opus-4-8` price + tier mapping.** Added to `reference/prices/claude.md` and mapped as the `opus`/`high` model in `reference/runtime-models.md`; provenance refreshed. The stale tables previously topped out at `claude-opus-4-7`.
|
|
16
|
+
|
|
17
|
+
### Fixed
|
|
18
|
+
|
|
19
|
+
- **Unknown / new models are now priced loudly and conservatively.** `budget-enforcer.cjs computeCost` normalizes the model id (so `[1m]` variants match their base row) and, when no price row matches, returns a conservative ceiling computed at the **opus** rate with `cost_estimated: true` and a `cost_lookup_fallback` telemetry event - instead of the old silent `cost_usd: null`. The headless `session-runner` `rateFor` falls back through the resolved tier's rate and uses the opus rate (not sonnet) as its last-resort default, ending the systematic under-billing of frontier models. Tier inference no longer relies on loose substring matching.
|
|
20
|
+
|
|
21
|
+
### Deferred
|
|
22
|
+
|
|
23
|
+
- Making `reasoning-class` the canonical routing vocabulary (opus/sonnet/haiku as aliases) and deriving SDK token budgets from `context_window` + tiered >200k long-context pricing - tracked for a later phase; not required for new-family readiness.
|
|
24
|
+
|
|
25
|
+
### Breaking changes
|
|
26
|
+
|
|
27
|
+
None.
|
|
28
|
+
|
|
29
|
+
5,096/5,096 tests pass.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
7
33
|
## [1.59.8] - 2026-06-10
|
|
8
34
|
|
|
9
35
|
Production-wiring repair and security hardening from a 4-agent self-audit (`.planning/audits/SELF-AUDIT-v1.59.7.md`). The theme: real, well-tested library code whose production call-sites silently neutered it. This release makes the wiring either true or honest.
|
package/hooks/budget-enforcer.ts
CHANGED
|
@@ -91,6 +91,10 @@ interface BudgetEnforcerBackend {
|
|
|
91
91
|
runtime_used: string | null;
|
|
92
92
|
fallback: boolean;
|
|
93
93
|
reason: string | null;
|
|
94
|
+
// Phase 59-9: true when the cost is a CONSERVATIVE ESTIMATE (opus ceiling)
|
|
95
|
+
// for an unknown/new model rather than a table-matched figure. Additive +
|
|
96
|
+
// optional — absent on the table-matched (branches 1-4) paths.
|
|
97
|
+
cost_estimated?: boolean;
|
|
94
98
|
};
|
|
95
99
|
modelFromResolved(resolved: unknown, agent: string): string | null;
|
|
96
100
|
// Plan 33.6-03 (SC#6): the canonical cost-row payload builder (the
|
|
@@ -946,6 +950,40 @@ function emitCostRecorded(
|
|
|
946
950
|
}
|
|
947
951
|
}
|
|
948
952
|
|
|
953
|
+
/**
|
|
954
|
+
* Phase 59-9: emit a `cost_lookup_fallback` event when the cost backend
|
|
955
|
+
* could not table-match a model and fell back to the CONSERVATIVE OPUS
|
|
956
|
+
* CEILING (or, more rarely, returned an unpriced fallback). Makes an
|
|
957
|
+
* unknown/new model OBSERVABLE in telemetry instead of silently mis-billed
|
|
958
|
+
* (or billed as $0). Reuses the same BaseEvent envelope + appendEvent path
|
|
959
|
+
* as every other emit. Fail-open — never throws, never blocks the spawn.
|
|
960
|
+
*/
|
|
961
|
+
function emitCostLookupFallback(
|
|
962
|
+
payload: {
|
|
963
|
+
runtime: string;
|
|
964
|
+
agent: string;
|
|
965
|
+
model_id: string | null;
|
|
966
|
+
tier: string | null;
|
|
967
|
+
reason: string | null;
|
|
968
|
+
cost_usd: number | null;
|
|
969
|
+
cost_estimated: boolean;
|
|
970
|
+
},
|
|
971
|
+
cycle?: string,
|
|
972
|
+
): void {
|
|
973
|
+
const ev = {
|
|
974
|
+
type: 'cost_lookup_fallback',
|
|
975
|
+
timestamp: new Date().toISOString(),
|
|
976
|
+
sessionId: getSessionId(),
|
|
977
|
+
...(cycle !== undefined && cycle !== 'unknown' ? { cycle } : {}),
|
|
978
|
+
payload,
|
|
979
|
+
};
|
|
980
|
+
try {
|
|
981
|
+
appendEvent(ev as unknown as HookFiredEvent);
|
|
982
|
+
} catch {
|
|
983
|
+
// Fail open.
|
|
984
|
+
}
|
|
985
|
+
}
|
|
986
|
+
|
|
949
987
|
/**
|
|
950
988
|
* Plan 27.5-02 / D-03: emit `bandit.tier_selected` event when the bandit
|
|
951
989
|
* is consulted (regardless of whether it overrode the prior tier). The
|
|
@@ -1588,6 +1626,28 @@ export async function main(): Promise<void> {
|
|
|
1588
1626
|
cycle,
|
|
1589
1627
|
);
|
|
1590
1628
|
|
|
1629
|
+
// Phase 59-9: when the cost was a CONSERVATIVE ESTIMATE (unknown/new model
|
|
1630
|
+
// priced at the opus ceiling) rather than a table-matched figure, emit a
|
|
1631
|
+
// distinct telemetry signal so an unrecognized model is OBSERVABLE rather
|
|
1632
|
+
// than silently mis-billed. Best-effort, never throws (fail-open).
|
|
1633
|
+
if (
|
|
1634
|
+
costLookup.cost_estimated === true ||
|
|
1635
|
+
(costLookup.fallback === true && costLookup.reason === 'model_not_found')
|
|
1636
|
+
) {
|
|
1637
|
+
emitCostLookupFallback(
|
|
1638
|
+
{
|
|
1639
|
+
runtime: runtimeId,
|
|
1640
|
+
agent,
|
|
1641
|
+
model_id: effectiveModelId ?? costLookup.model,
|
|
1642
|
+
tier: costLookup.tier ?? effectiveTier,
|
|
1643
|
+
reason: costLookup.reason,
|
|
1644
|
+
cost_usd: costLookup.cost_usd,
|
|
1645
|
+
cost_estimated: costLookup.cost_estimated === true,
|
|
1646
|
+
},
|
|
1647
|
+
cycle,
|
|
1648
|
+
);
|
|
1649
|
+
}
|
|
1650
|
+
|
|
1591
1651
|
// Branch E: standard spawn-allowed (includes tier-downgraded path).
|
|
1592
1652
|
writeTelemetry({
|
|
1593
1653
|
agent,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hegemonart/get-design-done",
|
|
3
|
-
"version": "1.59.
|
|
3
|
+
"version": "1.59.9",
|
|
4
4
|
"description": "A design-quality pipeline for AI coding agents: brief, explore, plan, design, and verify UI work against your design system.",
|
|
5
5
|
"author": "Hegemon",
|
|
6
6
|
"homepage": "https://github.com/hegemonart/get-design-done",
|
|
@@ -13,6 +13,17 @@
|
|
|
13
13
|
| claude-sonnet-4-7 | sonnet | 3.00 | 15.00 | 0.30 |
|
|
14
14
|
| claude-sonnet-4-6 | sonnet | 3.00 | 15.00 | 0.30 |
|
|
15
15
|
| claude-opus-4-7 | opus | 15.00 | 75.00 | 1.50 |
|
|
16
|
+
| claude-opus-4-8 | opus | 15.00 | 75.00 | 1.50 |
|
|
17
|
+
|
|
18
|
+
> **>200k-input (1M-context) pricing note.** The rates above are the
|
|
19
|
+
> standard (≤200k-input) per-1M-token prices. Anthropic's flagship
|
|
20
|
+
> `claude-opus-4-8` ships a 1M-context (`[1m]`) variant; long-context
|
|
21
|
+
> requests above the 200k-input threshold may be billed at a higher
|
|
22
|
+
> tiered rate. This table tracks only the standard tier today; the
|
|
23
|
+
> >200k tiered figure will be added as a separate row/column once
|
|
24
|
+
> Anthropic publishes it. The parser is positional-by-header and
|
|
25
|
+
> tolerates right-edge columns, so a future `>200k_input_per_1m`
|
|
26
|
+
> column can be appended without breaking cost lookups.
|
|
16
27
|
|
|
17
28
|
## size_budget → conservative token ranges
|
|
18
29
|
|
|
@@ -44,21 +44,21 @@ Anthropic's first-party runtime. Public tier docs at https://docs.anthropic.com/
|
|
|
44
44
|
{
|
|
45
45
|
"id": "claude",
|
|
46
46
|
"tier_to_model": {
|
|
47
|
-
"opus": { "model": "claude-opus-4-
|
|
48
|
-
"sonnet": { "model": "claude-sonnet-4-6" },
|
|
49
|
-
"haiku": { "model": "claude-haiku-4-5" }
|
|
47
|
+
"opus": { "model": "claude-opus-4-8", "context_window": 1000000 },
|
|
48
|
+
"sonnet": { "model": "claude-sonnet-4-6", "context_window": 200000 },
|
|
49
|
+
"haiku": { "model": "claude-haiku-4-5", "context_window": 200000 }
|
|
50
50
|
},
|
|
51
51
|
"reasoning_class_to_model": {
|
|
52
|
-
"high": { "model": "claude-opus-4-
|
|
53
|
-
"medium": { "model": "claude-sonnet-4-6" },
|
|
54
|
-
"low": { "model": "claude-haiku-4-5" }
|
|
52
|
+
"high": { "model": "claude-opus-4-8", "context_window": 1000000 },
|
|
53
|
+
"medium": { "model": "claude-sonnet-4-6", "context_window": 200000 },
|
|
54
|
+
"low": { "model": "claude-haiku-4-5", "context_window": 200000 }
|
|
55
55
|
},
|
|
56
56
|
"provenance": [
|
|
57
57
|
{
|
|
58
58
|
"source_url": "https://docs.anthropic.com/en/docs/about-claude/models",
|
|
59
|
-
"retrieved_at": "2026-
|
|
60
|
-
"last_validated_cycle": "2026-
|
|
61
|
-
"note": "Anthropic public model catalog — first-party runtime."
|
|
59
|
+
"retrieved_at": "2026-06-10T00:00:00.000Z",
|
|
60
|
+
"last_validated_cycle": "2026-06-10-v1.59.9",
|
|
61
|
+
"note": "Anthropic public model catalog — first-party runtime. Opus tier moved to claude-opus-4-8 (1M-context [1m] variant) this cycle."
|
|
62
62
|
}
|
|
63
63
|
]
|
|
64
64
|
}
|
|
@@ -1115,6 +1115,10 @@ export interface ModelRow {
|
|
|
1115
1115
|
* Optional internal/provider model ID for runtimes whose API identifiers differ from the public name (D-03).
|
|
1116
1116
|
*/
|
|
1117
1117
|
provider_model_id?: string;
|
|
1118
|
+
/**
|
|
1119
|
+
* Optional context-window size (max tokens) for this model, recorded as machine-readable metadata. The Anthropic opus tier ships the 1M-context [1m] variant (1000000). Recorded as fact this cycle; not yet a budgeting driver (deferred — no consumer is wired).
|
|
1120
|
+
*/
|
|
1121
|
+
context_window?: number;
|
|
1118
1122
|
}
|
|
1119
1123
|
|
|
1120
1124
|
export type RuntimeModelsSchema = RuntimeModelsTierToModelMap;
|
|
@@ -121,6 +121,11 @@
|
|
|
121
121
|
"type": "string",
|
|
122
122
|
"minLength": 1,
|
|
123
123
|
"description": "Optional internal/provider model ID for runtimes whose API identifiers differ from the public name (D-03)."
|
|
124
|
+
},
|
|
125
|
+
"context_window": {
|
|
126
|
+
"type": "integer",
|
|
127
|
+
"minimum": 1,
|
|
128
|
+
"description": "Optional context-window size (max tokens) for this model, recorded as machine-readable metadata. The Anthropic opus tier ships the 1M-context [1m] variant (1000000). Recorded as fact this cycle; not yet a budgeting driver (deferred — no consumer is wired)."
|
|
124
129
|
}
|
|
125
130
|
}
|
|
126
131
|
}
|
|
@@ -52,6 +52,8 @@
|
|
|
52
52
|
const fs = require('node:fs');
|
|
53
53
|
const path = require('node:path');
|
|
54
54
|
|
|
55
|
+
const { normalizeModelId, tierForModelId } = require('./model-id.cjs');
|
|
56
|
+
|
|
55
57
|
const REPO_ROOT_GUESS = path.resolve(__dirname, '..', '..');
|
|
56
58
|
const DEFAULT_RUNTIME_ID = 'claude';
|
|
57
59
|
const VALID_TIERS = Object.freeze(['opus', 'sonnet', 'haiku']);
|
|
@@ -326,10 +328,18 @@ function computeCost(args, opts) {
|
|
|
326
328
|
tokens_out: Number(args.tokens_out || 0),
|
|
327
329
|
cache_hit: args.cache_hit === true,
|
|
328
330
|
};
|
|
331
|
+
// Normalize the model id (strip a trailing `[1m]`/`[200k]` variant suffix)
|
|
332
|
+
// BEFORE table lookup so e.g. `claude-opus-4-8[1m]` matches the
|
|
333
|
+
// `claude-opus-4-8` row. The variant encodes a context-window SKU; the
|
|
334
|
+
// current price tables are keyed on the base id.
|
|
335
|
+
const rawModelId = typeof args.model_id === 'string' && args.model_id.length > 0
|
|
336
|
+
? args.model_id
|
|
337
|
+
: null;
|
|
338
|
+
const normalizedModelId = rawModelId !== null
|
|
339
|
+
? (normalizeModelId(rawModelId).base || rawModelId)
|
|
340
|
+
: null;
|
|
329
341
|
const q = {
|
|
330
|
-
model_id:
|
|
331
|
-
? args.model_id
|
|
332
|
-
: null,
|
|
342
|
+
model_id: normalizedModelId,
|
|
333
343
|
tier: typeof args.tier === 'string' && args.tier.length > 0
|
|
334
344
|
? args.tier
|
|
335
345
|
: null,
|
|
@@ -365,14 +375,33 @@ function computeCost(args, opts) {
|
|
|
365
375
|
}
|
|
366
376
|
}
|
|
367
377
|
|
|
368
|
-
// Branch 5: nothing matched.
|
|
378
|
+
// Branch 5: nothing matched. Rather than silently returning a null cost
|
|
379
|
+
// (which downstream aggregators treat as $0 — a frontier model billed as
|
|
380
|
+
// free), compute a CONSERVATIVE CEILING at the OPUS rate from the claude
|
|
381
|
+
// price table. An unknown/new model is thus priced LOUDLY (cost_estimated)
|
|
382
|
+
// and CONSERVATIVELY (opus ceiling), never $0 and never the sonnet rate.
|
|
383
|
+
const reason = rows.length === 0 ? 'runtime_table_missing' : 'model_not_found';
|
|
384
|
+
const claudeRows = loadPriceTable(DEFAULT_RUNTIME_ID, opts);
|
|
385
|
+
const opusRow = findPriceRow(claudeRows, { tier: 'opus' });
|
|
386
|
+
if (opusRow !== null) {
|
|
387
|
+
return {
|
|
388
|
+
cost_usd: applyFormula(opusRow, tokens),
|
|
389
|
+
model: normalizedModelId,
|
|
390
|
+
tier: 'opus',
|
|
391
|
+
runtime_used: DEFAULT_RUNTIME_ID,
|
|
392
|
+
fallback: true,
|
|
393
|
+
reason,
|
|
394
|
+
cost_estimated: true,
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
// Even the opus row is unavailable → genuinely cannot price. Keep null.
|
|
369
398
|
return {
|
|
370
399
|
cost_usd: null,
|
|
371
400
|
model: null,
|
|
372
401
|
tier: q.tier,
|
|
373
402
|
runtime_used: null,
|
|
374
403
|
fallback: false,
|
|
375
|
-
reason
|
|
404
|
+
reason,
|
|
376
405
|
};
|
|
377
406
|
}
|
|
378
407
|
|
|
@@ -78,7 +78,7 @@ function validateModelRow(row, where) {
|
|
|
78
78
|
if (typeof row.model !== 'string' || row.model.length === 0) {
|
|
79
79
|
throw new Error(`${where}: 'model' must be a non-empty string`);
|
|
80
80
|
}
|
|
81
|
-
const allowedKeys = new Set(['model', 'provider_model_id']);
|
|
81
|
+
const allowedKeys = new Set(['model', 'provider_model_id', 'context_window']);
|
|
82
82
|
for (const k of Object.keys(row)) {
|
|
83
83
|
if (!allowedKeys.has(k)) {
|
|
84
84
|
throw new Error(`${where}: unknown key '${k}' (allowed: ${[...allowedKeys].join(', ')})`);
|
|
@@ -89,6 +89,14 @@ function validateModelRow(row, where) {
|
|
|
89
89
|
throw new Error(`${where}: 'provider_model_id' must be a non-empty string when present`);
|
|
90
90
|
}
|
|
91
91
|
}
|
|
92
|
+
// Optional context-window size — mirror the schema (integer >= 1). Recorded as
|
|
93
|
+
// machine-readable metadata (the 1M-context [1m] opus variant); not yet a
|
|
94
|
+
// budgeting driver (deferred — no consumer wired this cycle).
|
|
95
|
+
if (row.context_window !== undefined) {
|
|
96
|
+
if (typeof row.context_window !== 'number' || !Number.isInteger(row.context_window) || row.context_window < 1) {
|
|
97
|
+
throw new Error(`${where}: 'context_window' must be a positive integer when present`);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
92
100
|
}
|
|
93
101
|
|
|
94
102
|
function validateProvenance(arr, where) {
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
/*
|
|
3
|
+
* scripts/lib/model-id.cjs — model-id normalization + tiering (pure, dependency-free).
|
|
4
|
+
*
|
|
5
|
+
* WHY THIS EXISTS
|
|
6
|
+
* ---------------
|
|
7
|
+
* Two unrelated callers need to reason about model ids in identical ways:
|
|
8
|
+
* - scripts/lib/session-runner/index.ts (routing: which tier am I running?)
|
|
9
|
+
* - scripts/lib/budget-enforcer.cjs (pricing: what does this model cost?)
|
|
10
|
+
* Each previously carried its own ad-hoc parsing, which drifted. This module is
|
|
11
|
+
* the single source of truth so a new model family is a DATA edit here (or in the
|
|
12
|
+
* price tables), never a logic change scattered across callers.
|
|
13
|
+
*
|
|
14
|
+
* DESIGN PRINCIPLES
|
|
15
|
+
* -----------------
|
|
16
|
+
* 1. TIER IS FOR ROUTING. `tierForModelId` answers "opus | sonnet | haiku" so the
|
|
17
|
+
* router can pick an agent class. It is NOT a pricing key on its own — pricing
|
|
18
|
+
* also depends on the exact id and (later) the context-window variant.
|
|
19
|
+
*
|
|
20
|
+
* 2. NULL MEANS UNKNOWN — PRICE CONSERVATIVELY + LOUDLY. We deliberately return
|
|
21
|
+
* `null` for ids we cannot confidently classify rather than guessing a tier.
|
|
22
|
+
* A wrong tier guess silently mis-routes or mis-prices. Callers MUST treat
|
|
23
|
+
* null as "unknown model — assume the most expensive plausible price AND warn",
|
|
24
|
+
* never as a tier and never as free. Do NOT add heuristic fallbacks that
|
|
25
|
+
* invent a tier for arbitrary strings.
|
|
26
|
+
*
|
|
27
|
+
* 3. VARIANT SUFFIX IS FOR CONTEXT-WINDOW-AWARE PRICING (LATER). Ids may carry a
|
|
28
|
+
* bracketed variant such as `claude-opus-4-8[1m]` or `...[200k]`. The variant
|
|
29
|
+
* encodes a context-window SKU that can have different per-token pricing. We
|
|
30
|
+
* split it off cleanly (`{ base, variant }`) so tiering operates on `base`
|
|
31
|
+
* while a future price table can key on `(base, variant)`. Date stamps in the
|
|
32
|
+
* base (e.g. `claude-opus-4-8-20260101`) are NOT variants and are left intact.
|
|
33
|
+
*
|
|
34
|
+
* 4. NEW FAMILIES ARE A DATA EDIT, NOT A CODE CHANGE. To onboard a new model:
|
|
35
|
+
* - if its id contains the tier word (opus/sonnet/haiku), the family-pattern
|
|
36
|
+
* rule already handles it — optionally pin it in KNOWN_TIER_BY_ID;
|
|
37
|
+
* - if its id does NOT contain the tier word (e.g. a hypothetical
|
|
38
|
+
* `claude-fable-5`), add one line to ALIAS_MAP (see comment there);
|
|
39
|
+
* - pricing specifics go in the caller's price table keyed on the exact id.
|
|
40
|
+
*/
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* KNOWN_TIER_BY_ID — explicit, exact-id → tier pins.
|
|
44
|
+
* Seeded with the currently-shipping ids. Exact matches win over pattern rules,
|
|
45
|
+
* so this is also the place to OVERRIDE a family-pattern result if a specific
|
|
46
|
+
* sku is mis-classified by the generic regex. Keys are the normalized `base`
|
|
47
|
+
* (no bracket variant).
|
|
48
|
+
*/
|
|
49
|
+
const KNOWN_TIER_BY_ID = Object.freeze({
|
|
50
|
+
'claude-opus-4-8': 'opus',
|
|
51
|
+
'claude-opus-4-7': 'opus',
|
|
52
|
+
'claude-sonnet-4-7': 'sonnet',
|
|
53
|
+
'claude-sonnet-4-6': 'sonnet',
|
|
54
|
+
'claude-sonnet-4-5': 'sonnet',
|
|
55
|
+
'claude-haiku-4-5': 'haiku',
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* ALIAS_MAP — extension point for families whose id does NOT contain the tier word.
|
|
60
|
+
*
|
|
61
|
+
* Currently EMPTY by design. The family-pattern rule (step c in tierForModelId)
|
|
62
|
+
* already covers any id literally containing `opus`/`sonnet`/`haiku`. Use this map
|
|
63
|
+
* ONLY for a future lineup whose product name omits the tier word.
|
|
64
|
+
*
|
|
65
|
+
* Example — when Anthropic publishes the `claude-fable-5` sku lineup and we learn
|
|
66
|
+
* it maps to opus-class routing, add (keyed on normalized base):
|
|
67
|
+
*
|
|
68
|
+
* 'claude-fable-5': 'opus',
|
|
69
|
+
*
|
|
70
|
+
* Until the lineup is public we leave it empty rather than guess — an unknown
|
|
71
|
+
* `claude-fable-5` correctly resolves to null (conservative pricing + warning).
|
|
72
|
+
*/
|
|
73
|
+
const ALIAS_MAP = Object.freeze({
|
|
74
|
+
// 'claude-fable-5': 'opus', // <- add when the fable-5 sku lineup is public
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
const VARIANT_RE = /\[([^\]]*)\]\s*$/; // trailing bracketed suffix, e.g. [1m] / [200k]
|
|
78
|
+
const FAMILY_RE = /(?:^|-)(opus|sonnet|haiku)(?:-|$)/;
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* normalizeModelId(id) → { base, variant }
|
|
82
|
+
*
|
|
83
|
+
* Splits off a single trailing bracketed variant suffix (e.g. `[1m]`, `[200k]`),
|
|
84
|
+
* returning it lowercased with brackets removed as `variant`, and the remaining
|
|
85
|
+
* trimmed id as `base`. Date stamps in the base are preserved. Null/empty/
|
|
86
|
+
* undefined input yields `{ base: '', variant: null }`.
|
|
87
|
+
*
|
|
88
|
+
* @param {string|null|undefined} id
|
|
89
|
+
* @returns {{ base: string, variant: string|null }}
|
|
90
|
+
*/
|
|
91
|
+
function normalizeModelId(id) {
|
|
92
|
+
if (id == null) return { base: '', variant: null };
|
|
93
|
+
const s = String(id).trim();
|
|
94
|
+
if (s === '') return { base: '', variant: null };
|
|
95
|
+
|
|
96
|
+
const m = s.match(VARIANT_RE);
|
|
97
|
+
if (m) {
|
|
98
|
+
const variant = m[1].trim().toLowerCase();
|
|
99
|
+
const base = s.slice(0, m.index).trim();
|
|
100
|
+
return { base, variant: variant === '' ? null : variant };
|
|
101
|
+
}
|
|
102
|
+
return { base: s, variant: null };
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* tierForModelId(id) → 'opus' | 'sonnet' | 'haiku' | null
|
|
107
|
+
*
|
|
108
|
+
* Resolution order:
|
|
109
|
+
* (a) normalize → work on `base`;
|
|
110
|
+
* (b) exact match in KNOWN_TIER_BY_ID;
|
|
111
|
+
* (c) family-pattern: base contains the tier word as a token;
|
|
112
|
+
* (d) ALIAS_MAP (families whose id omits the tier word);
|
|
113
|
+
* (e) otherwise null — UNKNOWN. Callers must price conservatively + loudly,
|
|
114
|
+
* NOT treat null as a tier.
|
|
115
|
+
*
|
|
116
|
+
* @param {string|null|undefined} id
|
|
117
|
+
* @returns {'opus'|'sonnet'|'haiku'|null}
|
|
118
|
+
*/
|
|
119
|
+
function tierForModelId(id) {
|
|
120
|
+
const { base } = normalizeModelId(id);
|
|
121
|
+
if (base === '') return null;
|
|
122
|
+
|
|
123
|
+
// (b) exact known-id pin
|
|
124
|
+
if (Object.prototype.hasOwnProperty.call(KNOWN_TIER_BY_ID, base)) {
|
|
125
|
+
return KNOWN_TIER_BY_ID[base];
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// (c) family-pattern (tier word appears as a token in the id)
|
|
129
|
+
const fam = base.match(FAMILY_RE);
|
|
130
|
+
if (fam) return fam[1];
|
|
131
|
+
|
|
132
|
+
// (d) alias for families whose id omits the tier word
|
|
133
|
+
if (Object.prototype.hasOwnProperty.call(ALIAS_MAP, base)) {
|
|
134
|
+
return ALIAS_MAP[base];
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// (e) unknown → null (conservative pricing + loud warning is the caller's job)
|
|
138
|
+
return null;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
module.exports = { normalizeModelId, tierForModelId, KNOWN_TIER_BY_ID, ALIAS_MAP };
|
|
@@ -122,6 +122,21 @@ const adaptiveModeLib = _nodeRequire(
|
|
|
122
122
|
getMode: (opts?: { baseDir?: string; budgetPath?: string; quiet?: boolean }) => 'static' | 'hedge' | 'full';
|
|
123
123
|
};
|
|
124
124
|
|
|
125
|
+
// ── Phase 59-9 — model-id normalization + tiering (single source of truth) ───
|
|
126
|
+
//
|
|
127
|
+
// `scripts/lib/model-id.cjs` is the canonical id parser shared with the
|
|
128
|
+
// budget-enforcer. We route BOTH tier-labeling (`tierFromModel`) and pricing
|
|
129
|
+
// (`rateFor`) through it so a new model family is a DATA edit there / in the
|
|
130
|
+
// price tables, never scattered substring logic here. `tierForModelId` returns
|
|
131
|
+
// `null` for an unknown family — callers MUST treat that as "price
|
|
132
|
+
// conservatively + loudly", never as a tier or as free.
|
|
133
|
+
const modelId = _nodeRequire(
|
|
134
|
+
_resolve(_REPO_ROOT, 'scripts/lib/model-id.cjs'),
|
|
135
|
+
) as {
|
|
136
|
+
normalizeModelId: (id: string | null | undefined) => { base: string; variant: string | null };
|
|
137
|
+
tierForModelId: (id: string | null | undefined) => 'opus' | 'sonnet' | 'haiku' | null;
|
|
138
|
+
};
|
|
139
|
+
|
|
125
140
|
/** Rate-guard provider key for the Anthropic Agent SDK. */
|
|
126
141
|
const RATE_GUARD_PROVIDER = 'anthropic';
|
|
127
142
|
|
|
@@ -144,16 +159,24 @@ const SESSION_RUNNER_DEFAULT_BIN = 'medium';
|
|
|
144
159
|
*
|
|
145
160
|
* Used at the 4 terminal-emit sites where the final tier isn't already
|
|
146
161
|
* carried on `opts` — we fall back to inspecting `usage.model` (folded
|
|
147
|
-
* during the run loop from SDK chunks).
|
|
148
|
-
*
|
|
149
|
-
*
|
|
162
|
+
* during the run loop from SDK chunks). Delegates to the shared
|
|
163
|
+
* `model-id.cjs` resolver (variant suffix like `[1m]` is stripped, known
|
|
164
|
+
* ids classified identically to before).
|
|
165
|
+
*
|
|
166
|
+
* The shared resolver returns `null` for an UNKNOWN family. For tier
|
|
167
|
+
* LABELING (telemetry / posterior arms) we map null → 'sonnet' as the
|
|
168
|
+
* safest middle tier so the bandit arms stay well-defined. This is a
|
|
169
|
+
* TELEMETRY default only — it does NOT influence PRICING. Pricing of an
|
|
170
|
+
* unknown family uses the conservative OPUS ceiling, resolved separately in
|
|
171
|
+
* `rateFor` (see DEFAULT_MODEL_RATE / tier fallback there). Keep the two
|
|
172
|
+
* concerns distinct: a wrong tier label mis-attributes a posterior arm; a
|
|
173
|
+
* wrong price under-bills a frontier model.
|
|
150
174
|
*/
|
|
151
175
|
function tierFromModel(modelName: string | null | undefined): 'opus' | 'sonnet' | 'haiku' {
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
return 'sonnet';
|
|
176
|
+
const tier = modelId.tierForModelId(modelName);
|
|
177
|
+
// null = unknown family → conservative TELEMETRY default (pricing handled
|
|
178
|
+
// separately + conservatively in rateFor).
|
|
179
|
+
return tier ?? 'sonnet';
|
|
157
180
|
}
|
|
158
181
|
|
|
159
182
|
/**
|
|
@@ -539,29 +562,72 @@ function _logPeerCallComplete(args: {
|
|
|
539
562
|
const RETRY_BACKOFF = { baseMs: 1000, maxMs: 30_000 } as const;
|
|
540
563
|
|
|
541
564
|
/**
|
|
542
|
-
* Per-million-token USD rates.
|
|
543
|
-
*
|
|
565
|
+
* Per-million-token USD rates.
|
|
566
|
+
*
|
|
567
|
+
* Canonical price source is `reference/prices/claude.md`; this table mirrors
|
|
568
|
+
* it for the sync headless path — keep in lockstep.
|
|
569
|
+
*
|
|
570
|
+
* Unknown FAMILIES default to the OPUS ceiling (see DEFAULT_MODEL_RATE) — a
|
|
571
|
+
* conservative overestimate. We'd rather cap early than silently under-bill a
|
|
572
|
+
* frontier model. Known families fall back to their per-tier representative
|
|
573
|
+
* rate (PER_TIER_RATE) so a dated/variant sku still prices correctly.
|
|
544
574
|
*/
|
|
545
575
|
const MODEL_RATES: Readonly<Record<string, { input: number; output: number }>> = Object.freeze({
|
|
576
|
+
'claude-opus-4-8': { input: 15, output: 75 },
|
|
546
577
|
'claude-opus-4-7': { input: 15, output: 75 },
|
|
547
578
|
'claude-sonnet-4-5': { input: 3, output: 15 },
|
|
548
579
|
'claude-haiku-4-5': { input: 0.8, output: 4 },
|
|
549
580
|
});
|
|
550
|
-
const DEFAULT_MODEL_RATE = Object.freeze({ input: 3, output: 15 });
|
|
551
581
|
|
|
552
|
-
/**
|
|
582
|
+
/** Per-tier representative rates (match reference/prices/claude.md). Used as
|
|
583
|
+
* the fallback when an exact/prefix MODEL_RATES match is absent but the
|
|
584
|
+
* family tier is known. */
|
|
585
|
+
const PER_TIER_RATE: Readonly<Record<'opus' | 'sonnet' | 'haiku', { input: number; output: number }>> =
|
|
586
|
+
Object.freeze({
|
|
587
|
+
opus: { input: 15, output: 75 },
|
|
588
|
+
sonnet: { input: 3, output: 15 },
|
|
589
|
+
haiku: { input: 1, output: 5 },
|
|
590
|
+
});
|
|
591
|
+
|
|
592
|
+
/**
|
|
593
|
+
* DEFAULT_MODEL_RATE — conservative ceiling for a GENUINELY UNKNOWN family
|
|
594
|
+
* (tier resolves to null). Set to the OPUS rate, matching this file's own
|
|
595
|
+
* "safer overestimate" intent. The previous sonnet default UNDER-billed any
|
|
596
|
+
* frontier model whose id we did not yet recognize.
|
|
597
|
+
*/
|
|
598
|
+
const DEFAULT_MODEL_RATE = Object.freeze({ input: 15, output: 75 });
|
|
599
|
+
|
|
600
|
+
/**
|
|
601
|
+
* Resolve a per-M-token rate for a model name.
|
|
602
|
+
*
|
|
603
|
+
* Resolution order (conservative + robust):
|
|
604
|
+
* 1. normalize the id (strip `[1m]`/`[200k]` variant suffix) → work on base;
|
|
605
|
+
* 2. exact match in MODEL_RATES;
|
|
606
|
+
* 3. prefix match (e.g. "claude-opus-4-7-20250101" → "claude-opus-4-7");
|
|
607
|
+
* 4. per-tier fallback via `tierForModelId(base)` (opus/sonnet/haiku → that
|
|
608
|
+
* tier's representative rate) — keeps dated/variant skus of a known
|
|
609
|
+
* family priced correctly;
|
|
610
|
+
* 5. ONLY if the tier is null (genuinely unknown family) → DEFAULT_MODEL_RATE
|
|
611
|
+
* (opus ceiling — price LOUDLY + CONSERVATIVELY, never $0 or sonnet).
|
|
612
|
+
*/
|
|
553
613
|
function rateFor(modelName: string | null): { input: number; output: number } {
|
|
554
614
|
if (modelName === null || modelName === '') return DEFAULT_MODEL_RATE;
|
|
555
|
-
|
|
556
|
-
|
|
615
|
+
const { base } = modelId.normalizeModelId(modelName);
|
|
616
|
+
if (base === '') return DEFAULT_MODEL_RATE;
|
|
617
|
+
// (2) Direct match first.
|
|
618
|
+
const direct = MODEL_RATES[base];
|
|
557
619
|
if (direct !== undefined) return direct;
|
|
558
|
-
// Prefix match
|
|
620
|
+
// (3) Prefix match.
|
|
559
621
|
for (const key of Object.keys(MODEL_RATES)) {
|
|
560
|
-
if (
|
|
622
|
+
if (base.startsWith(key)) {
|
|
561
623
|
const hit = MODEL_RATES[key];
|
|
562
624
|
if (hit !== undefined) return hit;
|
|
563
625
|
}
|
|
564
626
|
}
|
|
627
|
+
// (4) Per-tier fallback for a known family.
|
|
628
|
+
const tier = modelId.tierForModelId(base);
|
|
629
|
+
if (tier !== null) return PER_TIER_RATE[tier];
|
|
630
|
+
// (5) Unknown family → conservative opus ceiling.
|
|
565
631
|
return DEFAULT_MODEL_RATE;
|
|
566
632
|
}
|
|
567
633
|
|
|
@@ -1281,3 +1347,8 @@ function buildResult(args: BuildResultArgs): SessionResult {
|
|
|
1281
1347
|
// invariant: session-runner consumers can rely on these constants being
|
|
1282
1348
|
// stable across minor releases.
|
|
1283
1349
|
export { MODEL_RATES, DEFAULT_MODEL_RATE, RATE_GUARD_PROVIDER };
|
|
1350
|
+
|
|
1351
|
+
// Pricing internals exported for regression tests (Phase 59-9 model-cost-truth):
|
|
1352
|
+
// verify unknown families resolve to the conservative opus ceiling while known
|
|
1353
|
+
// families price correctly via the per-tier fallback.
|
|
1354
|
+
export { rateFor, usdCost, tierFromModel, PER_TIER_RATE };
|
package/sdk/cli/index.js
CHANGED
|
@@ -5442,15 +5442,15 @@ var banditIntegration = _nodeRequire(
|
|
|
5442
5442
|
var adaptiveModeLib = _nodeRequire(
|
|
5443
5443
|
(0, import_node_path9.resolve)(_REPO_ROOT, "scripts/lib/adaptive-mode.cjs")
|
|
5444
5444
|
);
|
|
5445
|
+
var modelId = _nodeRequire(
|
|
5446
|
+
(0, import_node_path9.resolve)(_REPO_ROOT, "scripts/lib/model-id.cjs")
|
|
5447
|
+
);
|
|
5445
5448
|
var RATE_GUARD_PROVIDER = "anthropic";
|
|
5446
5449
|
var DEFAULT_MAX_RETRIES = 2;
|
|
5447
5450
|
var SESSION_RUNNER_DEFAULT_BIN = "medium";
|
|
5448
5451
|
function tierFromModel(modelName) {
|
|
5449
|
-
|
|
5450
|
-
|
|
5451
|
-
if (lower.includes("opus")) return "opus";
|
|
5452
|
-
if (lower.includes("haiku")) return "haiku";
|
|
5453
|
-
return "sonnet";
|
|
5452
|
+
const tier = modelId.tierForModelId(modelName);
|
|
5453
|
+
return tier ?? "sonnet";
|
|
5454
5454
|
}
|
|
5455
5455
|
function _recordBanditOutcome(input) {
|
|
5456
5456
|
try {
|
|
@@ -5659,21 +5659,31 @@ function _logPeerCallComplete(args) {
|
|
|
5659
5659
|
}
|
|
5660
5660
|
var RETRY_BACKOFF = { baseMs: 1e3, maxMs: 3e4 };
|
|
5661
5661
|
var MODEL_RATES = Object.freeze({
|
|
5662
|
+
"claude-opus-4-8": { input: 15, output: 75 },
|
|
5662
5663
|
"claude-opus-4-7": { input: 15, output: 75 },
|
|
5663
5664
|
"claude-sonnet-4-5": { input: 3, output: 15 },
|
|
5664
5665
|
"claude-haiku-4-5": { input: 0.8, output: 4 }
|
|
5665
5666
|
});
|
|
5666
|
-
var
|
|
5667
|
+
var PER_TIER_RATE = Object.freeze({
|
|
5668
|
+
opus: { input: 15, output: 75 },
|
|
5669
|
+
sonnet: { input: 3, output: 15 },
|
|
5670
|
+
haiku: { input: 1, output: 5 }
|
|
5671
|
+
});
|
|
5672
|
+
var DEFAULT_MODEL_RATE = Object.freeze({ input: 15, output: 75 });
|
|
5667
5673
|
function rateFor(modelName) {
|
|
5668
5674
|
if (modelName === null || modelName === "") return DEFAULT_MODEL_RATE;
|
|
5669
|
-
const
|
|
5675
|
+
const { base } = modelId.normalizeModelId(modelName);
|
|
5676
|
+
if (base === "") return DEFAULT_MODEL_RATE;
|
|
5677
|
+
const direct = MODEL_RATES[base];
|
|
5670
5678
|
if (direct !== void 0) return direct;
|
|
5671
5679
|
for (const key of Object.keys(MODEL_RATES)) {
|
|
5672
|
-
if (
|
|
5680
|
+
if (base.startsWith(key)) {
|
|
5673
5681
|
const hit = MODEL_RATES[key];
|
|
5674
5682
|
if (hit !== void 0) return hit;
|
|
5675
5683
|
}
|
|
5676
5684
|
}
|
|
5685
|
+
const tier = modelId.tierForModelId(base);
|
|
5686
|
+
if (tier !== null) return PER_TIER_RATE[tier];
|
|
5677
5687
|
return DEFAULT_MODEL_RATE;
|
|
5678
5688
|
}
|
|
5679
5689
|
function usdCost(inputTokens, outputTokens, modelName) {
|