language-models 2.1.3 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +28 -0
- package/README.md +2 -0
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +13 -1
- package/dist/index.js.map +1 -1
- package/dist/models.d.ts +1 -1
- package/dist/models.d.ts.map +1 -1
- package/dist/models.js +8 -10
- package/dist/models.js.map +1 -1
- package/dist/policy.d.ts +127 -0
- package/dist/policy.d.ts.map +1 -0
- package/dist/policy.js +246 -0
- package/dist/policy.js.map +1 -0
- package/dist/pricing/index.d.ts +19 -0
- package/dist/pricing/index.d.ts.map +1 -0
- package/dist/pricing/index.js +18 -0
- package/dist/pricing/index.js.map +1 -0
- package/dist/pricing/lookup.d.ts +46 -0
- package/dist/pricing/lookup.d.ts.map +1 -0
- package/dist/pricing/lookup.js +94 -0
- package/dist/pricing/lookup.js.map +1 -0
- package/dist/pricing/table.d.ts +46 -0
- package/dist/pricing/table.d.ts.map +1 -0
- package/dist/pricing/table.js +214 -0
- package/dist/pricing/table.js.map +1 -0
- package/dist/pricing/types.d.ts +84 -0
- package/dist/pricing/types.d.ts.map +1 -0
- package/dist/pricing/types.js +32 -0
- package/dist/pricing/types.js.map +1 -0
- package/package.json +16 -12
- package/src/index.ts +42 -1
- package/src/models.ts +8 -12
- package/src/policy.ts +343 -0
- package/src/pricing/index.ts +29 -0
- package/src/pricing/lookup.ts +124 -0
- package/src/pricing/table.ts +235 -0
- package/src/pricing/types.ts +90 -0
- package/{src → test}/aliases.test.ts +20 -22
- package/{src → test}/index.test.ts +9 -9
- package/{src → test}/models.test.ts +8 -6
- package/test/policy.test.ts +203 -0
- package/test/pricing.test.ts +279 -0
- package/vitest.config.ts +21 -1
- package/.turbo/turbo-test.log +0 -18
- package/LICENSE +0 -21
- package/src/aliases.js +0 -40
- package/src/aliases.test.js +0 -264
- package/src/index.js +0 -9
- package/src/index.test.js +0 -320
- package/src/models.js +0 -108
- package/src/models.test.js +0 -335
- package/vitest.config.js +0 -10
package/src/policy.ts
ADDED
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ModelPolicy - per-model resilience and tier policy data
|
|
3
|
+
*
|
|
4
|
+
* `language-models` owns model identity (alias resolution, capability lookup).
|
|
5
|
+
* Resilience policy (retry, circuit breaker, fallback chain, batch tier) is
|
|
6
|
+
* a per-model concern that belongs alongside the catalog data — but the
|
|
7
|
+
* runtime *machinery* that applies the policy lives in `ai-functions`.
|
|
8
|
+
*
|
|
9
|
+
* This module provides:
|
|
10
|
+
* - `ModelPolicy` MDXLD type (`$type: 'ModelPolicy'`)
|
|
11
|
+
* - `policyFor(alias)` - resolve an alias and return its derived policy
|
|
12
|
+
* - `derivePolicy(model, alias?)` - inference layer that turns OpenRouter raw
|
|
13
|
+
* data into a policy by applying heuristics (newest frontier model is best,
|
|
14
|
+
* price within a family is inversely correlated with capability, etc.)
|
|
15
|
+
*
|
|
16
|
+
* Source of truth: OpenRouter raw catalog (data/models.json) + heuristics.
|
|
17
|
+
* Strategy: runtime derivation, cached. A static snapshot generator could be
|
|
18
|
+
* added later (see `derivePolicy` — it's pure, so you can pre-compute it).
|
|
19
|
+
*
|
|
20
|
+
* @packageDocumentation
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import { resolve, get, list, type ModelInfo } from './models.js'
|
|
24
|
+
import { ALIASES } from './aliases.js'
|
|
25
|
+
|
|
26
|
+
// ============================================================================
|
|
27
|
+
// MDXLD types
|
|
28
|
+
// ============================================================================
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Error category taxonomy. Mirrors `ai-functions`'s `ErrorCategory` enum by
|
|
32
|
+
* string value — we don't import it (circular), but the strings line up.
|
|
33
|
+
*/
|
|
34
|
+
export type ErrorCategoryName =
|
|
35
|
+
| 'network'
|
|
36
|
+
| 'rate_limit'
|
|
37
|
+
| 'invalid_input'
|
|
38
|
+
| 'authentication'
|
|
39
|
+
| 'server'
|
|
40
|
+
| 'context_length'
|
|
41
|
+
| 'unknown'
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Retry classification: which error categories are retryable for this model,
|
|
45
|
+
* plus backoff parameters.
|
|
46
|
+
*/
|
|
47
|
+
export interface RetryPolicyData {
|
|
48
|
+
maxRetries: number
|
|
49
|
+
baseDelay: number
|
|
50
|
+
maxDelay: number
|
|
51
|
+
multiplier: number
|
|
52
|
+
jitter: number
|
|
53
|
+
/** Categories that should trigger a retry */
|
|
54
|
+
retryableCategories: ErrorCategoryName[]
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Circuit-breaker policy data (per-model state keys come from the alias).
|
|
59
|
+
*/
|
|
60
|
+
export interface CircuitBreakerPolicyData {
|
|
61
|
+
failureThreshold: number
|
|
62
|
+
resetTimeout: number
|
|
63
|
+
successThreshold: number
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Tiers a model is eligible for.
|
|
68
|
+
*
|
|
69
|
+
* - `immediate`: synchronous online inference (always available)
|
|
70
|
+
* - `flex`: faster-than-batch processing (~minutes, ~50% discount)
|
|
71
|
+
* — only OpenAI/Bedrock currently
|
|
72
|
+
* - `batch`: batch API processing (~hours, ~50% discount)
|
|
73
|
+
* — OpenAI/Anthropic/Google/Bedrock/Cloudflare
|
|
74
|
+
*/
|
|
75
|
+
export type BatchTier = 'immediate' | 'flex' | 'batch'
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Provider-specific HTTP status code → ErrorCategory mapping.
|
|
79
|
+
* Empty here by default — `ai-functions/retry.ts#classifyError` handles the
|
|
80
|
+
* common cases. Override per-model if a provider has unusual error codes.
|
|
81
|
+
*/
|
|
82
|
+
export type ErrorMapping = Record<number, ErrorCategoryName>
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* MDXLD-shaped per-model resilience and tier policy.
|
|
86
|
+
*
|
|
87
|
+
* `$type: 'ModelPolicy'`, `$id` is the resolved model id (e.g.
|
|
88
|
+
* `'anthropic/claude-opus-4.5'`).
|
|
89
|
+
*/
|
|
90
|
+
export interface ModelPolicy {
|
|
91
|
+
$type: 'ModelPolicy'
|
|
92
|
+
$id: string
|
|
93
|
+
/** Provider slug (e.g. 'anthropic') */
|
|
94
|
+
provider: string
|
|
95
|
+
retry: RetryPolicyData
|
|
96
|
+
circuitBreaker: CircuitBreakerPolicyData
|
|
97
|
+
/** Ordered list of model ids to try after this one fails */
|
|
98
|
+
fallbackChain: string[]
|
|
99
|
+
/** Tiers this model is eligible for */
|
|
100
|
+
batchTier: BatchTier[]
|
|
101
|
+
/** Provider-specific HTTP code → category overrides */
|
|
102
|
+
errorMapping: ErrorMapping
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// ============================================================================
|
|
106
|
+
// Defaults & heuristics
|
|
107
|
+
// ============================================================================
|
|
108
|
+
|
|
109
|
+
/** Frontier labs — newer releases tend to be more capable. */
|
|
110
|
+
const FRONTIER_PROVIDERS = new Set(['anthropic', 'openai', 'google'])
|
|
111
|
+
|
|
112
|
+
/** Providers with batch APIs supported by ai-functions. */
|
|
113
|
+
const BATCH_PROVIDERS = new Set(['anthropic', 'openai', 'google', 'amazon-bedrock', 'cloudflare'])
|
|
114
|
+
|
|
115
|
+
/** Providers with flex (faster-than-batch) APIs. */
|
|
116
|
+
const FLEX_PROVIDERS = new Set(['openai', 'amazon-bedrock'])
|
|
117
|
+
|
|
118
|
+
/** Curated fallback seeds — picked one per frontier family. */
|
|
119
|
+
const FRONTIER_FALLBACK: readonly string[] = [
|
|
120
|
+
'anthropic/claude-sonnet-4.5',
|
|
121
|
+
'anthropic/claude-opus-4.5',
|
|
122
|
+
'openai/gpt-4o',
|
|
123
|
+
'google/gemini-2.5-pro',
|
|
124
|
+
]
|
|
125
|
+
|
|
126
|
+
/** Default retry policy — matches `ai-functions` `RetryPolicy` defaults. */
|
|
127
|
+
export const DEFAULT_RETRY: RetryPolicyData = {
|
|
128
|
+
maxRetries: 3,
|
|
129
|
+
baseDelay: 1000,
|
|
130
|
+
maxDelay: 30000,
|
|
131
|
+
multiplier: 2,
|
|
132
|
+
jitter: 0,
|
|
133
|
+
retryableCategories: ['network', 'rate_limit', 'server', 'unknown'],
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
/** Default circuit breaker — matches `ai-functions` defaults. */
|
|
137
|
+
export const DEFAULT_CIRCUIT_BREAKER: CircuitBreakerPolicyData = {
|
|
138
|
+
failureThreshold: 5,
|
|
139
|
+
resetTimeout: 30000,
|
|
140
|
+
successThreshold: 1,
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Default policy for an unknown model. Used when no catalog entry is found.
|
|
145
|
+
*/
|
|
146
|
+
export function defaultPolicy(modelId: string): ModelPolicy {
|
|
147
|
+
const provider = modelId.includes('/') ? modelId.split('/')[0]! : 'unknown'
|
|
148
|
+
return {
|
|
149
|
+
$type: 'ModelPolicy',
|
|
150
|
+
$id: modelId,
|
|
151
|
+
provider,
|
|
152
|
+
retry: { ...DEFAULT_RETRY },
|
|
153
|
+
circuitBreaker: { ...DEFAULT_CIRCUIT_BREAKER },
|
|
154
|
+
fallbackChain: [],
|
|
155
|
+
batchTier: ['immediate'],
|
|
156
|
+
errorMapping: {},
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ============================================================================
|
|
161
|
+
// Derivation layer
|
|
162
|
+
// ============================================================================
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Parse a price-per-token string to a number. Returns 0 on parse failure.
|
|
166
|
+
* Pricing comes through OpenRouter as a string (e.g. "0.000003").
|
|
167
|
+
*/
|
|
168
|
+
function parsePrice(p?: string): number {
|
|
169
|
+
if (!p) return 0
|
|
170
|
+
const n = Number(p)
|
|
171
|
+
return Number.isFinite(n) ? n : 0
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Extract a "family" key from a model id for fallback grouping.
|
|
176
|
+
* 'anthropic/claude-opus-4.5' → 'anthropic/claude'
|
|
177
|
+
* 'openai/gpt-4o-mini' → 'openai/gpt'
|
|
178
|
+
*/
|
|
179
|
+
function familyKey(id: string): string {
|
|
180
|
+
const slash = id.indexOf('/')
|
|
181
|
+
if (slash < 0) return id
|
|
182
|
+
const provider = id.substring(0, slash)
|
|
183
|
+
const rest = id.substring(slash + 1).toLowerCase()
|
|
184
|
+
// Strip trailing version tags / size qualifiers
|
|
185
|
+
const family = rest
|
|
186
|
+
.replace(/[-_]?\d.*$/, '') // drop -4.5, -2.0, etc
|
|
187
|
+
.replace(/(opus|sonnet|haiku|mini|pro|flash|lite|maverick|instruct).*$/, '$1')
|
|
188
|
+
.replace(/-?(opus|sonnet|haiku|mini|pro|flash|lite|maverick|instruct)$/, '')
|
|
189
|
+
return `${provider}/${family || rest.split('-')[0]}`
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
/**
|
|
193
|
+
* Derive the fallback chain for a model.
|
|
194
|
+
*
|
|
195
|
+
* Heuristics:
|
|
196
|
+
* 1. Prefer same-family siblings (e.g. sonnet → opus → haiku within Claude)
|
|
197
|
+
* ordered by `created` (newer first), then by price descending (more
|
|
198
|
+
* expensive within a family is usually more capable).
|
|
199
|
+
* 2. Then fall back to frontier-lab seeds, skipping the model itself and
|
|
200
|
+
* anything from the same family already included.
|
|
201
|
+
* 3. Cap at 4 entries to keep latency bounded.
|
|
202
|
+
*/
|
|
203
|
+
function deriveFallbackChain(model: ModelInfo, allModels: ModelInfo[]): string[] {
|
|
204
|
+
const chain: string[] = []
|
|
205
|
+
const seen = new Set<string>([model.id])
|
|
206
|
+
const fam = familyKey(model.id)
|
|
207
|
+
|
|
208
|
+
// Step 1: same-family siblings, sorted newest-first then by price desc.
|
|
209
|
+
const siblings = allModels
|
|
210
|
+
.filter((m) => m.id !== model.id && familyKey(m.id) === fam)
|
|
211
|
+
.map((m) => ({
|
|
212
|
+
m,
|
|
213
|
+
created: (m as ModelInfo & { created?: number }).created ?? 0,
|
|
214
|
+
price: parsePrice(m.pricing?.completion),
|
|
215
|
+
}))
|
|
216
|
+
.sort((a, b) => {
|
|
217
|
+
if (b.created !== a.created) return b.created - a.created
|
|
218
|
+
return b.price - a.price
|
|
219
|
+
})
|
|
220
|
+
|
|
221
|
+
for (const { m } of siblings.slice(0, 2)) {
|
|
222
|
+
if (!seen.has(m.id)) {
|
|
223
|
+
chain.push(m.id)
|
|
224
|
+
seen.add(m.id)
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// Step 2: frontier seeds.
|
|
229
|
+
for (const seed of FRONTIER_FALLBACK) {
|
|
230
|
+
if (chain.length >= 4) break
|
|
231
|
+
if (seen.has(seed)) continue
|
|
232
|
+
if (familyKey(seed) === fam) continue
|
|
233
|
+
chain.push(seed)
|
|
234
|
+
seen.add(seed)
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
return chain
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Derive batch-tier eligibility from provider capability.
|
|
242
|
+
*/
|
|
243
|
+
function deriveBatchTiers(provider: string): BatchTier[] {
|
|
244
|
+
const tiers: BatchTier[] = ['immediate']
|
|
245
|
+
if (FLEX_PROVIDERS.has(provider)) tiers.push('flex')
|
|
246
|
+
if (BATCH_PROVIDERS.has(provider)) tiers.push('batch')
|
|
247
|
+
return tiers
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
/**
|
|
251
|
+
* Derive the retry policy. Frontier providers get one extra attempt because
|
|
252
|
+
* their rate limits are typically more transient than long-tail providers.
|
|
253
|
+
*/
|
|
254
|
+
function deriveRetry(provider: string): RetryPolicyData {
|
|
255
|
+
if (FRONTIER_PROVIDERS.has(provider)) {
|
|
256
|
+
return { ...DEFAULT_RETRY, maxRetries: 4, jitter: 0.2 }
|
|
257
|
+
}
|
|
258
|
+
return { ...DEFAULT_RETRY }
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
/**
|
|
262
|
+
* Derive the circuit-breaker policy. Frontier providers get a higher
|
|
263
|
+
* failure threshold (more capacity) and a shorter reset timeout.
|
|
264
|
+
*/
|
|
265
|
+
function deriveCircuitBreaker(provider: string): CircuitBreakerPolicyData {
|
|
266
|
+
if (FRONTIER_PROVIDERS.has(provider)) {
|
|
267
|
+
return { failureThreshold: 8, resetTimeout: 20000, successThreshold: 1 }
|
|
268
|
+
}
|
|
269
|
+
return { ...DEFAULT_CIRCUIT_BREAKER }
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Derivation layer — turn a `ModelInfo` (from OpenRouter raw data) into a
|
|
274
|
+
* `ModelPolicy` by applying heuristics.
|
|
275
|
+
*
|
|
276
|
+
* Pure function; safe to call at build time to pre-compute a static snapshot.
|
|
277
|
+
*
|
|
278
|
+
* @param model - The catalog entry for the model.
|
|
279
|
+
* @param allModels - The full catalog, used for sibling lookup. Optional;
|
|
280
|
+
* defaults to `list()`.
|
|
281
|
+
*/
|
|
282
|
+
export function derivePolicy(model: ModelInfo, allModels?: ModelInfo[]): ModelPolicy {
|
|
283
|
+
const all = allModels ?? list()
|
|
284
|
+
const slash = model.id.indexOf('/')
|
|
285
|
+
const provider = slash > 0 ? model.id.substring(0, slash) : model.provider ?? 'unknown'
|
|
286
|
+
|
|
287
|
+
return {
|
|
288
|
+
$type: 'ModelPolicy',
|
|
289
|
+
$id: model.id,
|
|
290
|
+
provider,
|
|
291
|
+
retry: deriveRetry(provider),
|
|
292
|
+
circuitBreaker: deriveCircuitBreaker(provider),
|
|
293
|
+
fallbackChain: deriveFallbackChain(model, all),
|
|
294
|
+
batchTier: deriveBatchTiers(provider),
|
|
295
|
+
errorMapping: {},
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// ============================================================================
|
|
300
|
+
// Public API
|
|
301
|
+
// ============================================================================
|
|
302
|
+
|
|
303
|
+
/** Per-process cache of derived policies, keyed by resolved model id. */
|
|
304
|
+
const policyCache = new Map<string, ModelPolicy>()
|
|
305
|
+
|
|
306
|
+
/**
|
|
307
|
+
* Resolve an alias (or full model id) and return its policy.
|
|
308
|
+
*
|
|
309
|
+
* Falls back to `defaultPolicy(id)` if the model is not in the catalog —
|
|
310
|
+
* callers always get a usable policy.
|
|
311
|
+
*
|
|
312
|
+
* @example
|
|
313
|
+
* ```ts
|
|
314
|
+
* const p = policyFor('sonnet')
|
|
315
|
+
* // p.fallbackChain → ['anthropic/claude-opus-4.5', 'openai/gpt-4o', ...]
|
|
316
|
+
* // p.batchTier → ['immediate', 'batch']
|
|
317
|
+
* ```
|
|
318
|
+
*/
|
|
319
|
+
export function policyFor(input: string): ModelPolicy {
|
|
320
|
+
const id = resolve(input)
|
|
321
|
+
const cached = policyCache.get(id)
|
|
322
|
+
if (cached) return cached
|
|
323
|
+
|
|
324
|
+
const model = get(id)
|
|
325
|
+
const policy = model ? derivePolicy(model) : defaultPolicy(id)
|
|
326
|
+
policyCache.set(id, policy)
|
|
327
|
+
return policy
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/**
|
|
331
|
+
* Reset the policy cache. Useful for tests, or after the catalog is reloaded.
|
|
332
|
+
*/
|
|
333
|
+
export function resetPolicyCache(): void {
|
|
334
|
+
policyCache.clear()
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* List all known aliases. Convenience for tooling that wants to enumerate
|
|
339
|
+
* derived policies (e.g. a static snapshot generator).
|
|
340
|
+
*/
|
|
341
|
+
export function listAliases(): string[] {
|
|
342
|
+
return Object.keys(ALIASES)
|
|
343
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* language-models / pricing — canonical LLM model pricing table.
|
|
3
|
+
*
|
|
4
|
+
* Consume via the subpath export:
|
|
5
|
+
*
|
|
6
|
+
* import { PRICING_TABLE, priceFor, type ModelPricing } from 'language-models/pricing'
|
|
7
|
+
*
|
|
8
|
+
* (Equivalent symbols are also re-exported from the package root —
|
|
9
|
+
* `import { priceFor } from 'language-models'` works too.)
|
|
10
|
+
*
|
|
11
|
+
* Rates are sourced from public Vertex / Bedrock / AI Studio list prices.
|
|
12
|
+
* `priceFor()` throws on unknown slug/tier rather than returning a silent
|
|
13
|
+
* zero (per BYOK_GATEWAY_LIES discipline: loud failure beats silent
|
|
14
|
+
* downgrade).
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
export type {
|
|
18
|
+
HasPricingArgs,
|
|
19
|
+
ModelPricing,
|
|
20
|
+
PriceForArgs,
|
|
21
|
+
PriceForResult,
|
|
22
|
+
PricingTier,
|
|
23
|
+
Provider,
|
|
24
|
+
RateBlock,
|
|
25
|
+
} from './types.js'
|
|
26
|
+
|
|
27
|
+
export { PRICING_TABLE } from './table.js'
|
|
28
|
+
|
|
29
|
+
export { priceFor, listSlugs, hasPricing, rowsForSlug } from './lookup.js'
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* language-models / pricing — lookup helpers.
|
|
3
|
+
*
|
|
4
|
+
* The single source of truth for cost computation across the Phase-2
|
|
5
|
+
* three-repo cascade. Throws on unknown slug/tier (per BYOK_GATEWAY_LIES
|
|
6
|
+
* memory: silent zero is the lying-gateway pattern; loud failure beats
|
|
7
|
+
* silent downgrade).
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { PRICING_TABLE } from './table.js'
|
|
11
|
+
import type {
|
|
12
|
+
HasPricingArgs,
|
|
13
|
+
ModelPricing,
|
|
14
|
+
PriceForArgs,
|
|
15
|
+
PriceForResult,
|
|
16
|
+
PricingTier,
|
|
17
|
+
} from './types.js'
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Compute USD cost for a known generation. Throws on:
|
|
21
|
+
* - unknown slug (no rows for the slug at all)
|
|
22
|
+
* - unknown/unmodeled tier for the slug (no row for the (slug, tier) pair)
|
|
23
|
+
* - negative token counts (programming error — fail loud)
|
|
24
|
+
*
|
|
25
|
+
* Honors `contextTierBreakpoint` when present: if `inputTokens >=
|
|
26
|
+
* breakpoint`, the entire request is billed at `contextTierAbove` rates
|
|
27
|
+
* (matches Google's published billing model — the rate switches once the
|
|
28
|
+
* input crosses 200K, applied to the full request).
|
|
29
|
+
*
|
|
30
|
+
* `cachedInputTokens` is billed at `cachedInputPer1M` when defined,
|
|
31
|
+
* falling back to the regular input rate otherwise. Cached tokens are
|
|
32
|
+
* SUBTRACTED from `inputTokens` to compute the non-cached portion — i.e.
|
|
33
|
+
* `inputTokens` is the TOTAL input including any cached tokens.
|
|
34
|
+
*/
|
|
35
|
+
export function priceFor(args: PriceForArgs): PriceForResult {
|
|
36
|
+
const { slug, tier, inputTokens, outputTokens, cachedInputTokens } = args
|
|
37
|
+
|
|
38
|
+
if (inputTokens < 0 || outputTokens < 0 || (cachedInputTokens ?? 0) < 0) {
|
|
39
|
+
throw new RangeError(
|
|
40
|
+
`priceFor() requires non-negative token counts; got input=${inputTokens}, output=${outputTokens}, cachedInput=${
|
|
41
|
+
cachedInputTokens ?? 0
|
|
42
|
+
}`
|
|
43
|
+
)
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const row = findRow(slug, tier)
|
|
47
|
+
|
|
48
|
+
// Pick the right rate block based on context-tier breakpoint.
|
|
49
|
+
const useAbove =
|
|
50
|
+
typeof row.contextTierBreakpoint === 'number' &&
|
|
51
|
+
row.contextTierAbove !== undefined &&
|
|
52
|
+
inputTokens >= row.contextTierBreakpoint
|
|
53
|
+
const block = useAbove
|
|
54
|
+
? (row.contextTierAbove as NonNullable<typeof row.contextTierAbove>)
|
|
55
|
+
: {
|
|
56
|
+
inputPer1M: row.inputPer1M,
|
|
57
|
+
outputPer1M: row.outputPer1M,
|
|
58
|
+
cachedInputPer1M: row.cachedInputPer1M,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const cachedTok = Math.min(cachedInputTokens ?? 0, inputTokens)
|
|
62
|
+
const nonCachedInputTok = inputTokens - cachedTok
|
|
63
|
+
const cachedRate = block.cachedInputPer1M ?? block.inputPer1M
|
|
64
|
+
|
|
65
|
+
const inputUsd =
|
|
66
|
+
(nonCachedInputTok / 1_000_000) * block.inputPer1M + (cachedTok / 1_000_000) * cachedRate
|
|
67
|
+
const outputUsd = (outputTokens / 1_000_000) * block.outputPer1M
|
|
68
|
+
const totalUsd = inputUsd + outputUsd
|
|
69
|
+
|
|
70
|
+
return { inputUsd, outputUsd, totalUsd }
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Returns the unique set of slugs in the table. Useful for adapters that
|
|
75
|
+
* want to validate caller-supplied model ids before dispatching.
|
|
76
|
+
*/
|
|
77
|
+
export function listSlugs(): readonly string[] {
|
|
78
|
+
const set = new Set<string>()
|
|
79
|
+
for (const row of PRICING_TABLE) set.add(row.slug)
|
|
80
|
+
return Array.from(set)
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Returns true if pricing exists for the given (slug, tier). Use this
|
|
85
|
+
* when you need a non-throwing existence check before calling
|
|
86
|
+
* `priceFor()` (e.g. a metering middleware that wants to fall back to
|
|
87
|
+
* "unknown cost" telemetry rather than throwing on a not-yet-registered
|
|
88
|
+
* model).
|
|
89
|
+
*/
|
|
90
|
+
export function hasPricing(args: HasPricingArgs): boolean {
|
|
91
|
+
return PRICING_TABLE.some((row) => row.slug === args.slug && row.tier === args.tier)
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Returns all pricing rows for a slug, across all tiers. Useful for
|
|
96
|
+
* tooling that wants to display "this model has standard + batch tiers"
|
|
97
|
+
* in a UI.
|
|
98
|
+
*/
|
|
99
|
+
export function rowsForSlug(slug: string): readonly ModelPricing[] {
|
|
100
|
+
return PRICING_TABLE.filter((row) => row.slug === slug)
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// ---------------------------------------------------------------------------
|
|
104
|
+
// Internal
|
|
105
|
+
// ---------------------------------------------------------------------------
|
|
106
|
+
|
|
107
|
+
function findRow(slug: string, tier: PricingTier): ModelPricing {
|
|
108
|
+
const slugRows = PRICING_TABLE.filter((row) => row.slug === slug)
|
|
109
|
+
if (slugRows.length === 0) {
|
|
110
|
+
throw new Error(
|
|
111
|
+
`Unknown model slug: '${slug}'. Known slugs: ${listSlugs().slice(0, 8).join(', ')}${
|
|
112
|
+
listSlugs().length > 8 ? `, ...(${listSlugs().length - 8} more)` : ''
|
|
113
|
+
}`
|
|
114
|
+
)
|
|
115
|
+
}
|
|
116
|
+
const tierRow = slugRows.find((row) => row.tier === tier)
|
|
117
|
+
if (tierRow === undefined) {
|
|
118
|
+
const availableTiers = slugRows.map((row) => row.tier)
|
|
119
|
+
throw new Error(
|
|
120
|
+
`No '${tier}' pricing for slug '${slug}'. Available tiers: ${availableTiers.join(', ')}`
|
|
121
|
+
)
|
|
122
|
+
}
|
|
123
|
+
return tierRow
|
|
124
|
+
}
|