language-models 2.1.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/.turbo/turbo-build.log +1 -1
  2. package/CHANGELOG.md +36 -0
  3. package/README.md +106 -43
  4. package/dist/index.d.ts +3 -1
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +13 -1
  7. package/dist/index.js.map +1 -1
  8. package/dist/models.d.ts +1 -1
  9. package/dist/models.d.ts.map +1 -1
  10. package/dist/models.js +8 -10
  11. package/dist/models.js.map +1 -1
  12. package/dist/policy.d.ts +127 -0
  13. package/dist/policy.d.ts.map +1 -0
  14. package/dist/policy.js +246 -0
  15. package/dist/policy.js.map +1 -0
  16. package/dist/pricing/index.d.ts +19 -0
  17. package/dist/pricing/index.d.ts.map +1 -0
  18. package/dist/pricing/index.js +18 -0
  19. package/dist/pricing/index.js.map +1 -0
  20. package/dist/pricing/lookup.d.ts +46 -0
  21. package/dist/pricing/lookup.d.ts.map +1 -0
  22. package/dist/pricing/lookup.js +94 -0
  23. package/dist/pricing/lookup.js.map +1 -0
  24. package/dist/pricing/table.d.ts +46 -0
  25. package/dist/pricing/table.d.ts.map +1 -0
  26. package/dist/pricing/table.js +214 -0
  27. package/dist/pricing/table.js.map +1 -0
  28. package/dist/pricing/types.d.ts +84 -0
  29. package/dist/pricing/types.d.ts.map +1 -0
  30. package/dist/pricing/types.js +32 -0
  31. package/dist/pricing/types.js.map +1 -0
  32. package/package.json +6 -2
  33. package/src/index.ts +42 -1
  34. package/src/models.ts +8 -12
  35. package/src/policy.ts +343 -0
  36. package/src/pricing/index.ts +29 -0
  37. package/src/pricing/lookup.ts +124 -0
  38. package/src/pricing/table.ts +235 -0
  39. package/src/pricing/types.ts +90 -0
  40. package/{src → test}/aliases.test.ts +20 -22
  41. package/{src → test}/index.test.ts +9 -9
  42. package/{src → test}/models.test.ts +8 -6
  43. package/test/policy.test.ts +203 -0
  44. package/test/pricing.test.ts +279 -0
  45. package/vitest.config.ts +21 -1
  46. package/.turbo/turbo-test.log +0 -7
  47. package/src/aliases.js +0 -40
  48. package/src/aliases.test.js +0 -264
  49. package/src/index.js +0 -9
  50. package/src/index.test.js +0 -320
  51. package/src/models.js +0 -108
  52. package/src/models.test.js +0 -335
  53. package/vitest.config.js +0 -10
package/src/policy.ts ADDED
@@ -0,0 +1,343 @@
1
+ /**
2
+ * ModelPolicy - per-model resilience and tier policy data
3
+ *
4
+ * `language-models` owns model identity (alias resolution, capability lookup).
5
+ * Resilience policy (retry, circuit breaker, fallback chain, batch tier) is
6
+ * a per-model concern that belongs alongside the catalog data — but the
7
+ * runtime *machinery* that applies the policy lives in `ai-functions`.
8
+ *
9
+ * This module provides:
10
+ * - `ModelPolicy` MDXLD type (`$type: 'ModelPolicy'`)
11
+ * - `policyFor(alias)` - resolve an alias and return its derived policy
12
+ * - `derivePolicy(model, alias?)` - inference layer that turns OpenRouter raw
13
+ * data into a policy by applying heuristics (newest frontier model is best,
14
+ * price within a family is inversely correlated with capability, etc.)
15
+ *
16
+ * Source of truth: OpenRouter raw catalog (data/models.json) + heuristics.
17
+ * Strategy: runtime derivation, cached. A static snapshot generator could be
18
+ * added later (see `derivePolicy` — it's pure, so you can pre-compute it).
19
+ *
20
+ * @packageDocumentation
21
+ */
22
+
23
+ import { resolve, get, list, type ModelInfo } from './models.js'
24
+ import { ALIASES } from './aliases.js'
25
+
26
+ // ============================================================================
27
+ // MDXLD types
28
+ // ============================================================================
29
+
30
+ /**
31
+ * Error category taxonomy. Mirrors `ai-functions`'s `ErrorCategory` enum by
32
+ * string value — we don't import it (circular), but the strings line up.
33
+ */
34
+ export type ErrorCategoryName =
35
+ | 'network'
36
+ | 'rate_limit'
37
+ | 'invalid_input'
38
+ | 'authentication'
39
+ | 'server'
40
+ | 'context_length'
41
+ | 'unknown'
42
+
43
+ /**
44
+ * Retry classification: which error categories are retryable for this model,
45
+ * plus backoff parameters.
46
+ */
47
+ export interface RetryPolicyData {
48
+ maxRetries: number
49
+ baseDelay: number
50
+ maxDelay: number
51
+ multiplier: number
52
+ jitter: number
53
+ /** Categories that should trigger a retry */
54
+ retryableCategories: ErrorCategoryName[]
55
+ }
56
+
57
+ /**
58
+ * Circuit-breaker policy data (per-model state keys come from the alias).
59
+ */
60
+ export interface CircuitBreakerPolicyData {
61
+ failureThreshold: number
62
+ resetTimeout: number
63
+ successThreshold: number
64
+ }
65
+
66
+ /**
67
+ * Tiers a model is eligible for.
68
+ *
69
+ * - `immediate`: synchronous online inference (always available)
70
+ * - `flex`: faster-than-batch processing (~minutes, ~50% discount)
71
+ * — only OpenAI/Bedrock currently
72
+ * - `batch`: batch API processing (~hours, ~50% discount)
73
+ * — OpenAI/Anthropic/Google/Bedrock/Cloudflare
74
+ */
75
+ export type BatchTier = 'immediate' | 'flex' | 'batch'
76
+
77
+ /**
78
+ * Provider-specific HTTP status code → ErrorCategory mapping.
79
+ * Empty here by default — `ai-functions/retry.ts#classifyError` handles the
80
+ * common cases. Override per-model if a provider has unusual error codes.
81
+ */
82
+ export type ErrorMapping = Record<number, ErrorCategoryName>
83
+
84
+ /**
85
+ * MDXLD-shaped per-model resilience and tier policy.
86
+ *
87
+ * `$type: 'ModelPolicy'`, `$id` is the resolved model id (e.g.
88
+ * `'anthropic/claude-opus-4.5'`).
89
+ */
90
+ export interface ModelPolicy {
91
+ $type: 'ModelPolicy'
92
+ $id: string
93
+ /** Provider slug (e.g. 'anthropic') */
94
+ provider: string
95
+ retry: RetryPolicyData
96
+ circuitBreaker: CircuitBreakerPolicyData
97
+ /** Ordered list of model ids to try after this one fails */
98
+ fallbackChain: string[]
99
+ /** Tiers this model is eligible for */
100
+ batchTier: BatchTier[]
101
+ /** Provider-specific HTTP code → category overrides */
102
+ errorMapping: ErrorMapping
103
+ }
104
+
105
+ // ============================================================================
106
+ // Defaults & heuristics
107
+ // ============================================================================
108
+
109
+ /** Frontier labs — newer releases tend to be more capable. */
110
+ const FRONTIER_PROVIDERS = new Set(['anthropic', 'openai', 'google'])
111
+
112
+ /** Providers with batch APIs supported by ai-functions. */
113
+ const BATCH_PROVIDERS = new Set(['anthropic', 'openai', 'google', 'amazon-bedrock', 'cloudflare'])
114
+
115
+ /** Providers with flex (faster-than-batch) APIs. */
116
+ const FLEX_PROVIDERS = new Set(['openai', 'amazon-bedrock'])
117
+
118
+ /** Curated fallback seeds — picked one per frontier family. */
119
+ const FRONTIER_FALLBACK: readonly string[] = [
120
+ 'anthropic/claude-sonnet-4.5',
121
+ 'anthropic/claude-opus-4.5',
122
+ 'openai/gpt-4o',
123
+ 'google/gemini-2.5-pro',
124
+ ]
125
+
126
+ /** Default retry policy — matches `ai-functions` `RetryPolicy` defaults. */
127
+ export const DEFAULT_RETRY: RetryPolicyData = {
128
+ maxRetries: 3,
129
+ baseDelay: 1000,
130
+ maxDelay: 30000,
131
+ multiplier: 2,
132
+ jitter: 0,
133
+ retryableCategories: ['network', 'rate_limit', 'server', 'unknown'],
134
+ }
135
+
136
+ /** Default circuit breaker — matches `ai-functions` defaults. */
137
+ export const DEFAULT_CIRCUIT_BREAKER: CircuitBreakerPolicyData = {
138
+ failureThreshold: 5,
139
+ resetTimeout: 30000,
140
+ successThreshold: 1,
141
+ }
142
+
143
+ /**
144
+ * Default policy for an unknown model. Used when no catalog entry is found.
145
+ */
146
+ export function defaultPolicy(modelId: string): ModelPolicy {
147
+ const provider = modelId.includes('/') ? modelId.split('/')[0]! : 'unknown'
148
+ return {
149
+ $type: 'ModelPolicy',
150
+ $id: modelId,
151
+ provider,
152
+ retry: { ...DEFAULT_RETRY },
153
+ circuitBreaker: { ...DEFAULT_CIRCUIT_BREAKER },
154
+ fallbackChain: [],
155
+ batchTier: ['immediate'],
156
+ errorMapping: {},
157
+ }
158
+ }
159
+
160
+ // ============================================================================
161
+ // Derivation layer
162
+ // ============================================================================
163
+
164
+ /**
165
+ * Parse a price-per-token string to a number. Returns 0 on parse failure.
166
+ * Pricing comes through OpenRouter as a string (e.g. "0.000003").
167
+ */
168
+ function parsePrice(p?: string): number {
169
+ if (!p) return 0
170
+ const n = Number(p)
171
+ return Number.isFinite(n) ? n : 0
172
+ }
173
+
174
+ /**
175
+ * Extract a "family" key from a model id for fallback grouping.
176
+ * 'anthropic/claude-opus-4.5' → 'anthropic/claude'
177
+ * 'openai/gpt-4o-mini' → 'openai/gpt'
178
+ */
179
+ function familyKey(id: string): string {
180
+ const slash = id.indexOf('/')
181
+ if (slash < 0) return id
182
+ const provider = id.substring(0, slash)
183
+ const rest = id.substring(slash + 1).toLowerCase()
184
+ // Strip trailing version tags / size qualifiers
185
+ const family = rest
186
+ .replace(/[-_]?\d.*$/, '') // drop -4.5, -2.0, etc
187
+ .replace(/(opus|sonnet|haiku|mini|pro|flash|lite|maverick|instruct).*$/, '$1')
188
+ .replace(/-?(opus|sonnet|haiku|mini|pro|flash|lite|maverick|instruct)$/, '')
189
+ return `${provider}/${family || rest.split('-')[0]}`
190
+ }
191
+
192
+ /**
193
+ * Derive the fallback chain for a model.
194
+ *
195
+ * Heuristics:
196
+ * 1. Prefer same-family siblings (e.g. sonnet → opus → haiku within Claude)
197
+ * ordered by `created` (newer first), then by price descending (more
198
+ * expensive within a family is usually more capable).
199
+ * 2. Then fall back to frontier-lab seeds, skipping the model itself and
200
+ * anything from the same family already included.
201
+ * 3. Cap at 4 entries to keep latency bounded.
202
+ */
203
+ function deriveFallbackChain(model: ModelInfo, allModels: ModelInfo[]): string[] {
204
+ const chain: string[] = []
205
+ const seen = new Set<string>([model.id])
206
+ const fam = familyKey(model.id)
207
+
208
+ // Step 1: same-family siblings, sorted newest-first then by price desc.
209
+ const siblings = allModels
210
+ .filter((m) => m.id !== model.id && familyKey(m.id) === fam)
211
+ .map((m) => ({
212
+ m,
213
+ created: (m as ModelInfo & { created?: number }).created ?? 0,
214
+ price: parsePrice(m.pricing?.completion),
215
+ }))
216
+ .sort((a, b) => {
217
+ if (b.created !== a.created) return b.created - a.created
218
+ return b.price - a.price
219
+ })
220
+
221
+ for (const { m } of siblings.slice(0, 2)) {
222
+ if (!seen.has(m.id)) {
223
+ chain.push(m.id)
224
+ seen.add(m.id)
225
+ }
226
+ }
227
+
228
+ // Step 2: frontier seeds.
229
+ for (const seed of FRONTIER_FALLBACK) {
230
+ if (chain.length >= 4) break
231
+ if (seen.has(seed)) continue
232
+ if (familyKey(seed) === fam) continue
233
+ chain.push(seed)
234
+ seen.add(seed)
235
+ }
236
+
237
+ return chain
238
+ }
239
+
240
+ /**
241
+ * Derive batch-tier eligibility from provider capability.
242
+ */
243
+ function deriveBatchTiers(provider: string): BatchTier[] {
244
+ const tiers: BatchTier[] = ['immediate']
245
+ if (FLEX_PROVIDERS.has(provider)) tiers.push('flex')
246
+ if (BATCH_PROVIDERS.has(provider)) tiers.push('batch')
247
+ return tiers
248
+ }
249
+
250
+ /**
251
+ * Derive the retry policy. Frontier providers get one extra attempt because
252
+ * their rate limits are typically more transient than long-tail providers.
253
+ */
254
+ function deriveRetry(provider: string): RetryPolicyData {
255
+ if (FRONTIER_PROVIDERS.has(provider)) {
256
+ return { ...DEFAULT_RETRY, maxRetries: 4, jitter: 0.2 }
257
+ }
258
+ return { ...DEFAULT_RETRY }
259
+ }
260
+
261
+ /**
262
+ * Derive the circuit-breaker policy. Frontier providers get a higher
263
+ * failure threshold (more capacity) and a shorter reset timeout.
264
+ */
265
+ function deriveCircuitBreaker(provider: string): CircuitBreakerPolicyData {
266
+ if (FRONTIER_PROVIDERS.has(provider)) {
267
+ return { failureThreshold: 8, resetTimeout: 20000, successThreshold: 1 }
268
+ }
269
+ return { ...DEFAULT_CIRCUIT_BREAKER }
270
+ }
271
+
272
+ /**
273
+ * Derivation layer — turn a `ModelInfo` (from OpenRouter raw data) into a
274
+ * `ModelPolicy` by applying heuristics.
275
+ *
276
+ * Pure function; safe to call at build time to pre-compute a static snapshot.
277
+ *
278
+ * @param model - The catalog entry for the model.
279
+ * @param allModels - The full catalog, used for sibling lookup. Optional;
280
+ * defaults to `list()`.
281
+ */
282
+ export function derivePolicy(model: ModelInfo, allModels?: ModelInfo[]): ModelPolicy {
283
+ const all = allModels ?? list()
284
+ const slash = model.id.indexOf('/')
285
+ const provider = slash > 0 ? model.id.substring(0, slash) : model.provider ?? 'unknown'
286
+
287
+ return {
288
+ $type: 'ModelPolicy',
289
+ $id: model.id,
290
+ provider,
291
+ retry: deriveRetry(provider),
292
+ circuitBreaker: deriveCircuitBreaker(provider),
293
+ fallbackChain: deriveFallbackChain(model, all),
294
+ batchTier: deriveBatchTiers(provider),
295
+ errorMapping: {},
296
+ }
297
+ }
298
+
299
+ // ============================================================================
300
+ // Public API
301
+ // ============================================================================
302
+
303
+ /** Per-process cache of derived policies, keyed by resolved model id. */
304
+ const policyCache = new Map<string, ModelPolicy>()
305
+
306
+ /**
307
+ * Resolve an alias (or full model id) and return its policy.
308
+ *
309
+ * Falls back to `defaultPolicy(id)` if the model is not in the catalog —
310
+ * callers always get a usable policy.
311
+ *
312
+ * @example
313
+ * ```ts
314
+ * const p = policyFor('sonnet')
315
+ * // p.fallbackChain → ['anthropic/claude-opus-4.5', 'openai/gpt-4o', ...]
316
+ * // p.batchTier → ['immediate', 'batch']
317
+ * ```
318
+ */
319
+ export function policyFor(input: string): ModelPolicy {
320
+ const id = resolve(input)
321
+ const cached = policyCache.get(id)
322
+ if (cached) return cached
323
+
324
+ const model = get(id)
325
+ const policy = model ? derivePolicy(model) : defaultPolicy(id)
326
+ policyCache.set(id, policy)
327
+ return policy
328
+ }
329
+
330
+ /**
331
+ * Reset the policy cache. Useful for tests, or after the catalog is reloaded.
332
+ */
333
+ export function resetPolicyCache(): void {
334
+ policyCache.clear()
335
+ }
336
+
337
+ /**
338
+ * List all known aliases. Convenience for tooling that wants to enumerate
339
+ * derived policies (e.g. a static snapshot generator).
340
+ */
341
+ export function listAliases(): string[] {
342
+ return Object.keys(ALIASES)
343
+ }
@@ -0,0 +1,29 @@
1
+ /**
2
+ * language-models / pricing — canonical LLM model pricing table.
3
+ *
4
+ * Consume via the subpath export:
5
+ *
6
+ * import { PRICING_TABLE, priceFor, type ModelPricing } from 'language-models/pricing'
7
+ *
8
+ * (Equivalent symbols are also re-exported from the package root —
9
+ * `import { priceFor } from 'language-models'` works too.)
10
+ *
11
+ * Rates are sourced from public Vertex / Bedrock / AI Studio list prices.
12
+ * `priceFor()` throws on unknown slug/tier rather than returning a silent
13
+ * zero (per BYOK_GATEWAY_LIES discipline: loud failure beats silent
14
+ * downgrade).
15
+ */
16
+
17
+ export type {
18
+ HasPricingArgs,
19
+ ModelPricing,
20
+ PriceForArgs,
21
+ PriceForResult,
22
+ PricingTier,
23
+ Provider,
24
+ RateBlock,
25
+ } from './types.js'
26
+
27
+ export { PRICING_TABLE } from './table.js'
28
+
29
+ export { priceFor, listSlugs, hasPricing, rowsForSlug } from './lookup.js'
@@ -0,0 +1,124 @@
1
+ /**
2
+ * language-models / pricing — lookup helpers.
3
+ *
4
+ * The single source of truth for cost computation across the Phase-2
5
+ * three-repo cascade. Throws on unknown slug/tier (per BYOK_GATEWAY_LIES
6
+ * memory: silent zero is the lying-gateway pattern; loud failure beats
7
+ * silent downgrade).
8
+ */
9
+
10
+ import { PRICING_TABLE } from './table.js'
11
+ import type {
12
+ HasPricingArgs,
13
+ ModelPricing,
14
+ PriceForArgs,
15
+ PriceForResult,
16
+ PricingTier,
17
+ } from './types.js'
18
+
19
+ /**
20
+ * Compute USD cost for a known generation. Throws on:
21
+ * - unknown slug (no rows for the slug at all)
22
+ * - unknown/unmodeled tier for the slug (no row for the (slug, tier) pair)
23
+ * - negative token counts (programming error — fail loud)
24
+ *
25
+ * Honors `contextTierBreakpoint` when present: if `inputTokens >=
26
+ * breakpoint`, the entire request is billed at `contextTierAbove` rates
27
+ * (matches Google's published billing model — the rate switches once the
28
+ * input crosses 200K, applied to the full request).
29
+ *
30
+ * `cachedInputTokens` is billed at `cachedInputPer1M` when defined,
31
+ * falling back to the regular input rate otherwise. Cached tokens are
32
+ * SUBTRACTED from `inputTokens` to compute the non-cached portion — i.e.
33
+ * `inputTokens` is the TOTAL input including any cached tokens.
34
+ */
35
+ export function priceFor(args: PriceForArgs): PriceForResult {
36
+ const { slug, tier, inputTokens, outputTokens, cachedInputTokens } = args
37
+
38
+ if (inputTokens < 0 || outputTokens < 0 || (cachedInputTokens ?? 0) < 0) {
39
+ throw new RangeError(
40
+ `priceFor() requires non-negative token counts; got input=${inputTokens}, output=${outputTokens}, cachedInput=${
41
+ cachedInputTokens ?? 0
42
+ }`
43
+ )
44
+ }
45
+
46
+ const row = findRow(slug, tier)
47
+
48
+ // Pick the right rate block based on context-tier breakpoint.
49
+ const useAbove =
50
+ typeof row.contextTierBreakpoint === 'number' &&
51
+ row.contextTierAbove !== undefined &&
52
+ inputTokens >= row.contextTierBreakpoint
53
+ const block = useAbove
54
+ ? (row.contextTierAbove as NonNullable<typeof row.contextTierAbove>)
55
+ : {
56
+ inputPer1M: row.inputPer1M,
57
+ outputPer1M: row.outputPer1M,
58
+ cachedInputPer1M: row.cachedInputPer1M,
59
+ }
60
+
61
+ const cachedTok = Math.min(cachedInputTokens ?? 0, inputTokens)
62
+ const nonCachedInputTok = inputTokens - cachedTok
63
+ const cachedRate = block.cachedInputPer1M ?? block.inputPer1M
64
+
65
+ const inputUsd =
66
+ (nonCachedInputTok / 1_000_000) * block.inputPer1M + (cachedTok / 1_000_000) * cachedRate
67
+ const outputUsd = (outputTokens / 1_000_000) * block.outputPer1M
68
+ const totalUsd = inputUsd + outputUsd
69
+
70
+ return { inputUsd, outputUsd, totalUsd }
71
+ }
72
+
73
+ /**
74
+ * Returns the unique set of slugs in the table. Useful for adapters that
75
+ * want to validate caller-supplied model ids before dispatching.
76
+ */
77
+ export function listSlugs(): readonly string[] {
78
+ const set = new Set<string>()
79
+ for (const row of PRICING_TABLE) set.add(row.slug)
80
+ return Array.from(set)
81
+ }
82
+
83
+ /**
84
+ * Returns true if pricing exists for the given (slug, tier). Use this
85
+ * when you need a non-throwing existence check before calling
86
+ * `priceFor()` (e.g. a metering middleware that wants to fall back to
87
+ * "unknown cost" telemetry rather than throwing on a not-yet-registered
88
+ * model).
89
+ */
90
+ export function hasPricing(args: HasPricingArgs): boolean {
91
+ return PRICING_TABLE.some((row) => row.slug === args.slug && row.tier === args.tier)
92
+ }
93
+
94
+ /**
95
+ * Returns all pricing rows for a slug, across all tiers. Useful for
96
+ * tooling that wants to display "this model has standard + batch tiers"
97
+ * in a UI.
98
+ */
99
+ export function rowsForSlug(slug: string): readonly ModelPricing[] {
100
+ return PRICING_TABLE.filter((row) => row.slug === slug)
101
+ }
102
+
103
+ // ---------------------------------------------------------------------------
104
+ // Internal
105
+ // ---------------------------------------------------------------------------
106
+
107
+ function findRow(slug: string, tier: PricingTier): ModelPricing {
108
+ const slugRows = PRICING_TABLE.filter((row) => row.slug === slug)
109
+ if (slugRows.length === 0) {
110
+ throw new Error(
111
+ `Unknown model slug: '${slug}'. Known slugs: ${listSlugs().slice(0, 8).join(', ')}${
112
+ listSlugs().length > 8 ? `, ...(${listSlugs().length - 8} more)` : ''
113
+ }`
114
+ )
115
+ }
116
+ const tierRow = slugRows.find((row) => row.tier === tier)
117
+ if (tierRow === undefined) {
118
+ const availableTiers = slugRows.map((row) => row.tier)
119
+ throw new Error(
120
+ `No '${tier}' pricing for slug '${slug}'. Available tiers: ${availableTiers.join(', ')}`
121
+ )
122
+ }
123
+ return tierRow
124
+ }