language-models 2.1.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +1 -1
- package/CHANGELOG.md +36 -0
- package/README.md +106 -43
- package/dist/index.d.ts +3 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +13 -1
- package/dist/index.js.map +1 -1
- package/dist/models.d.ts +1 -1
- package/dist/models.d.ts.map +1 -1
- package/dist/models.js +8 -10
- package/dist/models.js.map +1 -1
- package/dist/policy.d.ts +127 -0
- package/dist/policy.d.ts.map +1 -0
- package/dist/policy.js +246 -0
- package/dist/policy.js.map +1 -0
- package/dist/pricing/index.d.ts +19 -0
- package/dist/pricing/index.d.ts.map +1 -0
- package/dist/pricing/index.js +18 -0
- package/dist/pricing/index.js.map +1 -0
- package/dist/pricing/lookup.d.ts +46 -0
- package/dist/pricing/lookup.d.ts.map +1 -0
- package/dist/pricing/lookup.js +94 -0
- package/dist/pricing/lookup.js.map +1 -0
- package/dist/pricing/table.d.ts +46 -0
- package/dist/pricing/table.d.ts.map +1 -0
- package/dist/pricing/table.js +214 -0
- package/dist/pricing/table.js.map +1 -0
- package/dist/pricing/types.d.ts +84 -0
- package/dist/pricing/types.d.ts.map +1 -0
- package/dist/pricing/types.js +32 -0
- package/dist/pricing/types.js.map +1 -0
- package/package.json +6 -2
- package/src/index.ts +42 -1
- package/src/models.ts +8 -12
- package/src/policy.ts +343 -0
- package/src/pricing/index.ts +29 -0
- package/src/pricing/lookup.ts +124 -0
- package/src/pricing/table.ts +235 -0
- package/src/pricing/types.ts +90 -0
- package/{src → test}/aliases.test.ts +20 -22
- package/{src → test}/index.test.ts +9 -9
- package/{src → test}/models.test.ts +8 -6
- package/test/policy.test.ts +203 -0
- package/test/pricing.test.ts +279 -0
- package/vitest.config.ts +21 -1
- package/.turbo/turbo-test.log +0 -7
- package/src/aliases.js +0 -40
- package/src/aliases.test.js +0 -264
- package/src/index.js +0 -9
- package/src/index.test.js +0 -320
- package/src/models.js +0 -108
- package/src/models.test.js +0 -335
- package/vitest.config.js +0 -10
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for ModelPolicy derivation layer
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { describe, it, expect, beforeEach } from 'vitest'
|
|
6
|
+
import {
|
|
7
|
+
policyFor,
|
|
8
|
+
derivePolicy,
|
|
9
|
+
defaultPolicy,
|
|
10
|
+
resetPolicyCache,
|
|
11
|
+
DEFAULT_RETRY,
|
|
12
|
+
DEFAULT_CIRCUIT_BREAKER,
|
|
13
|
+
type ModelInfo,
|
|
14
|
+
type ModelPolicy,
|
|
15
|
+
} from '../src/index.js'
|
|
16
|
+
|
|
17
|
+
beforeEach(() => {
|
|
18
|
+
resetPolicyCache()
|
|
19
|
+
})
|
|
20
|
+
|
|
21
|
+
describe('ModelPolicy MDXLD shape', () => {
|
|
22
|
+
it('has $type ModelPolicy', () => {
|
|
23
|
+
const p = policyFor('sonnet')
|
|
24
|
+
expect(p.$type).toBe('ModelPolicy')
|
|
25
|
+
})
|
|
26
|
+
|
|
27
|
+
it('uses resolved model id as $id', () => {
|
|
28
|
+
const p = policyFor('sonnet')
|
|
29
|
+
expect(p.$id).toBe('anthropic/claude-sonnet-4.5')
|
|
30
|
+
})
|
|
31
|
+
})
|
|
32
|
+
|
|
33
|
+
describe('defaultPolicy', () => {
|
|
34
|
+
it('produces a usable default for unknown models', () => {
|
|
35
|
+
const p = defaultPolicy('foo/bar')
|
|
36
|
+
expect(p.$type).toBe('ModelPolicy')
|
|
37
|
+
expect(p.$id).toBe('foo/bar')
|
|
38
|
+
expect(p.provider).toBe('foo')
|
|
39
|
+
expect(p.retry).toEqual(DEFAULT_RETRY)
|
|
40
|
+
expect(p.circuitBreaker).toEqual(DEFAULT_CIRCUIT_BREAKER)
|
|
41
|
+
expect(p.batchTier).toEqual(['immediate'])
|
|
42
|
+
expect(p.fallbackChain).toEqual([])
|
|
43
|
+
})
|
|
44
|
+
|
|
45
|
+
it('handles ids without a provider prefix', () => {
|
|
46
|
+
const p = defaultPolicy('orphan-model')
|
|
47
|
+
expect(p.provider).toBe('unknown')
|
|
48
|
+
})
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
describe('derivePolicy heuristics', () => {
|
|
52
|
+
const sonnet: ModelInfo = {
|
|
53
|
+
id: 'anthropic/claude-sonnet-4.5',
|
|
54
|
+
name: 'Claude Sonnet 4.5',
|
|
55
|
+
context_length: 200000,
|
|
56
|
+
pricing: { prompt: '0.000003', completion: '0.000015' },
|
|
57
|
+
provider: 'anthropic',
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
it('frontier providers get extra retries and jitter', () => {
|
|
61
|
+
const p = derivePolicy(sonnet, [sonnet])
|
|
62
|
+
expect(p.retry.maxRetries).toBe(4)
|
|
63
|
+
expect(p.retry.jitter).toBeGreaterThan(0)
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
it('non-frontier providers get default retries', () => {
|
|
67
|
+
const longTail: ModelInfo = {
|
|
68
|
+
id: 'mistralai/mistral-large-2411',
|
|
69
|
+
name: 'Mistral Large',
|
|
70
|
+
context_length: 128000,
|
|
71
|
+
pricing: { prompt: '0.000002', completion: '0.000006' },
|
|
72
|
+
provider: 'mistralai',
|
|
73
|
+
}
|
|
74
|
+
const p = derivePolicy(longTail, [longTail])
|
|
75
|
+
expect(p.retry).toEqual(DEFAULT_RETRY)
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
it('OpenAI gets immediate, flex, and batch tiers', () => {
|
|
79
|
+
const gpt4o: ModelInfo = {
|
|
80
|
+
id: 'openai/gpt-4o',
|
|
81
|
+
name: 'GPT-4o',
|
|
82
|
+
context_length: 128000,
|
|
83
|
+
pricing: { prompt: '0.0000025', completion: '0.00001' },
|
|
84
|
+
provider: 'openai',
|
|
85
|
+
}
|
|
86
|
+
const p = derivePolicy(gpt4o, [gpt4o])
|
|
87
|
+
expect(p.batchTier).toContain('immediate')
|
|
88
|
+
expect(p.batchTier).toContain('flex')
|
|
89
|
+
expect(p.batchTier).toContain('batch')
|
|
90
|
+
})
|
|
91
|
+
|
|
92
|
+
it('Anthropic gets immediate and batch (no flex)', () => {
|
|
93
|
+
const p = derivePolicy(sonnet, [sonnet])
|
|
94
|
+
expect(p.batchTier).toContain('immediate')
|
|
95
|
+
expect(p.batchTier).toContain('batch')
|
|
96
|
+
expect(p.batchTier).not.toContain('flex')
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
it('frontier circuit breaker has higher threshold', () => {
|
|
100
|
+
const p = derivePolicy(sonnet, [sonnet])
|
|
101
|
+
expect(p.circuitBreaker.failureThreshold).toBeGreaterThan(
|
|
102
|
+
DEFAULT_CIRCUIT_BREAKER.failureThreshold
|
|
103
|
+
)
|
|
104
|
+
})
|
|
105
|
+
|
|
106
|
+
it('retryable categories include network, rate_limit, server', () => {
|
|
107
|
+
const p = derivePolicy(sonnet, [sonnet])
|
|
108
|
+
expect(p.retry.retryableCategories).toContain('network')
|
|
109
|
+
expect(p.retry.retryableCategories).toContain('rate_limit')
|
|
110
|
+
expect(p.retry.retryableCategories).toContain('server')
|
|
111
|
+
})
|
|
112
|
+
})
|
|
113
|
+
|
|
114
|
+
describe('fallback chain derivation', () => {
|
|
115
|
+
const sonnet: ModelInfo = {
|
|
116
|
+
id: 'anthropic/claude-sonnet-4.5',
|
|
117
|
+
name: 'Claude Sonnet 4.5',
|
|
118
|
+
context_length: 200000,
|
|
119
|
+
pricing: { prompt: '0.000003', completion: '0.000015' },
|
|
120
|
+
provider: 'anthropic',
|
|
121
|
+
}
|
|
122
|
+
const opus: ModelInfo = {
|
|
123
|
+
id: 'anthropic/claude-opus-4.5',
|
|
124
|
+
name: 'Claude Opus 4.5',
|
|
125
|
+
context_length: 200000,
|
|
126
|
+
pricing: { prompt: '0.000015', completion: '0.000075' },
|
|
127
|
+
provider: 'anthropic',
|
|
128
|
+
}
|
|
129
|
+
const haiku: ModelInfo = {
|
|
130
|
+
id: 'anthropic/claude-haiku-4.5',
|
|
131
|
+
name: 'Claude Haiku 4.5',
|
|
132
|
+
context_length: 200000,
|
|
133
|
+
pricing: { prompt: '0.000001', completion: '0.000005' },
|
|
134
|
+
provider: 'anthropic',
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
it('prefers same-family siblings before frontier seeds', () => {
|
|
138
|
+
const all = [sonnet, opus, haiku]
|
|
139
|
+
const p = derivePolicy(sonnet, all)
|
|
140
|
+
// First entry should be a Claude sibling
|
|
141
|
+
expect(p.fallbackChain[0]?.startsWith('anthropic/claude-')).toBe(true)
|
|
142
|
+
})
|
|
143
|
+
|
|
144
|
+
it('does not include the model itself', () => {
|
|
145
|
+
const all = [sonnet, opus, haiku]
|
|
146
|
+
const p = derivePolicy(sonnet, all)
|
|
147
|
+
expect(p.fallbackChain).not.toContain(sonnet.id)
|
|
148
|
+
})
|
|
149
|
+
|
|
150
|
+
it('caps fallback chain length', () => {
|
|
151
|
+
const p = policyFor('sonnet')
|
|
152
|
+
expect(p.fallbackChain.length).toBeLessThanOrEqual(4)
|
|
153
|
+
})
|
|
154
|
+
})
|
|
155
|
+
|
|
156
|
+
describe('policyFor caching', () => {
|
|
157
|
+
it('returns the same policy instance for the same alias', () => {
|
|
158
|
+
const a = policyFor('sonnet')
|
|
159
|
+
const b = policyFor('sonnet')
|
|
160
|
+
expect(a).toBe(b)
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
it('resolves aliases to canonical ids', () => {
|
|
164
|
+
const a = policyFor('sonnet')
|
|
165
|
+
const b = policyFor('anthropic/claude-sonnet-4.5')
|
|
166
|
+
// Cache hit means same instance
|
|
167
|
+
expect(a).toBe(b)
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
it('resetPolicyCache invalidates cache', () => {
|
|
171
|
+
const a = policyFor('sonnet')
|
|
172
|
+
resetPolicyCache()
|
|
173
|
+
const b = policyFor('sonnet')
|
|
174
|
+
expect(a).not.toBe(b)
|
|
175
|
+
expect(a).toEqual(b)
|
|
176
|
+
})
|
|
177
|
+
})
|
|
178
|
+
|
|
179
|
+
describe('policyFor for unknown models', () => {
|
|
180
|
+
it('returns defaultPolicy when alias is unresolvable', () => {
|
|
181
|
+
const p = policyFor('definitely-not-a-real-alias-xyz')
|
|
182
|
+
expect(p.$type).toBe('ModelPolicy')
|
|
183
|
+
// Falls back to defaultPolicy when model is not in the catalog
|
|
184
|
+
expect(p.fallbackChain).toEqual([])
|
|
185
|
+
expect(p.batchTier).toEqual(['immediate'])
|
|
186
|
+
})
|
|
187
|
+
})
|
|
188
|
+
|
|
189
|
+
describe('integration with full catalog', () => {
|
|
190
|
+
it('derives a complete policy for sonnet', () => {
|
|
191
|
+
const p: ModelPolicy = policyFor('sonnet')
|
|
192
|
+
expect(p.provider).toBe('anthropic')
|
|
193
|
+
expect(p.retry.maxRetries).toBeGreaterThanOrEqual(3)
|
|
194
|
+
expect(Array.isArray(p.fallbackChain)).toBe(true)
|
|
195
|
+
expect(p.batchTier.length).toBeGreaterThan(0)
|
|
196
|
+
})
|
|
197
|
+
|
|
198
|
+
it('derives a complete policy for gpt-4o', () => {
|
|
199
|
+
const p = policyFor('gpt-4o')
|
|
200
|
+
expect(p.provider).toBe('openai')
|
|
201
|
+
expect(p.batchTier).toContain('flex')
|
|
202
|
+
})
|
|
203
|
+
})
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* language-models / pricing — canonical pricing table tests
|
|
3
|
+
*
|
|
4
|
+
* Regression anchors come from real production runs in startup-builder:
|
|
5
|
+
*
|
|
6
|
+
* 1. **BMC corpus** (sb-srnl 2026-05-07): 5602 records via vertex-batch on
|
|
7
|
+
* `vertex/gemini-3.1-pro` flex/batch tier; ~4500 input tokens avg,
|
|
8
|
+
* ~1140 output tokens avg → total ~$63.53 (≈ $0.01134/record). At
|
|
9
|
+
* flex/batch ≤200K rates ($1/M in, $6/M out): input cost = 5602 ×
|
|
10
|
+
* 4500 × 1e-6 = $25.21; output cost = 5602 × 1140 × 6e-6 = $38.32;
|
|
11
|
+
* total = $63.53.
|
|
12
|
+
*
|
|
13
|
+
* 2. **Synthetic anchors** for known token counts on each tier — exercise
|
|
14
|
+
* the linear-rate math directly to catch off-by-1000 errors.
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import { describe, it, expect } from 'vitest'
|
|
18
|
+
import { priceFor, PRICING_TABLE, listSlugs, hasPricing } from '../src/pricing/index.js'
|
|
19
|
+
|
|
20
|
+
describe('language-models/pricing — table integrity', () => {
|
|
21
|
+
it('exports a non-empty PRICING_TABLE', () => {
|
|
22
|
+
expect(PRICING_TABLE.length).toBeGreaterThan(10)
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
it('every row has provider, slug, tier, inputPer1M, outputPer1M', () => {
|
|
26
|
+
for (const row of PRICING_TABLE) {
|
|
27
|
+
expect(row.provider).toMatch(/^(vertex|bedrock|openai|anthropic|google-ai-studio)$/)
|
|
28
|
+
expect(typeof row.slug).toBe('string')
|
|
29
|
+
expect(row.slug.length).toBeGreaterThan(0)
|
|
30
|
+
expect(row.tier).toMatch(/^(standard|batch|flex|provisioned)$/)
|
|
31
|
+
expect(typeof row.inputPer1M).toBe('number')
|
|
32
|
+
expect(typeof row.outputPer1M).toBe('number')
|
|
33
|
+
expect(row.inputPer1M).toBeGreaterThanOrEqual(0)
|
|
34
|
+
expect(row.outputPer1M).toBeGreaterThanOrEqual(0)
|
|
35
|
+
}
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
it('slug+tier is unique across the table', () => {
|
|
39
|
+
const seen = new Set<string>()
|
|
40
|
+
for (const row of PRICING_TABLE) {
|
|
41
|
+
const k = `${row.slug}|${row.tier}`
|
|
42
|
+
expect(seen.has(k), `duplicate row for ${k}`).toBe(false)
|
|
43
|
+
seen.add(k)
|
|
44
|
+
}
|
|
45
|
+
})
|
|
46
|
+
|
|
47
|
+
it('listSlugs() returns the unique set of slugs', () => {
|
|
48
|
+
const slugs = listSlugs()
|
|
49
|
+
expect(slugs).toContain('vertex/gemini-3.1-pro')
|
|
50
|
+
expect(slugs).toContain('bedrock/claude-opus-4-7')
|
|
51
|
+
expect(slugs).toContain('aistudio/gemini-embedding-2')
|
|
52
|
+
// No duplicates.
|
|
53
|
+
expect(new Set(slugs).size).toBe(slugs.length)
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
it('hasPricing() returns true for known slug+tier and false for unknown', () => {
|
|
57
|
+
expect(hasPricing({ slug: 'vertex/gemini-3.1-pro', tier: 'batch' })).toBe(true)
|
|
58
|
+
expect(hasPricing({ slug: 'bedrock/claude-opus-4-7', tier: 'standard' })).toBe(true)
|
|
59
|
+
expect(hasPricing({ slug: 'vertex/gemini-3.1-pro', tier: 'provisioned' })).toBe(false)
|
|
60
|
+
expect(hasPricing({ slug: 'made-up/model', tier: 'standard' })).toBe(false)
|
|
61
|
+
})
|
|
62
|
+
})
|
|
63
|
+
|
|
64
|
+
describe('language-models/pricing — required slugs present', () => {
|
|
65
|
+
// Every slug below must have at least one row. These are the canonical
|
|
66
|
+
// slugs used across startup-builder + icps + services-builder today.
|
|
67
|
+
const REQUIRED_SLUGS = [
|
|
68
|
+
// Vertex Gemini family
|
|
69
|
+
'vertex/gemini-3.1-pro',
|
|
70
|
+
'vertex/gemini-3.1-flash-lite',
|
|
71
|
+
'vertex/gemini-2.5-pro',
|
|
72
|
+
'vertex/gemini-2.5-flash',
|
|
73
|
+
// Bedrock Anthropic family
|
|
74
|
+
'bedrock/claude-opus-4-7',
|
|
75
|
+
'bedrock/claude-opus-4-6',
|
|
76
|
+
'bedrock/claude-sonnet-4-7',
|
|
77
|
+
'bedrock/claude-sonnet-4-6',
|
|
78
|
+
'bedrock/claude-haiku-4-5',
|
|
79
|
+
// Google AI Studio embedding (the embedding exception — not on Bedrock)
|
|
80
|
+
'aistudio/gemini-embedding-2',
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
for (const slug of REQUIRED_SLUGS) {
|
|
84
|
+
it(`has at least one row for ${slug}`, () => {
|
|
85
|
+
const matches = PRICING_TABLE.filter((row) => row.slug === slug)
|
|
86
|
+
expect(matches.length).toBeGreaterThanOrEqual(1)
|
|
87
|
+
})
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
it('vertex/gemini-3.1-pro has both standard and batch tiers', () => {
|
|
91
|
+
const slug = 'vertex/gemini-3.1-pro'
|
|
92
|
+
const tiers = PRICING_TABLE.filter((row) => row.slug === slug).map((r) => r.tier)
|
|
93
|
+
expect(tiers).toContain('standard')
|
|
94
|
+
expect(tiers).toContain('batch')
|
|
95
|
+
})
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
describe('language-models/pricing — synthetic rate anchors', () => {
|
|
99
|
+
// priceFor() is per-call: the 200K breakpoint applies to a single call's
|
|
100
|
+
// inputTokens. For aggregate cost rollups across many calls, callers
|
|
101
|
+
// accumulate per-call results. The synthetic anchors below all use
|
|
102
|
+
// <200K input tokens to exercise the base tier; the explicit breakpoint
|
|
103
|
+
// anchor below uses ≥200K to exercise the high-context tier.
|
|
104
|
+
|
|
105
|
+
it('vertex/gemini-3.1-pro batch ≤200K: 150K in + 80K out = $0.150 + $0.480 = $0.630', () => {
|
|
106
|
+
const result = priceFor({
|
|
107
|
+
slug: 'vertex/gemini-3.1-pro',
|
|
108
|
+
tier: 'batch',
|
|
109
|
+
inputTokens: 150_000,
|
|
110
|
+
outputTokens: 80_000,
|
|
111
|
+
})
|
|
112
|
+
expect(result.inputUsd).toBeCloseTo(0.15, 6)
|
|
113
|
+
expect(result.outputUsd).toBeCloseTo(0.48, 6)
|
|
114
|
+
expect(result.totalUsd).toBeCloseTo(0.63, 6)
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
it('vertex/gemini-3.1-pro standard ≤200K: 100K in + 100K out = $0.20 + $1.20 = $1.40', () => {
|
|
118
|
+
const result = priceFor({
|
|
119
|
+
slug: 'vertex/gemini-3.1-pro',
|
|
120
|
+
tier: 'standard',
|
|
121
|
+
inputTokens: 100_000,
|
|
122
|
+
outputTokens: 100_000,
|
|
123
|
+
})
|
|
124
|
+
expect(result.inputUsd).toBeCloseTo(0.2, 6)
|
|
125
|
+
expect(result.outputUsd).toBeCloseTo(1.2, 6)
|
|
126
|
+
expect(result.totalUsd).toBeCloseTo(1.4, 6)
|
|
127
|
+
})
|
|
128
|
+
|
|
129
|
+
it('vertex/gemini-3.1-pro standard ≥200K applies the high-context rate', () => {
|
|
130
|
+
// 250K input tokens crosses the 200K breakpoint → apply contextTierAbove
|
|
131
|
+
// ($4/M in, $18/M out) instead of base ($2/M in, $12/M out).
|
|
132
|
+
const result = priceFor({
|
|
133
|
+
slug: 'vertex/gemini-3.1-pro',
|
|
134
|
+
tier: 'standard',
|
|
135
|
+
inputTokens: 250_000,
|
|
136
|
+
outputTokens: 50_000,
|
|
137
|
+
})
|
|
138
|
+
// 250K × $4/M = $1.00 ; 50K × $18/M = $0.90 ; total = $1.90
|
|
139
|
+
expect(result.inputUsd).toBeCloseTo(1.0, 6)
|
|
140
|
+
expect(result.outputUsd).toBeCloseTo(0.9, 6)
|
|
141
|
+
expect(result.totalUsd).toBeCloseTo(1.9, 6)
|
|
142
|
+
})
|
|
143
|
+
|
|
144
|
+
it('vertex/gemini-3.1-pro batch ≥200K applies the high-context rate', () => {
|
|
145
|
+
// 250K input tokens batch → $2/M in (vs $1/M base), $9/M out (vs $6/M)
|
|
146
|
+
const result = priceFor({
|
|
147
|
+
slug: 'vertex/gemini-3.1-pro',
|
|
148
|
+
tier: 'batch',
|
|
149
|
+
inputTokens: 250_000,
|
|
150
|
+
outputTokens: 50_000,
|
|
151
|
+
})
|
|
152
|
+
// 250K × $2/M = $0.50 ; 50K × $9/M = $0.45 ; total = $0.95
|
|
153
|
+
expect(result.inputUsd).toBeCloseTo(0.5, 6)
|
|
154
|
+
expect(result.outputUsd).toBeCloseTo(0.45, 6)
|
|
155
|
+
expect(result.totalUsd).toBeCloseTo(0.95, 6)
|
|
156
|
+
})
|
|
157
|
+
|
|
158
|
+
it('bedrock/claude-opus-4-7 standard: 1M in + 1M out = $15 + $75 = $90 (flat — no breakpoint)', () => {
|
|
159
|
+
const result = priceFor({
|
|
160
|
+
slug: 'bedrock/claude-opus-4-7',
|
|
161
|
+
tier: 'standard',
|
|
162
|
+
inputTokens: 1_000_000,
|
|
163
|
+
outputTokens: 1_000_000,
|
|
164
|
+
})
|
|
165
|
+
expect(result.inputUsd).toBeCloseTo(15, 6)
|
|
166
|
+
expect(result.outputUsd).toBeCloseTo(75, 6)
|
|
167
|
+
expect(result.totalUsd).toBeCloseTo(90, 6)
|
|
168
|
+
})
|
|
169
|
+
|
|
170
|
+
it('bedrock/claude-haiku-4-5 standard: 1M in + 1M out = $1 + $5 = $6', () => {
|
|
171
|
+
const result = priceFor({
|
|
172
|
+
slug: 'bedrock/claude-haiku-4-5',
|
|
173
|
+
tier: 'standard',
|
|
174
|
+
inputTokens: 1_000_000,
|
|
175
|
+
outputTokens: 1_000_000,
|
|
176
|
+
})
|
|
177
|
+
expect(result.totalUsd).toBeCloseTo(6, 6)
|
|
178
|
+
})
|
|
179
|
+
|
|
180
|
+
it('aistudio/gemini-embedding-2 standard: 1M in + 0 out = $0.15', () => {
|
|
181
|
+
const result = priceFor({
|
|
182
|
+
slug: 'aistudio/gemini-embedding-2',
|
|
183
|
+
tier: 'standard',
|
|
184
|
+
inputTokens: 1_000_000,
|
|
185
|
+
outputTokens: 0,
|
|
186
|
+
})
|
|
187
|
+
expect(result.inputUsd).toBeCloseTo(0.15, 6)
|
|
188
|
+
expect(result.outputUsd).toBeCloseTo(0, 6)
|
|
189
|
+
expect(result.totalUsd).toBeCloseTo(0.15, 6)
|
|
190
|
+
})
|
|
191
|
+
})
|
|
192
|
+
|
|
193
|
+
describe('language-models/pricing — production regression anchor (BMC corpus)', () => {
|
|
194
|
+
// sb-srnl 2026-05-07: 5602 records via vertex-batch on
|
|
195
|
+
// vertex/gemini-3.1-pro flex/batch tier (≤200K input per record):
|
|
196
|
+
// - input avg 4500 tok per record (well under 200K breakpoint)
|
|
197
|
+
// - output avg 1140 tok per record
|
|
198
|
+
// Per-record at $1/M in + $6/M out:
|
|
199
|
+
// input cost = 4500 × 1e-6 × $1 = $0.0045
|
|
200
|
+
// output cost = 1140 × 1e-6 × $6 = $0.00684
|
|
201
|
+
// total per record = $0.01134
|
|
202
|
+
// 5602 records × $0.01134 = $63.5267
|
|
203
|
+
//
|
|
204
|
+
// priceFor() is per-call: callers accumulate the rollup.
|
|
205
|
+
it('5602 records × ~4500 in × ~1140 out @ batch ≈ $63.53', () => {
|
|
206
|
+
const records = 5602
|
|
207
|
+
const inputAvg = 4500
|
|
208
|
+
const outputAvg = 1140
|
|
209
|
+
let total = 0
|
|
210
|
+
for (let i = 0; i < records; i++) {
|
|
211
|
+
const r = priceFor({
|
|
212
|
+
slug: 'vertex/gemini-3.1-pro',
|
|
213
|
+
tier: 'batch',
|
|
214
|
+
inputTokens: inputAvg,
|
|
215
|
+
outputTokens: outputAvg,
|
|
216
|
+
})
|
|
217
|
+
total += r.totalUsd
|
|
218
|
+
}
|
|
219
|
+
expect(total).toBeGreaterThan(63)
|
|
220
|
+
expect(total).toBeLessThan(64)
|
|
221
|
+
expect(total).toBeCloseTo(63.527, 2)
|
|
222
|
+
})
|
|
223
|
+
|
|
224
|
+
// For aggregate token counts where every call is known to be under the
|
|
225
|
+
// breakpoint, callers can pass the aggregate directly — but they MUST
|
|
226
|
+
// ensure no individual call crossed 200K. priceFor() does not have a
|
|
227
|
+
// way to express "aggregate of small calls"; that's by design.
|
|
228
|
+
it('aggregate-equivalent: priceFor with sub-breakpoint aggregates matches per-call sum (when no call crossed 200K)', () => {
|
|
229
|
+
// 50 calls × 4000 input + 1000 output @ batch.
|
|
230
|
+
// Per-call: 4000×1e-6×$1 + 1000×1e-6×$6 = $0.004 + $0.006 = $0.010
|
|
231
|
+
// 50 × $0.010 = $0.50
|
|
232
|
+
let perCall = 0
|
|
233
|
+
for (let i = 0; i < 50; i++) {
|
|
234
|
+
const r = priceFor({
|
|
235
|
+
slug: 'vertex/gemini-3.1-pro',
|
|
236
|
+
tier: 'batch',
|
|
237
|
+
inputTokens: 4000,
|
|
238
|
+
outputTokens: 1000,
|
|
239
|
+
})
|
|
240
|
+
perCall += r.totalUsd
|
|
241
|
+
}
|
|
242
|
+
expect(perCall).toBeCloseTo(0.5, 6)
|
|
243
|
+
})
|
|
244
|
+
})
|
|
245
|
+
|
|
246
|
+
describe('language-models/pricing — error paths', () => {
|
|
247
|
+
it('throws on unknown slug (not silent zero)', () => {
|
|
248
|
+
expect(() =>
|
|
249
|
+
priceFor({
|
|
250
|
+
slug: 'made-up/nonexistent-model',
|
|
251
|
+
tier: 'standard',
|
|
252
|
+
inputTokens: 1000,
|
|
253
|
+
outputTokens: 1000,
|
|
254
|
+
})
|
|
255
|
+
).toThrow(/Unknown model slug/)
|
|
256
|
+
})
|
|
257
|
+
|
|
258
|
+
it('throws on unknown tier (not silent zero)', () => {
|
|
259
|
+
expect(() =>
|
|
260
|
+
priceFor({
|
|
261
|
+
slug: 'vertex/gemini-3.1-pro',
|
|
262
|
+
tier: 'provisioned',
|
|
263
|
+
inputTokens: 1000,
|
|
264
|
+
outputTokens: 1000,
|
|
265
|
+
})
|
|
266
|
+
).toThrow(/No .* pricing/i)
|
|
267
|
+
})
|
|
268
|
+
|
|
269
|
+
it('rejects negative token counts', () => {
|
|
270
|
+
expect(() =>
|
|
271
|
+
priceFor({
|
|
272
|
+
slug: 'bedrock/claude-opus-4-7',
|
|
273
|
+
tier: 'standard',
|
|
274
|
+
inputTokens: -1,
|
|
275
|
+
outputTokens: 0,
|
|
276
|
+
})
|
|
277
|
+
).toThrow(/non-negative/i)
|
|
278
|
+
})
|
|
279
|
+
})
|
package/vitest.config.ts
CHANGED
|
@@ -2,10 +2,30 @@ import { defineConfig } from 'vitest/config'
|
|
|
2
2
|
|
|
3
3
|
export default defineConfig({
|
|
4
4
|
test: {
|
|
5
|
+
// CRITICAL: Limit concurrency to prevent resource exhaustion
|
|
6
|
+
maxConcurrency: 1,
|
|
7
|
+
maxWorkers: 1,
|
|
8
|
+
minWorkers: 1,
|
|
9
|
+
fileParallelism: false,
|
|
10
|
+
|
|
5
11
|
globals: false,
|
|
6
12
|
environment: 'node',
|
|
7
|
-
include: ['src/**/*.test.ts'],
|
|
13
|
+
include: ['src/**/*.test.ts', 'test/**/*.test.ts'],
|
|
8
14
|
testTimeout: 10000,
|
|
9
15
|
hookTimeout: 10000,
|
|
16
|
+
|
|
17
|
+
// Coverage configuration
|
|
18
|
+
coverage: {
|
|
19
|
+
provider: 'v8',
|
|
20
|
+
reporter: ['text', 'json', 'html'],
|
|
21
|
+
include: ['src/**/*.ts'],
|
|
22
|
+
exclude: ['**/*.test.ts', '**/__tests__/**', '**/node_modules/**'],
|
|
23
|
+
thresholds: {
|
|
24
|
+
statements: 65,
|
|
25
|
+
branches: 60,
|
|
26
|
+
functions: 60,
|
|
27
|
+
lines: 65,
|
|
28
|
+
},
|
|
29
|
+
},
|
|
10
30
|
},
|
|
11
31
|
})
|
package/.turbo/turbo-test.log
DELETED
package/src/aliases.js
DELETED
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Model aliases - map simple names to full model IDs
|
|
3
|
-
*/
|
|
4
|
-
export const ALIASES = {
|
|
5
|
-
// Claude (Anthropic)
|
|
6
|
-
'opus': 'anthropic/claude-opus-4.5',
|
|
7
|
-
'sonnet': 'anthropic/claude-sonnet-4.5',
|
|
8
|
-
'haiku': 'anthropic/claude-haiku-4.5',
|
|
9
|
-
'claude': 'anthropic/claude-sonnet-4.5',
|
|
10
|
-
// GPT (OpenAI)
|
|
11
|
-
'gpt': 'openai/gpt-4o',
|
|
12
|
-
'gpt-4o': 'openai/gpt-4o',
|
|
13
|
-
'gpt-4o-mini': 'openai/gpt-4o-mini',
|
|
14
|
-
'4o': 'openai/gpt-4o',
|
|
15
|
-
'o1': 'openai/o1',
|
|
16
|
-
'o3': 'openai/o3',
|
|
17
|
-
'o3-mini': 'openai/o3-mini',
|
|
18
|
-
'o4-mini': 'openai/o4-mini',
|
|
19
|
-
// Gemini (Google)
|
|
20
|
-
'gemini': 'google/gemini-2.5-flash',
|
|
21
|
-
'flash': 'google/gemini-2.5-flash',
|
|
22
|
-
'gemini-flash': 'google/gemini-2.5-flash',
|
|
23
|
-
'gemini-pro': 'google/gemini-2.5-pro',
|
|
24
|
-
// Llama (Meta)
|
|
25
|
-
'llama': 'meta-llama/llama-4-maverick',
|
|
26
|
-
'llama-4': 'meta-llama/llama-4-maverick',
|
|
27
|
-
'llama-70b': 'meta-llama/llama-3.3-70b-instruct',
|
|
28
|
-
// DeepSeek
|
|
29
|
-
'deepseek': 'deepseek/deepseek-chat',
|
|
30
|
-
'r1': 'deepseek/deepseek-r1',
|
|
31
|
-
// Mistral
|
|
32
|
-
'mistral': 'mistralai/mistral-large-2411',
|
|
33
|
-
'codestral': 'mistralai/codestral-2501',
|
|
34
|
-
// Qwen
|
|
35
|
-
'qwen': 'qwen/qwen3-235b-a22b',
|
|
36
|
-
// Grok
|
|
37
|
-
'grok': 'x-ai/grok-3',
|
|
38
|
-
// Perplexity
|
|
39
|
-
'sonar': 'perplexity/sonar-pro',
|
|
40
|
-
};
|