@parallel-cli/parallel 0.4.1 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/pricing.js CHANGED
@@ -5,60 +5,162 @@
5
5
  * Prices drift over time; overrides always win.
6
6
  */
7
7
  const BUILTIN = {
8
- // DeepSeek
9
- 'deepseek-v4-flash': { input: 0.27, output: 1.1 },
10
- 'deepseek-v4-pro': { input: 0.55, output: 2.19 },
11
- 'deepseek-chat': { input: 0.27, output: 1.1 },
12
- 'deepseek-reasoner': { input: 0.55, output: 2.19 },
8
+ // === Western ===
13
9
  // OpenAI
14
- 'gpt-4o': { input: 2.5, output: 10 },
15
- 'gpt-4o-mini': { input: 0.15, output: 0.6 },
16
- 'gpt-4.1': { input: 2, output: 8 },
17
- 'gpt-4.1-mini': { input: 0.4, output: 1.6 },
18
- 'gpt-4.1-nano': { input: 0.1, output: 0.4 },
19
- 'o3-mini': { input: 1.1, output: 4.4 },
20
- 'o4-mini': { input: 1.1, output: 4.4 },
10
+ 'gpt-5.5': { input: 5.00, output: 30.00, cacheHit: 0.50 },
11
+ 'gpt-5.5-pro': { input: 30.00, output: 180.00, cacheHit: 3.00 },
12
+ 'gpt-5.4': { input: 1.25, output: 10.00, cacheHit: 0.125 }, // approx
13
+ 'gpt-5.3-codex': { input: 1.25, output: 10.00, cacheHit: 0.125 }, // approx
14
+ 'gpt-4o': { input: 2.50, output: 10.00 },
15
+ 'gpt-4o-mini': { input: 0.15, output: 0.60 },
16
+ 'o4-mini': { input: 1.10, output: 4.40 },
17
+ 'o3': { input: 5.00, output: 20.00 },
18
+ 'o3-mini': { input: 0.55, output: 2.20 },
19
+ 'o1': { input: 15.00, output: 60.00 },
20
+ 'o1-mini': { input: 1.10, output: 4.40 },
21
21
  // Anthropic
22
- 'claude-opus-4': { input: 15, output: 75 },
23
- 'claude-sonnet-4': { input: 3, output: 15 },
24
- 'claude-haiku-4': { input: 1, output: 5 },
25
- 'claude-3-5-haiku': { input: 0.8, output: 4 },
22
+ 'claude-opus-4-8': { input: 5.00, output: 25.00, cacheHit: 0.50 },
23
+ 'claude-opus-4-7': { input: 5.00, output: 25.00, cacheHit: 0.50 },
24
+ 'claude-sonnet-4-6': { input: 3.00, output: 15.00, cacheHit: 0.30 },
25
+ 'claude-haiku-4-5': { input: 1.00, output: 5.00, cacheHit: 0.10 },
26
+ // Google Gemini
27
+ 'gemini-3.1-pro': { input: 2.00, output: 12.00, cacheHit: 0.20 }, // tiered: >200K = 2×/1.5×
28
+ 'gemini-3.5-flash': { input: 1.50, output: 9.00, cacheHit: 0.15 },
29
+ 'gemini-3-flash': { input: 0.60, output: 3.00, cacheHit: 0.06 }, // approx
30
+ 'gemini-3.1-flash-lite': { input: 0.10, output: 0.40, cacheHit: 0.01 },
31
+ // xAI Grok
32
+ 'grok-4': { input: 3.00, output: 15.00 }, // approx
33
+ 'grok-4-fast-reasoning': { input: 0.20, output: 0.50 }, // approx
34
+ 'grok-3': { input: 3.00, output: 15.00 },
35
+ 'grok-code-fast-1': { input: 0.20, output: 1.50 }, // approx
26
36
  // Mistral
27
- 'mistral-large': { input: 2, output: 6 },
28
- 'codestral': { input: 0.3, output: 0.9 },
29
- 'devstral': { input: 0.1, output: 0.3 },
30
- // Alibaba
31
- 'qwen2.5-coder': { input: 0.09, output: 0.09 },
32
- 'qwen-max': { input: 1.6, output: 6.4 },
33
- // xAI (Grok)
34
- 'grok-4': { input: 4.0, output: 16.0 },
35
- 'grok-3-beta': { input: 3.0, output: 12.0 },
36
- 'grok-3-mini': { input: 0.55, output: 2.2 },
37
- // Perplexity
38
- 'sonar-pro': { input: 3.0, output: 15.0 },
39
- 'sonar': { input: 1.0, output: 1.0 },
40
- 'sonar-reasoning': { input: 2.0, output: 16.0 },
37
+ 'mistral-large-2': { input: 2.00, output: 6.00 },
38
+ 'magistral-medium': { input: 2.00, output: 5.00 },
39
+ 'codestral-latest': { input: 0.30, output: 0.90 },
40
+ 'mistral-small-latest': { input: 0.20, output: 0.60 },
41
41
  // Cohere
42
- 'command-a': { input: 2.5, output: 10.0 },
43
- 'command-r-plus': { input: 2.5, output: 10.0 },
44
- 'command-r': { input: 0.5, output: 1.5 },
45
- // DeepInfra
46
- 'llama-4-maverick': { input: 0.2, output: 0.6 }, // approximate
47
- 'wizardlm-2-8x22b': { input: 0.5, output: 0.5 }, // approximate
48
- // Fireworks
49
- 'llama-4-scout': { input: 0.1, output: 0.3 }, // approximate
50
- 'mixtral-8x22b': { input: 0.9, output: 0.9 }, // approximate
42
+ 'command-a': { input: 2.50, output: 10.00 },
43
+ 'command-r-plus': { input: 2.50, output: 10.00 },
44
+ // Perplexity
45
+ 'sonar-pro': { input: 3.00, output: 15.00 },
46
+ 'sonar-deep-research': { input: 2.00, output: 8.00 },
47
+ // === Chinese ===
48
+ // DeepSeek
49
+ 'deepseek-v4-pro': { input: 0.435, output: 0.87, cacheHit: 0.0036 },
50
+ 'deepseek-v4-flash': { input: 0.14, output: 0.28, cacheHit: 0.003 },
51
+ 'deepseek-chat': { input: 0.27, output: 1.10, cacheHit: 0.027 },
52
+ 'deepseek-reasoner': { input: 0.55, output: 2.19, cacheHit: 0.14 },
53
+ // MiniMax — list pricing
54
+ 'MiniMax-M3': { input: 0.60, output: 2.40, cacheHit: 0.12 },
55
+ 'MiniMax-M2.7': { input: 0.30, output: 1.20, cacheHit: 0.06 },
56
+ 'MiniMax-M2.7-highspeed': { input: 0.60, output: 2.40, cacheHit: 0.06 },
57
+ // Z.ai / GLM
58
+ 'glm-5.2': { input: 1.00, output: 3.20 },
59
+ 'glm-5.1': { input: 1.00, output: 3.20 },
60
+ 'glm-4.7': { input: 0.60, output: 2.20 },
61
+ 'glm-4.7-flash': { input: 0.00, output: 0.00 }, // free
62
+ 'glm-5v-turbo': { input: 0.30, output: 0.90 },
63
+ // Alibaba / Qwen
64
+ 'qwen3.7-max': { input: 1.25, output: 3.75 }, // promo price, cache: yes
65
+ 'qwen3.6-max-preview': { input: 1.04, output: 6.24 }, // tiered, lower bound
66
+ 'qwen3.6-plus': { input: 0.40, output: 1.20 },
67
+ 'qwen3.5-coder': { input: 0.20, output: 0.60 },
68
+ // Moonshot / Kimi
69
+ 'kimi-k2.6': { input: 0.95, output: 4.00, cacheHit: 0.16 },
70
+ 'kimi-k2.7-code': { input: 0.95, output: 4.00, cacheHit: 0.19 },
71
+ 'kimi-k2.5': { input: 0.60, output: 3.00, cacheHit: 0.10 },
72
+ 'moonshot-v1-128k': { input: 0.85, output: 1.70 },
73
+ // Xiaomi / MiMo
74
+ 'mimo-v2-pro': { input: 0.30, output: 1.00 }, // approx
75
+ 'mimo-v2-omni': { input: 0.40, output: 1.50 }, // approx
76
+ // StepFun
77
+ 'step-2-16k': { input: 0.50, output: 1.50 }, // approx
78
+ // === Gateways ===
79
+ // OpenRouter
80
+ 'openai/gpt-5.5': { input: 5.00, output: 30.00 },
81
+ 'anthropic/claude-sonnet-4-6': { input: 3.00, output: 15.00 },
82
+ 'google/gemini-3.5-flash': { input: 1.50, output: 9.00 },
83
+ 'deepseek/deepseek-v4-pro': { input: 0.435, output: 0.87 },
84
+ 'meta-llama/llama-4-maverick': { input: 0.50, output: 1.60 },
85
+ 'mistralai/mistral-large-2': { input: 2.00, output: 6.00 },
86
+ // SiliconFlow
87
+ 'deepseek-ai/DeepSeek-V4-Pro': { input: 0.435, output: 0.87 },
88
+ 'deepseek-ai/DeepSeek-R1': { input: 0.55, output: 2.19 },
89
+ 'Qwen/Qwen3-Coder-480B': { input: 0.20, output: 0.60 },
90
+ 'glm-4/GLM-5.2': { input: 1.00, output: 3.20 },
91
+ 'moonshotai/Kimi-K2.6': { input: 0.95, output: 4.00 },
92
+ // Atlas Cloud
93
+ 'deepseek-v4-pro @atlas': { input: 0.435, output: 0.87 },
94
+ 'deepseek-r1 @atlas': { input: 0.55, output: 2.19 },
95
+ 'qwen3.7-max @atlas': { input: 1.25, output: 3.75 },
96
+ 'glm-5.2 @atlas': { input: 1.00, output: 3.20 },
97
+ 'kimi-k2.6 @atlas': { input: 0.95, output: 4.00 },
98
+ 'llama-4-maverick @atlas': { input: 0.50, output: 1.60 },
99
+ // Requesty — 0% markup, same as direct
100
+ 'gpt-5.5 @requesty': { input: 5.00, output: 30.00 },
101
+ 'claude-sonnet-4-6 @requesty': { input: 3.00, output: 15.00 },
102
+ 'gemini-3.5-flash @requesty': { input: 1.50, output: 9.00 },
103
+ 'deepseek-v4-pro @requesty': { input: 0.435, output: 0.87 },
104
+ 'llama-4-maverick @requesty': { input: 0.50, output: 1.60 },
105
+ 'mistral-large-2 @requesty': { input: 2.00, output: 6.00 },
106
+ // Vercel AI Gateway — list price, no markup
107
+ 'gpt-5.5 @vercel': { input: 5.00, output: 30.00 },
108
+ 'claude-sonnet-4-6 @vercel': { input: 3.00, output: 15.00 },
109
+ 'gemini-3.5-flash @vercel': { input: 1.50, output: 9.00 },
110
+ 'deepseek-v4-pro @vercel': { input: 0.435, output: 0.87 },
111
+ 'llama-4-maverick @vercel': { input: 0.50, output: 1.60 },
112
+ // === Inference hosts ===
113
+ // Groq
114
+ 'qwen-2.5-coder-32b': { input: 0.30, output: 0.50 },
115
+ 'deepseek-r1-distill-llama-70b': { input: 0.30, output: 2.00 },
116
+ 'kimi-k2.6 @groq': { input: 1.00, output: 3.00 },
117
+ 'llama-3.3-70b-versatile': { input: 0.15, output: 0.30 },
51
118
  // Cerebras
52
- 'llama-3.3-70b @cerebras': { input: 0.5, output: 1.5 }, // approximate
53
- 'llama-3.1-8b': { input: 0.05, output: 0.1 }, // approximate
119
+ 'llama-4-maverick-17b-128e-instruct': { input: 0.65, output: 0.85 },
120
+ 'qwen3-coder-480b @cerebras': { input: 2.00, output: 2.00 },
121
+ 'kimi-k2.6 @cerebras': { input: 1.65, output: 6.50 },
122
+ 'llama-3.3-70b @cerebras': { input: 0.10, output: 0.20 },
123
+ // Together AI
124
+ 'meta-llama/Llama-4-Maverick-17B-128E-Instruct': { input: 0.27, output: 0.85 },
125
+ 'deepseek-ai/DeepSeek-V3 @together': { input: 0.30, output: 0.30 },
126
+ 'Qwen/Qwen3-Coder-480B @together': { input: 0.40, output: 1.20 },
127
+ 'moonshotai/Kimi-K2.6 @together': { input: 0.60, output: 2.50 },
128
+ // Fireworks
129
+ 'accounts/fireworks/models/llama4-maverick-17b': { input: 0.22, output: 0.88 },
130
+ 'accounts/fireworks/models/deepseek-v3': { input: 0.90, output: 0.90 },
131
+ 'accounts/fireworks/models/qwen3-coder-480b': { input: 0.45, output: 1.80 },
132
+ 'accounts/fireworks/models/kimi-k2.6': { input: 0.60, output: 2.50 },
133
+ // DeepInfra
134
+ 'meta-llama/Llama-4-Maverick-17B-128E': { input: 0.20, output: 0.60 },
135
+ 'deepseek-ai/DeepSeek-V3 @deepinfra': { input: 0.26, output: 0.38 },
136
+ 'Qwen/Qwen3-Coder-480B @deepinfra': { input: 0.30, output: 1.00 },
137
+ 'moonshotai/Kimi-K2.6 @deepinfra': { input: 0.75, output: 3.50, cacheHit: 0.15 },
54
138
  // Novita
55
- 'deepseek-r1': { input: 2.0, output: 8.0 },
56
- 'deepseek-v3': { input: 1.25, output: 5.0 },
57
- 'llama-3.1-70b': { input: 0.35, output: 0.4 }, // approximate
139
+ 'meta-llama/llama-4-maverick-17b-128e': { input: 0.20, output: 0.60 },
140
+ 'deepseek/deepseek-v3': { input: 0.10, output: 0.28 },
141
+ 'qwen/qwen3-coder-480b': { input: 0.30, output: 1.20 },
142
+ 'moonshotai/kimi-k2.6': { input: 0.57, output: 2.30 },
58
143
  // Hyperbolic
59
- 'qwen3-235b': { input: 0.5, output: 1.5 }, // approximate
60
- // Local endpoints are free
61
- 'ollama': { input: 0, output: 0 },
144
+ 'meta-llama/Llama-4-Maverick-17B-128E @hyperbolic': { input: 0.20, output: 0.60 },
145
+ 'deepseek-ai/DeepSeek-V3 @hyperbolic': { input: 0.25, output: 0.85 },
146
+ 'Qwen/Qwen3-Coder-480B @hyperbolic': { input: 0.30, output: 1.20 },
147
+ 'moonshotai/Kimi-K2.6 @hyperbolic': { input: 0.80, output: 3.00 },
148
+ // SambaNova
149
+ 'Meta-Llama-4-Maverick-17B-128E-Instruct': { input: 0.20, output: 0.30 },
150
+ 'DeepSeek-V3 @sambanova': { input: 1.00, output: 1.50 },
151
+ 'Llama-3.3-70B-Instruct': { input: 0.10, output: 0.20 },
152
+ // === Local (free) ===
153
+ // Ollama
154
+ 'qwen3-coder:480b': { input: 0.00, output: 0.00 },
155
+ 'glm-4.7 @ollama': { input: 0.00, output: 0.00 },
156
+ 'deepseek-v3 @ollama': { input: 0.00, output: 0.00 },
157
+ 'kimi-k2 @ollama': { input: 0.00, output: 0.00 },
158
+ 'llama3.2': { input: 0.00, output: 0.00 },
159
+ 'mistral @ollama': { input: 0.00, output: 0.00 },
160
+ 'codellama': { input: 0.00, output: 0.00 },
161
+ 'gemma3': { input: 0.00, output: 0.00 },
162
+ // vLLM / SGLang
163
+ 'your-model-here': { input: 0.00, output: 0.00 },
62
164
  };
63
165
  /**
64
166
  * Resolve the price of a model: provider override first, then built-in table
@@ -66,22 +168,28 @@ const BUILTIN = {
66
168
  * "claude-sonnet-4-20250514" or "openai/gpt-4o-mini"). null = unknown.
67
169
  */
68
170
  export function priceFor(provider, model) {
69
- const override = provider?.prices?.[model];
171
+ const overrideKey = provider?.prices
172
+ ? Object.keys(provider.prices).find((key) => key.toLowerCase() === model.toLowerCase())
173
+ : undefined;
174
+ const override = overrideKey ? provider?.prices?.[overrideKey] : undefined;
70
175
  if (override)
71
176
  return override;
72
177
  const m = model.toLowerCase();
73
178
  // strip an optional "vendor/" prefix (OpenRouter-style ids)
74
179
  const bare = m.includes('/') ? m.slice(m.lastIndexOf('/') + 1) : m;
75
- if (BUILTIN[bare])
76
- return BUILTIN[bare];
180
+ const builtins = Object.entries(BUILTIN).map(([key, price]) => [key.toLowerCase(), price]);
181
+ const exact = builtins.find(([key]) => key === bare);
182
+ if (exact)
183
+ return exact[1];
77
184
  // longest prefix wins so "deepseek-chat" beats nothing else
78
185
  let best = null;
79
- for (const key of Object.keys(BUILTIN)) {
80
- if ((bare.startsWith(key) || bare.includes(key)) && (!best || key.length > best.length))
81
- best = key;
186
+ for (const entry of builtins) {
187
+ const [key] = entry;
188
+ if ((bare.startsWith(key) || bare.includes(key)) && (!best || key.length > best[0].length))
189
+ best = entry;
82
190
  }
83
191
  if (best)
84
- return BUILTIN[best];
192
+ return best[1];
85
193
  // local endpoints (ollama, llama.cpp, vLLM on localhost) → free
86
194
  if (provider && /localhost|127\.0\.0\.1|0\.0\.0\.0/.test(provider.baseUrl))
87
195
  return { input: 0, output: 0 };