@aliou/pi-synthetic 0.17.2 → 0.17.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aliou/pi-synthetic",
3
- "version": "0.17.2",
3
+ "version": "0.17.4",
4
4
  "license": "MIT",
5
5
  "type": "module",
6
6
  "private": false,
@@ -15,6 +15,8 @@
15
15
  * 1. "Error from inference backend: 400 The input (N tokens) is longer
16
16
  * than the model's context length (M tokens)."
17
17
  * 2. "Context limit exceeded"
18
+ * 3. "Error from inference backend: 400 status code (no body)"
19
+ * — some backends return a bare 400 when the context is too long.
18
20
  */
19
21
  export const SYNTHETIC_OVERFLOW_PATTERN =
20
- /input \(\d+ tokens\) is longer than the model's context length|Context limit exceeded/i;
22
+ /input \(\d+ tokens\) is longer than the model's context length|Context limit exceeded|400 status code \(no body\)/i;
@@ -91,38 +91,6 @@ export const SYNTHETIC_MODELS: SyntheticModelConfig[] = [
91
91
  contextWindow: 196608,
92
92
  maxTokens: 65536,
93
93
  },
94
- // models.dev: synthetic/hf:meta-llama/Llama-3.3-70B-Instruct → ctx=128000, out=32768
95
- {
96
- id: "hf:meta-llama/Llama-3.3-70B-Instruct",
97
- name: "meta-llama/Llama-3.3-70B-Instruct",
98
- provider: "together",
99
- reasoning: false,
100
- input: ["text"],
101
- cost: {
102
- input: 0.88,
103
- output: 0.88,
104
- cacheRead: 0.88,
105
- cacheWrite: 0,
106
- },
107
- contextWindow: 131072,
108
- maxTokens: 32768,
109
- },
110
- // models.dev: synthetic/hf:deepseek-ai/DeepSeek-R1-0528 → ctx=128000, out=128000
111
- {
112
- id: "hf:deepseek-ai/DeepSeek-R1-0528",
113
- name: "deepseek-ai/DeepSeek-R1-0528",
114
- provider: "together",
115
- reasoning: true,
116
- input: ["text"],
117
- cost: {
118
- input: 3,
119
- output: 8,
120
- cacheRead: 3,
121
- cacheWrite: 0,
122
- },
123
- contextWindow: 131072,
124
- maxTokens: 128000,
125
- },
126
94
  // models.dev: synthetic/hf:deepseek-ai/DeepSeek-V3.2 → ctx=162816, out=8000
127
95
  {
128
96
  id: "hf:deepseek-ai/DeepSeek-V3.2",
@@ -191,70 +159,6 @@ export const SYNTHETIC_MODELS: SyntheticModelConfig[] = [
191
159
  contextWindow: 262144,
192
160
  maxTokens: 65536,
193
161
  },
194
- // API: hf:moonshotai/Kimi-K2.5 → ctx=262144, out=65536
195
- {
196
- id: "hf:moonshotai/Kimi-K2.5",
197
- name: "moonshotai/Kimi-K2.5",
198
- provider: "together",
199
- reasoning: true,
200
- input: ["text", "image"],
201
- cost: {
202
- input: 0.5,
203
- output: 2.8,
204
- cacheRead: 0.5,
205
- cacheWrite: 0,
206
- },
207
- contextWindow: 262144,
208
- maxTokens: 65536,
209
- },
210
- // API: hf:nvidia/Kimi-K2.5-NVFP4 → ctx=262144; models.dev: out=65536 (NVFP4 quantized)
211
- {
212
- id: "hf:nvidia/Kimi-K2.5-NVFP4",
213
- name: "nvidia/Kimi-K2.5-NVFP4",
214
- provider: "together",
215
- reasoning: true,
216
- input: ["text", "image"],
217
- cost: {
218
- input: 0.5,
219
- output: 2.8,
220
- cacheRead: 0.5,
221
- cacheWrite: 0,
222
- },
223
- contextWindow: 262144,
224
- maxTokens: 65536,
225
- },
226
- // models.dev: synthetic/hf:deepseek-ai/DeepSeek-V3 → ctx=128000, out=128000
227
- {
228
- id: "hf:deepseek-ai/DeepSeek-V3",
229
- name: "deepseek-ai/DeepSeek-V3",
230
- provider: "together",
231
- reasoning: true,
232
- input: ["text"],
233
- cost: {
234
- input: 1.25,
235
- output: 1.25,
236
- cacheRead: 1.25,
237
- cacheWrite: 0,
238
- },
239
- contextWindow: 131072,
240
- maxTokens: 128000,
241
- },
242
- // models.dev: synthetic/hf:Qwen/Qwen3-235B-A22B-Thinking-2507 → ctx=256000, out=32000
243
- {
244
- id: "hf:Qwen/Qwen3-235B-A22B-Thinking-2507",
245
- name: "Qwen/Qwen3-235B-A22B-Thinking-2507",
246
- provider: "together",
247
- reasoning: true,
248
- input: ["text"],
249
- cost: {
250
- input: 0.65,
251
- output: 3,
252
- cacheRead: 0.65,
253
- cacheWrite: 0,
254
- },
255
- contextWindow: 262144,
256
- maxTokens: 32000,
257
- },
258
162
  // API: hf:Qwen/Qwen3.5-397B-A17B → ctx=262144, out=65536
259
163
  {
260
164
  id: "hf:Qwen/Qwen3.5-397B-A17B",