model-library 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. model_library/base/base.py +141 -62
  2. model_library/base/delegate_only.py +77 -10
  3. model_library/base/output.py +43 -0
  4. model_library/base/utils.py +35 -0
  5. model_library/config/alibaba_models.yaml +49 -57
  6. model_library/config/all_models.json +353 -120
  7. model_library/config/anthropic_models.yaml +2 -1
  8. model_library/config/kimi_models.yaml +30 -3
  9. model_library/config/mistral_models.yaml +2 -0
  10. model_library/config/openai_models.yaml +15 -23
  11. model_library/config/together_models.yaml +2 -0
  12. model_library/config/xiaomi_models.yaml +43 -0
  13. model_library/config/zai_models.yaml +27 -3
  14. model_library/exceptions.py +3 -77
  15. model_library/providers/ai21labs.py +12 -8
  16. model_library/providers/alibaba.py +17 -8
  17. model_library/providers/amazon.py +49 -16
  18. model_library/providers/anthropic.py +128 -48
  19. model_library/providers/azure.py +22 -10
  20. model_library/providers/cohere.py +7 -7
  21. model_library/providers/deepseek.py +8 -8
  22. model_library/providers/fireworks.py +7 -8
  23. model_library/providers/google/batch.py +14 -10
  24. model_library/providers/google/google.py +57 -30
  25. model_library/providers/inception.py +7 -7
  26. model_library/providers/kimi.py +18 -8
  27. model_library/providers/minimax.py +15 -17
  28. model_library/providers/mistral.py +20 -8
  29. model_library/providers/openai.py +99 -22
  30. model_library/providers/openrouter.py +34 -0
  31. model_library/providers/perplexity.py +7 -7
  32. model_library/providers/together.py +7 -8
  33. model_library/providers/vals.py +12 -6
  34. model_library/providers/vercel.py +34 -0
  35. model_library/providers/xai.py +47 -42
  36. model_library/providers/xiaomi.py +34 -0
  37. model_library/providers/zai.py +38 -8
  38. model_library/register_models.py +5 -0
  39. model_library/registry_utils.py +48 -17
  40. model_library/retriers/__init__.py +0 -0
  41. model_library/retriers/backoff.py +73 -0
  42. model_library/retriers/base.py +225 -0
  43. model_library/retriers/token.py +427 -0
  44. model_library/retriers/utils.py +11 -0
  45. model_library/settings.py +1 -1
  46. model_library/utils.py +17 -7
  47. {model_library-0.1.7.dist-info → model_library-0.1.9.dist-info}/METADATA +2 -1
  48. model_library-0.1.9.dist-info/RECORD +73 -0
  49. {model_library-0.1.7.dist-info → model_library-0.1.9.dist-info}/WHEEL +1 -1
  50. model_library-0.1.7.dist-info/RECORD +0 -64
  51. {model_library-0.1.7.dist-info → model_library-0.1.9.dist-info}/licenses/LICENSE +0 -0
  52. {model_library-0.1.7.dist-info → model_library-0.1.9.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,187 @@
1
1
  {
2
+ "kimi/kimi-k2.5-thinking": {
3
+ "company": "Moonshot AI",
4
+ "label": "Kimi K2.5",
5
+ "description": null,
6
+ "release_date": "2026-01-26",
7
+ "open_source": true,
8
+ "documentation_url": "https://platform.moonshot.ai/docs",
9
+ "properties": {
10
+ "context_window": 262144,
11
+ "max_tokens": 128000,
12
+ "training_cutoff": null,
13
+ "reasoning_model": true
14
+ },
15
+ "supports": {
16
+ "images": true,
17
+ "files": false,
18
+ "temperature": false,
19
+ "tools": true
20
+ },
21
+ "metadata": {
22
+ "deprecated": false,
23
+ "available_for_everyone": true,
24
+ "available_as_evaluator": false,
25
+ "ignored_for_cost": false
26
+ },
27
+ "provider_properties": {},
28
+ "costs_per_million_token": {
29
+ "input": 0.6,
30
+ "output": 3.0,
31
+ "cache": {
32
+ "read": 0.1,
33
+ "write_markup": 1.0
34
+ }
35
+ },
36
+ "alternative_keys": [],
37
+ "default_parameters": {
38
+ "temperature": 1.0,
39
+ "top_p": 0.95
40
+ },
41
+ "provider_endpoint": "kimi-k2.5",
42
+ "provider_name": "kimi",
43
+ "full_key": "kimi/kimi-k2.5-thinking",
44
+ "slug": "kimi_kimi-k2.5-thinking"
45
+ },
46
+ "kimi/kimi-k2.5": {
47
+ "company": "Moonshot AI",
48
+ "label": "Kimi K2.5",
49
+ "description": null,
50
+ "release_date": "2026-01-26",
51
+ "open_source": true,
52
+ "documentation_url": "https://platform.moonshot.ai/docs",
53
+ "properties": {
54
+ "context_window": 262144,
55
+ "max_tokens": 128000,
56
+ "training_cutoff": null,
57
+ "reasoning_model": false
58
+ },
59
+ "supports": {
60
+ "images": true,
61
+ "files": false,
62
+ "temperature": false,
63
+ "tools": true
64
+ },
65
+ "metadata": {
66
+ "deprecated": false,
67
+ "available_for_everyone": true,
68
+ "available_as_evaluator": false,
69
+ "ignored_for_cost": false
70
+ },
71
+ "provider_properties": {},
72
+ "costs_per_million_token": {
73
+ "input": 0.6,
74
+ "output": 3.0,
75
+ "cache": {
76
+ "read": 0.1,
77
+ "write_markup": 1.0
78
+ }
79
+ },
80
+ "alternative_keys": [
81
+ {
82
+ "kimi/kimi-k2.5-thinking": {
83
+ "properties": {
84
+ "reasoning_model": true
85
+ },
86
+ "default_parameters": {
87
+ "temperature": 1.0
88
+ }
89
+ }
90
+ }
91
+ ],
92
+ "default_parameters": {
93
+ "temperature": 0.6,
94
+ "top_p": 0.95
95
+ },
96
+ "provider_endpoint": "kimi-k2.5",
97
+ "provider_name": "kimi",
98
+ "full_key": "kimi/kimi-k2.5",
99
+ "slug": "kimi_kimi-k2.5"
100
+ },
101
+ "alibaba/qwen3-max-2026-01-23": {
102
+ "company": "Alibaba",
103
+ "label": "Qwen 3 Max Thinking",
104
+ "description": "Qwen 3 Max with enhanced reasoning capabilities",
105
+ "release_date": "2026-01-23",
106
+ "open_source": false,
107
+ "properties": {
108
+ "context_window": 256000,
109
+ "max_tokens": 32000,
110
+ "training_cutoff": null,
111
+ "reasoning_model": true
112
+ },
113
+ "supports": {
114
+ "images": false,
115
+ "temperature": true,
116
+ "tools": true
117
+ },
118
+ "metadata": {
119
+ "deprecated": false,
120
+ "available_for_everyone": false,
121
+ "available_as_evaluator": false,
122
+ "ignored_for_cost": false
123
+ },
124
+ "provider_properties": {},
125
+ "costs_per_million_token": {
126
+ "input": 1.2,
127
+ "output": 6.0
128
+ },
129
+ "alternative_keys": [],
130
+ "default_parameters": {
131
+ "temperature": 0.7
132
+ },
133
+ "provider_endpoint": "qwen3-max-2026-01-23",
134
+ "provider_name": "alibaba",
135
+ "full_key": "alibaba/qwen3-max-2026-01-23",
136
+ "slug": "alibaba_qwen3-max-2026-01-23"
137
+ },
138
+ "zai/glm-4.7-flashx": {
139
+ "company": "zAI",
140
+ "label": "GLM 4.7 Flash",
141
+ "description": "z.AI lightweight fast model",
142
+ "release_date": "2026-01-19",
143
+ "open_source": true,
144
+ "documentation_url": "https://docs.z.ai/",
145
+ "properties": {
146
+ "context_window": 200000,
147
+ "max_tokens": 128000,
148
+ "training_cutoff": null,
149
+ "reasoning_model": true
150
+ },
151
+ "supports": {
152
+ "images": false,
153
+ "files": false,
154
+ "temperature": true,
155
+ "tools": true
156
+ },
157
+ "metadata": {
158
+ "deprecated": false,
159
+ "available_for_everyone": true,
160
+ "available_as_evaluator": false,
161
+ "ignored_for_cost": false
162
+ },
163
+ "provider_properties": {
164
+ "clear_thinking": false
165
+ },
166
+ "costs_per_million_token": {
167
+ "input": 0.07,
168
+ "output": 0.4,
169
+ "cache": {
170
+ "read": 0.01,
171
+ "read_discount": 1.0,
172
+ "write_markup": 1.0
173
+ }
174
+ },
175
+ "alternative_keys": [],
176
+ "default_parameters": {
177
+ "temperature": 1.0,
178
+ "top_p": 0.95
179
+ },
180
+ "provider_endpoint": "glm-4.7-flashx",
181
+ "provider_name": "zai",
182
+ "full_key": "zai/glm-4.7-flashx",
183
+ "slug": "zai_glm-4.7-flashx"
184
+ },
2
185
  "minimax/MiniMax-M2.1": {
3
186
  "company": "MiniMax",
4
187
  "label": "MiniMax-M2.1",
@@ -79,7 +262,9 @@
79
262
  "write_markup": 1.0
80
263
  }
81
264
  },
82
- "alternative_keys": [],
265
+ "alternative_keys": [
266
+ "vercel/zai/glm-4.7"
267
+ ],
83
268
  "default_parameters": {
84
269
  "temperature": 1.0,
85
270
  "top_p": 1.0
@@ -89,6 +274,94 @@
89
274
  "full_key": "zai/glm-4.7",
90
275
  "slug": "zai_glm-4.7"
91
276
  },
277
+ "vercel/zai/glm-4.7": {
278
+ "company": "zAI",
279
+ "label": "GLM 4.7",
280
+ "description": "Latest model from ZAI",
281
+ "release_date": "2025-12-22",
282
+ "open_source": true,
283
+ "documentation_url": "https://docs.z.ai/",
284
+ "properties": {
285
+ "context_window": 200000,
286
+ "max_tokens": 128000,
287
+ "training_cutoff": null,
288
+ "reasoning_model": true
289
+ },
290
+ "supports": {
291
+ "images": false,
292
+ "files": false,
293
+ "temperature": true,
294
+ "tools": true
295
+ },
296
+ "metadata": {
297
+ "deprecated": false,
298
+ "available_for_everyone": true,
299
+ "available_as_evaluator": false,
300
+ "ignored_for_cost": false
301
+ },
302
+ "provider_properties": {},
303
+ "costs_per_million_token": {
304
+ "input": 0.6,
305
+ "output": 2.2,
306
+ "cache": {
307
+ "read": 0.11,
308
+ "read_discount": 1.0,
309
+ "write_markup": 1.0
310
+ }
311
+ },
312
+ "alternative_keys": [],
313
+ "default_parameters": {
314
+ "temperature": 1.0,
315
+ "top_p": 1.0
316
+ },
317
+ "provider_endpoint": "zai/glm-4.7",
318
+ "provider_name": "vercel",
319
+ "full_key": "vercel/zai/glm-4.7",
320
+ "slug": "vercel_zai_glm-4.7"
321
+ },
322
+ "xiaomi/mimo-v2-flash": {
323
+ "company": "Xiaomi",
324
+ "label": "MiMo V2 Flash",
325
+ "description": "MiMo V2 Flash is Xiaomi's Mixture-of-Experts (MoE) language model with 309B total parameters and 15B active parameters. Designed for high-speed reasoning and agentic workflows, it utilizes a novel hybrid attention architecture and Multi-Token Prediction (MTP) to achieve state-of-the-art performance while significantly reducing inference costs.",
326
+ "release_date": "2025-12-17",
327
+ "open_source": true,
328
+ "documentation_url": "https://platform.xiaomimimo.com/#/docs/",
329
+ "properties": {
330
+ "context_window": 256000,
331
+ "max_tokens": 64000,
332
+ "training_cutoff": "December 2024"
333
+ },
334
+ "supports": {
335
+ "images": false,
336
+ "files": false,
337
+ "temperature": true,
338
+ "tools": true
339
+ },
340
+ "metadata": {
341
+ "deprecated": false,
342
+ "available_for_everyone": true,
343
+ "available_as_evaluator": false,
344
+ "ignored_for_cost": false
345
+ },
346
+ "provider_properties": {},
347
+ "costs_per_million_token": {
348
+ "input": 0.1,
349
+ "output": 0.3,
350
+ "cache": {
351
+ "read": 0.01,
352
+ "write_markup": 1.0
353
+ }
354
+ },
355
+ "alternative_keys": [],
356
+ "default_parameters": {
357
+ "temperature": 0.3,
358
+ "top_p": 0.95
359
+ },
360
+ "provider_endpoint": "mimo-v2-flash",
361
+ "provider_name": "xiaomi",
362
+ "full_key": "xiaomi/mimo-v2-flash",
363
+ "slug": "xiaomi_mimo-v2-flash"
364
+ },
92
365
  "google/gemini-3-flash-preview": {
93
366
  "company": "Google",
94
367
  "label": "Gemini 3 Flash (12/25)",
@@ -229,6 +502,54 @@
229
502
  "full_key": "openai/gpt-5.2-pro",
230
503
  "slug": "openai_gpt-5.2-pro"
231
504
  },
505
+ "openai/gpt-5.2-codex": {
506
+ "company": "OpenAI",
507
+ "label": "GPT 5.2 Codex",
508
+ "description": "GPT 5.2 optimized for code",
509
+ "release_date": "2025-12-11",
510
+ "open_source": false,
511
+ "documentation_url": "https://platform.openai.com/docs/models/gpt-5.2-codex",
512
+ "properties": {
513
+ "context_window": 400000,
514
+ "max_tokens": 128000,
515
+ "training_cutoff": "2024-09",
516
+ "reasoning_model": true
517
+ },
518
+ "supports": {
519
+ "images": true,
520
+ "files": true,
521
+ "batch": true,
522
+ "temperature": false,
523
+ "tools": true
524
+ },
525
+ "metadata": {
526
+ "deprecated": false,
527
+ "available_for_everyone": false,
528
+ "available_as_evaluator": false,
529
+ "ignored_for_cost": false
530
+ },
531
+ "provider_properties": {},
532
+ "costs_per_million_token": {
533
+ "input": 1.75,
534
+ "output": 14.0,
535
+ "cache": {
536
+ "read": 0.175,
537
+ "write_markup": 1.0
538
+ },
539
+ "batch": {
540
+ "input_discount": 0.5,
541
+ "output_discount": 0.5
542
+ }
543
+ },
544
+ "alternative_keys": [],
545
+ "default_parameters": {
546
+ "reasoning_effort": "high"
547
+ },
548
+ "provider_endpoint": "gpt-5.2-codex",
549
+ "provider_name": "openai",
550
+ "full_key": "openai/gpt-5.2-codex",
551
+ "slug": "openai_gpt-5.2-codex"
552
+ },
232
553
  "openai/gpt-5.2-2025-12-11": {
233
554
  "company": "OpenAI",
234
555
  "label": "GPT 5.2",
@@ -408,7 +729,7 @@
408
729
  "openai/gpt-5.1-codex-max": {
409
730
  "company": "OpenAI",
410
731
  "label": "GPT 5.1 Codex Max",
411
- "description": "OpenAI's frontier agentic coding model. Good at long-running coding tasks.",
732
+ "description": "GPT 5.1 optimized for code",
412
733
  "release_date": "2025-12-04",
413
734
  "open_source": false,
414
735
  "documentation_url": "",
@@ -628,7 +949,9 @@
628
949
  "available_as_evaluator": false,
629
950
  "ignored_for_cost": false
630
951
  },
631
- "provider_properties": {},
952
+ "provider_properties": {
953
+ "supports_compute_effort": true
954
+ },
632
955
  "costs_per_million_token": {
633
956
  "input": 5.0,
634
957
  "output": 25.0,
@@ -678,7 +1001,9 @@
678
1001
  "available_as_evaluator": false,
679
1002
  "ignored_for_cost": false
680
1003
  },
681
- "provider_properties": {},
1004
+ "provider_properties": {
1005
+ "supports_compute_effort": true
1006
+ },
682
1007
  "costs_per_million_token": {
683
1008
  "input": 5.0,
684
1009
  "output": 25.0,
@@ -921,7 +1246,7 @@
921
1246
  "openai/gpt-5.1-codex": {
922
1247
  "company": "OpenAI",
923
1248
  "label": "GPT 5.1 Codex",
924
- "description": "OpenAI's latest coding model",
1249
+ "description": "GPT 5.1 optimized for code",
925
1250
  "release_date": "2025-11-13",
926
1251
  "open_source": false,
927
1252
  "documentation_url": "https://platform.openai.com/docs/models/gpt-5.1-codex",
@@ -1015,7 +1340,7 @@
1015
1340
  "slug": "openai_gpt-5.1-2025-11-13"
1016
1341
  },
1017
1342
  "kimi/kimi-k2-thinking": {
1018
- "company": "Kimi",
1343
+ "company": "Moonshot AI",
1019
1344
  "label": "Kimi K2 Thinking",
1020
1345
  "description": null,
1021
1346
  "release_date": "2025-11-06",
@@ -1025,7 +1350,7 @@
1025
1350
  "context_window": 256000,
1026
1351
  "max_tokens": 32000,
1027
1352
  "training_cutoff": null,
1028
- "reasoning_model": true
1353
+ "reasoning_model": false
1029
1354
  },
1030
1355
  "supports": {
1031
1356
  "images": false,
@@ -1058,7 +1383,7 @@
1058
1383
  }
1059
1384
  ],
1060
1385
  "default_parameters": {
1061
- "temperature": 1.0
1386
+ "temperature": 0.6
1062
1387
  },
1063
1388
  "provider_endpoint": "kimi-k2-thinking",
1064
1389
  "provider_name": "kimi",
@@ -1102,7 +1427,7 @@
1102
1427
  "slug": "inception_mercury"
1103
1428
  },
1104
1429
  "fireworks/kimi-k2-thinking": {
1105
- "company": "Kimi",
1430
+ "company": "Moonshot AI",
1106
1431
  "label": "Kimi K2 Thinking",
1107
1432
  "description": null,
1108
1433
  "release_date": "2025-11-06",
@@ -1112,7 +1437,7 @@
1112
1437
  "context_window": 256000,
1113
1438
  "max_tokens": 32000,
1114
1439
  "training_cutoff": null,
1115
- "reasoning_model": true
1440
+ "reasoning_model": false
1116
1441
  },
1117
1442
  "supports": {
1118
1443
  "images": false,
@@ -1136,7 +1461,7 @@
1136
1461
  },
1137
1462
  "alternative_keys": [],
1138
1463
  "default_parameters": {
1139
- "temperature": 1.0
1464
+ "temperature": 0.6
1140
1465
  },
1141
1466
  "provider_endpoint": "kimi-k2-thinking",
1142
1467
  "provider_name": "fireworks",
@@ -1939,7 +2264,7 @@
1939
2264
  "temperature": true
1940
2265
  },
1941
2266
  "metadata": {
1942
- "deprecated": false,
2267
+ "deprecated": true,
1943
2268
  "available_for_everyone": false,
1944
2269
  "available_as_evaluator": false,
1945
2270
  "ignored_for_cost": false
@@ -2021,16 +2346,7 @@
2021
2346
  "provider_properties": {},
2022
2347
  "costs_per_million_token": {
2023
2348
  "input": 1.2,
2024
- "output": 6.0,
2025
- "cache": {
2026
- "read_discount": 0.8,
2027
- "write_markup": 1.0
2028
- },
2029
- "context": {
2030
- "threshold": 32000.0,
2031
- "input": 2.4,
2032
- "output": 12.0
2033
- }
2349
+ "output": 6.0
2034
2350
  },
2035
2351
  "alternative_keys": [],
2036
2352
  "default_parameters": {
@@ -2383,8 +2699,8 @@
2383
2699
  "properties": {
2384
2700
  "context_window": 262144,
2385
2701
  "max_tokens": 65536,
2386
- "training_cutoff": "",
2387
- "reasoning_model": false
2702
+ "training_cutoff": null,
2703
+ "reasoning_model": true
2388
2704
  },
2389
2705
  "supports": {
2390
2706
  "images": false,
@@ -3305,9 +3621,14 @@
3305
3621
  },
3306
3622
  "alternative_keys": [
3307
3623
  {
3308
- "costs_per_million_token": {
3309
- "input": 0.55,
3310
- "output": 2.19
3624
+ "fireworks/glm-4p5": {
3625
+ "metadata": {
3626
+ "deprecated": true
3627
+ },
3628
+ "costs_per_million_token": {
3629
+ "input": 0.55,
3630
+ "output": 2.19
3631
+ }
3311
3632
  }
3312
3633
  }
3313
3634
  ],
@@ -3385,15 +3706,15 @@
3385
3706
  "tools": true
3386
3707
  },
3387
3708
  "metadata": {
3388
- "deprecated": false,
3709
+ "deprecated": true,
3389
3710
  "available_for_everyone": true,
3390
3711
  "available_as_evaluator": false,
3391
3712
  "ignored_for_cost": false
3392
3713
  },
3393
3714
  "provider_properties": {},
3394
3715
  "costs_per_million_token": {
3395
- "input": 0.6,
3396
- "output": 2.2,
3716
+ "input": 0.55,
3717
+ "output": 2.19,
3397
3718
  "cache": {
3398
3719
  "read": 0.11,
3399
3720
  "read_discount": 1.0,
@@ -3705,7 +4026,7 @@
3705
4026
  "tools": true
3706
4027
  },
3707
4028
  "metadata": {
3708
- "deprecated": false,
4029
+ "deprecated": true,
3709
4030
  "available_for_everyone": true,
3710
4031
  "available_as_evaluator": false,
3711
4032
  "ignored_for_cost": false
@@ -4335,7 +4656,7 @@
4335
4656
  "tools": true
4336
4657
  },
4337
4658
  "metadata": {
4338
- "deprecated": false,
4659
+ "deprecated": true,
4339
4660
  "available_for_everyone": true,
4340
4661
  "available_as_evaluator": false,
4341
4662
  "ignored_for_cost": false
@@ -12286,94 +12607,6 @@
12286
12607
  "full_key": "azure/gpt-4-turbo-2024-04-09",
12287
12608
  "slug": "azure_gpt-4-turbo-2024-04-09"
12288
12609
  },
12289
- "databricks/dbrx-instruct": {
12290
- "company": "Databricks",
12291
- "label": "DBRX Instruct",
12292
- "description": "Databricks Instruct model.",
12293
- "release_date": "2024-03-27",
12294
- "open_source": false,
12295
- "properties": {
12296
- "context_window": 32768,
12297
- "max_tokens": 4096,
12298
- "training_cutoff": "2023-12",
12299
- "reasoning_model": false
12300
- },
12301
- "supports": {
12302
- "images": false,
12303
- "files": false,
12304
- "batch": true,
12305
- "temperature": true,
12306
- "tools": true
12307
- },
12308
- "metadata": {
12309
- "deprecated": true,
12310
- "available_for_everyone": false,
12311
- "available_as_evaluator": false,
12312
- "ignored_for_cost": false
12313
- },
12314
- "provider_properties": {},
12315
- "costs_per_million_token": {
12316
- "input": 2.25,
12317
- "output": 6.75,
12318
- "batch": {
12319
- "input_discount": 0.5,
12320
- "output_discount": 0.5
12321
- }
12322
- },
12323
- "alternative_keys": [],
12324
- "default_parameters": {
12325
- "reasoning_effort": "high"
12326
- },
12327
- "provider_endpoint": "databricks-dbrx-instruct",
12328
- "provider_name": "databricks",
12329
- "full_key": "databricks/dbrx-instruct",
12330
- "slug": "databricks_dbrx-instruct"
12331
- },
12332
- "databricks/databricks-dbrx-instruct": {
12333
- "company": "Databricks",
12334
- "label": "DBRX Instruct",
12335
- "description": "Databricks Instruct model.",
12336
- "release_date": "2024-03-27",
12337
- "open_source": false,
12338
- "properties": {
12339
- "context_window": 32768,
12340
- "max_tokens": 4096,
12341
- "training_cutoff": "2023-12",
12342
- "reasoning_model": false
12343
- },
12344
- "supports": {
12345
- "images": false,
12346
- "files": false,
12347
- "batch": true,
12348
- "temperature": true,
12349
- "tools": true
12350
- },
12351
- "metadata": {
12352
- "deprecated": true,
12353
- "available_for_everyone": false,
12354
- "available_as_evaluator": false,
12355
- "ignored_for_cost": false
12356
- },
12357
- "provider_properties": {},
12358
- "costs_per_million_token": {
12359
- "input": 2.25,
12360
- "output": 6.75,
12361
- "batch": {
12362
- "input_discount": 0.5,
12363
- "output_discount": 0.5
12364
- }
12365
- },
12366
- "alternative_keys": [
12367
- "databricks/dbrx-instruct"
12368
- ],
12369
- "default_parameters": {
12370
- "reasoning_effort": "high"
12371
- },
12372
- "provider_endpoint": "databricks-dbrx-instruct",
12373
- "provider_name": "databricks",
12374
- "full_key": "databricks/databricks-dbrx-instruct",
12375
- "slug": "databricks_databricks-dbrx-instruct"
12376
- },
12377
12610
  "cohere/command-r-03-2024": {
12378
12611
  "company": "Cohere",
12379
12612
  "label": "Command R (03/2024)",
@@ -22,7 +22,6 @@ base-config:
22
22
  documentation_url: https://docs.claude.com/en/docs/about-claude/models/overview
23
23
  default_parameters:
24
24
  temperature: 1
25
-
26
25
  claude-4-models:
27
26
  base-config:
28
27
  supports:
@@ -40,6 +39,8 @@ claude-4-models:
40
39
  cache:
41
40
  read: 0.5
42
41
  write: 6.25
42
+ provider_properties:
43
+ supports_compute_effort: true
43
44
  alternative_keys:
44
45
  - anthropic/claude-opus-4-5-20251101-thinking:
45
46
  properties:
@@ -1,5 +1,5 @@
1
1
  base-config:
2
- company: Kimi
2
+ company: Moonshot AI
3
3
  documentation_url: https://platform.moonshot.ai/docs
4
4
  open_source: true
5
5
  supports:
@@ -11,16 +11,43 @@ base-config:
11
11
  available_for_everyone: true
12
12
  ignored_for_cost: false
13
13
 
14
+ kimi-k2.5-models:
15
+
16
+ kimi/kimi-k2.5:
17
+ label: Kimi K2.5
18
+ release_date: 2026-01-26
19
+ supports:
20
+ images: true
21
+ temperature: false
22
+ properties:
23
+ reasoning_model: false
24
+ context_window: 262_144
25
+ max_tokens: 128_000
26
+ default_parameters:
27
+ temperature: 0.6
28
+ top_p: 0.95
29
+ costs_per_million_token:
30
+ input: 0.6
31
+ output: 3.0
32
+ cache:
33
+ read: 0.1
34
+ alternative_keys:
35
+ - kimi/kimi-k2.5-thinking:
36
+ properties:
37
+ reasoning_model: true
38
+ default_parameters:
39
+ temperature: 1.0
40
+
14
41
  kimi-k2-models:
15
42
  kimi/kimi-k2-thinking:
16
43
  label: Kimi K2 Thinking
17
44
  release_date: 2025-11-06
18
45
  properties:
19
- reasoning_model: true
46
+ reasoning_model: false
20
47
  context_window: 256_000
21
48
  max_tokens: 32_000
22
49
  default_parameters:
23
- temperature: 1.0
50
+ temperature: 0.6
24
51
  costs_per_million_token:
25
52
  input: 0.6
26
53
  output: 2.5