model-library 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. model_library/base/base.py +13 -6
  2. model_library/base/output.py +55 -0
  3. model_library/base/utils.py +3 -2
  4. model_library/config/README.md +169 -0
  5. model_library/config/ai21labs_models.yaml +11 -10
  6. model_library/config/alibaba_models.yaml +21 -22
  7. model_library/config/all_models.json +4708 -2471
  8. model_library/config/amazon_models.yaml +100 -102
  9. model_library/config/anthropic_models.yaml +59 -45
  10. model_library/config/cohere_models.yaml +25 -24
  11. model_library/config/deepseek_models.yaml +28 -25
  12. model_library/config/dummy_model.yaml +9 -7
  13. model_library/config/fireworks_models.yaml +86 -56
  14. model_library/config/google_models.yaml +156 -102
  15. model_library/config/inception_models.yaml +6 -6
  16. model_library/config/kimi_models.yaml +13 -14
  17. model_library/config/minimax_models.yaml +37 -0
  18. model_library/config/mistral_models.yaml +85 -29
  19. model_library/config/openai_models.yaml +192 -159
  20. model_library/config/perplexity_models.yaml +8 -23
  21. model_library/config/together_models.yaml +115 -103
  22. model_library/config/xai_models.yaml +85 -57
  23. model_library/config/zai_models.yaml +23 -15
  24. model_library/exceptions.py +12 -17
  25. model_library/file_utils.py +1 -1
  26. model_library/providers/amazon.py +32 -17
  27. model_library/providers/anthropic.py +2 -6
  28. model_library/providers/google/google.py +35 -29
  29. model_library/providers/minimax.py +33 -0
  30. model_library/providers/mistral.py +10 -1
  31. model_library/providers/openai.py +10 -8
  32. model_library/providers/together.py +18 -211
  33. model_library/register_models.py +36 -38
  34. model_library/registry_utils.py +18 -16
  35. model_library/utils.py +2 -2
  36. {model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/METADATA +3 -4
  37. model_library-0.1.4.dist-info/RECORD +64 -0
  38. model_library-0.1.2.dist-info/RECORD +0 -61
  39. {model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/WHEEL +0 -0
  40. {model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/licenses/LICENSE +0 -0
  41. {model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,14 @@ base-config:
2
2
  company: Google
3
3
  documentation_url: https://ai.google.dev/gemini-api/docs/models
4
4
  open_source: false
5
- class_properties:
5
+ supports:
6
+ images: false
7
+ videos: false
8
+ files: false
9
+ batch: false
10
+ temperature: false
11
+ tools: false
12
+ metadata:
6
13
  available_for_everyone: true
7
14
  available_as_evaluator: false
8
15
  ignored_for_cost: false
@@ -21,29 +28,66 @@ gemma-models:
21
28
  use such as phones and tablets, capable of handling text, audio, and vision
22
29
  inputs with efficient parameter caching techniques.
23
30
  release_date: 2025-03-12
24
- class_properties:
31
+ metadata:
25
32
  deprecated: true
26
33
  properties:
27
34
  context_window: 131_072
28
- max_token_output: 8_192
35
+ max_tokens: 8_192
29
36
  training_cutoff: "2023-09"
30
37
  costs_per_million_token:
31
38
  input: 0.00
32
39
  output: 0.00
33
40
 
41
+ gemini-3-models:
42
+ base-config:
43
+ supports:
44
+ images: true
45
+ files: true
46
+ videos: true
47
+ tools: true
48
+ batch: true
49
+ temperature: true
50
+ costs_per_million_token:
51
+ cache:
52
+ read_discount: 0.1
53
+ default_parameters:
54
+ temperature: 1
55
+ reasoning_effort: "high"
56
+
57
+ google/gemini-3-pro-preview:
58
+ label: Gemini 3 Pro (11/25)
59
+ description: Gemini 3 Pro, Google's most powerful model.
60
+ release_date: 2025-11-18
61
+ properties:
62
+ context_window: 1048576
63
+ max_tokens: 65536
64
+ training_cutoff: "2025-01"
65
+ reasoning_model: true
66
+ costs_per_million_token:
67
+ input: 2.0
68
+ output: 12.0
69
+ cache:
70
+ read: 0.20
71
+ context:
72
+ threshold: 200_000
73
+ input: 4.0
74
+ output: 18.0
75
+ cache:
76
+ read: 0.40
77
+
34
78
  gemini-2.5-models:
35
79
  base-config:
80
+ supports:
81
+ images: true
82
+ files: true
83
+ videos: true
84
+ tools: true
85
+ batch: true
86
+ temperature: true
36
87
  properties:
37
88
  context_window: 1_048_576
38
- max_token_output: 8_192
89
+ max_tokens: 8_192
39
90
  training_cutoff: "2024-05"
40
- class_properties:
41
- supports_images: true
42
- supports_files: true
43
- supports_videos: true
44
- supports_tools: true
45
- supports_batch_requests: true
46
- supports_temperature: true
47
91
  costs_per_million_token:
48
92
  cache:
49
93
  read_discount: 0.1
@@ -58,12 +102,12 @@ gemini-2.5-models:
58
102
  using multimodal inputs including audio, images, video, and PDFs with a context
59
103
  window exceeding 1 million tokens.
60
104
  release_date: 2025-07-17
105
+ metadata:
106
+ available_as_evaluator: true
61
107
  properties:
62
108
  context_window: 1_048_576
63
- max_token_output: 65_536
109
+ max_tokens: 65_536
64
110
  reasoning_model: true
65
- class_properties:
66
- available_as_evaluator: true
67
111
  costs_per_million_token:
68
112
  input: 1.25
69
113
  output: 10.0
@@ -83,12 +127,12 @@ gemini-2.5-models:
83
127
  large scale processing with support for multiple modalities, a 1 million token
84
128
  context window, and strong reasoning capabilities.
85
129
  release_date: 2025-07-17
130
+ metadata:
131
+ available_as_evaluator: true
86
132
  properties:
87
133
  context_window: 1_048_576
88
- max_token_output: 65_536
134
+ max_tokens: 65_536
89
135
  reasoning_model: false
90
- class_properties:
91
- available_as_evaluator: true
92
136
  costs_per_million_token:
93
137
  input: 0.30
94
138
  output: 2.50
@@ -107,7 +151,7 @@ gemini-2.5-models:
107
151
  release_date: 2025-09-25
108
152
  properties:
109
153
  context_window: 1_048_576
110
- max_token_output: 65_535
154
+ max_tokens: 65_535
111
155
  reasoning_model: false
112
156
  costs_per_million_token:
113
157
  input: 0.30
@@ -128,7 +172,7 @@ gemini-2.5-models:
128
172
  release_date: 2025-07-22
129
173
  properties:
130
174
  context_window: 1_048_576
131
- max_token_output: 65_535
175
+ max_tokens: 65_535
132
176
  reasoning_model: false
133
177
  costs_per_million_token:
134
178
  input: 0.10
@@ -149,7 +193,7 @@ gemini-2.5-models:
149
193
  release_date: 2025-09-25
150
194
  properties:
151
195
  context_window: 1_048_576
152
- max_token_output: 65_535
196
+ max_tokens: 65_535
153
197
  reasoning_model: false
154
198
  costs_per_million_token:
155
199
  input: 0.10
@@ -168,12 +212,12 @@ gemini-2.5-models:
168
212
  at scale, with support for text, images, video, and audio input modalities and
169
213
  large context length.
170
214
  release_date: 2025-06-17
215
+ metadata:
216
+ deprecated: true
171
217
  properties:
172
218
  context_window: 1_048_576
173
- max_token_output: 65_535
219
+ max_tokens: 65_535
174
220
  reasoning_model: false
175
- class_properties:
176
- deprecated: true
177
221
  costs_per_million_token:
178
222
  input: 0.10
179
223
  output: 0.40
@@ -191,13 +235,13 @@ gemini-2.5-models:
191
235
  large scale processing with support for multiple modalities, a 1 million token
192
236
  context window, and strong reasoning capabilities.
193
237
  release_date: 2025-05-20
238
+ metadata:
239
+ deprecated: true
194
240
  properties:
195
241
  context_window: 1_048_576
196
- max_token_output: 65_536
242
+ max_tokens: 65_536
197
243
  training_cutoff: "2025-01"
198
244
  reasoning_model: false
199
- class_properties:
200
- deprecated: true
201
245
  costs_per_million_token:
202
246
  input: 0.15
203
247
  output: 0.60
@@ -216,14 +260,15 @@ gemini-2.5-models:
216
260
  using multimodal inputs including audio, images, video, and PDFs with a context
217
261
  window exceeding 1 million tokens.
218
262
  release_date: 2025-06-05
263
+ supports:
264
+ batch: false
265
+ metadata:
266
+ deprecated: true
219
267
  properties:
220
268
  context_window: 1_048_576
221
- max_token_output: 65_536
269
+ max_tokens: 65_536
222
270
  training_cutoff: "2025-01"
223
271
  reasoning_model: true
224
- class_properties:
225
- supports_batch_requests: false
226
- deprecated: true
227
272
  costs_per_million_token:
228
273
  input: 1.25
229
274
  output: 10.0
@@ -238,14 +283,14 @@ gemini-2.5-models:
238
283
  using multimodal inputs including audio, images, video, and PDFs with a context
239
284
  window exceeding 1 million tokens.
240
285
  release_date: 2025-05-06
286
+ metadata:
287
+ available_for_everyone: false
288
+ deprecated: true
241
289
  properties:
242
290
  context_window: 1_048_576
243
- max_token_output: 65_536
291
+ max_tokens: 65_536
244
292
  training_cutoff: "2025-01"
245
- available_for_everyone: false
246
293
  reasoning_model: true
247
- class_properties:
248
- deprecated: true
249
294
  costs_per_million_token:
250
295
  input: 1.25
251
296
  output: 10.0
@@ -259,13 +304,13 @@ gemini-2.5-models:
259
304
  large scale processing with support for multiple modalities, a 1 million token
260
305
  context window, and strong reasoning capabilities.
261
306
  release_date: 2025-04-17
307
+ metadata:
308
+ deprecated: true
262
309
  properties:
263
310
  context_window: 1_048_576
264
- max_token_output: 65_536
311
+ max_tokens: 65_536
265
312
  training_cutoff: "2025-01"
266
313
  reasoning_model: false
267
- class_properties:
268
- deprecated: true
269
314
  costs_per_million_token:
270
315
  input: 0.3
271
316
  output: 2.5
@@ -284,14 +329,14 @@ gemini-2.5-models:
284
329
  using multimodal inputs including audio, images, video, and PDFs with a context
285
330
  window exceeding 1 million tokens.
286
331
  release_date: 2025-03-25
332
+ metadata:
333
+ available_for_everyone: false
334
+ deprecated: true
287
335
  properties:
288
336
  context_window: 1_048_576
289
- max_token_output: 65_536
337
+ max_tokens: 65_536
290
338
  training_cutoff: "2025-01"
291
- available_for_everyone: false
292
339
  reasoning_model: true
293
- class_properties:
294
- deprecated: true
295
340
  costs_per_million_token:
296
341
  input: 1.25
297
342
  output: 10.0
@@ -306,14 +351,14 @@ gemini-2.5-models:
306
351
  using multimodal inputs including audio, images, video, and PDFs with a context
307
352
  window exceeding 1 million tokens.
308
353
  release_date: 2025-04-05
354
+ metadata:
355
+ available_for_everyone: false
356
+ deprecated: true
309
357
  properties:
310
358
  context_window: 1_048_576
311
- max_token_output: 65_536
359
+ max_tokens: 65_536
312
360
  training_cutoff: "2025-01"
313
- available_for_everyone: false
314
361
  reasoning_model: true
315
- class_properties:
316
- deprecated: true
317
362
  costs_per_million_token:
318
363
  input: 1.25
319
364
  output: 10.0
@@ -322,12 +367,12 @@ gemini-2.5-models:
322
367
 
323
368
  old-gemini-models:
324
369
  base-config:
370
+ metadata:
371
+ deprecated: true
325
372
  properties:
326
373
  context_window: 2_097_152
327
- max_token_output: 8_192
374
+ max_tokens: 8_192
328
375
  training_cutoff: "2024-05"
329
- class_properties:
330
- deprecated: true
331
376
  costs_per_million_token:
332
377
  cache:
333
378
  read_discount: 0.25
@@ -339,17 +384,18 @@ old-gemini-models:
339
384
  large context processing and multimodal inputs, with strengths in speed, native
340
385
  tool use, and dataset comprehension.
341
386
  release_date: 2025-02-05
387
+ supports:
388
+ images: true
389
+ files: true
390
+ videos: true
391
+ metadata:
392
+ available_for_everyone: false
393
+ deprecated: true
342
394
  properties:
343
395
  context_window: 2_097_152
344
- max_token_output: 8_192
396
+ max_tokens: 8_192
345
397
  training_cutoff: "2024-06"
346
398
  reasoning_model: true
347
- available_for_everyone: false
348
- class_properties:
349
- deprecated: true
350
- supports_images: true
351
- supports_files: true
352
- supports_videos: true
353
399
  costs_per_million_token:
354
400
  input: 1.25
355
401
  output: 5.00
@@ -361,17 +407,18 @@ old-gemini-models:
361
407
  reasoning and longer contextual memory in multimodal scenarios including text,
362
408
  images, video, and audio.
363
409
  release_date: 2025-01-21
410
+ supports:
411
+ images: true
412
+ files: true
413
+ videos: true
414
+ metadata:
415
+ available_for_everyone: false
416
+ deprecated: true
364
417
  properties:
365
418
  context_window: 1_048_576
366
- max_token_output: 65_536
419
+ max_tokens: 65_536
367
420
  training_cutoff: "2024-05"
368
421
  reasoning_model: true
369
- available_for_everyone: false
370
- class_properties:
371
- deprecated: true
372
- supports_images: true
373
- supports_files: true
374
- supports_videos: true
375
422
  costs_per_million_token:
376
423
  input: 0.10
377
424
  output: 0.70
@@ -383,18 +430,19 @@ old-gemini-models:
383
430
  with a 1 million token context window and support for text, image, video, and
384
431
  audio inputs.
385
432
  release_date: 2025-02-05
433
+ supports:
434
+ images: true
435
+ files: true
436
+ videos: true
386
437
  properties:
387
- properties: null
388
438
  context_window: 1_048_576
389
- max_token_output: 8_192
439
+ max_tokens: 8_192
390
440
  training_cutoff: "2024-08"
391
441
  costs_per_million_token:
392
442
  input: 0.10
393
443
  output: 0.40
394
- class_properties:
395
- supports_images: true
396
- supports_files: true
397
- supports_videos: true
444
+ cache:
445
+ read: 0.025
398
446
 
399
447
  google/gemini-2.0-flash-exp:
400
448
  label: Gemini 2.0 Flash Exp
@@ -402,15 +450,16 @@ old-gemini-models:
402
450
  Gemini 2.0 Flash Experimental is a developer preview for advanced
403
451
  use cases requiring extended context and multimodal reasoning capabilities.
404
452
  release_date: 2024-12-11
453
+ supports:
454
+ images: true
455
+ files: true
456
+ videos: true
457
+ metadata:
458
+ deprecated: true
405
459
  properties:
406
460
  context_window: 1_048_576
407
- max_token_output: 8_192
461
+ max_tokens: 8_192
408
462
  training_cutoff: "2024-05"
409
- class_properties:
410
- deprecated: true
411
- supports_images: true
412
- supports_files: true
413
- supports_videos: true
414
463
  costs_per_million_token:
415
464
  input: 0.075
416
465
  output: 0.30
@@ -421,15 +470,16 @@ old-gemini-models:
421
470
  Gemini 1.5 Flash is an efficient and speedy version of Gemini 1.5
422
471
  with support for multiple data types and large contexts.
423
472
  release_date: 2024-09-24
473
+ supports:
474
+ images: true
475
+ files: true
476
+ videos: true
477
+ metadata:
478
+ deprecated: true
424
479
  properties:
425
480
  context_window: 1_048_576
426
- max_token_output: 8_192
481
+ max_tokens: 8_192
427
482
  training_cutoff: "2024-05"
428
- class_properties:
429
- deprecated: true
430
- supports_images: true
431
- supports_files: true
432
- supports_videos: true
433
483
  costs_per_million_token:
434
484
  input: 0.075
435
485
  output: 0.30
@@ -440,15 +490,16 @@ old-gemini-models:
440
490
  Gemini 1.5 Flash (early release) provides fast processing with vision
441
491
  and audio capabilities along with text understanding.
442
492
  release_date: 2024-05-24
493
+ supports:
494
+ images: true
495
+ files: true
496
+ videos: true
497
+ metadata:
498
+ deprecated: true
443
499
  properties:
444
500
  context_window: 1_048_576
445
- max_token_output: 8_192
501
+ max_tokens: 8_192
446
502
  training_cutoff: "2023-11"
447
- class_properties:
448
- deprecated: true
449
- supports_images: true
450
- supports_files: true
451
- supports_videos: true
452
503
  costs_per_million_token:
453
504
  input: 0.075
454
505
  output: 0.30
@@ -459,15 +510,16 @@ old-gemini-models:
459
510
  Gemini 1.5 Pro is a high-performance model with multimodal capabilities
460
511
  useful for complex reasoning tasks and dataset analysis.
461
512
  release_date: 2024-09-24
513
+ supports:
514
+ images: true
515
+ files: true
516
+ videos: true
517
+ metadata:
518
+ deprecated: true
462
519
  properties:
463
520
  context_window: 2_097_152
464
- max_token_output: 8_192
521
+ max_tokens: 8_192
465
522
  training_cutoff: "2024-05"
466
- class_properties:
467
- deprecated: true
468
- supports_images: true
469
- supports_files: true
470
- supports_videos: true
471
523
  costs_per_million_token:
472
524
  input: 1.25
473
525
  output: 5.0
@@ -481,15 +533,16 @@ old-gemini-models:
481
533
  Gemini 1.5 Pro (early release) supports multimodal inputs and advanced
482
534
  reasoning on moderate context lengths.
483
535
  release_date: 2024-05-24
536
+ supports:
537
+ images: true
538
+ files: true
539
+ videos: true
540
+ metadata:
541
+ deprecated: true
484
542
  properties:
485
543
  context_window: 1_048_576
486
- max_token_output: 8_192
544
+ max_tokens: 8_192
487
545
  training_cutoff: "2023-11"
488
- class_properties:
489
- deprecated: true
490
- supports_images: true
491
- supports_files: true
492
- supports_videos: true
493
546
  costs_per_million_token:
494
547
  input: 1.25
495
548
  output: 5.0
@@ -500,13 +553,14 @@ old-gemini-models:
500
553
  Gemini 1.0 Pro is an earlier generation multimodal model supporting
501
554
  text, image, and audio inputs with foundational reasoning abilities.
502
555
  release_date: 2024-02-14
556
+ supports:
557
+ images: false
558
+ metadata:
559
+ deprecated: true
503
560
  properties:
504
561
  context_window: 32_760
505
- max_token_output: 8_192
562
+ max_tokens: 8_192
506
563
  training_cutoff: "2023-02"
507
- class_properties:
508
- supports_images: false
509
- deprecated: true
510
564
  costs_per_million_token:
511
565
  input: 0.50
512
566
  output: 1.50
@@ -2,11 +2,12 @@ base-config:
2
2
  company: Inception
3
3
  documentation_url: https://docs.inceptionlabs.ai/get-started/models
4
4
  open_source: false
5
- class_properties:
6
- supports_images: false
7
- supports_tools: true
5
+ supports:
6
+ images: false
7
+ tools: true
8
+ files: false
9
+ metadata:
8
10
  available_as_evaluator: false
9
- supports_files: false
10
11
  available_for_everyone: true
11
12
  ignored_for_cost: false
12
13
 
@@ -17,8 +18,7 @@ inception-models:
17
18
  properties:
18
19
  reasoning_model: false
19
20
  context_window: 128_000
20
- default_parameters:
21
- max_output_tokens: 16_400 # number lifted from https://openrouter.ai/inception/mercury
21
+ max_tokens: 16_384 # rounded down
22
22
  costs_per_million_token:
23
23
  input: 0.25
24
24
  output: 1
@@ -2,33 +2,32 @@ base-config:
2
2
  company: Kimi
3
3
  documentation_url: https://platform.moonshot.ai/docs
4
4
  open_source: true
5
- class_properties:
6
- supports_images: false
7
- supports_tools: true
5
+ supports:
6
+ images: false
7
+ tools: true
8
+ files: false
9
+ metadata:
8
10
  available_as_evaluator: false
9
- supports_files: false
10
11
  available_for_everyone: true
11
12
  ignored_for_cost: false
12
13
 
13
14
  kimi-k2-models:
14
-
15
15
  kimi/kimi-k2-thinking:
16
16
  label: Kimi K2 Thinking
17
17
  release_date: 2025-11-06
18
18
  properties:
19
19
  reasoning_model: true
20
- context_window: 128_000
21
- max_token_output: 128_000
20
+ context_window: 256_000
21
+ max_tokens: 32_000
22
22
  default_parameters:
23
23
  temperature: 1.0
24
- max_output_tokens: 128_000
25
24
  costs_per_million_token:
26
- input: 0.60
27
- output: 2.50
25
+ input: 0.6
26
+ output: 2.5
27
+ cache:
28
+ read: 0.15
28
29
  alternative_keys:
29
30
  - fireworks/kimi-k2-thinking:
30
31
  costs_per_million_token:
31
- input: 0.50
32
- output: 0.50
33
- cache:
34
- read_discount: 1
32
+ input: 0.6
33
+ output: 2.5
@@ -0,0 +1,37 @@
1
+ base-config:
2
+ company: MiniMax
3
+ documentation_url: https://platform.minimax.io/docs
4
+ open_source: true
5
+ supports:
6
+ images: false
7
+ files: false
8
+ tools: true
9
+ temperature: true
10
+ metadata:
11
+ available_as_evaluator: false
12
+ available_for_everyone: true
13
+ ignored_for_cost: false
14
+ properties:
15
+ reasoning_model: false
16
+
17
+ minimax-m2-models:
18
+
19
+ minimax/MiniMax-M2:
20
+ label: MiniMax-M2
21
+ description: MiniMax-M2 is a cost-efficient open-source model optimized for agentic applications and coding in particular.
22
+ release_date: 2025-10-26
23
+ properties:
24
+ context_window: 204_800
25
+ max_tokens: 131_000
26
+ reasoning_model: true
27
+ training_cutoff: null
28
+ default_parameters:
29
+ temperature: 1.0
30
+ top_p: 0.95
31
+ costs_per_million_token:
32
+ input: 0.30
33
+ output: 1.20
34
+ cache:
35
+ read: 0.03
36
+ write: 0.375
37
+