model-library 0.1.8__tar.gz → 0.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. {model_library-0.1.8 → model_library-0.1.9}/PKG-INFO +1 -1
  2. {model_library-0.1.8 → model_library-0.1.9}/model_library/base/base.py +2 -0
  3. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/alibaba_models.yaml +5 -0
  4. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/all_models.json +115 -9
  5. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/anthropic_models.yaml +2 -1
  6. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/mistral_models.yaml +2 -0
  7. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/together_models.yaml +2 -0
  8. model_library-0.1.9/model_library/config/xiaomi_models.yaml +43 -0
  9. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/zai_models.yaml +3 -0
  10. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/anthropic.py +35 -8
  11. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/google/google.py +9 -1
  12. model_library-0.1.9/model_library/providers/vercel.py +34 -0
  13. model_library-0.1.9/model_library/providers/xiaomi.py +34 -0
  14. {model_library-0.1.8 → model_library-0.1.9}/model_library/register_models.py +5 -0
  15. {model_library-0.1.8 → model_library-0.1.9}/model_library/registry_utils.py +11 -4
  16. {model_library-0.1.8 → model_library-0.1.9}/model_library/utils.py +4 -7
  17. {model_library-0.1.8 → model_library-0.1.9}/model_library.egg-info/PKG-INFO +1 -1
  18. {model_library-0.1.8 → model_library-0.1.9}/model_library.egg-info/SOURCES.txt +5 -1
  19. model_library-0.1.9/tests/unit/test_utils.py +15 -0
  20. {model_library-0.1.8 → model_library-0.1.9}/uv.lock +3 -3
  21. {model_library-0.1.8 → model_library-0.1.9}/.gitattributes +0 -0
  22. {model_library-0.1.8 → model_library-0.1.9}/.github/workflows/publish.yml +0 -0
  23. {model_library-0.1.8 → model_library-0.1.9}/.github/workflows/style.yaml +0 -0
  24. {model_library-0.1.8 → model_library-0.1.9}/.github/workflows/test.yaml +0 -0
  25. {model_library-0.1.8 → model_library-0.1.9}/.github/workflows/typecheck.yml +0 -0
  26. {model_library-0.1.8 → model_library-0.1.9}/.gitignore +0 -0
  27. {model_library-0.1.8 → model_library-0.1.9}/LICENSE +0 -0
  28. {model_library-0.1.8 → model_library-0.1.9}/Makefile +0 -0
  29. {model_library-0.1.8 → model_library-0.1.9}/README.md +0 -0
  30. {model_library-0.1.8 → model_library-0.1.9}/examples/README.md +0 -0
  31. {model_library-0.1.8 → model_library-0.1.9}/examples/advanced/batch.py +0 -0
  32. {model_library-0.1.8 → model_library-0.1.9}/examples/advanced/custom_retrier.py +0 -0
  33. {model_library-0.1.8 → model_library-0.1.9}/examples/advanced/deep_research.py +0 -0
  34. {model_library-0.1.8 → model_library-0.1.9}/examples/advanced/stress.py +0 -0
  35. {model_library-0.1.8 → model_library-0.1.9}/examples/advanced/structured_output.py +0 -0
  36. {model_library-0.1.8 → model_library-0.1.9}/examples/advanced/web_search.py +0 -0
  37. {model_library-0.1.8 → model_library-0.1.9}/examples/basics.py +0 -0
  38. {model_library-0.1.8 → model_library-0.1.9}/examples/count_tokens.py +0 -0
  39. {model_library-0.1.8 → model_library-0.1.9}/examples/data/files.py +0 -0
  40. {model_library-0.1.8 → model_library-0.1.9}/examples/data/images.py +0 -0
  41. {model_library-0.1.8 → model_library-0.1.9}/examples/embeddings.py +0 -0
  42. {model_library-0.1.8 → model_library-0.1.9}/examples/files.py +0 -0
  43. {model_library-0.1.8 → model_library-0.1.9}/examples/images.py +0 -0
  44. {model_library-0.1.8 → model_library-0.1.9}/examples/prompt_caching.py +0 -0
  45. {model_library-0.1.8 → model_library-0.1.9}/examples/setup.py +0 -0
  46. {model_library-0.1.8 → model_library-0.1.9}/examples/token_retry.py +0 -0
  47. {model_library-0.1.8 → model_library-0.1.9}/examples/tool_calls.py +0 -0
  48. {model_library-0.1.8 → model_library-0.1.9}/model_library/__init__.py +0 -0
  49. {model_library-0.1.8 → model_library-0.1.9}/model_library/base/__init__.py +0 -0
  50. {model_library-0.1.8 → model_library-0.1.9}/model_library/base/batch.py +0 -0
  51. {model_library-0.1.8 → model_library-0.1.9}/model_library/base/delegate_only.py +0 -0
  52. {model_library-0.1.8 → model_library-0.1.9}/model_library/base/input.py +0 -0
  53. {model_library-0.1.8 → model_library-0.1.9}/model_library/base/output.py +0 -0
  54. {model_library-0.1.8 → model_library-0.1.9}/model_library/base/utils.py +0 -0
  55. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/README.md +0 -0
  56. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/ai21labs_models.yaml +0 -0
  57. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/amazon_models.yaml +0 -0
  58. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/cohere_models.yaml +0 -0
  59. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/deepseek_models.yaml +0 -0
  60. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/dummy_model.yaml +0 -0
  61. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/fireworks_models.yaml +0 -0
  62. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/google_models.yaml +0 -0
  63. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/inception_models.yaml +0 -0
  64. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/kimi_models.yaml +0 -0
  65. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/minimax_models.yaml +0 -0
  66. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/openai_models.yaml +0 -0
  67. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/perplexity_models.yaml +0 -0
  68. {model_library-0.1.8 → model_library-0.1.9}/model_library/config/xai_models.yaml +0 -0
  69. {model_library-0.1.8 → model_library-0.1.9}/model_library/exceptions.py +0 -0
  70. {model_library-0.1.8 → model_library-0.1.9}/model_library/file_utils.py +0 -0
  71. {model_library-0.1.8 → model_library-0.1.9}/model_library/logging.py +0 -0
  72. {model_library-0.1.8 → model_library-0.1.9}/model_library/model_utils.py +0 -0
  73. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/__init__.py +0 -0
  74. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/ai21labs.py +0 -0
  75. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/alibaba.py +0 -0
  76. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/amazon.py +0 -0
  77. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/azure.py +0 -0
  78. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/cohere.py +0 -0
  79. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/deepseek.py +0 -0
  80. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/fireworks.py +0 -0
  81. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/google/__init__.py +0 -0
  82. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/google/batch.py +0 -0
  83. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/inception.py +0 -0
  84. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/kimi.py +0 -0
  85. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/minimax.py +0 -0
  86. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/mistral.py +0 -0
  87. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/openai.py +0 -0
  88. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/openrouter.py +0 -0
  89. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/perplexity.py +0 -0
  90. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/together.py +0 -0
  91. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/vals.py +0 -0
  92. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/xai.py +0 -0
  93. {model_library-0.1.8 → model_library-0.1.9}/model_library/providers/zai.py +0 -0
  94. {model_library-0.1.8 → model_library-0.1.9}/model_library/py.typed +0 -0
  95. {model_library-0.1.8 → model_library-0.1.9}/model_library/retriers/__init__.py +0 -0
  96. {model_library-0.1.8 → model_library-0.1.9}/model_library/retriers/backoff.py +0 -0
  97. {model_library-0.1.8 → model_library-0.1.9}/model_library/retriers/base.py +0 -0
  98. {model_library-0.1.8 → model_library-0.1.9}/model_library/retriers/token.py +0 -0
  99. {model_library-0.1.8 → model_library-0.1.9}/model_library/retriers/utils.py +0 -0
  100. {model_library-0.1.8 → model_library-0.1.9}/model_library/settings.py +0 -0
  101. {model_library-0.1.8 → model_library-0.1.9}/model_library.egg-info/dependency_links.txt +0 -0
  102. {model_library-0.1.8 → model_library-0.1.9}/model_library.egg-info/requires.txt +0 -0
  103. {model_library-0.1.8 → model_library-0.1.9}/model_library.egg-info/top_level.txt +0 -0
  104. {model_library-0.1.8 → model_library-0.1.9}/pyproject.toml +0 -0
  105. {model_library-0.1.8 → model_library-0.1.9}/scripts/browse_models.py +0 -0
  106. {model_library-0.1.8 → model_library-0.1.9}/scripts/config.py +0 -0
  107. {model_library-0.1.8 → model_library-0.1.9}/scripts/publish.py +0 -0
  108. {model_library-0.1.8 → model_library-0.1.9}/scripts/run_models.py +0 -0
  109. {model_library-0.1.8 → model_library-0.1.9}/setup.cfg +0 -0
  110. {model_library-0.1.8 → model_library-0.1.9}/tests/README.md +0 -0
  111. {model_library-0.1.8 → model_library-0.1.9}/tests/__init__.py +0 -0
  112. {model_library-0.1.8 → model_library-0.1.9}/tests/conftest.py +0 -0
  113. {model_library-0.1.8 → model_library-0.1.9}/tests/integration/__init__.py +0 -0
  114. {model_library-0.1.8 → model_library-0.1.9}/tests/integration/conftest.py +0 -0
  115. {model_library-0.1.8 → model_library-0.1.9}/tests/integration/test_basic.py +0 -0
  116. {model_library-0.1.8 → model_library-0.1.9}/tests/integration/test_batch.py +0 -0
  117. {model_library-0.1.8 → model_library-0.1.9}/tests/integration/test_files.py +0 -0
  118. {model_library-0.1.8 → model_library-0.1.9}/tests/integration/test_long_problem.py +0 -0
  119. {model_library-0.1.8 → model_library-0.1.9}/tests/integration/test_reasoning.py +0 -0
  120. {model_library-0.1.8 → model_library-0.1.9}/tests/integration/test_retry.py +0 -0
  121. {model_library-0.1.8 → model_library-0.1.9}/tests/integration/test_streaming.py +0 -0
  122. {model_library-0.1.8 → model_library-0.1.9}/tests/integration/test_structured_output.py +0 -0
  123. {model_library-0.1.8 → model_library-0.1.9}/tests/integration/test_tools.py +0 -0
  124. {model_library-0.1.8 → model_library-0.1.9}/tests/test_helpers.py +0 -0
  125. {model_library-0.1.8 → model_library-0.1.9}/tests/unit/__init__.py +0 -0
  126. {model_library-0.1.8 → model_library-0.1.9}/tests/unit/conftest.py +0 -0
  127. {model_library-0.1.8 → model_library-0.1.9}/tests/unit/test_batch.py +0 -0
  128. {model_library-0.1.8 → model_library-0.1.9}/tests/unit/test_count_tokens.py +0 -0
  129. {model_library-0.1.8 → model_library-0.1.9}/tests/unit/test_deep_research.py +0 -0
  130. {model_library-0.1.8 → model_library-0.1.9}/tests/unit/test_get_client.py +0 -0
  131. {model_library-0.1.8 → model_library-0.1.9}/tests/unit/test_openai_config.py +0 -0
  132. {model_library-0.1.8 → model_library-0.1.9}/tests/unit/test_prompt_caching.py +0 -0
  133. {model_library-0.1.8 → model_library-0.1.9}/tests/unit/test_query_logger.py +0 -0
  134. {model_library-0.1.8 → model_library-0.1.9}/tests/unit/test_registry.py +0 -0
  135. {model_library-0.1.8 → model_library-0.1.9}/tests/unit/test_result_metadata.py +0 -0
  136. {model_library-0.1.8 → model_library-0.1.9}/tests/unit/test_retry.py +0 -0
  137. {model_library-0.1.8 → model_library-0.1.9}/tests/unit/test_token_retry.py +0 -0
  138. {model_library-0.1.8 → model_library-0.1.9}/tests/unit/test_tools.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: model-library
3
- Version: 0.1.8
3
+ Version: 0.1.9
4
4
  Summary: Model Library for vals.ai
5
5
  Author-email: "Vals AI, Inc." <contact@vals.ai>
6
6
  License: MIT
@@ -76,6 +76,7 @@ class LLMConfig(BaseModel):
76
76
  top_k: int | None = None
77
77
  reasoning: bool = False
78
78
  reasoning_effort: str | bool | None = None
79
+ compute_effort: str | None = None
79
80
  supports_images: bool = False
80
81
  supports_files: bool = False
81
82
  supports_videos: bool = False
@@ -155,6 +156,7 @@ class LLM(ABC):
155
156
 
156
157
  self.reasoning: bool = config.reasoning
157
158
  self.reasoning_effort: str | bool | None = config.reasoning_effort
159
+ self.compute_effort: str | None = config.compute_effort
158
160
 
159
161
  self.supports_files: bool = config.supports_files
160
162
  self.supports_videos: bool = config.supports_videos
@@ -37,6 +37,11 @@ qwen-3-max-models:
37
37
  supports:
38
38
  tools: true
39
39
  images: false
40
+ # only applies for <32K input tokens
41
+ # TODO: add thresholds
42
+ costs_per_million_token:
43
+ input: 1.2
44
+ output: 6.0
40
45
 
41
46
  alibaba/qwen3-max-2026-01-23:
42
47
  label: Qwen 3 Max Thinking
@@ -122,7 +122,10 @@
122
122
  "ignored_for_cost": false
123
123
  },
124
124
  "provider_properties": {},
125
- "costs_per_million_token": {},
125
+ "costs_per_million_token": {
126
+ "input": 1.2,
127
+ "output": 6.0
128
+ },
126
129
  "alternative_keys": [],
127
130
  "default_parameters": {
128
131
  "temperature": 0.7
@@ -259,7 +262,9 @@
259
262
  "write_markup": 1.0
260
263
  }
261
264
  },
262
- "alternative_keys": [],
265
+ "alternative_keys": [
266
+ "vercel/zai/glm-4.7"
267
+ ],
263
268
  "default_parameters": {
264
269
  "temperature": 1.0,
265
270
  "top_p": 1.0
@@ -269,6 +274,94 @@
269
274
  "full_key": "zai/glm-4.7",
270
275
  "slug": "zai_glm-4.7"
271
276
  },
277
+ "vercel/zai/glm-4.7": {
278
+ "company": "zAI",
279
+ "label": "GLM 4.7",
280
+ "description": "Latest model from ZAI",
281
+ "release_date": "2025-12-22",
282
+ "open_source": true,
283
+ "documentation_url": "https://docs.z.ai/",
284
+ "properties": {
285
+ "context_window": 200000,
286
+ "max_tokens": 128000,
287
+ "training_cutoff": null,
288
+ "reasoning_model": true
289
+ },
290
+ "supports": {
291
+ "images": false,
292
+ "files": false,
293
+ "temperature": true,
294
+ "tools": true
295
+ },
296
+ "metadata": {
297
+ "deprecated": false,
298
+ "available_for_everyone": true,
299
+ "available_as_evaluator": false,
300
+ "ignored_for_cost": false
301
+ },
302
+ "provider_properties": {},
303
+ "costs_per_million_token": {
304
+ "input": 0.6,
305
+ "output": 2.2,
306
+ "cache": {
307
+ "read": 0.11,
308
+ "read_discount": 1.0,
309
+ "write_markup": 1.0
310
+ }
311
+ },
312
+ "alternative_keys": [],
313
+ "default_parameters": {
314
+ "temperature": 1.0,
315
+ "top_p": 1.0
316
+ },
317
+ "provider_endpoint": "zai/glm-4.7",
318
+ "provider_name": "vercel",
319
+ "full_key": "vercel/zai/glm-4.7",
320
+ "slug": "vercel_zai_glm-4.7"
321
+ },
322
+ "xiaomi/mimo-v2-flash": {
323
+ "company": "Xiaomi",
324
+ "label": "MiMo V2 Flash",
325
+ "description": "MiMo V2 Flash is Xiaomi's Mixture-of-Experts (MoE) language model with 309B total parameters and 15B active parameters. Designed for high-speed reasoning and agentic workflows, it utilizes a novel hybrid attention architecture and Multi-Token Prediction (MTP) to achieve state-of-the-art performance while significantly reducing inference costs.",
326
+ "release_date": "2025-12-17",
327
+ "open_source": true,
328
+ "documentation_url": "https://platform.xiaomimimo.com/#/docs/",
329
+ "properties": {
330
+ "context_window": 256000,
331
+ "max_tokens": 64000,
332
+ "training_cutoff": "December 2024"
333
+ },
334
+ "supports": {
335
+ "images": false,
336
+ "files": false,
337
+ "temperature": true,
338
+ "tools": true
339
+ },
340
+ "metadata": {
341
+ "deprecated": false,
342
+ "available_for_everyone": true,
343
+ "available_as_evaluator": false,
344
+ "ignored_for_cost": false
345
+ },
346
+ "provider_properties": {},
347
+ "costs_per_million_token": {
348
+ "input": 0.1,
349
+ "output": 0.3,
350
+ "cache": {
351
+ "read": 0.01,
352
+ "write_markup": 1.0
353
+ }
354
+ },
355
+ "alternative_keys": [],
356
+ "default_parameters": {
357
+ "temperature": 0.3,
358
+ "top_p": 0.95
359
+ },
360
+ "provider_endpoint": "mimo-v2-flash",
361
+ "provider_name": "xiaomi",
362
+ "full_key": "xiaomi/mimo-v2-flash",
363
+ "slug": "xiaomi_mimo-v2-flash"
364
+ },
272
365
  "google/gemini-3-flash-preview": {
273
366
  "company": "Google",
274
367
  "label": "Gemini 3 Flash (12/25)",
@@ -856,7 +949,9 @@
856
949
  "available_as_evaluator": false,
857
950
  "ignored_for_cost": false
858
951
  },
859
- "provider_properties": {},
952
+ "provider_properties": {
953
+ "supports_compute_effort": true
954
+ },
860
955
  "costs_per_million_token": {
861
956
  "input": 5.0,
862
957
  "output": 25.0,
@@ -906,7 +1001,9 @@
906
1001
  "available_as_evaluator": false,
907
1002
  "ignored_for_cost": false
908
1003
  },
909
- "provider_properties": {},
1004
+ "provider_properties": {
1005
+ "supports_compute_effort": true
1006
+ },
910
1007
  "costs_per_million_token": {
911
1008
  "input": 5.0,
912
1009
  "output": 25.0,
@@ -2210,7 +2307,10 @@
2210
2307
  "ignored_for_cost": false
2211
2308
  },
2212
2309
  "provider_properties": {},
2213
- "costs_per_million_token": {},
2310
+ "costs_per_million_token": {
2311
+ "input": 1.2,
2312
+ "output": 6.0
2313
+ },
2214
2314
  "alternative_keys": [],
2215
2315
  "default_parameters": {
2216
2316
  "temperature": 0.7
@@ -2244,7 +2344,10 @@
2244
2344
  "ignored_for_cost": false
2245
2345
  },
2246
2346
  "provider_properties": {},
2247
- "costs_per_million_token": {},
2347
+ "costs_per_million_token": {
2348
+ "input": 1.2,
2349
+ "output": 6.0
2350
+ },
2248
2351
  "alternative_keys": [],
2249
2352
  "default_parameters": {
2250
2353
  "temperature": 0.7
@@ -2611,7 +2714,10 @@
2611
2714
  "ignored_for_cost": false
2612
2715
  },
2613
2716
  "provider_properties": {},
2614
- "costs_per_million_token": {},
2717
+ "costs_per_million_token": {
2718
+ "input": 1.2,
2719
+ "output": 6.0
2720
+ },
2615
2721
  "alternative_keys": [],
2616
2722
  "default_parameters": {
2617
2723
  "temperature": 0.7
@@ -3920,7 +4026,7 @@
3920
4026
  "tools": true
3921
4027
  },
3922
4028
  "metadata": {
3923
- "deprecated": false,
4029
+ "deprecated": true,
3924
4030
  "available_for_everyone": true,
3925
4031
  "available_as_evaluator": false,
3926
4032
  "ignored_for_cost": false
@@ -4550,7 +4656,7 @@
4550
4656
  "tools": true
4551
4657
  },
4552
4658
  "metadata": {
4553
- "deprecated": false,
4659
+ "deprecated": true,
4554
4660
  "available_for_everyone": true,
4555
4661
  "available_as_evaluator": false,
4556
4662
  "ignored_for_cost": false
@@ -22,7 +22,6 @@ base-config:
22
22
  documentation_url: https://docs.claude.com/en/docs/about-claude/models/overview
23
23
  default_parameters:
24
24
  temperature: 1
25
-
26
25
  claude-4-models:
27
26
  base-config:
28
27
  supports:
@@ -40,6 +39,8 @@ claude-4-models:
40
39
  cache:
41
40
  read: 0.5
42
41
  write: 6.25
42
+ provider_properties:
43
+ supports_compute_effort: true
43
44
  alternative_keys:
44
45
  - anthropic/claude-opus-4-5-20251101-thinking:
45
46
  properties:
@@ -38,6 +38,8 @@ magistral-models:
38
38
  costs_per_million_token:
39
39
  input: 2
40
40
  output: 5
41
+ metadata:
42
+ deprecated: true
41
43
 
42
44
  mistralai/magistral-medium-2509:
43
45
  label: Magistral Medium 1.2 (09/2025)
@@ -45,6 +45,8 @@ kimi-models:
45
45
  costs_per_million_token:
46
46
  input: 1.00
47
47
  output: 3.00
48
+ metadata:
49
+ deprecated: true
48
50
 
49
51
  # Meta Llama Models
50
52
  llama-4-models:
@@ -0,0 +1,43 @@
1
+ base-config:
2
+ company: Xiaomi
3
+ open_source: true
4
+ documentation_url: https://platform.xiaomimimo.com/#/docs/
5
+ supports:
6
+ images: false
7
+ files: false
8
+ tools: true
9
+ metadata:
10
+ available_as_evaluator: false
11
+ available_for_everyone: true
12
+ ignored_for_cost: false
13
+ properties:
14
+ training_cutoff: December 2024
15
+
16
+ xiaomi-models:
17
+ base-config:
18
+ properties:
19
+ context_window: 256000
20
+ supports:
21
+ temperature: true
22
+ top_p: true
23
+ default_parameters:
24
+ temperature: 0.3
25
+ top_p: 0.95
26
+
27
+ xiaomi/mimo-v2-flash:
28
+ label: MiMo V2 Flash
29
+ description:
30
+ MiMo V2 Flash is Xiaomi's Mixture-of-Experts (MoE) language model with
31
+ 309B total parameters and 15B active parameters. Designed for high-speed
32
+ reasoning and agentic workflows, it utilizes a novel hybrid attention
33
+ architecture and Multi-Token Prediction (MTP) to achieve state-of-the-art
34
+ performance while significantly reducing inference costs.
35
+ release_date: 2025-12-17
36
+ properties:
37
+ context_window: 256000
38
+ max_tokens: 64000
39
+ costs_per_million_token:
40
+ input: 0.10
41
+ output: 0.30
42
+ cache:
43
+ read: 0.01
@@ -51,6 +51,9 @@ zai-models:
51
51
  read: 0.11
52
52
  default_parameters:
53
53
  temperature: 1
54
+ alternative_keys:
55
+ - vercel/zai/glm-4.7
56
+
54
57
  zai/glm-4.5:
55
58
  label: GLM 4.5
56
59
  description: "z.AI old model"
@@ -23,6 +23,7 @@ from model_library.base import (
23
23
  InputItem,
24
24
  LLMBatchMixin,
25
25
  LLMConfig,
26
+ ProviderConfig,
26
27
  QueryResult,
27
28
  QueryResultCost,
28
29
  QueryResultMetadata,
@@ -48,6 +49,11 @@ from model_library.utils import (
48
49
  )
49
50
 
50
51
 
52
+ class AnthropicConfig(ProviderConfig):
53
+ supports_compute_effort: bool = False
54
+ supports_auto_thinking: bool = False
55
+
56
+
51
57
  class AnthropicBatchMixin(LLMBatchMixin):
52
58
  """Batch processing support for Anthropic's Message Batches API."""
53
59
 
@@ -251,6 +257,8 @@ class AnthropicBatchMixin(LLMBatchMixin):
251
257
 
252
258
  @register_provider("anthropic")
253
259
  class AnthropicModel(LLM):
260
+ provider_config = AnthropicConfig()
261
+
254
262
  def _get_default_api_key(self) -> str:
255
263
  if self.delegate_config:
256
264
  return self.delegate_config.api_key.get_secret_value()
@@ -547,13 +555,22 @@ class AnthropicModel(LLM):
547
555
  body["max_tokens"] = self.max_tokens
548
556
 
549
557
  if self.reasoning:
550
- budget_tokens = kwargs.pop(
551
- "budget_tokens", get_default_budget_tokens(self.max_tokens)
552
- )
553
- body["thinking"] = {
554
- "type": "enabled",
555
- "budget_tokens": budget_tokens,
556
- }
558
+ if self.provider_config.supports_auto_thinking:
559
+ body["thinking"] = {"type": "auto"}
560
+ else:
561
+ budget_tokens = kwargs.pop(
562
+ "budget_tokens", get_default_budget_tokens(self.max_tokens)
563
+ )
564
+ body["thinking"] = {
565
+ "type": "enabled",
566
+ "budget_tokens": budget_tokens,
567
+ }
568
+
569
+ # effort controls compute allocation for text, tool calls, and thinking. Opus-4.5+
570
+ # use instead of reasoning_effort with auto_thinking
571
+ if self.provider_config.supports_compute_effort and self.compute_effort:
572
+ # default is "high"
573
+ body["output_config"] = {"effort": self.compute_effort}
557
574
 
558
575
  # Thinking models don't support temperature: https://docs.claude.com/en/docs/build-with-claude/extended-thinking#feature-compatibility
559
576
  if self.supports_temperature and not self.reasoning:
@@ -599,7 +616,17 @@ class AnthropicModel(LLM):
599
616
 
600
617
  stream_kwargs = {**body}
601
618
  if is_anthropic_endpoint:
602
- betas = ["files-api-2025-04-14", "interleaved-thinking-2025-05-14"]
619
+ betas = ["files-api-2025-04-14"]
620
+ if self.provider_config.supports_auto_thinking:
621
+ betas.extend(
622
+ [
623
+ "auto-thinking-2026-01-12",
624
+ "effort-2025-11-24",
625
+ "max-effort-2026-01-24",
626
+ ]
627
+ )
628
+ else:
629
+ betas.extend(["interleaved-thinking-2025-05-14"])
603
630
  if "sonnet-4-5" in self.model_name:
604
631
  betas.append("context-1m-2025-08-07")
605
632
  stream_kwargs["betas"] = betas
@@ -361,16 +361,21 @@ class GoogleModel(LLM):
361
361
  contents: list[Content | None] = []
362
362
  finish_reason: FinishReason | None = None
363
363
 
364
+ chunks: list[GenerateContentResponse] = []
365
+
364
366
  async for chunk in stream:
367
+ chunks.append(chunk)
365
368
  candidates = chunk.candidates
366
369
  if not candidates:
367
370
  continue
368
371
 
369
372
  content = candidates[0].content
370
373
 
374
+ meaningful_content = False
371
375
  if content and content.parts:
372
376
  for part in content.parts:
373
377
  if part.function_call:
378
+ meaningful_content = True
374
379
  if not part.function_call.name:
375
380
  raise Exception(f"Invalid function call: {part}")
376
381
 
@@ -387,13 +392,15 @@ class GoogleModel(LLM):
387
392
  if not part.text:
388
393
  continue
389
394
  if part.thought:
395
+ meaningful_content = True
390
396
  reasoning += part.text
391
397
  else:
398
+ meaningful_content = True
392
399
  text += part.text
393
400
 
394
401
  if chunk.usage_metadata:
395
402
  metadata = chunk.usage_metadata
396
- if content:
403
+ if content and meaningful_content:
397
404
  contents.append(content)
398
405
  if candidates[0].finish_reason:
399
406
  finish_reason = candidates[0].finish_reason
@@ -402,6 +409,7 @@ class GoogleModel(LLM):
402
409
  self.logger.error(f"Unexpected finish reason: {finish_reason}")
403
410
 
404
411
  if not text and not reasoning and not tool_calls:
412
+ self.logger.error(f"Chunks: {chunks}")
405
413
  raise ModelNoOutputError("Model returned empty response")
406
414
 
407
415
  result = QueryResult(
@@ -0,0 +1,34 @@
1
+ from typing import Literal
2
+
3
+ from pydantic import SecretStr
4
+
5
+ from model_library import model_library_settings
6
+ from model_library.base import (
7
+ DelegateConfig,
8
+ DelegateOnly,
9
+ LLMConfig,
10
+ )
11
+ from model_library.register_models import register_provider
12
+
13
+
14
+ @register_provider("vercel")
15
+ class VercelModel(DelegateOnly):
16
+ def __init__(
17
+ self,
18
+ model_name: str,
19
+ provider: Literal["vercel"] = "vercel",
20
+ *,
21
+ config: LLMConfig | None = None,
22
+ ):
23
+ super().__init__(model_name, provider, config=config)
24
+
25
+ # https://vercel.com/docs/ai-gateway/sdks-and-apis#quick-start
26
+ self.init_delegate(
27
+ config=config,
28
+ delegate_config=DelegateConfig(
29
+ base_url="https://ai-gateway.vercel.sh/v1",
30
+ api_key=SecretStr(model_library_settings.VERCEL_API_KEY),
31
+ ),
32
+ use_completions=True,
33
+ delegate_provider="openai",
34
+ )
@@ -0,0 +1,34 @@
1
+ from typing import Literal
2
+
3
+ from pydantic import SecretStr
4
+
5
+ from model_library import model_library_settings
6
+ from model_library.base import (
7
+ DelegateConfig,
8
+ DelegateOnly,
9
+ LLMConfig,
10
+ )
11
+ from model_library.register_models import register_provider
12
+
13
+
14
+ @register_provider("xiaomi")
15
+ class XiaomiModel(DelegateOnly):
16
+ def __init__(
17
+ self,
18
+ model_name: str,
19
+ provider: Literal["xiaomi"] = "xiaomi",
20
+ *,
21
+ config: LLMConfig | None = None,
22
+ ):
23
+ super().__init__(model_name, provider, config=config)
24
+
25
+ # https://platform.xiaomimimo.com/#/docs/quick-start/first-api-call
26
+ self.init_delegate(
27
+ config=config,
28
+ delegate_config=DelegateConfig(
29
+ base_url="https://api.xiaomimimo.com/v1",
30
+ api_key=SecretStr(model_library_settings.XIAOMI_API_KEY),
31
+ ),
32
+ use_completions=True,
33
+ delegate_provider="openai",
34
+ )
@@ -170,6 +170,7 @@ class DefaultParameters(BaseModel):
170
170
  top_p: float | None = None
171
171
  top_k: int | None = None
172
172
  reasoning_effort: str | bool | None = None
173
+ compute_effort: str | bool | None = None
173
174
 
174
175
 
175
176
  class RawModelConfig(BaseModel):
@@ -338,6 +339,10 @@ def _register_models() -> ModelRegistry:
338
339
  copy.slug = key.replace("/", "_")
339
340
  copy.full_key = key
340
341
  copy.alternative_keys = []
342
+ copy.provider_properties = ProviderProperties.model_validate(
343
+ provider_properties
344
+ )
345
+
341
346
  registry[key] = copy
342
347
 
343
348
  return registry
@@ -1,9 +1,8 @@
1
+ import tiktoken
1
2
  from functools import cache
2
3
  from pathlib import Path
3
4
  from typing import TypedDict
4
5
 
5
- import tiktoken
6
-
7
6
  from model_library.base import (
8
7
  LLM,
9
8
  LLMConfig,
@@ -235,6 +234,14 @@ def get_model_names(
235
234
  )
236
235
 
237
236
 
237
+ """
238
+ everything below this comment is included for legacy support of caselaw/corpfin custom models.
239
+ @orestes please remove this as part of the migration to a standard CorpFin harness.
240
+ """
241
+
242
+ DEFAULT_CONTEXT_WINDOW = 128_000
243
+
244
+
238
245
  @cache
239
246
  def _get_tiktoken_encoder():
240
247
  """Get cached tiktoken encoder for consistent tokenization."""
@@ -257,7 +264,7 @@ def auto_trim_document(
257
264
  Trimmed document, or original document if trimming isn't needed
258
265
  """
259
266
 
260
- max_tokens = get_max_document_tokens(model_name)
267
+ max_tokens = get_max_document_tokens(model_name) or DEFAULT_CONTEXT_WINDOW
261
268
 
262
269
  encoding = _get_tiktoken_encoder()
263
270
  tokens = encoding.encode(document)
@@ -284,5 +291,5 @@ def get_max_document_tokens(model_name: str, output_buffer: int = 10000) -> int:
284
291
  # Import here to avoid circular imports
285
292
  from model_library.utils import get_context_window_for_model
286
293
 
287
- context_window = get_context_window_for_model(model_name)
294
+ context_window = get_context_window_for_model(model_name) or DEFAULT_CONTEXT_WINDOW
288
295
  return context_window - output_buffer
@@ -1,6 +1,5 @@
1
1
  import logging
2
2
  from collections.abc import Mapping, Sequence
3
-
4
3
  import httpx
5
4
  from anthropic import AsyncAnthropic
6
5
  from openai import AsyncOpenAI
@@ -75,16 +74,15 @@ def create_anthropic_client_with_defaults(
75
74
  )
76
75
 
77
76
 
78
- def get_context_window_for_model(model_name: str, default: int = 128_000) -> int:
77
+ def get_context_window_for_model(model_name: str) -> int | None:
79
78
  """
80
79
  Get the context window for a model by looking up its configuration from the registry.
81
80
 
82
81
  Args:
83
82
  model_name: The name of the model in the registry (e.g., "openai/gpt-4o-mini-2024-07-18" or "azure/gpt-4o-mini-2024-07-18")
84
- default: Default context window to return if model not found or missing context_window
85
83
 
86
84
  Returns:
87
- Context window size in tokens
85
+ Context window size in tokens (or `None` if not found)
88
86
  """
89
87
  # import here to avoid circular imports
90
88
  from model_library.register_models import get_model_registry
@@ -98,7 +96,6 @@ def get_context_window_for_model(model_name: str, default: int = 128_000) -> int
98
96
  return model_config.properties.context_window
99
97
  else:
100
98
  logger.warning(
101
- f"Model {model_name} not found in registry or missing context_window, "
102
- f"using default context length of {default}"
99
+ f"Model {model_name} not found in registry or missing context_window"
103
100
  )
104
- return default
101
+ return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: model-library
3
- Version: 0.1.8
3
+ Version: 0.1.9
4
4
  Summary: Model Library for vals.ai
5
5
  Author-email: "Vals AI, Inc." <contact@vals.ai>
6
6
  License: MIT
@@ -68,6 +68,7 @@ model_library/config/openai_models.yaml
68
68
  model_library/config/perplexity_models.yaml
69
69
  model_library/config/together_models.yaml
70
70
  model_library/config/xai_models.yaml
71
+ model_library/config/xiaomi_models.yaml
71
72
  model_library/config/zai_models.yaml
72
73
  model_library/providers/__init__.py
73
74
  model_library/providers/ai21labs.py
@@ -87,7 +88,9 @@ model_library/providers/openrouter.py
87
88
  model_library/providers/perplexity.py
88
89
  model_library/providers/together.py
89
90
  model_library/providers/vals.py
91
+ model_library/providers/vercel.py
90
92
  model_library/providers/xai.py
93
+ model_library/providers/xiaomi.py
91
94
  model_library/providers/zai.py
92
95
  model_library/providers/google/__init__.py
93
96
  model_library/providers/google/batch.py
@@ -129,4 +132,5 @@ tests/unit/test_registry.py
129
132
  tests/unit/test_result_metadata.py
130
133
  tests/unit/test_retry.py
131
134
  tests/unit/test_token_retry.py
132
- tests/unit/test_tools.py
135
+ tests/unit/test_tools.py
136
+ tests/unit/test_utils.py
@@ -0,0 +1,15 @@
1
+ """Unit tests for model_library/utils.py"""
2
+
3
+ from model_library.utils import get_context_window_for_model
4
+
5
+
6
+ def test_get_context_window_for_existing_model():
7
+ """Test that context window is correctly fetched for a model that exists."""
8
+ context_window = get_context_window_for_model("openai/gpt-4o-mini")
9
+ assert context_window == 128_000
10
+
11
+
12
+ def test_get_context_window_for_nonexistent_model():
13
+ """Test that None is returned for a model that doesn't exist."""
14
+ context_window = get_context_window_for_model("nonexistent/fake-model-xyz")
15
+ assert context_window is None
@@ -157,7 +157,7 @@ wheels = [
157
157
 
158
158
  [[package]]
159
159
  name = "anthropic"
160
- version = "0.74.0"
160
+ version = "0.77.0"
161
161
  source = { registry = "https://pypi.org/simple" }
162
162
  dependencies = [
163
163
  { name = "anyio" },
@@ -169,9 +169,9 @@ dependencies = [
169
169
  { name = "sniffio" },
170
170
  { name = "typing-extensions" },
171
171
  ]
172
- sdist = { url = "https://files.pythonhosted.org/packages/5b/f9/baa1b885c8664b446e6a13003938046901e54ffd70b532bbebd01256e34b/anthropic-0.74.0.tar.gz", hash = "sha256:114ec10cb394b6764e199da06335da4747b019c5629e53add33572f66964ad99", size = 428958, upload-time = "2025-11-18T15:29:47.579Z" }
172
+ sdist = { url = "https://files.pythonhosted.org/packages/eb/85/6cb5da3cf91de2eeea89726316e8c5c8c31e2d61ee7cb1233d7e95512c31/anthropic-0.77.0.tar.gz", hash = "sha256:ce36efeb80cb1e25430a88440dc0f9aa5c87f10d080ab70a1bdfd5c2c5fbedb4", size = 504575, upload-time = "2026-01-29T18:20:41.507Z" }
173
173
  wheels = [
174
- { url = "https://files.pythonhosted.org/packages/61/27/8c404b290ec650e634eacc674df943913722ec21097b0476d68458250c2f/anthropic-0.74.0-py3-none-any.whl", hash = "sha256:df29b8dfcdbd2751fa31177f643d8d8f66c5315fe06bdc42f9139e9f00d181d5", size = 371474, upload-time = "2025-11-18T15:29:45.748Z" },
174
+ { url = "https://files.pythonhosted.org/packages/ac/27/9df785d3f94df9ac72f43ee9e14b8120b37d992b18f4952774ed46145022/anthropic-0.77.0-py3-none-any.whl", hash = "sha256:65cc83a3c82ce622d5c677d0d7706c77d29dc83958c6b10286e12fda6ffb2651", size = 397867, upload-time = "2026-01-29T18:20:39.481Z" },
175
175
  ]
176
176
 
177
177
  [[package]]
File without changes
File without changes
File without changes
File without changes
File without changes