model-library 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. model_library/base/base.py +13 -6
  2. model_library/base/output.py +55 -0
  3. model_library/base/utils.py +3 -2
  4. model_library/config/README.md +169 -0
  5. model_library/config/ai21labs_models.yaml +11 -10
  6. model_library/config/alibaba_models.yaml +21 -22
  7. model_library/config/all_models.json +4708 -2471
  8. model_library/config/amazon_models.yaml +100 -102
  9. model_library/config/anthropic_models.yaml +59 -45
  10. model_library/config/cohere_models.yaml +25 -24
  11. model_library/config/deepseek_models.yaml +28 -25
  12. model_library/config/dummy_model.yaml +9 -7
  13. model_library/config/fireworks_models.yaml +86 -56
  14. model_library/config/google_models.yaml +156 -102
  15. model_library/config/inception_models.yaml +6 -6
  16. model_library/config/kimi_models.yaml +13 -14
  17. model_library/config/minimax_models.yaml +37 -0
  18. model_library/config/mistral_models.yaml +85 -29
  19. model_library/config/openai_models.yaml +192 -159
  20. model_library/config/perplexity_models.yaml +8 -23
  21. model_library/config/together_models.yaml +115 -103
  22. model_library/config/xai_models.yaml +85 -57
  23. model_library/config/zai_models.yaml +23 -15
  24. model_library/exceptions.py +12 -17
  25. model_library/file_utils.py +1 -1
  26. model_library/providers/amazon.py +32 -17
  27. model_library/providers/anthropic.py +2 -6
  28. model_library/providers/google/google.py +35 -29
  29. model_library/providers/minimax.py +33 -0
  30. model_library/providers/mistral.py +10 -1
  31. model_library/providers/openai.py +10 -8
  32. model_library/providers/together.py +18 -211
  33. model_library/register_models.py +36 -38
  34. model_library/registry_utils.py +18 -16
  35. model_library/utils.py +2 -2
  36. {model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/METADATA +3 -4
  37. model_library-0.1.4.dist-info/RECORD +64 -0
  38. model_library-0.1.2.dist-info/RECORD +0 -61
  39. {model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/WHEEL +0 -0
  40. {model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/licenses/LICENSE +0 -0
  41. {model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/top_level.txt +0 -0
@@ -2,20 +2,21 @@ base-config:
2
2
  company: xAI
3
3
  documentation_url: https://docs.x.ai/docs#models
4
4
  open_source: false
5
- class_properties:
6
- supports_images: true
5
+ supports:
6
+ images: true
7
+ files: false
8
+ tools: true
9
+ metadata:
7
10
  available_as_evaluator: false
8
- supports_files: false
9
11
  available_for_everyone: true
10
12
  ignored_for_cost: false
11
- supports_tools: false
12
13
  properties:
13
14
  reasoning_model: false
14
15
 
15
16
  xai-models:
16
17
  base-config:
17
- class_properties:
18
- supports_temperature: true
18
+ supports:
19
+ temperature: true
19
20
  costs_per_million_token:
20
21
  cache:
21
22
  read_discount: 0.25
@@ -29,20 +30,16 @@ xai-models:
29
30
  release_date: 2025-08-25
30
31
  properties:
31
32
  context_window: 256_000
32
- max_token_output: 40_000
33
+ max_tokens: 40_000
33
34
  reasoning_model: true
34
- class_properties:
35
- supports_images: false
36
- supports_tools: true
35
+ supports:
36
+ images: false
37
37
  costs_per_million_token:
38
38
  input: 0.20
39
39
  output: 1.50
40
40
  cache:
41
41
  read: 0.02
42
42
  documentation_url: https://docs.x.ai/docs/models/grok-code-fast-1
43
- default_parameters:
44
- temperature: 0.7
45
- max_output_tokens: 40000
46
43
  alternative_keys:
47
44
  - grok/grok-code-fast
48
45
  - grok/grok-code-fast-1-0825
@@ -52,16 +49,12 @@ xai-models:
52
49
  description: Latest advancement in cost-efficient reasoning models with unified architecture. Handles complex requests with deep chain-of-thought reasoning. Features 2M token context window and native tool use.
53
50
  release_date: 2025-09-19
54
51
  open_source: false
55
- class_properties:
56
- supports_images: true
57
- available_as_evaluator: true
58
- supports_metadata: true
59
- supports_files: false
60
- available_for_everyone: true
61
- ignored_for_cost: false
52
+ supports:
53
+ images: true
54
+ files: false
62
55
  properties:
63
56
  context_window: 2_000_000
64
- max_token_output: 2_000_000 # from openrouter
57
+ max_tokens: 2_000_000
65
58
  training_cutoff: null
66
59
  reasoning_model: true
67
60
  documentation_url: https://docs.x.ai/docs/models/grok-4-fast-reasoning
@@ -74,28 +67,69 @@ xai-models:
74
67
  threshold: 128_000
75
68
  input: 0.4
76
69
  output: 1.0
77
- default_parameters:
78
- temperature: 0.7
79
- max_output_tokens: 128000
80
70
  alternative_keys:
81
71
  - grok/grok-4-fast
82
72
  - grok/grok-4-fast-reasoning-latest
83
73
 
74
+ grok/grok-4-1-fast-reasoning:
75
+ label: Grok 4.1 Fast (Reasoning)
76
+ description: ""
77
+ release_date: 2025-11-19
78
+ open_source: false
79
+ supports:
80
+ images: true
81
+ files: false
82
+ properties:
83
+ context_window: 2_000_000
84
+ max_tokens: 2_000_000 # from openrouter
85
+ training_cutoff: null
86
+ reasoning_model: true
87
+ documentation_url: ""
88
+ costs_per_million_token:
89
+ input: 0.20
90
+ output: 0.5
91
+ cache:
92
+ read: 0.05
93
+ context:
94
+ threshold: 128_000
95
+ input: 0.4
96
+ output: 1.0
97
+
98
+ grok/grok-4-1-fast-non-reasoning:
99
+ label: Grok 4.1 Fast Non-Reasoning
100
+ description: ""
101
+ release_date: 2025-11-19
102
+ open_source: false
103
+ supports:
104
+ images: true
105
+ files: false
106
+ properties:
107
+ context_window: 2_000_000
108
+ max_tokens: 2_000_000 # from openrouter
109
+ training_cutoff: null
110
+ reasoning_model: false
111
+ documentation_url: ""
112
+ costs_per_million_token:
113
+ input: 0.20
114
+ output: 0.5
115
+ cache:
116
+ read: 0.05
117
+ context:
118
+ threshold: 128_000
119
+ input: 0.4
120
+ output: 1.0
121
+
84
122
  grok/grok-4-fast-non-reasoning:
85
123
  label: Grok 4 Fast (Non-Reasoning)
86
124
  description: Cost-efficient model focused on speed and efficiency for straightforward tasks like summarization or classification without deep logical processing. Unified architecture with reasoning variant, steered via system prompts.
87
125
  release_date: 2025-09-19
88
126
  open_source: false
89
- class_properties:
90
- supports_images: true
91
- available_as_evaluator: true
92
- supports_metadata: true
93
- supports_files: false
94
- available_for_everyone: true
95
- ignored_for_cost: false
127
+ supports:
128
+ images: true
129
+ files: false
96
130
  properties:
97
131
  context_window: 2_000_000
98
- max_token_output: 2_000_000 # from openrouter
132
+ max_tokens: 2_000_000
99
133
  training_cutoff: null
100
134
  reasoning_model: false
101
135
  documentation_url: https://docs.x.ai/docs/models/grok-4-fast-non-reasoning
@@ -108,9 +142,6 @@ xai-models:
108
142
  threshold: 128_000
109
143
  input: 0.4
110
144
  output: 1.0
111
- default_parameters:
112
- temperature: 0.7
113
- max_output_tokens: 2000000
114
145
  alternative_keys:
115
146
  - grok/grok-4-fast-non-reasoning-latest
116
147
 
@@ -118,13 +149,12 @@ xai-models:
118
149
  label: Grok 4
119
150
  description: Latest and greatest flagship model offering unparalleled performance in natural language, math and reasoning. The perfect jack of all trades with native tool use and structured outputs support.
120
151
  release_date: 2025-07-09
121
- class_properties:
122
- supports_images: true
123
- available_for_everyone: false
124
- supports_tools: true
152
+ supports:
153
+ images: true
154
+ tools: true
125
155
  properties:
126
156
  context_window: 256_000
127
- max_token_output: 128_000
157
+ max_tokens: 128_000
128
158
  training_cutoff: null
129
159
  reasoning_model: true
130
160
  documentation_url: https://docs.x.ai/docs/models/grok-4-0709
@@ -137,9 +167,6 @@ xai-models:
137
167
  threshold: 128_000
138
168
  input: 6.00
139
169
  output: 30.00
140
- default_parameters:
141
- temperature: 0.7
142
- max_output_tokens: 128000
143
170
  alternative_keys:
144
171
  - grok/grok-4
145
172
  - grok/grok-4-latest
@@ -150,15 +177,15 @@ xai-models:
150
177
  release_date: 2025-04-09
151
178
  properties:
152
179
  context_window: 131_072
153
- max_token_output: null
180
+ max_tokens: null
154
181
  training_cutoff: null
155
182
  reasoning_model: true
156
- class_properties:
183
+ metadata:
157
184
  deprecated: true
158
185
  costs_per_million_token:
159
186
  input: 0.30
160
187
  output: 0.50
161
- cached:
188
+ cache:
162
189
  read: 0.075
163
190
  documentation_url: https://docs.x.ai/docs/models/grok-3-mini
164
191
  default_parameters:
@@ -188,7 +215,7 @@ xai-models:
188
215
  release_date: 2025-04-09
189
216
  properties:
190
217
  context_window: 131_072
191
- max_token_output: null
218
+ max_tokens: null
192
219
  training_cutoff: null
193
220
  costs_per_million_token:
194
221
  input: 3.00
@@ -211,10 +238,10 @@ xai-models:
211
238
  release_date: 2024-12-12
212
239
  properties:
213
240
  context_window: 8_192
214
- max_token_output: null
241
+ max_tokens: null
215
242
  training_cutoff: null
216
- class_properties:
217
- supports_images: true
243
+ supports:
244
+ images: true
218
245
  costs_per_million_token:
219
246
  input: 2.00
220
247
  output: 10.00
@@ -228,9 +255,9 @@ xai-models:
228
255
  release_date: 2024-12-11
229
256
  properties:
230
257
  context_window: 131_072
231
- max_token_output: null
258
+ max_tokens: null
232
259
  training_cutoff: null
233
- class_properties:
260
+ metadata:
234
261
  deprecated: true
235
262
  costs_per_million_token:
236
263
  input: 2.00
@@ -242,10 +269,11 @@ xai-models:
242
269
  release_date: 2024-12-12
243
270
  properties:
244
271
  context_window: 8_192
245
- max_token_output: null
272
+ max_tokens: null
246
273
  training_cutoff: null
247
- class_properties:
248
- supports_images: true
274
+ supports:
275
+ images: true
276
+ metadata:
249
277
  deprecated: true
250
278
  costs_per_million_token:
251
279
  input: 5.00
@@ -257,9 +285,9 @@ xai-models:
257
285
  release_date: 2024-12-11
258
286
  properties:
259
287
  context_window: 131_072
260
- max_token_output: null
288
+ max_tokens: null
261
289
  training_cutoff: null
262
- class_properties:
290
+ metadata:
263
291
  deprecated: true
264
292
  costs_per_million_token:
265
293
  input: 5.00
@@ -2,12 +2,13 @@ base-config:
2
2
  company: zAI
3
3
  open_source: true
4
4
  documentation_url: https://docs.z.ai/
5
- class_properties:
6
- supports_images: false
7
- supports_files: false
5
+ supports:
6
+ images: false
7
+ files: false
8
+ temperature: true
9
+ tools: true
10
+ properties:
8
11
  reasoning_model: true
9
- supports_temperature: true
10
- supports_tools: true
11
12
  default_parameters:
12
13
  temperature: 0.6
13
14
  top_p: 1
@@ -23,14 +24,17 @@ zai-models:
23
24
  release_date: 2025-07-28
24
25
  properties:
25
26
  context_window: 128_000
26
- max_token_output: 81_920
27
+ max_tokens: 81_920
27
28
  costs_per_million_token:
28
29
  input: 0.6
29
30
  output: 2.2
30
31
  cache:
31
32
  read: 0.11
32
33
  alternative_keys:
33
- - fireworks/glm-4p5
34
+ - fireworks/glm-4p5:
35
+ costs_per_million_token:
36
+ input: 0.55
37
+ output: 2.19
34
38
 
35
39
  zai/glm-4.5-air:
36
40
  label: GLM 4.5 Air
@@ -38,14 +42,17 @@ zai-models:
38
42
  release_date: 2025-07-28
39
43
  properties:
40
44
  context_window: 128_000
41
- max_token_output: 81_920
45
+ max_tokens: 81_920
42
46
  costs_per_million_token:
43
47
  input: 0.2
44
48
  output: 1.1
45
- cache:
49
+ cache:
46
50
  read: 0.03
47
51
  alternative_keys:
48
- - together/zai-org/GLM-4.5-Air-FP8
52
+ - together/zai-org/GLM-4.5-Air-FP8:
53
+ costs_per_million_token:
54
+ input: 0.22
55
+ output: 0.88
49
56
 
50
57
  zai/glm-4.6:
51
58
  label: GLM 4.6
@@ -53,13 +60,14 @@ zai-models:
53
60
  release_date: 2025-09-30
54
61
  properties:
55
62
  context_window: 200_000
56
- max_token_output: 122_880
63
+ max_tokens: 122_880
57
64
  costs_per_million_token:
58
65
  input: 0.6
59
66
  output: 2.2
60
- cache:
67
+ cache:
61
68
  read: 0.11
62
69
  alternative_keys:
63
- - fireworks/glm-4p6
64
-
65
-
70
+ - fireworks/glm-4p6:
71
+ costs_per_million_token:
72
+ input: 0.55
73
+ output: 2.19
@@ -5,9 +5,11 @@ from typing import Any, Callable
5
5
 
6
6
  import backoff
7
7
  from ai21 import TooManyRequestsError as AI21RateLimitError
8
+ from anthropic import InternalServerError
8
9
  from anthropic import RateLimitError as AnthropicRateLimitError
9
10
  from backoff._typing import Details
10
11
  from httpcore import ReadError as HTTPCoreReadError
12
+ from httpx import ConnectError as HTTPXConnectError
11
13
  from httpx import ReadError as HTTPXReadError
12
14
  from httpx import RemoteProtocolError
13
15
  from openai import APIConnectionError as OpenAIAPIConnectionError
@@ -53,20 +55,6 @@ class MaxOutputTokensExceededError(Exception):
53
55
  super().__init__(message or MaxOutputTokensExceededError.DEFAULT_MESSAGE)
54
56
 
55
57
 
56
- class MaxInputTokensExceededError(Exception):
57
- """
58
- Raised when the input exceeds the allowed max input tokens limit
59
- """
60
-
61
- DEFAULT_MESSAGE: str = (
62
- "Input exceeded the maximum allowed input tokens. "
63
- "Consider reducing the input size."
64
- )
65
-
66
- def __init__(self, message: str | None = None):
67
- super().__init__(message or MaxInputTokensExceededError.DEFAULT_MESSAGE)
68
-
69
-
70
58
  class MaxContextWindowExceededError(Exception):
71
59
  """
72
60
  Raised when the context window exceeds the allowed max context window limit
@@ -97,7 +85,9 @@ CONTEXT_WINDOW_PATTERN = re.compile(
97
85
  r"sent message larger than max|"
98
86
  r"input tokens exceeded|"
99
87
  r"(messages?|total length).*too long|"
100
- r"payload.*too large"
88
+ r"payload.*too large|"
89
+ r"string too long|"
90
+ r"input exceeded the context window"
101
91
  )
102
92
 
103
93
 
@@ -166,9 +156,11 @@ RETRIABLE_EXCEPTIONS = [
166
156
  OpenAIUnprocessableEntityError,
167
157
  OpenAIAPIConnectionError,
168
158
  AnthropicRateLimitError,
159
+ InternalServerError,
169
160
  AI21RateLimitError,
170
161
  RemoteProtocolError, # httpx connection closing when running models from sdk
171
162
  HTTPXReadError,
163
+ HTTPXConnectError,
172
164
  HTTPCoreReadError,
173
165
  ]
174
166
 
@@ -186,11 +178,13 @@ RETRIABLE_EXCEPTION_CODES = [
186
178
  "connection_error",
187
179
  "service_unavailable",
188
180
  "rate_limit",
181
+ "rate limit",
189
182
  "internal_error",
190
183
  "server_error",
191
184
  "overloaded",
192
185
  "throttling", # AWS throttling errors
193
186
  "throttlingexception", # AWS throttling errors
187
+ "internal server error",
194
188
  ]
195
189
 
196
190
 
@@ -239,8 +233,9 @@ def retry_llm_call(
239
233
  logger: logging.Logger,
240
234
  max_tries: int = RETRY_MAX_TRIES,
241
235
  max_time: float | None = None,
242
- backoff_callback: Callable[[int, Exception | None, float, float], None]
243
- | None = None,
236
+ backoff_callback: (
237
+ Callable[[int, Exception | None, float, float], None] | None
238
+ ) = None,
244
239
  ):
245
240
  def on_backoff(details: Details):
246
241
  exception = details.get("exception")
@@ -56,7 +56,7 @@ def concat_images(
56
56
  new_width = int(combined_image.width * scale_factor)
57
57
  new_height = int(combined_image.height * scale_factor)
58
58
 
59
- combined_image = combined_image.resize(
59
+ combined_image = combined_image.resize( # type: ignore
60
60
  (new_width, new_height), Image.Resampling.LANCZOS
61
61
  )
62
62
 
@@ -26,6 +26,7 @@ from model_library.base import (
26
26
  ToolDefinition,
27
27
  ToolResult,
28
28
  )
29
+ from model_library.base.input import FileBase
29
30
  from model_library.exceptions import (
30
31
  BadInputError,
31
32
  MaxOutputTokensExceededError,
@@ -60,11 +61,13 @@ class AmazonModel(LLM):
60
61
  config: LLMConfig | None = None,
61
62
  ):
62
63
  super().__init__(model_name, provider, config=config)
63
- if self.model_name.endswith("-thinking"):
64
- self.model_name = self.model_name.replace("-thinking", "")
65
- self.reasoning = True
66
- if self.max_tokens < 1024:
67
- self.max_tokens = 2048
64
+ self.supports_cache = "amazon" in self.model_name or "claude" in self.model_name
65
+ self.supports_cache = (
66
+ self.supports_cache and "v2" not in self.model_name
67
+ ) # supported but no access yet
68
+ self.supports_tool_cache = self.supports_cache and "claude" in self.model_name
69
+
70
+ cache_control = {"type": "default"}
68
71
 
69
72
  @override
70
73
  async def parse_input(
@@ -120,6 +123,10 @@ class AmazonModel(LLM):
120
123
  new_input.append(item)
121
124
 
122
125
  if content_user:
126
+ if self.supports_cache:
127
+ if not isinstance(input[-1], FileBase):
128
+ # last item cannot be file
129
+ content_user.append({"cachePoint": self.cache_control})
123
130
  new_input.append({"role": "user", "content": content_user})
124
131
 
125
132
  return new_input
@@ -174,6 +181,8 @@ class AmazonModel(LLM):
174
181
  }
175
182
  }
176
183
  )
184
+ if parsed_tools and self.supports_tool_cache:
185
+ parsed_tools.append({"cachePoint": self.cache_control})
177
186
  return parsed_tools
178
187
 
179
188
  @override
@@ -203,8 +212,12 @@ class AmazonModel(LLM):
203
212
 
204
213
  if "system_prompt" in kwargs:
205
214
  body["system"] = [{"text": kwargs.pop("system_prompt")}]
215
+ if self.supports_cache:
216
+ body["system"].append({"cachePoint": self.cache_control})
206
217
 
207
218
  if self.reasoning:
219
+ if self.max_tokens < 1024:
220
+ self.max_tokens = 2048
208
221
  budget_tokens = kwargs.pop(
209
222
  "budget_tokens", get_default_budget_tokens(self.max_tokens)
210
223
  )
@@ -244,9 +257,8 @@ class AmazonModel(LLM):
244
257
  tool_calls: dict[str, Any] = {}
245
258
 
246
259
  messages: dict[str, Any] = {"content": []}
247
- input_tokens = 0
248
- output_tokens = 0
249
260
  stop_reason: str = ""
261
+ metadata = QueryResultMetadata()
250
262
 
251
263
  for chunk in response["stream"]:
252
264
  key = list(chunk.keys())[0]
@@ -281,8 +293,16 @@ class AmazonModel(LLM):
281
293
  tool_calls["input"] += delta["toolUse"]["input"]
282
294
 
283
295
  case "metadata":
284
- input_tokens = value["usage"]["inputTokens"]
285
- output_tokens = value["usage"]["outputTokens"]
296
+ metadata = QueryResultMetadata(
297
+ in_tokens=value["usage"]["inputTokens"],
298
+ out_tokens=value["usage"]["outputTokens"],
299
+ )
300
+ metadata.cache_read_tokens = value["usage"].get(
301
+ "cacheReadInputTokens", None
302
+ )
303
+ metadata.cache_write_tokens = value["usage"].get(
304
+ "cacheWriteInputTokens", None
305
+ )
286
306
 
287
307
  case "contentBlockStop":
288
308
  if tool_calls:
@@ -308,7 +328,7 @@ class AmazonModel(LLM):
308
328
  case "messageStop":
309
329
  stop_reason = value["stopReason"]
310
330
 
311
- return messages, stop_reason, input_tokens, output_tokens
331
+ return messages, stop_reason, metadata
312
332
 
313
333
  # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime/client/converse.html#
314
334
  @override
@@ -326,9 +346,7 @@ class AmazonModel(LLM):
326
346
  **body,
327
347
  )
328
348
 
329
- messages, stop_reason, input_tokens, output_tokens = await self.stream_response(
330
- response
331
- )
349
+ messages, stop_reason, metadata = await self.stream_response(response)
332
350
 
333
351
  text = " ".join([i["text"] for i in messages["content"] if "text" in i])
334
352
  reasoning = " ".join(
@@ -361,10 +379,7 @@ class AmazonModel(LLM):
361
379
  return QueryResult(
362
380
  output_text=text,
363
381
  reasoning=reasoning,
364
- metadata=QueryResultMetadata(
365
- in_tokens=input_tokens,
366
- out_tokens=output_tokens,
367
- ),
382
+ metadata=metadata,
368
383
  tool_calls=tool_calls,
369
384
  history=[*input, messages],
370
385
  )
@@ -562,12 +562,8 @@ class AnthropicModel(LLM):
562
562
 
563
563
  body = await self.create_body(input, tools=tools, **kwargs)
564
564
 
565
- betas = [
566
- "files-api-2025-04-14",
567
- "interleaved-thinking-2025-05-14",
568
- ]
569
-
570
- if "claude-sonnet-4-5" in self.model_name:
565
+ betas = ["files-api-2025-04-14", "interleaved-thinking-2025-05-14"]
566
+ if "sonnet-4-5" in self.model_name:
571
567
  betas.append("context-1m-2025-08-07")
572
568
 
573
569
  async with self.get_client().beta.messages.stream(