model-library 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. model_library/base/__init__.py +7 -0
  2. model_library/{base.py → base/base.py} +58 -429
  3. model_library/base/batch.py +121 -0
  4. model_library/base/delegate_only.py +94 -0
  5. model_library/base/input.py +100 -0
  6. model_library/base/output.py +229 -0
  7. model_library/base/utils.py +43 -0
  8. model_library/config/ai21labs_models.yaml +1 -0
  9. model_library/config/all_models.json +461 -36
  10. model_library/config/anthropic_models.yaml +30 -3
  11. model_library/config/deepseek_models.yaml +3 -1
  12. model_library/config/google_models.yaml +49 -0
  13. model_library/config/openai_models.yaml +43 -4
  14. model_library/config/together_models.yaml +1 -0
  15. model_library/config/xai_models.yaml +63 -3
  16. model_library/exceptions.py +8 -2
  17. model_library/file_utils.py +1 -1
  18. model_library/providers/__init__.py +0 -0
  19. model_library/providers/ai21labs.py +2 -0
  20. model_library/providers/alibaba.py +16 -78
  21. model_library/providers/amazon.py +3 -0
  22. model_library/providers/anthropic.py +215 -8
  23. model_library/providers/azure.py +2 -0
  24. model_library/providers/cohere.py +14 -80
  25. model_library/providers/deepseek.py +14 -90
  26. model_library/providers/fireworks.py +17 -81
  27. model_library/providers/google/google.py +55 -47
  28. model_library/providers/inception.py +15 -83
  29. model_library/providers/kimi.py +15 -83
  30. model_library/providers/mistral.py +2 -0
  31. model_library/providers/openai.py +10 -2
  32. model_library/providers/perplexity.py +12 -79
  33. model_library/providers/together.py +19 -210
  34. model_library/providers/vals.py +2 -0
  35. model_library/providers/xai.py +2 -0
  36. model_library/providers/zai.py +15 -83
  37. model_library/register_models.py +75 -57
  38. model_library/registry_utils.py +5 -5
  39. model_library/utils.py +3 -28
  40. {model_library-0.1.1.dist-info → model_library-0.1.3.dist-info}/METADATA +2 -3
  41. model_library-0.1.3.dist-info/RECORD +61 -0
  42. model_library-0.1.1.dist-info/RECORD +0 -54
  43. {model_library-0.1.1.dist-info → model_library-0.1.3.dist-info}/WHEEL +0 -0
  44. {model_library-0.1.1.dist-info → model_library-0.1.3.dist-info}/licenses/LICENSE +0 -0
  45. {model_library-0.1.1.dist-info → model_library-0.1.3.dist-info}/top_level.txt +0 -0
@@ -23,6 +23,32 @@ base-config:
23
23
  temperature: 1
24
24
 
25
25
  claude-4-models:
26
+ base-config:
27
+ class_properties:
28
+ supports_batch_requests: true
29
+
30
+ anthropic/claude-opus-4-5-20251101:
31
+ label: Claude Opus 4.5 (Nonthinking)
32
+ release_date: 2025-11-24
33
+ properties:
34
+ context_window: 200_000
35
+ max_token_output: 64_000
36
+ extending_thinking: 64_000
37
+ class_properties:
38
+ available_for_everyone: false
39
+ default_parameters:
40
+ max_output_tokens: 64_000
41
+ costs_per_million_token:
42
+ input: 15.0
43
+ output: 75.0
44
+ cache:
45
+ read: 1.5
46
+ write: 18.75
47
+ alternative_keys:
48
+ - anthropic/claude-opus-4-5-20251101-thinking:
49
+ properties:
50
+ reasoning_model: true
51
+
26
52
  anthropic/claude-opus-4-1-20250805:
27
53
  label: Claude Opus 4.1 (Nonthinking)
28
54
  description: Advanced model for specialized complex
@@ -193,11 +219,12 @@ claude-3-5-models:
193
219
  alternative_keys:
194
220
  - anthropic/claude-3-5-sonnet-latest
195
221
  - anthropic/claude-3.5-sonnet-latest
196
-
222
+
197
223
  anthropic/claude-3-5-sonnet-20240620:
198
224
  label: Claude 3.5 Sonnet
199
225
  release_date: 2024-06-20
200
- description: Claude Sonnet 3.5 (June 2024) variant for code and content generation,
226
+ description:
227
+ Claude Sonnet 3.5 (June 2024) variant for code and content generation,
201
228
  multilingual and vision-capable, deprecated.
202
229
  class_properties:
203
230
  deprecated: true
@@ -335,7 +362,7 @@ claude-2-models:
335
362
  costs_per_million_token:
336
363
  input: 8.0
337
364
  output: 24.0
338
-
365
+
339
366
  anthropic/claude-1.3:
340
367
  label: Claude 1.3
341
368
  release_date: null
@@ -10,7 +10,7 @@ base-config:
10
10
  ignored_for_cost: false
11
11
  properties:
12
12
  reasoning_model: false
13
-
13
+
14
14
  deepseek-v3p2-exp-models:
15
15
  base-config:
16
16
  class_properties:
@@ -23,6 +23,8 @@ deepseek-v3p2-exp-models:
23
23
  costs_per_million_token:
24
24
  input: 0.28
25
25
  output: 0.42
26
+ cache:
27
+ read_discount: 0.1
26
28
 
27
29
  deepseek/deepseek-chat:
28
30
  label: DeepSeek V3.2-Exp (Nonthinking)
@@ -31,6 +31,55 @@ gemma-models:
31
31
  input: 0.00
32
32
  output: 0.00
33
33
 
34
+
35
+ gemini-3-models:
36
+ base-config:
37
+ properties:
38
+ context_window: 1_048_576
39
+ max_token_output: 8_192
40
+ training_cutoff: "2025-01"
41
+ class_properties:
42
+ supports_images: true
43
+ supports_files: true
44
+ supports_videos: true
45
+ supports_tools: true
46
+ supports_batch_requests: true
47
+ supports_temperature: true
48
+ costs_per_million_token:
49
+ cache:
50
+ read_discount: 0.1
51
+ default_parameters:
52
+ temperature: 1
53
+ reasoning_effort: "high"
54
+
55
+ google/gemini-3-pro-preview:
56
+ label: Gemini 3 Pro (11/25)
57
+ description: Gemini 3 Pro, Google's most powerful model.
58
+ release_date: 2025-11-18
59
+ properties:
60
+ context_window: 1048576
61
+ max_token_output: 65536
62
+ training_cutoff: "2025-01"
63
+ reasoning_model: true
64
+ class_properties:
65
+ supports_images: true
66
+ supports_files: true
67
+ supports_videos: true
68
+ supports_tools: true
69
+ supports_batch_requests: true
70
+ supports_temperature: true
71
+ costs_per_million_token:
72
+ input: 2
73
+ output: 12
74
+ cache:
75
+ read: 0.20
76
+ context:
77
+ threshold: 200_000
78
+ input: 2.5
79
+ output: 15.0
80
+ cache:
81
+ read: 0.40
82
+
34
83
  gemini-2.5-models:
35
84
  base-config:
36
85
  properties:
@@ -31,6 +31,31 @@ gpt-5-models:
31
31
  supports_temperature: false
32
32
  supports_files: true
33
33
  supports_tools: true
34
+ openai/gpt-5.1-codex:
35
+ label: GPT 5.1 Codex
36
+ documentation_url: https://platform.openai.com/docs/models/gpt-5.1-codex
37
+ description: OpenAI's latest coding model
38
+ release_date: 2025-11-13
39
+ costs_per_million_token:
40
+ input: 1.25
41
+ output: 10.0
42
+ cache:
43
+ read: 0.125
44
+ default_parameters:
45
+ max_output_tokens: 128_000
46
+
47
+ openai/gpt-5.1-codex-mini:
48
+ label: GPT 5.1 Codex Mini
49
+ documentation_url: https://platform.openai.com/docs/models/gpt-5.1-codex-mini
50
+ description: OpenAI's miniature coding model
51
+ release_date: 2025-11-13
52
+ costs_per_million_token:
53
+ input: 0.25
54
+ output: 2.00
55
+ cache:
56
+ read: 0.025
57
+ default_parameters:
58
+ max_output_tokens: 128_000
34
59
 
35
60
  openai/gpt-5-codex:
36
61
  label: GPT 5 Codex
@@ -48,7 +73,24 @@ gpt-5-models:
48
73
  available_as_evaluator: true
49
74
  supports_images: true
50
75
  default_parameters:
51
- temperature: 1
76
+ max_output_tokens: 128_000
77
+
78
+ openai/gpt-5.1-2025-11-13:
79
+ label: GPT 5.1
80
+ documentation_url: https://platform.openai.com/docs/models/gpt-5.1
81
+ description: GPT-5.1 is OpenAI's flagship model for coding and agentic tasks with configurable reasoning and non-reasoning effort.
82
+ release_date: 2025-11-13
83
+ costs_per_million_token:
84
+ input: 1.25
85
+ output: 10
86
+ cache:
87
+ read: 0.125
88
+ properties:
89
+ training_cutoff: "2024-09"
90
+ class_properties:
91
+ available_as_evaluator: true
92
+ supports_images: true
93
+ default_parameters:
52
94
  max_output_tokens: 128_000
53
95
 
54
96
  openai/gpt-5-2025-08-07:
@@ -67,7 +109,6 @@ gpt-5-models:
67
109
  available_as_evaluator: true
68
110
  supports_images: true
69
111
  default_parameters:
70
- temperature: 1
71
112
  max_output_tokens: 128_000
72
113
  alternative_keys:
73
114
  - azure/gpt-5-2025-08-07
@@ -85,7 +126,6 @@ gpt-5-models:
85
126
  properties:
86
127
  training_cutoff: "2024-05"
87
128
  default_parameters:
88
- temperature: 1
89
129
  max_output_tokens: 128_000
90
130
  class_properties:
91
131
  supports_images: true
@@ -105,7 +145,6 @@ gpt-5-models:
105
145
  properties:
106
146
  training_cutoff: "2024-05"
107
147
  default_parameters:
108
- temperature: 1
109
148
  max_output_tokens: 128_000
110
149
  class_properties:
111
150
  supports_images: true
@@ -28,6 +28,7 @@ kimi-models:
28
28
  supports_temperature: true
29
29
  default_parameters:
30
30
  temperature: 0.3
31
+ max_output_tokens: 16_384
31
32
 
32
33
  together/moonshotai/Kimi-K2-Instruct:
33
34
  label: Kimi K2 Instruct
@@ -8,7 +8,7 @@ base-config:
8
8
  supports_files: false
9
9
  available_for_everyone: true
10
10
  ignored_for_cost: false
11
- supports_tools: false
11
+ supports_tools: true
12
12
  properties:
13
13
  reasoning_model: false
14
14
 
@@ -33,7 +33,6 @@ xai-models:
33
33
  reasoning_model: true
34
34
  class_properties:
35
35
  supports_images: false
36
- supports_tools: true
37
36
  costs_per_million_token:
38
37
  input: 0.20
39
38
  output: 1.50
@@ -81,6 +80,68 @@ xai-models:
81
80
  - grok/grok-4-fast
82
81
  - grok/grok-4-fast-reasoning-latest
83
82
 
83
+ grok/grok-4-1-fast-reasoning:
84
+ label: Grok 4.1 Fast (Reasoning)
85
+ description: ""
86
+ release_date: 2025-10-19
87
+ open_source: false
88
+ class_properties:
89
+ supports_images: true
90
+ available_as_evaluator: true
91
+ supports_metadata: true
92
+ supports_files: false
93
+ available_for_everyone: true
94
+ ignored_for_cost: false
95
+ properties:
96
+ context_window: 2_000_000
97
+ max_token_output: 2_000_000 # from openrouter
98
+ training_cutoff: null
99
+ reasoning_model: true
100
+ documentation_url: ""
101
+ costs_per_million_token:
102
+ input: 0.20
103
+ output: 0.5
104
+ cache:
105
+ read: 0.05
106
+ context:
107
+ threshold: 128_000
108
+ input: 0.4
109
+ output: 1.0
110
+ default_parameters:
111
+ temperature: 0.7
112
+ max_output_tokens: 128000
113
+
114
+ grok/grok-4-1-fast-non-reasoning:
115
+ label: Grok 4.1 Fast Non-Reasoning
116
+ description: ""
117
+ release_date: 2025-10-19
118
+ open_source: false
119
+ class_properties:
120
+ supports_images: true
121
+ available_as_evaluator: true
122
+ supports_metadata: true
123
+ supports_files: false
124
+ available_for_everyone: true
125
+ ignored_for_cost: false
126
+ properties:
127
+ context_window: 2_000_000
128
+ max_token_output: 2_000_000 # from openrouter
129
+ training_cutoff: null
130
+ reasoning_model: false
131
+ documentation_url: ""
132
+ costs_per_million_token:
133
+ input: 0.20
134
+ output: 0.5
135
+ cache:
136
+ read: 0.05
137
+ context:
138
+ threshold: 128_000
139
+ input: 0.4
140
+ output: 1.0
141
+ default_parameters:
142
+ temperature: 0.7
143
+ max_output_tokens: 128000
144
+
84
145
  grok/grok-4-fast-non-reasoning:
85
146
  label: Grok 4 Fast (Non-Reasoning)
86
147
  description: Cost-efficient model focused on speed and efficiency for straightforward tasks like summarization or classification without deep logical processing. Unified architecture with reasoning variant, steered via system prompts.
@@ -121,7 +182,6 @@ xai-models:
121
182
  class_properties:
122
183
  supports_images: true
123
184
  available_for_everyone: false
124
- supports_tools: true
125
185
  properties:
126
186
  context_window: 256_000
127
187
  max_token_output: 128_000
@@ -5,6 +5,7 @@ from typing import Any, Callable
5
5
 
6
6
  import backoff
7
7
  from ai21 import TooManyRequestsError as AI21RateLimitError
8
+ from anthropic import InternalServerError
8
9
  from anthropic import RateLimitError as AnthropicRateLimitError
9
10
  from backoff._typing import Details
10
11
  from httpcore import ReadError as HTTPCoreReadError
@@ -166,6 +167,7 @@ RETRIABLE_EXCEPTIONS = [
166
167
  OpenAIUnprocessableEntityError,
167
168
  OpenAIAPIConnectionError,
168
169
  AnthropicRateLimitError,
170
+ InternalServerError,
169
171
  AI21RateLimitError,
170
172
  RemoteProtocolError, # httpx connection closing when running models from sdk
171
173
  HTTPXReadError,
@@ -189,6 +191,9 @@ RETRIABLE_EXCEPTION_CODES = [
189
191
  "internal_error",
190
192
  "server_error",
191
193
  "overloaded",
194
+ "throttling", # AWS throttling errors
195
+ "throttlingexception", # AWS throttling errors
196
+ "internal server error",
192
197
  ]
193
198
 
194
199
 
@@ -237,8 +242,9 @@ def retry_llm_call(
237
242
  logger: logging.Logger,
238
243
  max_tries: int = RETRY_MAX_TRIES,
239
244
  max_time: float | None = None,
240
- backoff_callback: Callable[[int, Exception | None, float, float], None]
241
- | None = None,
245
+ backoff_callback: (
246
+ Callable[[int, Exception | None, float, float], None] | None
247
+ ) = None,
242
248
  ):
243
249
  def on_backoff(details: Details):
244
250
  exception = details.get("exception")
@@ -56,7 +56,7 @@ def concat_images(
56
56
  new_width = int(combined_image.width * scale_factor)
57
57
  new_height = int(combined_image.height * scale_factor)
58
58
 
59
- combined_image = combined_image.resize(
59
+ combined_image = combined_image.resize( # type: ignore
60
60
  (new_width, new_height), Image.Resampling.LANCZOS
61
61
  )
62
62
 
File without changes
@@ -26,9 +26,11 @@ from model_library.exceptions import (
26
26
  MaxOutputTokensExceededError,
27
27
  ModelNoOutputError,
28
28
  )
29
+ from model_library.register_models import register_provider
29
30
  from model_library.utils import default_httpx_client
30
31
 
31
32
 
33
+ @register_provider("ai21labs")
32
34
  class AI21LabsModel(LLM):
33
35
  _client: AsyncAI21Client | None = None
34
36
 
@@ -1,29 +1,21 @@
1
- import io
2
- from typing import Any, Literal, Sequence
1
+ from typing import Literal
3
2
 
4
3
  from typing_extensions import override
5
4
 
6
5
  from model_library import model_library_settings
7
6
  from model_library.base import (
8
- LLM,
9
- FileInput,
10
- FileWithId,
11
- InputItem,
7
+ DelegateOnly,
12
8
  LLMConfig,
13
- QueryResult,
14
9
  QueryResultCost,
15
10
  QueryResultMetadata,
16
- ToolDefinition,
17
11
  )
18
12
  from model_library.providers.openai import OpenAIModel
13
+ from model_library.register_models import register_provider
19
14
  from model_library.utils import create_openai_client_with_defaults
20
15
 
21
16
 
22
- class AlibabaModel(LLM):
23
- @override
24
- def get_client(self) -> None:
25
- raise NotImplementedError("Not implemented")
26
-
17
+ @register_provider("alibaba")
18
+ class AlibabaModel(DelegateOnly):
27
19
  def __init__(
28
20
  self,
29
21
  model_name: str,
@@ -32,23 +24,20 @@ class AlibabaModel(LLM):
32
24
  config: LLMConfig | None = None,
33
25
  ):
34
26
  super().__init__(model_name, provider, config=config)
35
- self.native: bool = False
36
27
 
37
- self.delegate: OpenAIModel | None = (
38
- None
39
- if self.native
40
- else OpenAIModel(
41
- model_name=model_name,
42
- provider=provider,
43
- config=config,
44
- custom_client=create_openai_client_with_defaults(
45
- api_key=model_library_settings.DASHSCOPE_API_KEY,
46
- base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
47
- ),
48
- use_completions=True,
49
- )
28
+ # https://www.alibabacloud.com/help/en/model-studio/first-api-call-to-qwen
29
+ self.delegate = OpenAIModel(
30
+ model_name=self.model_name,
31
+ provider=self.provider,
32
+ config=config,
33
+ custom_client=create_openai_client_with_defaults(
34
+ api_key=model_library_settings.DASHSCOPE_API_KEY,
35
+ base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
36
+ ),
37
+ use_completions=True,
50
38
  )
51
39
 
40
+ @override
52
41
  async def _calculate_cost(
53
42
  self,
54
43
  metadata: QueryResultMetadata,
@@ -94,54 +83,3 @@ class AlibabaModel(LLM):
94
83
  else None,
95
84
  cache_write=None,
96
85
  )
97
-
98
- @override
99
- async def parse_input(
100
- self,
101
- input: Sequence[InputItem],
102
- **kwargs: Any,
103
- ) -> Any:
104
- raise NotImplementedError()
105
-
106
- @override
107
- async def parse_image(
108
- self,
109
- image: FileInput,
110
- ) -> Any:
111
- raise NotImplementedError()
112
-
113
- @override
114
- async def parse_file(
115
- self,
116
- file: FileInput,
117
- ) -> Any:
118
- raise NotImplementedError()
119
-
120
- @override
121
- async def parse_tools(
122
- self,
123
- tools: list[ToolDefinition],
124
- ) -> Any:
125
- raise NotImplementedError()
126
-
127
- @override
128
- async def upload_file(
129
- self,
130
- name: str,
131
- mime: str,
132
- bytes: io.BytesIO,
133
- type: Literal["image", "file"] = "file",
134
- ) -> FileWithId:
135
- raise NotImplementedError()
136
-
137
- @override
138
- async def _query_impl(
139
- self,
140
- input: Sequence[InputItem],
141
- *,
142
- tools: list[ToolDefinition],
143
- **kwargs: object,
144
- ) -> QueryResult:
145
- if self.delegate:
146
- return await self.delegate_query(input, tools=tools, **kwargs)
147
- raise NotImplementedError()
@@ -31,8 +31,11 @@ from model_library.exceptions import (
31
31
  MaxOutputTokensExceededError,
32
32
  )
33
33
  from model_library.model_utils import get_default_budget_tokens
34
+ from model_library.register_models import register_provider
34
35
 
35
36
 
37
+ @register_provider("amazon")
38
+ @register_provider("bedrock")
36
39
  class AmazonModel(LLM):
37
40
  _client: BaseClient | None = None
38
41