model-library 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_library/base/base.py +2 -0
- model_library/base/output.py +1 -0
- model_library/config/README.md +169 -0
- model_library/config/ai21labs_models.yaml +11 -11
- model_library/config/alibaba_models.yaml +21 -22
- model_library/config/all_models.json +4572 -2598
- model_library/config/amazon_models.yaml +100 -102
- model_library/config/anthropic_models.yaml +43 -52
- model_library/config/cohere_models.yaml +25 -24
- model_library/config/deepseek_models.yaml +28 -25
- model_library/config/dummy_model.yaml +9 -7
- model_library/config/fireworks_models.yaml +86 -56
- model_library/config/google_models.yaml +131 -126
- model_library/config/inception_models.yaml +6 -6
- model_library/config/kimi_models.yaml +13 -14
- model_library/config/minimax_models.yaml +37 -0
- model_library/config/mistral_models.yaml +85 -29
- model_library/config/openai_models.yaml +192 -150
- model_library/config/perplexity_models.yaml +8 -23
- model_library/config/together_models.yaml +115 -104
- model_library/config/xai_models.yaml +47 -79
- model_library/config/zai_models.yaml +23 -15
- model_library/exceptions.py +6 -15
- model_library/providers/amazon.py +32 -17
- model_library/providers/minimax.py +33 -0
- model_library/providers/mistral.py +10 -1
- model_library/providers/openai.py +2 -6
- model_library/register_models.py +36 -36
- model_library/registry_utils.py +18 -16
- model_library/utils.py +2 -2
- {model_library-0.1.3.dist-info → model_library-0.1.4.dist-info}/METADATA +2 -2
- model_library-0.1.4.dist-info/RECORD +64 -0
- model_library-0.1.3.dist-info/RECORD +0 -61
- {model_library-0.1.3.dist-info → model_library-0.1.4.dist-info}/WHEEL +0 -0
- {model_library-0.1.3.dist-info → model_library-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {model_library-0.1.3.dist-info → model_library-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -2,20 +2,21 @@ base-config:
|
|
|
2
2
|
company: xAI
|
|
3
3
|
documentation_url: https://docs.x.ai/docs#models
|
|
4
4
|
open_source: false
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
supports:
|
|
6
|
+
images: true
|
|
7
|
+
files: false
|
|
8
|
+
tools: true
|
|
9
|
+
metadata:
|
|
7
10
|
available_as_evaluator: false
|
|
8
|
-
supports_files: false
|
|
9
11
|
available_for_everyone: true
|
|
10
12
|
ignored_for_cost: false
|
|
11
|
-
supports_tools: true
|
|
12
13
|
properties:
|
|
13
14
|
reasoning_model: false
|
|
14
15
|
|
|
15
16
|
xai-models:
|
|
16
17
|
base-config:
|
|
17
|
-
|
|
18
|
-
|
|
18
|
+
supports:
|
|
19
|
+
temperature: true
|
|
19
20
|
costs_per_million_token:
|
|
20
21
|
cache:
|
|
21
22
|
read_discount: 0.25
|
|
@@ -29,19 +30,16 @@ xai-models:
|
|
|
29
30
|
release_date: 2025-08-25
|
|
30
31
|
properties:
|
|
31
32
|
context_window: 256_000
|
|
32
|
-
|
|
33
|
+
max_tokens: 40_000
|
|
33
34
|
reasoning_model: true
|
|
34
|
-
|
|
35
|
-
|
|
35
|
+
supports:
|
|
36
|
+
images: false
|
|
36
37
|
costs_per_million_token:
|
|
37
38
|
input: 0.20
|
|
38
39
|
output: 1.50
|
|
39
40
|
cache:
|
|
40
41
|
read: 0.02
|
|
41
42
|
documentation_url: https://docs.x.ai/docs/models/grok-code-fast-1
|
|
42
|
-
default_parameters:
|
|
43
|
-
temperature: 0.7
|
|
44
|
-
max_output_tokens: 40000
|
|
45
43
|
alternative_keys:
|
|
46
44
|
- grok/grok-code-fast
|
|
47
45
|
- grok/grok-code-fast-1-0825
|
|
@@ -51,16 +49,12 @@ xai-models:
|
|
|
51
49
|
description: Latest advancement in cost-efficient reasoning models with unified architecture. Handles complex requests with deep chain-of-thought reasoning. Features 2M token context window and native tool use.
|
|
52
50
|
release_date: 2025-09-19
|
|
53
51
|
open_source: false
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
supports_metadata: true
|
|
58
|
-
supports_files: false
|
|
59
|
-
available_for_everyone: true
|
|
60
|
-
ignored_for_cost: false
|
|
52
|
+
supports:
|
|
53
|
+
images: true
|
|
54
|
+
files: false
|
|
61
55
|
properties:
|
|
62
56
|
context_window: 2_000_000
|
|
63
|
-
|
|
57
|
+
max_tokens: 2_000_000
|
|
64
58
|
training_cutoff: null
|
|
65
59
|
reasoning_model: true
|
|
66
60
|
documentation_url: https://docs.x.ai/docs/models/grok-4-fast-reasoning
|
|
@@ -73,9 +67,6 @@ xai-models:
|
|
|
73
67
|
threshold: 128_000
|
|
74
68
|
input: 0.4
|
|
75
69
|
output: 1.0
|
|
76
|
-
default_parameters:
|
|
77
|
-
temperature: 0.7
|
|
78
|
-
max_output_tokens: 128000
|
|
79
70
|
alternative_keys:
|
|
80
71
|
- grok/grok-4-fast
|
|
81
72
|
- grok/grok-4-fast-reasoning-latest
|
|
@@ -83,18 +74,14 @@ xai-models:
|
|
|
83
74
|
grok/grok-4-1-fast-reasoning:
|
|
84
75
|
label: Grok 4.1 Fast (Reasoning)
|
|
85
76
|
description: ""
|
|
86
|
-
release_date: 2025-
|
|
77
|
+
release_date: 2025-11-19
|
|
87
78
|
open_source: false
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
supports_metadata: true
|
|
92
|
-
supports_files: false
|
|
93
|
-
available_for_everyone: true
|
|
94
|
-
ignored_for_cost: false
|
|
79
|
+
supports:
|
|
80
|
+
images: true
|
|
81
|
+
files: false
|
|
95
82
|
properties:
|
|
96
83
|
context_window: 2_000_000
|
|
97
|
-
|
|
84
|
+
max_tokens: 2_000_000 # from openrouter
|
|
98
85
|
training_cutoff: null
|
|
99
86
|
reasoning_model: true
|
|
100
87
|
documentation_url: ""
|
|
@@ -107,25 +94,18 @@ xai-models:
|
|
|
107
94
|
threshold: 128_000
|
|
108
95
|
input: 0.4
|
|
109
96
|
output: 1.0
|
|
110
|
-
default_parameters:
|
|
111
|
-
temperature: 0.7
|
|
112
|
-
max_output_tokens: 128000
|
|
113
97
|
|
|
114
98
|
grok/grok-4-1-fast-non-reasoning:
|
|
115
99
|
label: Grok 4.1 Fast Non-Reasoning
|
|
116
100
|
description: ""
|
|
117
|
-
release_date: 2025-
|
|
101
|
+
release_date: 2025-11-19
|
|
118
102
|
open_source: false
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
supports_metadata: true
|
|
123
|
-
supports_files: false
|
|
124
|
-
available_for_everyone: true
|
|
125
|
-
ignored_for_cost: false
|
|
103
|
+
supports:
|
|
104
|
+
images: true
|
|
105
|
+
files: false
|
|
126
106
|
properties:
|
|
127
107
|
context_window: 2_000_000
|
|
128
|
-
|
|
108
|
+
max_tokens: 2_000_000 # from openrouter
|
|
129
109
|
training_cutoff: null
|
|
130
110
|
reasoning_model: false
|
|
131
111
|
documentation_url: ""
|
|
@@ -138,25 +118,18 @@ xai-models:
|
|
|
138
118
|
threshold: 128_000
|
|
139
119
|
input: 0.4
|
|
140
120
|
output: 1.0
|
|
141
|
-
default_parameters:
|
|
142
|
-
temperature: 0.7
|
|
143
|
-
max_output_tokens: 128000
|
|
144
121
|
|
|
145
122
|
grok/grok-4-fast-non-reasoning:
|
|
146
123
|
label: Grok 4 Fast (Non-Reasoning)
|
|
147
124
|
description: Cost-efficient model focused on speed and efficiency for straightforward tasks like summarization or classification without deep logical processing. Unified architecture with reasoning variant, steered via system prompts.
|
|
148
125
|
release_date: 2025-09-19
|
|
149
126
|
open_source: false
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
supports_metadata: true
|
|
154
|
-
supports_files: false
|
|
155
|
-
available_for_everyone: true
|
|
156
|
-
ignored_for_cost: false
|
|
127
|
+
supports:
|
|
128
|
+
images: true
|
|
129
|
+
files: false
|
|
157
130
|
properties:
|
|
158
131
|
context_window: 2_000_000
|
|
159
|
-
|
|
132
|
+
max_tokens: 2_000_000
|
|
160
133
|
training_cutoff: null
|
|
161
134
|
reasoning_model: false
|
|
162
135
|
documentation_url: https://docs.x.ai/docs/models/grok-4-fast-non-reasoning
|
|
@@ -169,9 +142,6 @@ xai-models:
|
|
|
169
142
|
threshold: 128_000
|
|
170
143
|
input: 0.4
|
|
171
144
|
output: 1.0
|
|
172
|
-
default_parameters:
|
|
173
|
-
temperature: 0.7
|
|
174
|
-
max_output_tokens: 2000000
|
|
175
145
|
alternative_keys:
|
|
176
146
|
- grok/grok-4-fast-non-reasoning-latest
|
|
177
147
|
|
|
@@ -179,12 +149,12 @@ xai-models:
|
|
|
179
149
|
label: Grok 4
|
|
180
150
|
description: Latest and greatest flagship model offering unparalleled performance in natural language, math and reasoning. The perfect jack of all trades with native tool use and structured outputs support.
|
|
181
151
|
release_date: 2025-07-09
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
152
|
+
supports:
|
|
153
|
+
images: true
|
|
154
|
+
tools: true
|
|
185
155
|
properties:
|
|
186
156
|
context_window: 256_000
|
|
187
|
-
|
|
157
|
+
max_tokens: 128_000
|
|
188
158
|
training_cutoff: null
|
|
189
159
|
reasoning_model: true
|
|
190
160
|
documentation_url: https://docs.x.ai/docs/models/grok-4-0709
|
|
@@ -197,9 +167,6 @@ xai-models:
|
|
|
197
167
|
threshold: 128_000
|
|
198
168
|
input: 6.00
|
|
199
169
|
output: 30.00
|
|
200
|
-
default_parameters:
|
|
201
|
-
temperature: 0.7
|
|
202
|
-
max_output_tokens: 128000
|
|
203
170
|
alternative_keys:
|
|
204
171
|
- grok/grok-4
|
|
205
172
|
- grok/grok-4-latest
|
|
@@ -210,15 +177,15 @@ xai-models:
|
|
|
210
177
|
release_date: 2025-04-09
|
|
211
178
|
properties:
|
|
212
179
|
context_window: 131_072
|
|
213
|
-
|
|
180
|
+
max_tokens: null
|
|
214
181
|
training_cutoff: null
|
|
215
182
|
reasoning_model: true
|
|
216
|
-
|
|
183
|
+
metadata:
|
|
217
184
|
deprecated: true
|
|
218
185
|
costs_per_million_token:
|
|
219
186
|
input: 0.30
|
|
220
187
|
output: 0.50
|
|
221
|
-
|
|
188
|
+
cache:
|
|
222
189
|
read: 0.075
|
|
223
190
|
documentation_url: https://docs.x.ai/docs/models/grok-3-mini
|
|
224
191
|
default_parameters:
|
|
@@ -248,7 +215,7 @@ xai-models:
|
|
|
248
215
|
release_date: 2025-04-09
|
|
249
216
|
properties:
|
|
250
217
|
context_window: 131_072
|
|
251
|
-
|
|
218
|
+
max_tokens: null
|
|
252
219
|
training_cutoff: null
|
|
253
220
|
costs_per_million_token:
|
|
254
221
|
input: 3.00
|
|
@@ -271,10 +238,10 @@ xai-models:
|
|
|
271
238
|
release_date: 2024-12-12
|
|
272
239
|
properties:
|
|
273
240
|
context_window: 8_192
|
|
274
|
-
|
|
241
|
+
max_tokens: null
|
|
275
242
|
training_cutoff: null
|
|
276
|
-
|
|
277
|
-
|
|
243
|
+
supports:
|
|
244
|
+
images: true
|
|
278
245
|
costs_per_million_token:
|
|
279
246
|
input: 2.00
|
|
280
247
|
output: 10.00
|
|
@@ -288,9 +255,9 @@ xai-models:
|
|
|
288
255
|
release_date: 2024-12-11
|
|
289
256
|
properties:
|
|
290
257
|
context_window: 131_072
|
|
291
|
-
|
|
258
|
+
max_tokens: null
|
|
292
259
|
training_cutoff: null
|
|
293
|
-
|
|
260
|
+
metadata:
|
|
294
261
|
deprecated: true
|
|
295
262
|
costs_per_million_token:
|
|
296
263
|
input: 2.00
|
|
@@ -302,10 +269,11 @@ xai-models:
|
|
|
302
269
|
release_date: 2024-12-12
|
|
303
270
|
properties:
|
|
304
271
|
context_window: 8_192
|
|
305
|
-
|
|
272
|
+
max_tokens: null
|
|
306
273
|
training_cutoff: null
|
|
307
|
-
|
|
308
|
-
|
|
274
|
+
supports:
|
|
275
|
+
images: true
|
|
276
|
+
metadata:
|
|
309
277
|
deprecated: true
|
|
310
278
|
costs_per_million_token:
|
|
311
279
|
input: 5.00
|
|
@@ -317,9 +285,9 @@ xai-models:
|
|
|
317
285
|
release_date: 2024-12-11
|
|
318
286
|
properties:
|
|
319
287
|
context_window: 131_072
|
|
320
|
-
|
|
288
|
+
max_tokens: null
|
|
321
289
|
training_cutoff: null
|
|
322
|
-
|
|
290
|
+
metadata:
|
|
323
291
|
deprecated: true
|
|
324
292
|
costs_per_million_token:
|
|
325
293
|
input: 5.00
|
|
@@ -2,12 +2,13 @@ base-config:
|
|
|
2
2
|
company: zAI
|
|
3
3
|
open_source: true
|
|
4
4
|
documentation_url: https://docs.z.ai/
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
supports:
|
|
6
|
+
images: false
|
|
7
|
+
files: false
|
|
8
|
+
temperature: true
|
|
9
|
+
tools: true
|
|
10
|
+
properties:
|
|
8
11
|
reasoning_model: true
|
|
9
|
-
supports_temperature: true
|
|
10
|
-
supports_tools: true
|
|
11
12
|
default_parameters:
|
|
12
13
|
temperature: 0.6
|
|
13
14
|
top_p: 1
|
|
@@ -23,14 +24,17 @@ zai-models:
|
|
|
23
24
|
release_date: 2025-07-28
|
|
24
25
|
properties:
|
|
25
26
|
context_window: 128_000
|
|
26
|
-
|
|
27
|
+
max_tokens: 81_920
|
|
27
28
|
costs_per_million_token:
|
|
28
29
|
input: 0.6
|
|
29
30
|
output: 2.2
|
|
30
31
|
cache:
|
|
31
32
|
read: 0.11
|
|
32
33
|
alternative_keys:
|
|
33
|
-
- fireworks/glm-4p5
|
|
34
|
+
- fireworks/glm-4p5:
|
|
35
|
+
costs_per_million_token:
|
|
36
|
+
input: 0.55
|
|
37
|
+
output: 2.19
|
|
34
38
|
|
|
35
39
|
zai/glm-4.5-air:
|
|
36
40
|
label: GLM 4.5 Air
|
|
@@ -38,14 +42,17 @@ zai-models:
|
|
|
38
42
|
release_date: 2025-07-28
|
|
39
43
|
properties:
|
|
40
44
|
context_window: 128_000
|
|
41
|
-
|
|
45
|
+
max_tokens: 81_920
|
|
42
46
|
costs_per_million_token:
|
|
43
47
|
input: 0.2
|
|
44
48
|
output: 1.1
|
|
45
|
-
cache:
|
|
49
|
+
cache:
|
|
46
50
|
read: 0.03
|
|
47
51
|
alternative_keys:
|
|
48
|
-
- together/zai-org/GLM-4.5-Air-FP8
|
|
52
|
+
- together/zai-org/GLM-4.5-Air-FP8:
|
|
53
|
+
costs_per_million_token:
|
|
54
|
+
input: 0.22
|
|
55
|
+
output: 0.88
|
|
49
56
|
|
|
50
57
|
zai/glm-4.6:
|
|
51
58
|
label: GLM 4.6
|
|
@@ -53,13 +60,14 @@ zai-models:
|
|
|
53
60
|
release_date: 2025-09-30
|
|
54
61
|
properties:
|
|
55
62
|
context_window: 200_000
|
|
56
|
-
|
|
63
|
+
max_tokens: 122_880
|
|
57
64
|
costs_per_million_token:
|
|
58
65
|
input: 0.6
|
|
59
66
|
output: 2.2
|
|
60
|
-
cache:
|
|
67
|
+
cache:
|
|
61
68
|
read: 0.11
|
|
62
69
|
alternative_keys:
|
|
63
|
-
- fireworks/glm-4p6
|
|
64
|
-
|
|
65
|
-
|
|
70
|
+
- fireworks/glm-4p6:
|
|
71
|
+
costs_per_million_token:
|
|
72
|
+
input: 0.55
|
|
73
|
+
output: 2.19
|
model_library/exceptions.py
CHANGED
|
@@ -9,6 +9,7 @@ from anthropic import InternalServerError
|
|
|
9
9
|
from anthropic import RateLimitError as AnthropicRateLimitError
|
|
10
10
|
from backoff._typing import Details
|
|
11
11
|
from httpcore import ReadError as HTTPCoreReadError
|
|
12
|
+
from httpx import ConnectError as HTTPXConnectError
|
|
12
13
|
from httpx import ReadError as HTTPXReadError
|
|
13
14
|
from httpx import RemoteProtocolError
|
|
14
15
|
from openai import APIConnectionError as OpenAIAPIConnectionError
|
|
@@ -54,20 +55,6 @@ class MaxOutputTokensExceededError(Exception):
|
|
|
54
55
|
super().__init__(message or MaxOutputTokensExceededError.DEFAULT_MESSAGE)
|
|
55
56
|
|
|
56
57
|
|
|
57
|
-
class MaxInputTokensExceededError(Exception):
|
|
58
|
-
"""
|
|
59
|
-
Raised when the input exceeds the allowed max input tokens limit
|
|
60
|
-
"""
|
|
61
|
-
|
|
62
|
-
DEFAULT_MESSAGE: str = (
|
|
63
|
-
"Input exceeded the maximum allowed input tokens. "
|
|
64
|
-
"Consider reducing the input size."
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
def __init__(self, message: str | None = None):
|
|
68
|
-
super().__init__(message or MaxInputTokensExceededError.DEFAULT_MESSAGE)
|
|
69
|
-
|
|
70
|
-
|
|
71
58
|
class MaxContextWindowExceededError(Exception):
|
|
72
59
|
"""
|
|
73
60
|
Raised when the context window exceeds the allowed max context window limit
|
|
@@ -98,7 +85,9 @@ CONTEXT_WINDOW_PATTERN = re.compile(
|
|
|
98
85
|
r"sent message larger than max|"
|
|
99
86
|
r"input tokens exceeded|"
|
|
100
87
|
r"(messages?|total length).*too long|"
|
|
101
|
-
r"payload.*too large"
|
|
88
|
+
r"payload.*too large|"
|
|
89
|
+
r"string too long|"
|
|
90
|
+
r"input exceeded the context window"
|
|
102
91
|
)
|
|
103
92
|
|
|
104
93
|
|
|
@@ -171,6 +160,7 @@ RETRIABLE_EXCEPTIONS = [
|
|
|
171
160
|
AI21RateLimitError,
|
|
172
161
|
RemoteProtocolError, # httpx connection closing when running models from sdk
|
|
173
162
|
HTTPXReadError,
|
|
163
|
+
HTTPXConnectError,
|
|
174
164
|
HTTPCoreReadError,
|
|
175
165
|
]
|
|
176
166
|
|
|
@@ -188,6 +178,7 @@ RETRIABLE_EXCEPTION_CODES = [
|
|
|
188
178
|
"connection_error",
|
|
189
179
|
"service_unavailable",
|
|
190
180
|
"rate_limit",
|
|
181
|
+
"rate limit",
|
|
191
182
|
"internal_error",
|
|
192
183
|
"server_error",
|
|
193
184
|
"overloaded",
|
|
@@ -26,6 +26,7 @@ from model_library.base import (
|
|
|
26
26
|
ToolDefinition,
|
|
27
27
|
ToolResult,
|
|
28
28
|
)
|
|
29
|
+
from model_library.base.input import FileBase
|
|
29
30
|
from model_library.exceptions import (
|
|
30
31
|
BadInputError,
|
|
31
32
|
MaxOutputTokensExceededError,
|
|
@@ -60,11 +61,13 @@ class AmazonModel(LLM):
|
|
|
60
61
|
config: LLMConfig | None = None,
|
|
61
62
|
):
|
|
62
63
|
super().__init__(model_name, provider, config=config)
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
self.
|
|
66
|
-
|
|
67
|
-
|
|
64
|
+
self.supports_cache = "amazon" in self.model_name or "claude" in self.model_name
|
|
65
|
+
self.supports_cache = (
|
|
66
|
+
self.supports_cache and "v2" not in self.model_name
|
|
67
|
+
) # supported but no access yet
|
|
68
|
+
self.supports_tool_cache = self.supports_cache and "claude" in self.model_name
|
|
69
|
+
|
|
70
|
+
cache_control = {"type": "default"}
|
|
68
71
|
|
|
69
72
|
@override
|
|
70
73
|
async def parse_input(
|
|
@@ -120,6 +123,10 @@ class AmazonModel(LLM):
|
|
|
120
123
|
new_input.append(item)
|
|
121
124
|
|
|
122
125
|
if content_user:
|
|
126
|
+
if self.supports_cache:
|
|
127
|
+
if not isinstance(input[-1], FileBase):
|
|
128
|
+
# last item cannot be file
|
|
129
|
+
content_user.append({"cachePoint": self.cache_control})
|
|
123
130
|
new_input.append({"role": "user", "content": content_user})
|
|
124
131
|
|
|
125
132
|
return new_input
|
|
@@ -174,6 +181,8 @@ class AmazonModel(LLM):
|
|
|
174
181
|
}
|
|
175
182
|
}
|
|
176
183
|
)
|
|
184
|
+
if parsed_tools and self.supports_tool_cache:
|
|
185
|
+
parsed_tools.append({"cachePoint": self.cache_control})
|
|
177
186
|
return parsed_tools
|
|
178
187
|
|
|
179
188
|
@override
|
|
@@ -203,8 +212,12 @@ class AmazonModel(LLM):
|
|
|
203
212
|
|
|
204
213
|
if "system_prompt" in kwargs:
|
|
205
214
|
body["system"] = [{"text": kwargs.pop("system_prompt")}]
|
|
215
|
+
if self.supports_cache:
|
|
216
|
+
body["system"].append({"cachePoint": self.cache_control})
|
|
206
217
|
|
|
207
218
|
if self.reasoning:
|
|
219
|
+
if self.max_tokens < 1024:
|
|
220
|
+
self.max_tokens = 2048
|
|
208
221
|
budget_tokens = kwargs.pop(
|
|
209
222
|
"budget_tokens", get_default_budget_tokens(self.max_tokens)
|
|
210
223
|
)
|
|
@@ -244,9 +257,8 @@ class AmazonModel(LLM):
|
|
|
244
257
|
tool_calls: dict[str, Any] = {}
|
|
245
258
|
|
|
246
259
|
messages: dict[str, Any] = {"content": []}
|
|
247
|
-
input_tokens = 0
|
|
248
|
-
output_tokens = 0
|
|
249
260
|
stop_reason: str = ""
|
|
261
|
+
metadata = QueryResultMetadata()
|
|
250
262
|
|
|
251
263
|
for chunk in response["stream"]:
|
|
252
264
|
key = list(chunk.keys())[0]
|
|
@@ -281,8 +293,16 @@ class AmazonModel(LLM):
|
|
|
281
293
|
tool_calls["input"] += delta["toolUse"]["input"]
|
|
282
294
|
|
|
283
295
|
case "metadata":
|
|
284
|
-
|
|
285
|
-
|
|
296
|
+
metadata = QueryResultMetadata(
|
|
297
|
+
in_tokens=value["usage"]["inputTokens"],
|
|
298
|
+
out_tokens=value["usage"]["outputTokens"],
|
|
299
|
+
)
|
|
300
|
+
metadata.cache_read_tokens = value["usage"].get(
|
|
301
|
+
"cacheReadInputTokens", None
|
|
302
|
+
)
|
|
303
|
+
metadata.cache_write_tokens = value["usage"].get(
|
|
304
|
+
"cacheWriteInputTokens", None
|
|
305
|
+
)
|
|
286
306
|
|
|
287
307
|
case "contentBlockStop":
|
|
288
308
|
if tool_calls:
|
|
@@ -308,7 +328,7 @@ class AmazonModel(LLM):
|
|
|
308
328
|
case "messageStop":
|
|
309
329
|
stop_reason = value["stopReason"]
|
|
310
330
|
|
|
311
|
-
return messages, stop_reason,
|
|
331
|
+
return messages, stop_reason, metadata
|
|
312
332
|
|
|
313
333
|
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime/client/converse.html#
|
|
314
334
|
@override
|
|
@@ -326,9 +346,7 @@ class AmazonModel(LLM):
|
|
|
326
346
|
**body,
|
|
327
347
|
)
|
|
328
348
|
|
|
329
|
-
messages, stop_reason,
|
|
330
|
-
response
|
|
331
|
-
)
|
|
349
|
+
messages, stop_reason, metadata = await self.stream_response(response)
|
|
332
350
|
|
|
333
351
|
text = " ".join([i["text"] for i in messages["content"] if "text" in i])
|
|
334
352
|
reasoning = " ".join(
|
|
@@ -361,10 +379,7 @@ class AmazonModel(LLM):
|
|
|
361
379
|
return QueryResult(
|
|
362
380
|
output_text=text,
|
|
363
381
|
reasoning=reasoning,
|
|
364
|
-
metadata=
|
|
365
|
-
in_tokens=input_tokens,
|
|
366
|
-
out_tokens=output_tokens,
|
|
367
|
-
),
|
|
382
|
+
metadata=metadata,
|
|
368
383
|
tool_calls=tool_calls,
|
|
369
384
|
history=[*input, messages],
|
|
370
385
|
)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from typing import Literal
|
|
2
|
+
|
|
3
|
+
from model_library import model_library_settings
|
|
4
|
+
from model_library.base import (
|
|
5
|
+
DelegateOnly,
|
|
6
|
+
LLMConfig,
|
|
7
|
+
)
|
|
8
|
+
from model_library.providers.openai import OpenAIModel
|
|
9
|
+
from model_library.register_models import register_provider
|
|
10
|
+
from model_library.utils import create_openai_client_with_defaults
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@register_provider("minimax")
|
|
14
|
+
class MinimaxModel(DelegateOnly):
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
model_name: str,
|
|
18
|
+
provider: Literal["minimax"] = "minimax",
|
|
19
|
+
*,
|
|
20
|
+
config: LLMConfig | None = None,
|
|
21
|
+
):
|
|
22
|
+
super().__init__(model_name, provider, config=config)
|
|
23
|
+
|
|
24
|
+
self.delegate = OpenAIModel(
|
|
25
|
+
model_name=self.model_name,
|
|
26
|
+
provider=self.provider,
|
|
27
|
+
config=config,
|
|
28
|
+
custom_client=create_openai_client_with_defaults(
|
|
29
|
+
api_key=model_library_settings.MINIMAX_API_KEY,
|
|
30
|
+
base_url="https://api.minimax.io/v1",
|
|
31
|
+
),
|
|
32
|
+
use_completions=True,
|
|
33
|
+
)
|
|
@@ -29,6 +29,7 @@ from model_library.base import (
|
|
|
29
29
|
from model_library.exceptions import (
|
|
30
30
|
BadInputError,
|
|
31
31
|
MaxOutputTokensExceededError,
|
|
32
|
+
ModelNoOutputError,
|
|
32
33
|
)
|
|
33
34
|
from model_library.file_utils import trim_images
|
|
34
35
|
from model_library.register_models import register_provider
|
|
@@ -250,9 +251,17 @@ class MistralModel(LLM):
|
|
|
250
251
|
self.logger.error(f"Error: {e}", exc_info=True)
|
|
251
252
|
raise e
|
|
252
253
|
|
|
253
|
-
if
|
|
254
|
+
if (
|
|
255
|
+
finish_reason == "length"
|
|
256
|
+
and not text
|
|
257
|
+
and not reasoning
|
|
258
|
+
and not raw_tool_calls
|
|
259
|
+
):
|
|
254
260
|
raise MaxOutputTokensExceededError()
|
|
255
261
|
|
|
262
|
+
if not text and not reasoning and not raw_tool_calls:
|
|
263
|
+
raise ModelNoOutputError()
|
|
264
|
+
|
|
256
265
|
tool_calls: list[ToolCall] = []
|
|
257
266
|
|
|
258
267
|
for tool_call in raw_tool_calls or []:
|
|
@@ -521,10 +521,6 @@ class OpenAIModel(LLM):
|
|
|
521
521
|
metadata: QueryResultMetadata = QueryResultMetadata()
|
|
522
522
|
raw_tool_calls: list[ChatCompletionMessageToolCall] = []
|
|
523
523
|
|
|
524
|
-
# enable usage data in streaming responses
|
|
525
|
-
if "stream_options" not in body:
|
|
526
|
-
body["stream_options"] = {"include_usage": True}
|
|
527
|
-
|
|
528
524
|
stream = await self.get_client().chat.completions.create(
|
|
529
525
|
**body, # pyright: ignore[reportAny]
|
|
530
526
|
stream=True,
|
|
@@ -587,7 +583,7 @@ class OpenAIModel(LLM):
|
|
|
587
583
|
cache_read_tokens = (
|
|
588
584
|
chunk.usage.prompt_tokens_details.cached_tokens or 0
|
|
589
585
|
if chunk.usage.prompt_tokens_details
|
|
590
|
-
else 0
|
|
586
|
+
else getattr(chunk.usage, "cached_tokens", 0) # for kimi
|
|
591
587
|
)
|
|
592
588
|
metadata = QueryResultMetadata(
|
|
593
589
|
in_tokens=chunk.usage.prompt_tokens - cache_read_tokens,
|
|
@@ -625,7 +621,7 @@ class OpenAIModel(LLM):
|
|
|
625
621
|
if raw_tool_calls
|
|
626
622
|
else None,
|
|
627
623
|
)
|
|
628
|
-
if
|
|
624
|
+
if reasoning_text:
|
|
629
625
|
setattr(final_message, "reasoning_content", reasoning_text)
|
|
630
626
|
|
|
631
627
|
return QueryResult(
|