model-library 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_library/base/base.py +13 -6
- model_library/base/output.py +55 -0
- model_library/base/utils.py +3 -2
- model_library/config/README.md +169 -0
- model_library/config/ai21labs_models.yaml +11 -10
- model_library/config/alibaba_models.yaml +21 -22
- model_library/config/all_models.json +4708 -2471
- model_library/config/amazon_models.yaml +100 -102
- model_library/config/anthropic_models.yaml +59 -45
- model_library/config/cohere_models.yaml +25 -24
- model_library/config/deepseek_models.yaml +28 -25
- model_library/config/dummy_model.yaml +9 -7
- model_library/config/fireworks_models.yaml +86 -56
- model_library/config/google_models.yaml +156 -102
- model_library/config/inception_models.yaml +6 -6
- model_library/config/kimi_models.yaml +13 -14
- model_library/config/minimax_models.yaml +37 -0
- model_library/config/mistral_models.yaml +85 -29
- model_library/config/openai_models.yaml +192 -159
- model_library/config/perplexity_models.yaml +8 -23
- model_library/config/together_models.yaml +115 -103
- model_library/config/xai_models.yaml +85 -57
- model_library/config/zai_models.yaml +23 -15
- model_library/exceptions.py +12 -17
- model_library/file_utils.py +1 -1
- model_library/providers/amazon.py +32 -17
- model_library/providers/anthropic.py +2 -6
- model_library/providers/google/google.py +35 -29
- model_library/providers/minimax.py +33 -0
- model_library/providers/mistral.py +10 -1
- model_library/providers/openai.py +10 -8
- model_library/providers/together.py +18 -211
- model_library/register_models.py +36 -38
- model_library/registry_utils.py +18 -16
- model_library/utils.py +2 -2
- {model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/METADATA +3 -4
- model_library-0.1.4.dist-info/RECORD +64 -0
- model_library-0.1.2.dist-info/RECORD +0 -61
- {model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/WHEEL +0 -0
- {model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/licenses/LICENSE +0 -0
- {model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/top_level.txt +0 -0
|
@@ -2,20 +2,21 @@ base-config:
|
|
|
2
2
|
company: xAI
|
|
3
3
|
documentation_url: https://docs.x.ai/docs#models
|
|
4
4
|
open_source: false
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
supports:
|
|
6
|
+
images: true
|
|
7
|
+
files: false
|
|
8
|
+
tools: true
|
|
9
|
+
metadata:
|
|
7
10
|
available_as_evaluator: false
|
|
8
|
-
supports_files: false
|
|
9
11
|
available_for_everyone: true
|
|
10
12
|
ignored_for_cost: false
|
|
11
|
-
supports_tools: false
|
|
12
13
|
properties:
|
|
13
14
|
reasoning_model: false
|
|
14
15
|
|
|
15
16
|
xai-models:
|
|
16
17
|
base-config:
|
|
17
|
-
|
|
18
|
-
|
|
18
|
+
supports:
|
|
19
|
+
temperature: true
|
|
19
20
|
costs_per_million_token:
|
|
20
21
|
cache:
|
|
21
22
|
read_discount: 0.25
|
|
@@ -29,20 +30,16 @@ xai-models:
|
|
|
29
30
|
release_date: 2025-08-25
|
|
30
31
|
properties:
|
|
31
32
|
context_window: 256_000
|
|
32
|
-
|
|
33
|
+
max_tokens: 40_000
|
|
33
34
|
reasoning_model: true
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
supports_tools: true
|
|
35
|
+
supports:
|
|
36
|
+
images: false
|
|
37
37
|
costs_per_million_token:
|
|
38
38
|
input: 0.20
|
|
39
39
|
output: 1.50
|
|
40
40
|
cache:
|
|
41
41
|
read: 0.02
|
|
42
42
|
documentation_url: https://docs.x.ai/docs/models/grok-code-fast-1
|
|
43
|
-
default_parameters:
|
|
44
|
-
temperature: 0.7
|
|
45
|
-
max_output_tokens: 40000
|
|
46
43
|
alternative_keys:
|
|
47
44
|
- grok/grok-code-fast
|
|
48
45
|
- grok/grok-code-fast-1-0825
|
|
@@ -52,16 +49,12 @@ xai-models:
|
|
|
52
49
|
description: Latest advancement in cost-efficient reasoning models with unified architecture. Handles complex requests with deep chain-of-thought reasoning. Features 2M token context window and native tool use.
|
|
53
50
|
release_date: 2025-09-19
|
|
54
51
|
open_source: false
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
supports_metadata: true
|
|
59
|
-
supports_files: false
|
|
60
|
-
available_for_everyone: true
|
|
61
|
-
ignored_for_cost: false
|
|
52
|
+
supports:
|
|
53
|
+
images: true
|
|
54
|
+
files: false
|
|
62
55
|
properties:
|
|
63
56
|
context_window: 2_000_000
|
|
64
|
-
|
|
57
|
+
max_tokens: 2_000_000
|
|
65
58
|
training_cutoff: null
|
|
66
59
|
reasoning_model: true
|
|
67
60
|
documentation_url: https://docs.x.ai/docs/models/grok-4-fast-reasoning
|
|
@@ -74,28 +67,69 @@ xai-models:
|
|
|
74
67
|
threshold: 128_000
|
|
75
68
|
input: 0.4
|
|
76
69
|
output: 1.0
|
|
77
|
-
default_parameters:
|
|
78
|
-
temperature: 0.7
|
|
79
|
-
max_output_tokens: 128000
|
|
80
70
|
alternative_keys:
|
|
81
71
|
- grok/grok-4-fast
|
|
82
72
|
- grok/grok-4-fast-reasoning-latest
|
|
83
73
|
|
|
74
|
+
grok/grok-4-1-fast-reasoning:
|
|
75
|
+
label: Grok 4.1 Fast (Reasoning)
|
|
76
|
+
description: ""
|
|
77
|
+
release_date: 2025-11-19
|
|
78
|
+
open_source: false
|
|
79
|
+
supports:
|
|
80
|
+
images: true
|
|
81
|
+
files: false
|
|
82
|
+
properties:
|
|
83
|
+
context_window: 2_000_000
|
|
84
|
+
max_tokens: 2_000_000 # from openrouter
|
|
85
|
+
training_cutoff: null
|
|
86
|
+
reasoning_model: true
|
|
87
|
+
documentation_url: ""
|
|
88
|
+
costs_per_million_token:
|
|
89
|
+
input: 0.20
|
|
90
|
+
output: 0.5
|
|
91
|
+
cache:
|
|
92
|
+
read: 0.05
|
|
93
|
+
context:
|
|
94
|
+
threshold: 128_000
|
|
95
|
+
input: 0.4
|
|
96
|
+
output: 1.0
|
|
97
|
+
|
|
98
|
+
grok/grok-4-1-fast-non-reasoning:
|
|
99
|
+
label: Grok 4.1 Fast Non-Reasoning
|
|
100
|
+
description: ""
|
|
101
|
+
release_date: 2025-11-19
|
|
102
|
+
open_source: false
|
|
103
|
+
supports:
|
|
104
|
+
images: true
|
|
105
|
+
files: false
|
|
106
|
+
properties:
|
|
107
|
+
context_window: 2_000_000
|
|
108
|
+
max_tokens: 2_000_000 # from openrouter
|
|
109
|
+
training_cutoff: null
|
|
110
|
+
reasoning_model: false
|
|
111
|
+
documentation_url: ""
|
|
112
|
+
costs_per_million_token:
|
|
113
|
+
input: 0.20
|
|
114
|
+
output: 0.5
|
|
115
|
+
cache:
|
|
116
|
+
read: 0.05
|
|
117
|
+
context:
|
|
118
|
+
threshold: 128_000
|
|
119
|
+
input: 0.4
|
|
120
|
+
output: 1.0
|
|
121
|
+
|
|
84
122
|
grok/grok-4-fast-non-reasoning:
|
|
85
123
|
label: Grok 4 Fast (Non-Reasoning)
|
|
86
124
|
description: Cost-efficient model focused on speed and efficiency for straightforward tasks like summarization or classification without deep logical processing. Unified architecture with reasoning variant, steered via system prompts.
|
|
87
125
|
release_date: 2025-09-19
|
|
88
126
|
open_source: false
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
supports_metadata: true
|
|
93
|
-
supports_files: false
|
|
94
|
-
available_for_everyone: true
|
|
95
|
-
ignored_for_cost: false
|
|
127
|
+
supports:
|
|
128
|
+
images: true
|
|
129
|
+
files: false
|
|
96
130
|
properties:
|
|
97
131
|
context_window: 2_000_000
|
|
98
|
-
|
|
132
|
+
max_tokens: 2_000_000
|
|
99
133
|
training_cutoff: null
|
|
100
134
|
reasoning_model: false
|
|
101
135
|
documentation_url: https://docs.x.ai/docs/models/grok-4-fast-non-reasoning
|
|
@@ -108,9 +142,6 @@ xai-models:
|
|
|
108
142
|
threshold: 128_000
|
|
109
143
|
input: 0.4
|
|
110
144
|
output: 1.0
|
|
111
|
-
default_parameters:
|
|
112
|
-
temperature: 0.7
|
|
113
|
-
max_output_tokens: 2000000
|
|
114
145
|
alternative_keys:
|
|
115
146
|
- grok/grok-4-fast-non-reasoning-latest
|
|
116
147
|
|
|
@@ -118,13 +149,12 @@ xai-models:
|
|
|
118
149
|
label: Grok 4
|
|
119
150
|
description: Latest and greatest flagship model offering unparalleled performance in natural language, math and reasoning. The perfect jack of all trades with native tool use and structured outputs support.
|
|
120
151
|
release_date: 2025-07-09
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
supports_tools: true
|
|
152
|
+
supports:
|
|
153
|
+
images: true
|
|
154
|
+
tools: true
|
|
125
155
|
properties:
|
|
126
156
|
context_window: 256_000
|
|
127
|
-
|
|
157
|
+
max_tokens: 128_000
|
|
128
158
|
training_cutoff: null
|
|
129
159
|
reasoning_model: true
|
|
130
160
|
documentation_url: https://docs.x.ai/docs/models/grok-4-0709
|
|
@@ -137,9 +167,6 @@ xai-models:
|
|
|
137
167
|
threshold: 128_000
|
|
138
168
|
input: 6.00
|
|
139
169
|
output: 30.00
|
|
140
|
-
default_parameters:
|
|
141
|
-
temperature: 0.7
|
|
142
|
-
max_output_tokens: 128000
|
|
143
170
|
alternative_keys:
|
|
144
171
|
- grok/grok-4
|
|
145
172
|
- grok/grok-4-latest
|
|
@@ -150,15 +177,15 @@ xai-models:
|
|
|
150
177
|
release_date: 2025-04-09
|
|
151
178
|
properties:
|
|
152
179
|
context_window: 131_072
|
|
153
|
-
|
|
180
|
+
max_tokens: null
|
|
154
181
|
training_cutoff: null
|
|
155
182
|
reasoning_model: true
|
|
156
|
-
|
|
183
|
+
metadata:
|
|
157
184
|
deprecated: true
|
|
158
185
|
costs_per_million_token:
|
|
159
186
|
input: 0.30
|
|
160
187
|
output: 0.50
|
|
161
|
-
|
|
188
|
+
cache:
|
|
162
189
|
read: 0.075
|
|
163
190
|
documentation_url: https://docs.x.ai/docs/models/grok-3-mini
|
|
164
191
|
default_parameters:
|
|
@@ -188,7 +215,7 @@ xai-models:
|
|
|
188
215
|
release_date: 2025-04-09
|
|
189
216
|
properties:
|
|
190
217
|
context_window: 131_072
|
|
191
|
-
|
|
218
|
+
max_tokens: null
|
|
192
219
|
training_cutoff: null
|
|
193
220
|
costs_per_million_token:
|
|
194
221
|
input: 3.00
|
|
@@ -211,10 +238,10 @@ xai-models:
|
|
|
211
238
|
release_date: 2024-12-12
|
|
212
239
|
properties:
|
|
213
240
|
context_window: 8_192
|
|
214
|
-
|
|
241
|
+
max_tokens: null
|
|
215
242
|
training_cutoff: null
|
|
216
|
-
|
|
217
|
-
|
|
243
|
+
supports:
|
|
244
|
+
images: true
|
|
218
245
|
costs_per_million_token:
|
|
219
246
|
input: 2.00
|
|
220
247
|
output: 10.00
|
|
@@ -228,9 +255,9 @@ xai-models:
|
|
|
228
255
|
release_date: 2024-12-11
|
|
229
256
|
properties:
|
|
230
257
|
context_window: 131_072
|
|
231
|
-
|
|
258
|
+
max_tokens: null
|
|
232
259
|
training_cutoff: null
|
|
233
|
-
|
|
260
|
+
metadata:
|
|
234
261
|
deprecated: true
|
|
235
262
|
costs_per_million_token:
|
|
236
263
|
input: 2.00
|
|
@@ -242,10 +269,11 @@ xai-models:
|
|
|
242
269
|
release_date: 2024-12-12
|
|
243
270
|
properties:
|
|
244
271
|
context_window: 8_192
|
|
245
|
-
|
|
272
|
+
max_tokens: null
|
|
246
273
|
training_cutoff: null
|
|
247
|
-
|
|
248
|
-
|
|
274
|
+
supports:
|
|
275
|
+
images: true
|
|
276
|
+
metadata:
|
|
249
277
|
deprecated: true
|
|
250
278
|
costs_per_million_token:
|
|
251
279
|
input: 5.00
|
|
@@ -257,9 +285,9 @@ xai-models:
|
|
|
257
285
|
release_date: 2024-12-11
|
|
258
286
|
properties:
|
|
259
287
|
context_window: 131_072
|
|
260
|
-
|
|
288
|
+
max_tokens: null
|
|
261
289
|
training_cutoff: null
|
|
262
|
-
|
|
290
|
+
metadata:
|
|
263
291
|
deprecated: true
|
|
264
292
|
costs_per_million_token:
|
|
265
293
|
input: 5.00
|
|
@@ -2,12 +2,13 @@ base-config:
|
|
|
2
2
|
company: zAI
|
|
3
3
|
open_source: true
|
|
4
4
|
documentation_url: https://docs.z.ai/
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
5
|
+
supports:
|
|
6
|
+
images: false
|
|
7
|
+
files: false
|
|
8
|
+
temperature: true
|
|
9
|
+
tools: true
|
|
10
|
+
properties:
|
|
8
11
|
reasoning_model: true
|
|
9
|
-
supports_temperature: true
|
|
10
|
-
supports_tools: true
|
|
11
12
|
default_parameters:
|
|
12
13
|
temperature: 0.6
|
|
13
14
|
top_p: 1
|
|
@@ -23,14 +24,17 @@ zai-models:
|
|
|
23
24
|
release_date: 2025-07-28
|
|
24
25
|
properties:
|
|
25
26
|
context_window: 128_000
|
|
26
|
-
|
|
27
|
+
max_tokens: 81_920
|
|
27
28
|
costs_per_million_token:
|
|
28
29
|
input: 0.6
|
|
29
30
|
output: 2.2
|
|
30
31
|
cache:
|
|
31
32
|
read: 0.11
|
|
32
33
|
alternative_keys:
|
|
33
|
-
- fireworks/glm-4p5
|
|
34
|
+
- fireworks/glm-4p5:
|
|
35
|
+
costs_per_million_token:
|
|
36
|
+
input: 0.55
|
|
37
|
+
output: 2.19
|
|
34
38
|
|
|
35
39
|
zai/glm-4.5-air:
|
|
36
40
|
label: GLM 4.5 Air
|
|
@@ -38,14 +42,17 @@ zai-models:
|
|
|
38
42
|
release_date: 2025-07-28
|
|
39
43
|
properties:
|
|
40
44
|
context_window: 128_000
|
|
41
|
-
|
|
45
|
+
max_tokens: 81_920
|
|
42
46
|
costs_per_million_token:
|
|
43
47
|
input: 0.2
|
|
44
48
|
output: 1.1
|
|
45
|
-
cache:
|
|
49
|
+
cache:
|
|
46
50
|
read: 0.03
|
|
47
51
|
alternative_keys:
|
|
48
|
-
- together/zai-org/GLM-4.5-Air-FP8
|
|
52
|
+
- together/zai-org/GLM-4.5-Air-FP8:
|
|
53
|
+
costs_per_million_token:
|
|
54
|
+
input: 0.22
|
|
55
|
+
output: 0.88
|
|
49
56
|
|
|
50
57
|
zai/glm-4.6:
|
|
51
58
|
label: GLM 4.6
|
|
@@ -53,13 +60,14 @@ zai-models:
|
|
|
53
60
|
release_date: 2025-09-30
|
|
54
61
|
properties:
|
|
55
62
|
context_window: 200_000
|
|
56
|
-
|
|
63
|
+
max_tokens: 122_880
|
|
57
64
|
costs_per_million_token:
|
|
58
65
|
input: 0.6
|
|
59
66
|
output: 2.2
|
|
60
|
-
cache:
|
|
67
|
+
cache:
|
|
61
68
|
read: 0.11
|
|
62
69
|
alternative_keys:
|
|
63
|
-
- fireworks/glm-4p6
|
|
64
|
-
|
|
65
|
-
|
|
70
|
+
- fireworks/glm-4p6:
|
|
71
|
+
costs_per_million_token:
|
|
72
|
+
input: 0.55
|
|
73
|
+
output: 2.19
|
model_library/exceptions.py
CHANGED
|
@@ -5,9 +5,11 @@ from typing import Any, Callable
|
|
|
5
5
|
|
|
6
6
|
import backoff
|
|
7
7
|
from ai21 import TooManyRequestsError as AI21RateLimitError
|
|
8
|
+
from anthropic import InternalServerError
|
|
8
9
|
from anthropic import RateLimitError as AnthropicRateLimitError
|
|
9
10
|
from backoff._typing import Details
|
|
10
11
|
from httpcore import ReadError as HTTPCoreReadError
|
|
12
|
+
from httpx import ConnectError as HTTPXConnectError
|
|
11
13
|
from httpx import ReadError as HTTPXReadError
|
|
12
14
|
from httpx import RemoteProtocolError
|
|
13
15
|
from openai import APIConnectionError as OpenAIAPIConnectionError
|
|
@@ -53,20 +55,6 @@ class MaxOutputTokensExceededError(Exception):
|
|
|
53
55
|
super().__init__(message or MaxOutputTokensExceededError.DEFAULT_MESSAGE)
|
|
54
56
|
|
|
55
57
|
|
|
56
|
-
class MaxInputTokensExceededError(Exception):
|
|
57
|
-
"""
|
|
58
|
-
Raised when the input exceeds the allowed max input tokens limit
|
|
59
|
-
"""
|
|
60
|
-
|
|
61
|
-
DEFAULT_MESSAGE: str = (
|
|
62
|
-
"Input exceeded the maximum allowed input tokens. "
|
|
63
|
-
"Consider reducing the input size."
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
def __init__(self, message: str | None = None):
|
|
67
|
-
super().__init__(message or MaxInputTokensExceededError.DEFAULT_MESSAGE)
|
|
68
|
-
|
|
69
|
-
|
|
70
58
|
class MaxContextWindowExceededError(Exception):
|
|
71
59
|
"""
|
|
72
60
|
Raised when the context window exceeds the allowed max context window limit
|
|
@@ -97,7 +85,9 @@ CONTEXT_WINDOW_PATTERN = re.compile(
|
|
|
97
85
|
r"sent message larger than max|"
|
|
98
86
|
r"input tokens exceeded|"
|
|
99
87
|
r"(messages?|total length).*too long|"
|
|
100
|
-
r"payload.*too large"
|
|
88
|
+
r"payload.*too large|"
|
|
89
|
+
r"string too long|"
|
|
90
|
+
r"input exceeded the context window"
|
|
101
91
|
)
|
|
102
92
|
|
|
103
93
|
|
|
@@ -166,9 +156,11 @@ RETRIABLE_EXCEPTIONS = [
|
|
|
166
156
|
OpenAIUnprocessableEntityError,
|
|
167
157
|
OpenAIAPIConnectionError,
|
|
168
158
|
AnthropicRateLimitError,
|
|
159
|
+
InternalServerError,
|
|
169
160
|
AI21RateLimitError,
|
|
170
161
|
RemoteProtocolError, # httpx connection closing when running models from sdk
|
|
171
162
|
HTTPXReadError,
|
|
163
|
+
HTTPXConnectError,
|
|
172
164
|
HTTPCoreReadError,
|
|
173
165
|
]
|
|
174
166
|
|
|
@@ -186,11 +178,13 @@ RETRIABLE_EXCEPTION_CODES = [
|
|
|
186
178
|
"connection_error",
|
|
187
179
|
"service_unavailable",
|
|
188
180
|
"rate_limit",
|
|
181
|
+
"rate limit",
|
|
189
182
|
"internal_error",
|
|
190
183
|
"server_error",
|
|
191
184
|
"overloaded",
|
|
192
185
|
"throttling", # AWS throttling errors
|
|
193
186
|
"throttlingexception", # AWS throttling errors
|
|
187
|
+
"internal server error",
|
|
194
188
|
]
|
|
195
189
|
|
|
196
190
|
|
|
@@ -239,8 +233,9 @@ def retry_llm_call(
|
|
|
239
233
|
logger: logging.Logger,
|
|
240
234
|
max_tries: int = RETRY_MAX_TRIES,
|
|
241
235
|
max_time: float | None = None,
|
|
242
|
-
backoff_callback:
|
|
243
|
-
|
|
236
|
+
backoff_callback: (
|
|
237
|
+
Callable[[int, Exception | None, float, float], None] | None
|
|
238
|
+
) = None,
|
|
244
239
|
):
|
|
245
240
|
def on_backoff(details: Details):
|
|
246
241
|
exception = details.get("exception")
|
model_library/file_utils.py
CHANGED
|
@@ -56,7 +56,7 @@ def concat_images(
|
|
|
56
56
|
new_width = int(combined_image.width * scale_factor)
|
|
57
57
|
new_height = int(combined_image.height * scale_factor)
|
|
58
58
|
|
|
59
|
-
combined_image = combined_image.resize(
|
|
59
|
+
combined_image = combined_image.resize( # type: ignore
|
|
60
60
|
(new_width, new_height), Image.Resampling.LANCZOS
|
|
61
61
|
)
|
|
62
62
|
|
|
@@ -26,6 +26,7 @@ from model_library.base import (
|
|
|
26
26
|
ToolDefinition,
|
|
27
27
|
ToolResult,
|
|
28
28
|
)
|
|
29
|
+
from model_library.base.input import FileBase
|
|
29
30
|
from model_library.exceptions import (
|
|
30
31
|
BadInputError,
|
|
31
32
|
MaxOutputTokensExceededError,
|
|
@@ -60,11 +61,13 @@ class AmazonModel(LLM):
|
|
|
60
61
|
config: LLMConfig | None = None,
|
|
61
62
|
):
|
|
62
63
|
super().__init__(model_name, provider, config=config)
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
self.
|
|
66
|
-
|
|
67
|
-
|
|
64
|
+
self.supports_cache = "amazon" in self.model_name or "claude" in self.model_name
|
|
65
|
+
self.supports_cache = (
|
|
66
|
+
self.supports_cache and "v2" not in self.model_name
|
|
67
|
+
) # supported but no access yet
|
|
68
|
+
self.supports_tool_cache = self.supports_cache and "claude" in self.model_name
|
|
69
|
+
|
|
70
|
+
cache_control = {"type": "default"}
|
|
68
71
|
|
|
69
72
|
@override
|
|
70
73
|
async def parse_input(
|
|
@@ -120,6 +123,10 @@ class AmazonModel(LLM):
|
|
|
120
123
|
new_input.append(item)
|
|
121
124
|
|
|
122
125
|
if content_user:
|
|
126
|
+
if self.supports_cache:
|
|
127
|
+
if not isinstance(input[-1], FileBase):
|
|
128
|
+
# last item cannot be file
|
|
129
|
+
content_user.append({"cachePoint": self.cache_control})
|
|
123
130
|
new_input.append({"role": "user", "content": content_user})
|
|
124
131
|
|
|
125
132
|
return new_input
|
|
@@ -174,6 +181,8 @@ class AmazonModel(LLM):
|
|
|
174
181
|
}
|
|
175
182
|
}
|
|
176
183
|
)
|
|
184
|
+
if parsed_tools and self.supports_tool_cache:
|
|
185
|
+
parsed_tools.append({"cachePoint": self.cache_control})
|
|
177
186
|
return parsed_tools
|
|
178
187
|
|
|
179
188
|
@override
|
|
@@ -203,8 +212,12 @@ class AmazonModel(LLM):
|
|
|
203
212
|
|
|
204
213
|
if "system_prompt" in kwargs:
|
|
205
214
|
body["system"] = [{"text": kwargs.pop("system_prompt")}]
|
|
215
|
+
if self.supports_cache:
|
|
216
|
+
body["system"].append({"cachePoint": self.cache_control})
|
|
206
217
|
|
|
207
218
|
if self.reasoning:
|
|
219
|
+
if self.max_tokens < 1024:
|
|
220
|
+
self.max_tokens = 2048
|
|
208
221
|
budget_tokens = kwargs.pop(
|
|
209
222
|
"budget_tokens", get_default_budget_tokens(self.max_tokens)
|
|
210
223
|
)
|
|
@@ -244,9 +257,8 @@ class AmazonModel(LLM):
|
|
|
244
257
|
tool_calls: dict[str, Any] = {}
|
|
245
258
|
|
|
246
259
|
messages: dict[str, Any] = {"content": []}
|
|
247
|
-
input_tokens = 0
|
|
248
|
-
output_tokens = 0
|
|
249
260
|
stop_reason: str = ""
|
|
261
|
+
metadata = QueryResultMetadata()
|
|
250
262
|
|
|
251
263
|
for chunk in response["stream"]:
|
|
252
264
|
key = list(chunk.keys())[0]
|
|
@@ -281,8 +293,16 @@ class AmazonModel(LLM):
|
|
|
281
293
|
tool_calls["input"] += delta["toolUse"]["input"]
|
|
282
294
|
|
|
283
295
|
case "metadata":
|
|
284
|
-
|
|
285
|
-
|
|
296
|
+
metadata = QueryResultMetadata(
|
|
297
|
+
in_tokens=value["usage"]["inputTokens"],
|
|
298
|
+
out_tokens=value["usage"]["outputTokens"],
|
|
299
|
+
)
|
|
300
|
+
metadata.cache_read_tokens = value["usage"].get(
|
|
301
|
+
"cacheReadInputTokens", None
|
|
302
|
+
)
|
|
303
|
+
metadata.cache_write_tokens = value["usage"].get(
|
|
304
|
+
"cacheWriteInputTokens", None
|
|
305
|
+
)
|
|
286
306
|
|
|
287
307
|
case "contentBlockStop":
|
|
288
308
|
if tool_calls:
|
|
@@ -308,7 +328,7 @@ class AmazonModel(LLM):
|
|
|
308
328
|
case "messageStop":
|
|
309
329
|
stop_reason = value["stopReason"]
|
|
310
330
|
|
|
311
|
-
return messages, stop_reason,
|
|
331
|
+
return messages, stop_reason, metadata
|
|
312
332
|
|
|
313
333
|
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime/client/converse.html#
|
|
314
334
|
@override
|
|
@@ -326,9 +346,7 @@ class AmazonModel(LLM):
|
|
|
326
346
|
**body,
|
|
327
347
|
)
|
|
328
348
|
|
|
329
|
-
messages, stop_reason,
|
|
330
|
-
response
|
|
331
|
-
)
|
|
349
|
+
messages, stop_reason, metadata = await self.stream_response(response)
|
|
332
350
|
|
|
333
351
|
text = " ".join([i["text"] for i in messages["content"] if "text" in i])
|
|
334
352
|
reasoning = " ".join(
|
|
@@ -361,10 +379,7 @@ class AmazonModel(LLM):
|
|
|
361
379
|
return QueryResult(
|
|
362
380
|
output_text=text,
|
|
363
381
|
reasoning=reasoning,
|
|
364
|
-
metadata=
|
|
365
|
-
in_tokens=input_tokens,
|
|
366
|
-
out_tokens=output_tokens,
|
|
367
|
-
),
|
|
382
|
+
metadata=metadata,
|
|
368
383
|
tool_calls=tool_calls,
|
|
369
384
|
history=[*input, messages],
|
|
370
385
|
)
|
|
@@ -562,12 +562,8 @@ class AnthropicModel(LLM):
|
|
|
562
562
|
|
|
563
563
|
body = await self.create_body(input, tools=tools, **kwargs)
|
|
564
564
|
|
|
565
|
-
betas = [
|
|
566
|
-
|
|
567
|
-
"interleaved-thinking-2025-05-14",
|
|
568
|
-
]
|
|
569
|
-
|
|
570
|
-
if "claude-sonnet-4-5" in self.model_name:
|
|
565
|
+
betas = ["files-api-2025-04-14", "interleaved-thinking-2025-05-14"]
|
|
566
|
+
if "sonnet-4-5" in self.model_name:
|
|
571
567
|
betas.append("context-1m-2025-08-07")
|
|
572
568
|
|
|
573
569
|
async with self.get_client().beta.messages.stream(
|