model-library 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- model_library/base/base.py +11 -6
- model_library/base/output.py +54 -0
- model_library/base/utils.py +3 -2
- model_library/config/ai21labs_models.yaml +1 -0
- model_library/config/all_models.json +300 -37
- model_library/config/anthropic_models.yaml +26 -3
- model_library/config/google_models.yaml +49 -0
- model_library/config/openai_models.yaml +0 -9
- model_library/config/together_models.yaml +1 -0
- model_library/config/xai_models.yaml +63 -3
- model_library/exceptions.py +6 -2
- model_library/file_utils.py +1 -1
- model_library/providers/anthropic.py +2 -6
- model_library/providers/google/google.py +35 -29
- model_library/providers/openai.py +8 -2
- model_library/providers/together.py +18 -211
- model_library/register_models.py +0 -2
- {model_library-0.1.2.dist-info → model_library-0.1.3.dist-info}/METADATA +2 -3
- {model_library-0.1.2.dist-info → model_library-0.1.3.dist-info}/RECORD +22 -22
- {model_library-0.1.2.dist-info → model_library-0.1.3.dist-info}/WHEEL +0 -0
- {model_library-0.1.2.dist-info → model_library-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {model_library-0.1.2.dist-info → model_library-0.1.3.dist-info}/top_level.txt +0 -0
|
@@ -27,6 +27,28 @@ claude-4-models:
|
|
|
27
27
|
class_properties:
|
|
28
28
|
supports_batch_requests: true
|
|
29
29
|
|
|
30
|
+
anthropic/claude-opus-4-5-20251101:
|
|
31
|
+
label: Claude Opus 4.5 (Nonthinking)
|
|
32
|
+
release_date: 2025-11-24
|
|
33
|
+
properties:
|
|
34
|
+
context_window: 200_000
|
|
35
|
+
max_token_output: 64_000
|
|
36
|
+
extending_thinking: 64_000
|
|
37
|
+
class_properties:
|
|
38
|
+
available_for_everyone: false
|
|
39
|
+
default_parameters:
|
|
40
|
+
max_output_tokens: 64_000
|
|
41
|
+
costs_per_million_token:
|
|
42
|
+
input: 15.0
|
|
43
|
+
output: 75.0
|
|
44
|
+
cache:
|
|
45
|
+
read: 1.5
|
|
46
|
+
write: 18.75
|
|
47
|
+
alternative_keys:
|
|
48
|
+
- anthropic/claude-opus-4-5-20251101-thinking:
|
|
49
|
+
properties:
|
|
50
|
+
reasoning_model: true
|
|
51
|
+
|
|
30
52
|
anthropic/claude-opus-4-1-20250805:
|
|
31
53
|
label: Claude Opus 4.1 (Nonthinking)
|
|
32
54
|
description: Advanced model for specialized complex
|
|
@@ -197,11 +219,12 @@ claude-3-5-models:
|
|
|
197
219
|
alternative_keys:
|
|
198
220
|
- anthropic/claude-3-5-sonnet-latest
|
|
199
221
|
- anthropic/claude-3.5-sonnet-latest
|
|
200
|
-
|
|
222
|
+
|
|
201
223
|
anthropic/claude-3-5-sonnet-20240620:
|
|
202
224
|
label: Claude 3.5 Sonnet
|
|
203
225
|
release_date: 2024-06-20
|
|
204
|
-
description:
|
|
226
|
+
description:
|
|
227
|
+
Claude Sonnet 3.5 (June 2024) variant for code and content generation,
|
|
205
228
|
multilingual and vision-capable, deprecated.
|
|
206
229
|
class_properties:
|
|
207
230
|
deprecated: true
|
|
@@ -339,7 +362,7 @@ claude-2-models:
|
|
|
339
362
|
costs_per_million_token:
|
|
340
363
|
input: 8.0
|
|
341
364
|
output: 24.0
|
|
342
|
-
|
|
365
|
+
|
|
343
366
|
anthropic/claude-1.3:
|
|
344
367
|
label: Claude 1.3
|
|
345
368
|
release_date: null
|
|
@@ -31,6 +31,55 @@ gemma-models:
|
|
|
31
31
|
input: 0.00
|
|
32
32
|
output: 0.00
|
|
33
33
|
|
|
34
|
+
|
|
35
|
+
gemini-3-models:
|
|
36
|
+
base-config:
|
|
37
|
+
properties:
|
|
38
|
+
context_window: 1_048_576
|
|
39
|
+
max_token_output: 8_192
|
|
40
|
+
training_cutoff: "2025-01"
|
|
41
|
+
class_properties:
|
|
42
|
+
supports_images: true
|
|
43
|
+
supports_files: true
|
|
44
|
+
supports_videos: true
|
|
45
|
+
supports_tools: true
|
|
46
|
+
supports_batch_requests: true
|
|
47
|
+
supports_temperature: true
|
|
48
|
+
costs_per_million_token:
|
|
49
|
+
cache:
|
|
50
|
+
read_discount: 0.1
|
|
51
|
+
default_parameters:
|
|
52
|
+
temperature: 1
|
|
53
|
+
reasoning_effort: "high"
|
|
54
|
+
|
|
55
|
+
google/gemini-3-pro-preview:
|
|
56
|
+
label: Gemini 3 Pro (11/25)
|
|
57
|
+
description: Gemini 3 Pro, Google's most powerful model.
|
|
58
|
+
release_date: 2025-11-18
|
|
59
|
+
properties:
|
|
60
|
+
context_window: 1048576
|
|
61
|
+
max_token_output: 65536
|
|
62
|
+
training_cutoff: "2025-01"
|
|
63
|
+
reasoning_model: true
|
|
64
|
+
class_properties:
|
|
65
|
+
supports_images: true
|
|
66
|
+
supports_files: true
|
|
67
|
+
supports_videos: true
|
|
68
|
+
supports_tools: true
|
|
69
|
+
supports_batch_requests: true
|
|
70
|
+
supports_temperature: true
|
|
71
|
+
costs_per_million_token:
|
|
72
|
+
input: 2
|
|
73
|
+
output: 12
|
|
74
|
+
cache:
|
|
75
|
+
read: 0.20
|
|
76
|
+
context:
|
|
77
|
+
threshold: 200_000
|
|
78
|
+
input: 2.5
|
|
79
|
+
output: 15.0
|
|
80
|
+
cache:
|
|
81
|
+
read: 0.40
|
|
82
|
+
|
|
34
83
|
gemini-2.5-models:
|
|
35
84
|
base-config:
|
|
36
85
|
properties:
|
|
@@ -31,7 +31,6 @@ gpt-5-models:
|
|
|
31
31
|
supports_temperature: false
|
|
32
32
|
supports_files: true
|
|
33
33
|
supports_tools: true
|
|
34
|
-
|
|
35
34
|
openai/gpt-5.1-codex:
|
|
36
35
|
label: GPT 5.1 Codex
|
|
37
36
|
documentation_url: https://platform.openai.com/docs/models/gpt-5.1-codex
|
|
@@ -43,7 +42,6 @@ gpt-5-models:
|
|
|
43
42
|
cache:
|
|
44
43
|
read: 0.125
|
|
45
44
|
default_parameters:
|
|
46
|
-
temperature: 1
|
|
47
45
|
max_output_tokens: 128_000
|
|
48
46
|
|
|
49
47
|
openai/gpt-5.1-codex-mini:
|
|
@@ -57,7 +55,6 @@ gpt-5-models:
|
|
|
57
55
|
cache:
|
|
58
56
|
read: 0.025
|
|
59
57
|
default_parameters:
|
|
60
|
-
temperature: 1
|
|
61
58
|
max_output_tokens: 128_000
|
|
62
59
|
|
|
63
60
|
openai/gpt-5-codex:
|
|
@@ -76,10 +73,8 @@ gpt-5-models:
|
|
|
76
73
|
available_as_evaluator: true
|
|
77
74
|
supports_images: true
|
|
78
75
|
default_parameters:
|
|
79
|
-
temperature: 1
|
|
80
76
|
max_output_tokens: 128_000
|
|
81
77
|
|
|
82
|
-
|
|
83
78
|
openai/gpt-5.1-2025-11-13:
|
|
84
79
|
label: GPT 5.1
|
|
85
80
|
documentation_url: https://platform.openai.com/docs/models/gpt-5.1
|
|
@@ -96,7 +91,6 @@ gpt-5-models:
|
|
|
96
91
|
available_as_evaluator: true
|
|
97
92
|
supports_images: true
|
|
98
93
|
default_parameters:
|
|
99
|
-
temperature: 1
|
|
100
94
|
max_output_tokens: 128_000
|
|
101
95
|
|
|
102
96
|
openai/gpt-5-2025-08-07:
|
|
@@ -115,7 +109,6 @@ gpt-5-models:
|
|
|
115
109
|
available_as_evaluator: true
|
|
116
110
|
supports_images: true
|
|
117
111
|
default_parameters:
|
|
118
|
-
temperature: 1
|
|
119
112
|
max_output_tokens: 128_000
|
|
120
113
|
alternative_keys:
|
|
121
114
|
- azure/gpt-5-2025-08-07
|
|
@@ -133,7 +126,6 @@ gpt-5-models:
|
|
|
133
126
|
properties:
|
|
134
127
|
training_cutoff: "2024-05"
|
|
135
128
|
default_parameters:
|
|
136
|
-
temperature: 1
|
|
137
129
|
max_output_tokens: 128_000
|
|
138
130
|
class_properties:
|
|
139
131
|
supports_images: true
|
|
@@ -153,7 +145,6 @@ gpt-5-models:
|
|
|
153
145
|
properties:
|
|
154
146
|
training_cutoff: "2024-05"
|
|
155
147
|
default_parameters:
|
|
156
|
-
temperature: 1
|
|
157
148
|
max_output_tokens: 128_000
|
|
158
149
|
class_properties:
|
|
159
150
|
supports_images: true
|
|
@@ -8,7 +8,7 @@ base-config:
|
|
|
8
8
|
supports_files: false
|
|
9
9
|
available_for_everyone: true
|
|
10
10
|
ignored_for_cost: false
|
|
11
|
-
supports_tools:
|
|
11
|
+
supports_tools: true
|
|
12
12
|
properties:
|
|
13
13
|
reasoning_model: false
|
|
14
14
|
|
|
@@ -33,7 +33,6 @@ xai-models:
|
|
|
33
33
|
reasoning_model: true
|
|
34
34
|
class_properties:
|
|
35
35
|
supports_images: false
|
|
36
|
-
supports_tools: true
|
|
37
36
|
costs_per_million_token:
|
|
38
37
|
input: 0.20
|
|
39
38
|
output: 1.50
|
|
@@ -81,6 +80,68 @@ xai-models:
|
|
|
81
80
|
- grok/grok-4-fast
|
|
82
81
|
- grok/grok-4-fast-reasoning-latest
|
|
83
82
|
|
|
83
|
+
grok/grok-4-1-fast-reasoning:
|
|
84
|
+
label: Grok 4.1 Fast (Reasoning)
|
|
85
|
+
description: ""
|
|
86
|
+
release_date: 2025-10-19
|
|
87
|
+
open_source: false
|
|
88
|
+
class_properties:
|
|
89
|
+
supports_images: true
|
|
90
|
+
available_as_evaluator: true
|
|
91
|
+
supports_metadata: true
|
|
92
|
+
supports_files: false
|
|
93
|
+
available_for_everyone: true
|
|
94
|
+
ignored_for_cost: false
|
|
95
|
+
properties:
|
|
96
|
+
context_window: 2_000_000
|
|
97
|
+
max_token_output: 2_000_000 # from openrouter
|
|
98
|
+
training_cutoff: null
|
|
99
|
+
reasoning_model: true
|
|
100
|
+
documentation_url: ""
|
|
101
|
+
costs_per_million_token:
|
|
102
|
+
input: 0.20
|
|
103
|
+
output: 0.5
|
|
104
|
+
cache:
|
|
105
|
+
read: 0.05
|
|
106
|
+
context:
|
|
107
|
+
threshold: 128_000
|
|
108
|
+
input: 0.4
|
|
109
|
+
output: 1.0
|
|
110
|
+
default_parameters:
|
|
111
|
+
temperature: 0.7
|
|
112
|
+
max_output_tokens: 128000
|
|
113
|
+
|
|
114
|
+
grok/grok-4-1-fast-non-reasoning:
|
|
115
|
+
label: Grok 4.1 Fast Non-Reasoning
|
|
116
|
+
description: ""
|
|
117
|
+
release_date: 2025-10-19
|
|
118
|
+
open_source: false
|
|
119
|
+
class_properties:
|
|
120
|
+
supports_images: true
|
|
121
|
+
available_as_evaluator: true
|
|
122
|
+
supports_metadata: true
|
|
123
|
+
supports_files: false
|
|
124
|
+
available_for_everyone: true
|
|
125
|
+
ignored_for_cost: false
|
|
126
|
+
properties:
|
|
127
|
+
context_window: 2_000_000
|
|
128
|
+
max_token_output: 2_000_000 # from openrouter
|
|
129
|
+
training_cutoff: null
|
|
130
|
+
reasoning_model: false
|
|
131
|
+
documentation_url: ""
|
|
132
|
+
costs_per_million_token:
|
|
133
|
+
input: 0.20
|
|
134
|
+
output: 0.5
|
|
135
|
+
cache:
|
|
136
|
+
read: 0.05
|
|
137
|
+
context:
|
|
138
|
+
threshold: 128_000
|
|
139
|
+
input: 0.4
|
|
140
|
+
output: 1.0
|
|
141
|
+
default_parameters:
|
|
142
|
+
temperature: 0.7
|
|
143
|
+
max_output_tokens: 128000
|
|
144
|
+
|
|
84
145
|
grok/grok-4-fast-non-reasoning:
|
|
85
146
|
label: Grok 4 Fast (Non-Reasoning)
|
|
86
147
|
description: Cost-efficient model focused on speed and efficiency for straightforward tasks like summarization or classification without deep logical processing. Unified architecture with reasoning variant, steered via system prompts.
|
|
@@ -121,7 +182,6 @@ xai-models:
|
|
|
121
182
|
class_properties:
|
|
122
183
|
supports_images: true
|
|
123
184
|
available_for_everyone: false
|
|
124
|
-
supports_tools: true
|
|
125
185
|
properties:
|
|
126
186
|
context_window: 256_000
|
|
127
187
|
max_token_output: 128_000
|
model_library/exceptions.py
CHANGED
|
@@ -5,6 +5,7 @@ from typing import Any, Callable
|
|
|
5
5
|
|
|
6
6
|
import backoff
|
|
7
7
|
from ai21 import TooManyRequestsError as AI21RateLimitError
|
|
8
|
+
from anthropic import InternalServerError
|
|
8
9
|
from anthropic import RateLimitError as AnthropicRateLimitError
|
|
9
10
|
from backoff._typing import Details
|
|
10
11
|
from httpcore import ReadError as HTTPCoreReadError
|
|
@@ -166,6 +167,7 @@ RETRIABLE_EXCEPTIONS = [
|
|
|
166
167
|
OpenAIUnprocessableEntityError,
|
|
167
168
|
OpenAIAPIConnectionError,
|
|
168
169
|
AnthropicRateLimitError,
|
|
170
|
+
InternalServerError,
|
|
169
171
|
AI21RateLimitError,
|
|
170
172
|
RemoteProtocolError, # httpx connection closing when running models from sdk
|
|
171
173
|
HTTPXReadError,
|
|
@@ -191,6 +193,7 @@ RETRIABLE_EXCEPTION_CODES = [
|
|
|
191
193
|
"overloaded",
|
|
192
194
|
"throttling", # AWS throttling errors
|
|
193
195
|
"throttlingexception", # AWS throttling errors
|
|
196
|
+
"internal server error",
|
|
194
197
|
]
|
|
195
198
|
|
|
196
199
|
|
|
@@ -239,8 +242,9 @@ def retry_llm_call(
|
|
|
239
242
|
logger: logging.Logger,
|
|
240
243
|
max_tries: int = RETRY_MAX_TRIES,
|
|
241
244
|
max_time: float | None = None,
|
|
242
|
-
backoff_callback:
|
|
243
|
-
|
|
245
|
+
backoff_callback: (
|
|
246
|
+
Callable[[int, Exception | None, float, float], None] | None
|
|
247
|
+
) = None,
|
|
244
248
|
):
|
|
245
249
|
def on_backoff(details: Details):
|
|
246
250
|
exception = details.get("exception")
|
model_library/file_utils.py
CHANGED
|
@@ -56,7 +56,7 @@ def concat_images(
|
|
|
56
56
|
new_width = int(combined_image.width * scale_factor)
|
|
57
57
|
new_height = int(combined_image.height * scale_factor)
|
|
58
58
|
|
|
59
|
-
combined_image = combined_image.resize(
|
|
59
|
+
combined_image = combined_image.resize( # type: ignore
|
|
60
60
|
(new_width, new_height), Image.Resampling.LANCZOS
|
|
61
61
|
)
|
|
62
62
|
|
|
@@ -562,12 +562,8 @@ class AnthropicModel(LLM):
|
|
|
562
562
|
|
|
563
563
|
body = await self.create_body(input, tools=tools, **kwargs)
|
|
564
564
|
|
|
565
|
-
betas = [
|
|
566
|
-
|
|
567
|
-
"interleaved-thinking-2025-05-14",
|
|
568
|
-
]
|
|
569
|
-
|
|
570
|
-
if "claude-sonnet-4-5" in self.model_name:
|
|
565
|
+
betas = ["files-api-2025-04-14", "interleaved-thinking-2025-05-14"]
|
|
566
|
+
if "sonnet-4-5" in self.model_name:
|
|
571
567
|
betas.append("context-1m-2025-08-07")
|
|
572
568
|
|
|
573
569
|
async with self.get_client().beta.messages.stream(
|
|
@@ -2,8 +2,6 @@ import base64
|
|
|
2
2
|
import io
|
|
3
3
|
from typing import Any, Literal, Sequence, cast
|
|
4
4
|
|
|
5
|
-
from typing_extensions import override
|
|
6
|
-
|
|
7
5
|
from google.genai import Client
|
|
8
6
|
from google.genai import errors as genai_errors
|
|
9
7
|
from google.genai.types import (
|
|
@@ -18,10 +16,14 @@ from google.genai.types import (
|
|
|
18
16
|
Part,
|
|
19
17
|
SafetySetting,
|
|
20
18
|
ThinkingConfig,
|
|
19
|
+
ThinkingLevel,
|
|
21
20
|
Tool,
|
|
22
21
|
ToolListUnion,
|
|
23
22
|
UploadFileConfig,
|
|
23
|
+
FinishReason,
|
|
24
24
|
)
|
|
25
|
+
from typing_extensions import override
|
|
26
|
+
|
|
25
27
|
from model_library import model_library_settings
|
|
26
28
|
from model_library.base import (
|
|
27
29
|
LLM,
|
|
@@ -119,15 +121,6 @@ class GoogleModel(LLM):
|
|
|
119
121
|
):
|
|
120
122
|
super().__init__(model_name, provider, config=config)
|
|
121
123
|
|
|
122
|
-
# thinking tag
|
|
123
|
-
if self.model_name.endswith("-thinking"):
|
|
124
|
-
original_name = self.model_name
|
|
125
|
-
self.model_name = self.model_name.replace("-thinking", "")
|
|
126
|
-
self.reasoning = True
|
|
127
|
-
self.logger.info(
|
|
128
|
-
f"Enabled thinking mode for {original_name} -> {self.model_name}"
|
|
129
|
-
)
|
|
130
|
-
|
|
131
124
|
if self.provider_config.use_vertex:
|
|
132
125
|
self.supports_batch = False
|
|
133
126
|
|
|
@@ -261,14 +254,12 @@ class GoogleModel(LLM):
|
|
|
261
254
|
bytes: io.BytesIO,
|
|
262
255
|
type: Literal["image", "file"] = "file",
|
|
263
256
|
) -> FileWithId:
|
|
264
|
-
if
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
)
|
|
271
|
-
raise Exception("Model does not support batching")
|
|
257
|
+
if self.provider_config.use_vertex:
|
|
258
|
+
raise Exception(
|
|
259
|
+
"Vertex AI does not support file uploads. "
|
|
260
|
+
"use FileWithBase64 to pass files as inline data"
|
|
261
|
+
"or use genai for file uploads"
|
|
262
|
+
)
|
|
272
263
|
|
|
273
264
|
mime = f"image/{mime}" if type == "image" else mime # TODO:
|
|
274
265
|
response: File = self.client.files.upload(
|
|
@@ -294,7 +285,6 @@ class GoogleModel(LLM):
|
|
|
294
285
|
tools: list[ToolDefinition],
|
|
295
286
|
**kwargs: object,
|
|
296
287
|
) -> dict[str, Any]:
|
|
297
|
-
self.logger.debug(f"Creating request body for {self.model_name}")
|
|
298
288
|
generation_config = GenerateContentConfig(
|
|
299
289
|
max_output_tokens=self.max_tokens,
|
|
300
290
|
)
|
|
@@ -310,13 +300,15 @@ class GoogleModel(LLM):
|
|
|
310
300
|
if system_prompt and isinstance(system_prompt, str) and system_prompt.strip():
|
|
311
301
|
generation_config.system_instruction = str(system_prompt)
|
|
312
302
|
|
|
313
|
-
if
|
|
314
|
-
|
|
315
|
-
|
|
303
|
+
if self.reasoning:
|
|
304
|
+
reasoning_config = ThinkingConfig(include_thoughts=True)
|
|
305
|
+
if self.reasoning_effort:
|
|
306
|
+
reasoning_config.thinking_level = ThinkingLevel(self.reasoning_effort)
|
|
307
|
+
else:
|
|
308
|
+
reasoning_config.thinking_budget = cast(
|
|
316
309
|
int, kwargs.pop("thinking_budget", self.DEFAULT_THINKING_BUDGET)
|
|
317
|
-
)
|
|
318
|
-
|
|
319
|
-
)
|
|
310
|
+
)
|
|
311
|
+
generation_config.thinking_config = reasoning_config
|
|
320
312
|
|
|
321
313
|
if tools:
|
|
322
314
|
generation_config.tools = cast(ToolListUnion, await self.parse_tools(tools))
|
|
@@ -343,17 +335,20 @@ class GoogleModel(LLM):
|
|
|
343
335
|
text: str = ""
|
|
344
336
|
reasoning: str = ""
|
|
345
337
|
tool_calls: list[ToolCall] = []
|
|
346
|
-
last_content: Content | None = None
|
|
347
338
|
|
|
348
339
|
metadata: GenerateContentResponseUsageMetadata | None = None
|
|
349
340
|
|
|
350
341
|
stream = await self.client.aio.models.generate_content_stream(**body)
|
|
342
|
+
contents: list[Content | None] = []
|
|
343
|
+
finish_reason: FinishReason | None = None
|
|
344
|
+
|
|
351
345
|
async for chunk in stream:
|
|
352
346
|
candidates = chunk.candidates
|
|
353
347
|
if not candidates:
|
|
354
348
|
continue
|
|
355
349
|
|
|
356
350
|
content = candidates[0].content
|
|
351
|
+
|
|
357
352
|
if content and content.parts:
|
|
358
353
|
for part in content.parts:
|
|
359
354
|
if part.function_call:
|
|
@@ -378,14 +373,24 @@ class GoogleModel(LLM):
|
|
|
378
373
|
|
|
379
374
|
if chunk.usage_metadata:
|
|
380
375
|
metadata = chunk.usage_metadata
|
|
381
|
-
|
|
376
|
+
if content:
|
|
377
|
+
contents.append(content)
|
|
378
|
+
if candidates[0].finish_reason:
|
|
379
|
+
finish_reason = candidates[0].finish_reason
|
|
380
|
+
|
|
381
|
+
if finish_reason != FinishReason.STOP:
|
|
382
|
+
self.logger.error(f"Unexpected finish reason: {finish_reason}")
|
|
383
|
+
|
|
384
|
+
if not text and not reasoning and not tool_calls:
|
|
385
|
+
raise ModelNoOutputError("Model returned empty response")
|
|
382
386
|
|
|
383
387
|
result = QueryResult(
|
|
384
388
|
output_text=text,
|
|
385
389
|
reasoning=reasoning,
|
|
386
|
-
history=[*input,
|
|
390
|
+
history=[*input, *contents],
|
|
387
391
|
tool_calls=tool_calls,
|
|
388
392
|
)
|
|
393
|
+
|
|
389
394
|
if metadata:
|
|
390
395
|
# see _calculate_cost
|
|
391
396
|
cache_read_tokens = metadata.cached_content_token_count or 0
|
|
@@ -446,6 +451,7 @@ class GoogleModel(LLM):
|
|
|
446
451
|
"response_mime_type": "application/json",
|
|
447
452
|
}
|
|
448
453
|
)
|
|
454
|
+
|
|
449
455
|
body["config"] = config
|
|
450
456
|
|
|
451
457
|
# Make the request with retry wrapper
|
|
@@ -5,7 +5,11 @@ import json
|
|
|
5
5
|
from typing import Any, Literal, Sequence, cast
|
|
6
6
|
|
|
7
7
|
from openai import APIConnectionError, AsyncOpenAI
|
|
8
|
-
from openai.types.chat import
|
|
8
|
+
from openai.types.chat import (
|
|
9
|
+
ChatCompletionMessage,
|
|
10
|
+
ChatCompletionMessageToolCall,
|
|
11
|
+
ChatCompletionMessageToolCallUnion,
|
|
12
|
+
)
|
|
9
13
|
from openai.types.chat.chat_completion_message_tool_call import Function
|
|
10
14
|
from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
|
|
11
15
|
from openai.types.create_embedding_response import CreateEmbeddingResponse
|
|
@@ -617,7 +621,9 @@ class OpenAIModel(LLM):
|
|
|
617
621
|
final_message = ChatCompletionMessage(
|
|
618
622
|
role="assistant",
|
|
619
623
|
content=output_text if output_text else None,
|
|
620
|
-
tool_calls=
|
|
624
|
+
tool_calls=cast(list[ChatCompletionMessageToolCallUnion], raw_tool_calls)
|
|
625
|
+
if raw_tool_calls
|
|
626
|
+
else None,
|
|
621
627
|
)
|
|
622
628
|
if hasattr(final_message, "reasoning_content") and reasoning_text:
|
|
623
629
|
setattr(final_message, "reasoning_content", reasoning_text)
|