model-library 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27,6 +27,28 @@ claude-4-models:
27
27
  class_properties:
28
28
  supports_batch_requests: true
29
29
 
30
+ anthropic/claude-opus-4-5-20251101:
31
+ label: Claude Opus 4.5 (Nonthinking)
32
+ release_date: 2025-11-24
33
+ properties:
34
+ context_window: 200_000
35
+ max_token_output: 64_000
36
+ extending_thinking: 64_000
37
+ class_properties:
38
+ available_for_everyone: false
39
+ default_parameters:
40
+ max_output_tokens: 64_000
41
+ costs_per_million_token:
42
+ input: 15.0
43
+ output: 75.0
44
+ cache:
45
+ read: 1.5
46
+ write: 18.75
47
+ alternative_keys:
48
+ - anthropic/claude-opus-4-5-20251101-thinking:
49
+ properties:
50
+ reasoning_model: true
51
+
30
52
  anthropic/claude-opus-4-1-20250805:
31
53
  label: Claude Opus 4.1 (Nonthinking)
32
54
  description: Advanced model for specialized complex
@@ -197,11 +219,12 @@ claude-3-5-models:
197
219
  alternative_keys:
198
220
  - anthropic/claude-3-5-sonnet-latest
199
221
  - anthropic/claude-3.5-sonnet-latest
200
-
222
+
201
223
  anthropic/claude-3-5-sonnet-20240620:
202
224
  label: Claude 3.5 Sonnet
203
225
  release_date: 2024-06-20
204
- description: Claude Sonnet 3.5 (June 2024) variant for code and content generation,
226
+ description:
227
+ Claude Sonnet 3.5 (June 2024) variant for code and content generation,
205
228
  multilingual and vision-capable, deprecated.
206
229
  class_properties:
207
230
  deprecated: true
@@ -339,7 +362,7 @@ claude-2-models:
339
362
  costs_per_million_token:
340
363
  input: 8.0
341
364
  output: 24.0
342
-
365
+
343
366
  anthropic/claude-1.3:
344
367
  label: Claude 1.3
345
368
  release_date: null
@@ -31,6 +31,55 @@ gemma-models:
31
31
  input: 0.00
32
32
  output: 0.00
33
33
 
34
+
35
+ gemini-3-models:
36
+ base-config:
37
+ properties:
38
+ context_window: 1_048_576
39
+ max_token_output: 8_192
40
+ training_cutoff: "2025-01"
41
+ class_properties:
42
+ supports_images: true
43
+ supports_files: true
44
+ supports_videos: true
45
+ supports_tools: true
46
+ supports_batch_requests: true
47
+ supports_temperature: true
48
+ costs_per_million_token:
49
+ cache:
50
+ read_discount: 0.1
51
+ default_parameters:
52
+ temperature: 1
53
+ reasoning_effort: "high"
54
+
55
+ google/gemini-3-pro-preview:
56
+ label: Gemini 3 Pro (11/25)
57
+ description: Gemini 3 Pro, Google's most powerful model.
58
+ release_date: 2025-11-18
59
+ properties:
60
+ context_window: 1048576
61
+ max_token_output: 65536
62
+ training_cutoff: "2025-01"
63
+ reasoning_model: true
64
+ class_properties:
65
+ supports_images: true
66
+ supports_files: true
67
+ supports_videos: true
68
+ supports_tools: true
69
+ supports_batch_requests: true
70
+ supports_temperature: true
71
+ costs_per_million_token:
72
+ input: 2
73
+ output: 12
74
+ cache:
75
+ read: 0.20
76
+ context:
77
+ threshold: 200_000
78
+ input: 2.5
79
+ output: 15.0
80
+ cache:
81
+ read: 0.40
82
+
34
83
  gemini-2.5-models:
35
84
  base-config:
36
85
  properties:
@@ -31,7 +31,6 @@ gpt-5-models:
31
31
  supports_temperature: false
32
32
  supports_files: true
33
33
  supports_tools: true
34
-
35
34
  openai/gpt-5.1-codex:
36
35
  label: GPT 5.1 Codex
37
36
  documentation_url: https://platform.openai.com/docs/models/gpt-5.1-codex
@@ -43,7 +42,6 @@ gpt-5-models:
43
42
  cache:
44
43
  read: 0.125
45
44
  default_parameters:
46
- temperature: 1
47
45
  max_output_tokens: 128_000
48
46
 
49
47
  openai/gpt-5.1-codex-mini:
@@ -57,7 +55,6 @@ gpt-5-models:
57
55
  cache:
58
56
  read: 0.025
59
57
  default_parameters:
60
- temperature: 1
61
58
  max_output_tokens: 128_000
62
59
 
63
60
  openai/gpt-5-codex:
@@ -76,10 +73,8 @@ gpt-5-models:
76
73
  available_as_evaluator: true
77
74
  supports_images: true
78
75
  default_parameters:
79
- temperature: 1
80
76
  max_output_tokens: 128_000
81
77
 
82
-
83
78
  openai/gpt-5.1-2025-11-13:
84
79
  label: GPT 5.1
85
80
  documentation_url: https://platform.openai.com/docs/models/gpt-5.1
@@ -96,7 +91,6 @@ gpt-5-models:
96
91
  available_as_evaluator: true
97
92
  supports_images: true
98
93
  default_parameters:
99
- temperature: 1
100
94
  max_output_tokens: 128_000
101
95
 
102
96
  openai/gpt-5-2025-08-07:
@@ -115,7 +109,6 @@ gpt-5-models:
115
109
  available_as_evaluator: true
116
110
  supports_images: true
117
111
  default_parameters:
118
- temperature: 1
119
112
  max_output_tokens: 128_000
120
113
  alternative_keys:
121
114
  - azure/gpt-5-2025-08-07
@@ -133,7 +126,6 @@ gpt-5-models:
133
126
  properties:
134
127
  training_cutoff: "2024-05"
135
128
  default_parameters:
136
- temperature: 1
137
129
  max_output_tokens: 128_000
138
130
  class_properties:
139
131
  supports_images: true
@@ -153,7 +145,6 @@ gpt-5-models:
153
145
  properties:
154
146
  training_cutoff: "2024-05"
155
147
  default_parameters:
156
- temperature: 1
157
148
  max_output_tokens: 128_000
158
149
  class_properties:
159
150
  supports_images: true
@@ -28,6 +28,7 @@ kimi-models:
28
28
  supports_temperature: true
29
29
  default_parameters:
30
30
  temperature: 0.3
31
+ max_output_tokens: 16_384
31
32
 
32
33
  together/moonshotai/Kimi-K2-Instruct:
33
34
  label: Kimi K2 Instruct
@@ -8,7 +8,7 @@ base-config:
8
8
  supports_files: false
9
9
  available_for_everyone: true
10
10
  ignored_for_cost: false
11
- supports_tools: false
11
+ supports_tools: true
12
12
  properties:
13
13
  reasoning_model: false
14
14
 
@@ -33,7 +33,6 @@ xai-models:
33
33
  reasoning_model: true
34
34
  class_properties:
35
35
  supports_images: false
36
- supports_tools: true
37
36
  costs_per_million_token:
38
37
  input: 0.20
39
38
  output: 1.50
@@ -81,6 +80,68 @@ xai-models:
81
80
  - grok/grok-4-fast
82
81
  - grok/grok-4-fast-reasoning-latest
83
82
 
83
+ grok/grok-4-1-fast-reasoning:
84
+ label: Grok 4.1 Fast (Reasoning)
85
+ description: ""
86
+ release_date: 2025-10-19
87
+ open_source: false
88
+ class_properties:
89
+ supports_images: true
90
+ available_as_evaluator: true
91
+ supports_metadata: true
92
+ supports_files: false
93
+ available_for_everyone: true
94
+ ignored_for_cost: false
95
+ properties:
96
+ context_window: 2_000_000
97
+ max_token_output: 2_000_000 # from openrouter
98
+ training_cutoff: null
99
+ reasoning_model: true
100
+ documentation_url: ""
101
+ costs_per_million_token:
102
+ input: 0.20
103
+ output: 0.5
104
+ cache:
105
+ read: 0.05
106
+ context:
107
+ threshold: 128_000
108
+ input: 0.4
109
+ output: 1.0
110
+ default_parameters:
111
+ temperature: 0.7
112
+ max_output_tokens: 128000
113
+
114
+ grok/grok-4-1-fast-non-reasoning:
115
+ label: Grok 4.1 Fast Non-Reasoning
116
+ description: ""
117
+ release_date: 2025-10-19
118
+ open_source: false
119
+ class_properties:
120
+ supports_images: true
121
+ available_as_evaluator: true
122
+ supports_metadata: true
123
+ supports_files: false
124
+ available_for_everyone: true
125
+ ignored_for_cost: false
126
+ properties:
127
+ context_window: 2_000_000
128
+ max_token_output: 2_000_000 # from openrouter
129
+ training_cutoff: null
130
+ reasoning_model: false
131
+ documentation_url: ""
132
+ costs_per_million_token:
133
+ input: 0.20
134
+ output: 0.5
135
+ cache:
136
+ read: 0.05
137
+ context:
138
+ threshold: 128_000
139
+ input: 0.4
140
+ output: 1.0
141
+ default_parameters:
142
+ temperature: 0.7
143
+ max_output_tokens: 128000
144
+
84
145
  grok/grok-4-fast-non-reasoning:
85
146
  label: Grok 4 Fast (Non-Reasoning)
86
147
  description: Cost-efficient model focused on speed and efficiency for straightforward tasks like summarization or classification without deep logical processing. Unified architecture with reasoning variant, steered via system prompts.
@@ -121,7 +182,6 @@ xai-models:
121
182
  class_properties:
122
183
  supports_images: true
123
184
  available_for_everyone: false
124
- supports_tools: true
125
185
  properties:
126
186
  context_window: 256_000
127
187
  max_token_output: 128_000
@@ -5,6 +5,7 @@ from typing import Any, Callable
5
5
 
6
6
  import backoff
7
7
  from ai21 import TooManyRequestsError as AI21RateLimitError
8
+ from anthropic import InternalServerError
8
9
  from anthropic import RateLimitError as AnthropicRateLimitError
9
10
  from backoff._typing import Details
10
11
  from httpcore import ReadError as HTTPCoreReadError
@@ -166,6 +167,7 @@ RETRIABLE_EXCEPTIONS = [
166
167
  OpenAIUnprocessableEntityError,
167
168
  OpenAIAPIConnectionError,
168
169
  AnthropicRateLimitError,
170
+ InternalServerError,
169
171
  AI21RateLimitError,
170
172
  RemoteProtocolError, # httpx connection closing when running models from sdk
171
173
  HTTPXReadError,
@@ -191,6 +193,7 @@ RETRIABLE_EXCEPTION_CODES = [
191
193
  "overloaded",
192
194
  "throttling", # AWS throttling errors
193
195
  "throttlingexception", # AWS throttling errors
196
+ "internal server error",
194
197
  ]
195
198
 
196
199
 
@@ -239,8 +242,9 @@ def retry_llm_call(
239
242
  logger: logging.Logger,
240
243
  max_tries: int = RETRY_MAX_TRIES,
241
244
  max_time: float | None = None,
242
- backoff_callback: Callable[[int, Exception | None, float, float], None]
243
- | None = None,
245
+ backoff_callback: (
246
+ Callable[[int, Exception | None, float, float], None] | None
247
+ ) = None,
244
248
  ):
245
249
  def on_backoff(details: Details):
246
250
  exception = details.get("exception")
@@ -56,7 +56,7 @@ def concat_images(
56
56
  new_width = int(combined_image.width * scale_factor)
57
57
  new_height = int(combined_image.height * scale_factor)
58
58
 
59
- combined_image = combined_image.resize(
59
+ combined_image = combined_image.resize( # type: ignore
60
60
  (new_width, new_height), Image.Resampling.LANCZOS
61
61
  )
62
62
 
@@ -562,12 +562,8 @@ class AnthropicModel(LLM):
562
562
 
563
563
  body = await self.create_body(input, tools=tools, **kwargs)
564
564
 
565
- betas = [
566
- "files-api-2025-04-14",
567
- "interleaved-thinking-2025-05-14",
568
- ]
569
-
570
- if "claude-sonnet-4-5" in self.model_name:
565
+ betas = ["files-api-2025-04-14", "interleaved-thinking-2025-05-14"]
566
+ if "sonnet-4-5" in self.model_name:
571
567
  betas.append("context-1m-2025-08-07")
572
568
 
573
569
  async with self.get_client().beta.messages.stream(
@@ -2,8 +2,6 @@ import base64
2
2
  import io
3
3
  from typing import Any, Literal, Sequence, cast
4
4
 
5
- from typing_extensions import override
6
-
7
5
  from google.genai import Client
8
6
  from google.genai import errors as genai_errors
9
7
  from google.genai.types import (
@@ -18,10 +16,14 @@ from google.genai.types import (
18
16
  Part,
19
17
  SafetySetting,
20
18
  ThinkingConfig,
19
+ ThinkingLevel,
21
20
  Tool,
22
21
  ToolListUnion,
23
22
  UploadFileConfig,
23
+ FinishReason,
24
24
  )
25
+ from typing_extensions import override
26
+
25
27
  from model_library import model_library_settings
26
28
  from model_library.base import (
27
29
  LLM,
@@ -119,15 +121,6 @@ class GoogleModel(LLM):
119
121
  ):
120
122
  super().__init__(model_name, provider, config=config)
121
123
 
122
- # thinking tag
123
- if self.model_name.endswith("-thinking"):
124
- original_name = self.model_name
125
- self.model_name = self.model_name.replace("-thinking", "")
126
- self.reasoning = True
127
- self.logger.info(
128
- f"Enabled thinking mode for {original_name} -> {self.model_name}"
129
- )
130
-
131
124
  if self.provider_config.use_vertex:
132
125
  self.supports_batch = False
133
126
 
@@ -261,14 +254,12 @@ class GoogleModel(LLM):
261
254
  bytes: io.BytesIO,
262
255
  type: Literal["image", "file"] = "file",
263
256
  ) -> FileWithId:
264
- if not self.supports_batch:
265
- if self.provider_config.use_vertex:
266
- raise Exception(
267
- "Vertex AI does not support file uploads. "
268
- "use FileWithBase64 to pass files as inline data"
269
- "or use genai for file uploads"
270
- )
271
- raise Exception("Model does not support batching")
257
+ if self.provider_config.use_vertex:
258
+ raise Exception(
259
+ "Vertex AI does not support file uploads. "
260
+ "use FileWithBase64 to pass files as inline data"
261
+ "or use genai for file uploads"
262
+ )
272
263
 
273
264
  mime = f"image/{mime}" if type == "image" else mime # TODO:
274
265
  response: File = self.client.files.upload(
@@ -294,7 +285,6 @@ class GoogleModel(LLM):
294
285
  tools: list[ToolDefinition],
295
286
  **kwargs: object,
296
287
  ) -> dict[str, Any]:
297
- self.logger.debug(f"Creating request body for {self.model_name}")
298
288
  generation_config = GenerateContentConfig(
299
289
  max_output_tokens=self.max_tokens,
300
290
  )
@@ -310,13 +300,15 @@ class GoogleModel(LLM):
310
300
  if system_prompt and isinstance(system_prompt, str) and system_prompt.strip():
311
301
  generation_config.system_instruction = str(system_prompt)
312
302
 
313
- if "gemini-2.5" in self.model_name and self.reasoning:
314
- generation_config.thinking_config = ThinkingConfig(
315
- thinking_budget=cast(
303
+ if self.reasoning:
304
+ reasoning_config = ThinkingConfig(include_thoughts=True)
305
+ if self.reasoning_effort:
306
+ reasoning_config.thinking_level = ThinkingLevel(self.reasoning_effort)
307
+ else:
308
+ reasoning_config.thinking_budget = cast(
316
309
  int, kwargs.pop("thinking_budget", self.DEFAULT_THINKING_BUDGET)
317
- ),
318
- include_thoughts=True,
319
- )
310
+ )
311
+ generation_config.thinking_config = reasoning_config
320
312
 
321
313
  if tools:
322
314
  generation_config.tools = cast(ToolListUnion, await self.parse_tools(tools))
@@ -343,17 +335,20 @@ class GoogleModel(LLM):
343
335
  text: str = ""
344
336
  reasoning: str = ""
345
337
  tool_calls: list[ToolCall] = []
346
- last_content: Content | None = None
347
338
 
348
339
  metadata: GenerateContentResponseUsageMetadata | None = None
349
340
 
350
341
  stream = await self.client.aio.models.generate_content_stream(**body)
342
+ contents: list[Content | None] = []
343
+ finish_reason: FinishReason | None = None
344
+
351
345
  async for chunk in stream:
352
346
  candidates = chunk.candidates
353
347
  if not candidates:
354
348
  continue
355
349
 
356
350
  content = candidates[0].content
351
+
357
352
  if content and content.parts:
358
353
  for part in content.parts:
359
354
  if part.function_call:
@@ -378,14 +373,24 @@ class GoogleModel(LLM):
378
373
 
379
374
  if chunk.usage_metadata:
380
375
  metadata = chunk.usage_metadata
381
- last_content = content
376
+ if content:
377
+ contents.append(content)
378
+ if candidates[0].finish_reason:
379
+ finish_reason = candidates[0].finish_reason
380
+
381
+ if finish_reason != FinishReason.STOP:
382
+ self.logger.error(f"Unexpected finish reason: {finish_reason}")
383
+
384
+ if not text and not reasoning and not tool_calls:
385
+ raise ModelNoOutputError("Model returned empty response")
382
386
 
383
387
  result = QueryResult(
384
388
  output_text=text,
385
389
  reasoning=reasoning,
386
- history=[*input, last_content],
390
+ history=[*input, *contents],
387
391
  tool_calls=tool_calls,
388
392
  )
393
+
389
394
  if metadata:
390
395
  # see _calculate_cost
391
396
  cache_read_tokens = metadata.cached_content_token_count or 0
@@ -446,6 +451,7 @@ class GoogleModel(LLM):
446
451
  "response_mime_type": "application/json",
447
452
  }
448
453
  )
454
+
449
455
  body["config"] = config
450
456
 
451
457
  # Make the request with retry wrapper
@@ -5,7 +5,11 @@ import json
5
5
  from typing import Any, Literal, Sequence, cast
6
6
 
7
7
  from openai import APIConnectionError, AsyncOpenAI
8
- from openai.types.chat import ChatCompletionMessage, ChatCompletionMessageToolCall
8
+ from openai.types.chat import (
9
+ ChatCompletionMessage,
10
+ ChatCompletionMessageToolCall,
11
+ ChatCompletionMessageToolCallUnion,
12
+ )
9
13
  from openai.types.chat.chat_completion_message_tool_call import Function
10
14
  from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
11
15
  from openai.types.create_embedding_response import CreateEmbeddingResponse
@@ -617,7 +621,9 @@ class OpenAIModel(LLM):
617
621
  final_message = ChatCompletionMessage(
618
622
  role="assistant",
619
623
  content=output_text if output_text else None,
620
- tool_calls=raw_tool_calls if raw_tool_calls else None,
624
+ tool_calls=cast(list[ChatCompletionMessageToolCallUnion], raw_tool_calls)
625
+ if raw_tool_calls
626
+ else None,
621
627
  )
622
628
  if hasattr(final_message, "reasoning_content") and reasoning_text:
623
629
  setattr(final_message, "reasoning_content", reasoning_text)