PyPI - model-library - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

model-library 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

model_library/base/base.py +11 -6
model_library/base/output.py +54 -0
model_library/base/utils.py +3 -2
model_library/config/ai21labs_models.yaml +1 -0
model_library/config/all_models.json +300 -37
model_library/config/anthropic_models.yaml +26 -3
model_library/config/google_models.yaml +49 -0
model_library/config/openai_models.yaml +0 -9
model_library/config/together_models.yaml +1 -0
model_library/config/xai_models.yaml +63 -3
model_library/exceptions.py +6 -2
model_library/file_utils.py +1 -1
model_library/providers/anthropic.py +2 -6
model_library/providers/google/google.py +35 -29
model_library/providers/openai.py +8 -2
model_library/providers/together.py +18 -211
model_library/register_models.py +0 -2
{model_library-0.1.2.dist-info → model_library-0.1.3.dist-info}/METADATA +2 -3
{model_library-0.1.2.dist-info → model_library-0.1.3.dist-info}/RECORD +22 -22
{model_library-0.1.2.dist-info → model_library-0.1.3.dist-info}/WHEEL +0 -0
{model_library-0.1.2.dist-info → model_library-0.1.3.dist-info}/licenses/LICENSE +0 -0
{model_library-0.1.2.dist-info → model_library-0.1.3.dist-info}/top_level.txt +0 -0

model_library/config/anthropic_models.yaml CHANGED Viewed

@@ -27,6 +27,28 @@ claude-4-models:
     class_properties:
       supports_batch_requests: true
+  anthropic/claude-opus-4-5-20251101:
+    label: Claude Opus 4.5 (Nonthinking)
+    release_date: 2025-11-24
+    properties:
+      context_window: 200_000
+      max_token_output: 64_000
+      extending_thinking: 64_000
+    class_properties:
+      available_for_everyone: false
+    default_parameters:
+      max_output_tokens: 64_000
+    costs_per_million_token:
+      input: 15.0
+      output: 75.0
+      cache:
+        read: 1.5
+        write: 18.75
+    alternative_keys:
+      - anthropic/claude-opus-4-5-20251101-thinking:
+          properties:
+            reasoning_model: true
   anthropic/claude-opus-4-1-20250805:
     label: Claude Opus 4.1 (Nonthinking)
     description: Advanced model for specialized complex
@@ -197,11 +219,12 @@ claude-3-5-models:
     alternative_keys:
       - anthropic/claude-3-5-sonnet-latest
       - anthropic/claude-3.5-sonnet-latest
   anthropic/claude-3-5-sonnet-20240620:
     label: Claude 3.5 Sonnet
     release_date: 2024-06-20
-    description: Claude Sonnet 3.5 (June 2024) variant for code and content generation,
+    description:
+      Claude Sonnet 3.5 (June 2024) variant for code and content generation,
       multilingual and vision-capable, deprecated.
     class_properties:
       deprecated: true
@@ -339,7 +362,7 @@ claude-2-models:
     costs_per_million_token:
       input: 8.0
       output: 24.0
   anthropic/claude-1.3:
     label: Claude 1.3
     release_date: null

model_library/config/google_models.yaml CHANGED Viewed

@@ -31,6 +31,55 @@ gemma-models:
       input: 0.00
       output: 0.00
+gemini-3-models:
+  base-config:
+    properties:
+      context_window: 1_048_576
+      max_token_output: 8_192
+      training_cutoff: "2025-01"
+    class_properties:
+      supports_images: true
+      supports_files: true
+      supports_videos: true
+      supports_tools: true
+      supports_batch_requests: true
+      supports_temperature: true
+    costs_per_million_token:
+      cache:
+        read_discount: 0.1
+    default_parameters:
+      temperature: 1
+      reasoning_effort: "high"
+  google/gemini-3-pro-preview:
+    label: Gemini 3 Pro (11/25)
+    description: Gemini 3 Pro, Google's most powerful model.
+    release_date: 2025-11-18
+    properties:
+      context_window: 1048576
+      max_token_output: 65536
+      training_cutoff: "2025-01"
+      reasoning_model: true
+    class_properties:
+      supports_images: true
+      supports_files: true
+      supports_videos: true
+      supports_tools: true
+      supports_batch_requests: true
+      supports_temperature: true
+    costs_per_million_token:
+      input: 2
+      output: 12
+      cache:
+        read: 0.20
+      context:
+        threshold: 200_000
+        input: 2.5
+        output: 15.0
+        cache:
+          read: 0.40
 gemini-2.5-models:
   base-config:
     properties:

model_library/config/openai_models.yaml CHANGED Viewed

@@ -31,7 +31,6 @@ gpt-5-models:
       supports_temperature: false
       supports_files: true
       supports_tools: true
   openai/gpt-5.1-codex:
     label: GPT 5.1 Codex
     documentation_url: https://platform.openai.com/docs/models/gpt-5.1-codex
@@ -43,7 +42,6 @@ gpt-5-models:
       cache:
         read: 0.125
     default_parameters:
-      temperature: 1
       max_output_tokens: 128_000
   openai/gpt-5.1-codex-mini:
@@ -57,7 +55,6 @@ gpt-5-models:
       cache:
         read: 0.025
     default_parameters:
-      temperature: 1
       max_output_tokens: 128_000
   openai/gpt-5-codex:
@@ -76,10 +73,8 @@ gpt-5-models:
       available_as_evaluator: true
       supports_images: true
     default_parameters:
-      temperature: 1
       max_output_tokens: 128_000
   openai/gpt-5.1-2025-11-13:
     label: GPT 5.1
     documentation_url: https://platform.openai.com/docs/models/gpt-5.1
@@ -96,7 +91,6 @@ gpt-5-models:
       available_as_evaluator: true
       supports_images: true
     default_parameters:
-      temperature: 1
       max_output_tokens: 128_000
   openai/gpt-5-2025-08-07:
@@ -115,7 +109,6 @@ gpt-5-models:
       available_as_evaluator: true
       supports_images: true
     default_parameters:
-      temperature: 1
       max_output_tokens: 128_000
     alternative_keys:
       - azure/gpt-5-2025-08-07
@@ -133,7 +126,6 @@ gpt-5-models:
     properties:
       training_cutoff: "2024-05"
     default_parameters:
-      temperature: 1
       max_output_tokens: 128_000
     class_properties:
       supports_images: true
@@ -153,7 +145,6 @@ gpt-5-models:
     properties:
       training_cutoff: "2024-05"
     default_parameters:
-      temperature: 1
       max_output_tokens: 128_000
     class_properties:
       supports_images: true

model_library/config/together_models.yaml CHANGED Viewed

@@ -28,6 +28,7 @@ kimi-models:
       supports_temperature: true
     default_parameters:
       temperature: 0.3
+      max_output_tokens: 16_384
   together/moonshotai/Kimi-K2-Instruct:
     label: Kimi K2 Instruct

model_library/config/xai_models.yaml CHANGED Viewed

@@ -8,7 +8,7 @@ base-config:
     supports_files: false
     available_for_everyone: true
     ignored_for_cost: false
-    supports_tools: false
+    supports_tools: true
   properties:
     reasoning_model: false
@@ -33,7 +33,6 @@ xai-models:
       reasoning_model: true
     class_properties:
       supports_images: false
-      supports_tools: true
     costs_per_million_token:
       input: 0.20
       output: 1.50
@@ -81,6 +80,68 @@ xai-models:
       - grok/grok-4-fast
       - grok/grok-4-fast-reasoning-latest
+  grok/grok-4-1-fast-reasoning:
+    label: Grok 4.1 Fast (Reasoning)
+    description: ""
+    release_date: 2025-10-19
+    open_source: false
+    class_properties:
+      supports_images: true
+      available_as_evaluator: true
+      supports_metadata: true
+      supports_files: false
+      available_for_everyone: true
+      ignored_for_cost: false
+    properties:
+      context_window: 2_000_000
+      max_token_output: 2_000_000 # from openrouter
+      training_cutoff: null
+      reasoning_model: true
+    documentation_url: ""
+    costs_per_million_token:
+      input: 0.20
+      output: 0.5
+      cache:
+        read: 0.05
+      context:
+        threshold: 128_000
+        input: 0.4
+        output: 1.0
+    default_parameters:
+      temperature: 0.7
+      max_output_tokens: 128000
+  grok/grok-4-1-fast-non-reasoning:
+    label: Grok 4.1 Fast Non-Reasoning
+    description: ""
+    release_date: 2025-10-19
+    open_source: false
+    class_properties:
+      supports_images: true
+      available_as_evaluator: true
+      supports_metadata: true
+      supports_files: false
+      available_for_everyone: true
+      ignored_for_cost: false
+    properties:
+      context_window: 2_000_000
+      max_token_output: 2_000_000 # from openrouter
+      training_cutoff: null
+      reasoning_model: false
+    documentation_url: ""
+    costs_per_million_token:
+      input: 0.20
+      output: 0.5
+      cache:
+        read: 0.05
+      context:
+        threshold: 128_000
+        input: 0.4
+        output: 1.0
+    default_parameters:
+      temperature: 0.7
+      max_output_tokens: 128000
   grok/grok-4-fast-non-reasoning:
     label: Grok 4 Fast (Non-Reasoning)
     description: Cost-efficient model focused on speed and efficiency for straightforward tasks like summarization or classification without deep logical processing. Unified architecture with reasoning variant, steered via system prompts.
@@ -121,7 +182,6 @@ xai-models:
     class_properties:
       supports_images: true
       available_for_everyone: false
-      supports_tools: true
     properties:
       context_window: 256_000
       max_token_output: 128_000

model_library/exceptions.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Any, Callable
 import backoff
 from ai21 import TooManyRequestsError as AI21RateLimitError
+from anthropic import InternalServerError
 from anthropic import RateLimitError as AnthropicRateLimitError
 from backoff._typing import Details
 from httpcore import ReadError as HTTPCoreReadError
@@ -166,6 +167,7 @@ RETRIABLE_EXCEPTIONS = [
     OpenAIUnprocessableEntityError,
     OpenAIAPIConnectionError,
     AnthropicRateLimitError,
+    InternalServerError,
     AI21RateLimitError,
     RemoteProtocolError,  # httpx connection closing when running models from sdk
     HTTPXReadError,
@@ -191,6 +193,7 @@ RETRIABLE_EXCEPTION_CODES = [
     "overloaded",
     "throttling",  # AWS throttling errors
     "throttlingexception",  # AWS throttling errors
+    "internal server error",
 ]
@@ -239,8 +242,9 @@ def retry_llm_call(
     logger: logging.Logger,
     max_tries: int = RETRY_MAX_TRIES,
     max_time: float | None = None,
-    backoff_callback: Callable[[int, Exception | None, float, float], None]
-    | None = None,
+    backoff_callback: (
+        Callable[[int, Exception | None, float, float], None] | None
+    ) = None,
 ):
     def on_backoff(details: Details):
         exception = details.get("exception")

model_library/file_utils.py CHANGED Viewed

@@ -56,7 +56,7 @@ def concat_images(
         new_width = int(combined_image.width * scale_factor)
         new_height = int(combined_image.height * scale_factor)
-        combined_image = combined_image.resize(
+        combined_image = combined_image.resize(  # type: ignore
             (new_width, new_height), Image.Resampling.LANCZOS
         )

model_library/providers/anthropic.py CHANGED Viewed

@@ -562,12 +562,8 @@ class AnthropicModel(LLM):
         body = await self.create_body(input, tools=tools, **kwargs)
-        betas = [
-            "files-api-2025-04-14",
-            "interleaved-thinking-2025-05-14",
-        ]
-        if "claude-sonnet-4-5" in self.model_name:
+        betas = ["files-api-2025-04-14", "interleaved-thinking-2025-05-14"]
+        if "sonnet-4-5" in self.model_name:
             betas.append("context-1m-2025-08-07")
         async with self.get_client().beta.messages.stream(

model_library/providers/google/google.py CHANGED Viewed

@@ -2,8 +2,6 @@ import base64
 import io
 from typing import Any, Literal, Sequence, cast
-from typing_extensions import override
 from google.genai import Client
 from google.genai import errors as genai_errors
 from google.genai.types import (
@@ -18,10 +16,14 @@ from google.genai.types import (
     Part,
     SafetySetting,
     ThinkingConfig,
+    ThinkingLevel,
     Tool,
     ToolListUnion,
     UploadFileConfig,
+    FinishReason,
 )
+from typing_extensions import override
 from model_library import model_library_settings
 from model_library.base import (
     LLM,
@@ -119,15 +121,6 @@ class GoogleModel(LLM):
     ):
         super().__init__(model_name, provider, config=config)
-        # thinking tag
-        if self.model_name.endswith("-thinking"):
-            original_name = self.model_name
-            self.model_name = self.model_name.replace("-thinking", "")
-            self.reasoning = True
-            self.logger.info(
-                f"Enabled thinking mode for {original_name} -> {self.model_name}"
-            )
         if self.provider_config.use_vertex:
             self.supports_batch = False
@@ -261,14 +254,12 @@ class GoogleModel(LLM):
         bytes: io.BytesIO,
         type: Literal["image", "file"] = "file",
     ) -> FileWithId:
-        if not self.supports_batch:
-            if self.provider_config.use_vertex:
-                raise Exception(
-                    "Vertex AI does not support file uploads. "
-                    "use FileWithBase64 to pass files as inline data"
-                    "or use genai for file uploads"
-                )
-            raise Exception("Model does not support batching")
+        if self.provider_config.use_vertex:
+            raise Exception(
+                "Vertex AI does not support file uploads. "
+                "use FileWithBase64 to pass files as inline data"
+                "or use genai for file uploads"
+            )
         mime = f"image/{mime}" if type == "image" else mime  # TODO:
         response: File = self.client.files.upload(
@@ -294,7 +285,6 @@ class GoogleModel(LLM):
         tools: list[ToolDefinition],
         **kwargs: object,
     ) -> dict[str, Any]:
-        self.logger.debug(f"Creating request body for {self.model_name}")
         generation_config = GenerateContentConfig(
             max_output_tokens=self.max_tokens,
         )
@@ -310,13 +300,15 @@ class GoogleModel(LLM):
         if system_prompt and isinstance(system_prompt, str) and system_prompt.strip():
             generation_config.system_instruction = str(system_prompt)
-        if "gemini-2.5" in self.model_name and self.reasoning:
-            generation_config.thinking_config = ThinkingConfig(
-                thinking_budget=cast(
+        if self.reasoning:
+            reasoning_config = ThinkingConfig(include_thoughts=True)
+            if self.reasoning_effort:
+                reasoning_config.thinking_level = ThinkingLevel(self.reasoning_effort)
+            else:
+                reasoning_config.thinking_budget = cast(
                     int, kwargs.pop("thinking_budget", self.DEFAULT_THINKING_BUDGET)
-                ),
-                include_thoughts=True,
-            )
+                )
+            generation_config.thinking_config = reasoning_config
         if tools:
             generation_config.tools = cast(ToolListUnion, await self.parse_tools(tools))
@@ -343,17 +335,20 @@ class GoogleModel(LLM):
         text: str = ""
         reasoning: str = ""
         tool_calls: list[ToolCall] = []
-        last_content: Content | None = None
         metadata: GenerateContentResponseUsageMetadata | None = None
         stream = await self.client.aio.models.generate_content_stream(**body)
+        contents: list[Content | None] = []
+        finish_reason: FinishReason | None = None
         async for chunk in stream:
             candidates = chunk.candidates
             if not candidates:
                 continue
             content = candidates[0].content
             if content and content.parts:
                 for part in content.parts:
                     if part.function_call:
@@ -378,14 +373,24 @@ class GoogleModel(LLM):
             if chunk.usage_metadata:
                 metadata = chunk.usage_metadata
-            last_content = content
+            if content:
+                contents.append(content)
+            if candidates[0].finish_reason:
+                finish_reason = candidates[0].finish_reason
+        if finish_reason != FinishReason.STOP:
+            self.logger.error(f"Unexpected finish reason: {finish_reason}")
+        if not text and not reasoning and not tool_calls:
+            raise ModelNoOutputError("Model returned empty response")
         result = QueryResult(
             output_text=text,
             reasoning=reasoning,
-            history=[*input, last_content],
+            history=[*input, *contents],
             tool_calls=tool_calls,
         )
         if metadata:
             # see _calculate_cost
             cache_read_tokens = metadata.cached_content_token_count or 0
@@ -446,6 +451,7 @@ class GoogleModel(LLM):
                 "response_mime_type": "application/json",
             }
         )
         body["config"] = config
         # Make the request with retry wrapper

model_library/providers/openai.py CHANGED Viewed

@@ -5,7 +5,11 @@ import json
 from typing import Any, Literal, Sequence, cast
 from openai import APIConnectionError, AsyncOpenAI
-from openai.types.chat import ChatCompletionMessage, ChatCompletionMessageToolCall
+from openai.types.chat import (
+    ChatCompletionMessage,
+    ChatCompletionMessageToolCall,
+    ChatCompletionMessageToolCallUnion,
+)
 from openai.types.chat.chat_completion_message_tool_call import Function
 from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
 from openai.types.create_embedding_response import CreateEmbeddingResponse
@@ -617,7 +621,9 @@ class OpenAIModel(LLM):
         final_message = ChatCompletionMessage(
             role="assistant",
             content=output_text if output_text else None,
-            tool_calls=raw_tool_calls if raw_tool_calls else None,
+            tool_calls=cast(list[ChatCompletionMessageToolCallUnion], raw_tool_calls)
+            if raw_tool_calls
+            else None,
         )
         if hasattr(final_message, "reasoning_content") and reasoning_text:
             setattr(final_message, "reasoning_content", reasoning_text)

model-library 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

model-library 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl