PyPI - model-library - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

model-library 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

model_library/base/base.py +13 -6
model_library/base/output.py +55 -0
model_library/base/utils.py +3 -2
model_library/config/README.md +169 -0
model_library/config/ai21labs_models.yaml +11 -10
model_library/config/alibaba_models.yaml +21 -22
model_library/config/all_models.json +4708 -2471
model_library/config/amazon_models.yaml +100 -102
model_library/config/anthropic_models.yaml +59 -45
model_library/config/cohere_models.yaml +25 -24
model_library/config/deepseek_models.yaml +28 -25
model_library/config/dummy_model.yaml +9 -7
model_library/config/fireworks_models.yaml +86 -56
model_library/config/google_models.yaml +156 -102
model_library/config/inception_models.yaml +6 -6
model_library/config/kimi_models.yaml +13 -14
model_library/config/minimax_models.yaml +37 -0
model_library/config/mistral_models.yaml +85 -29
model_library/config/openai_models.yaml +192 -159
model_library/config/perplexity_models.yaml +8 -23
model_library/config/together_models.yaml +115 -103
model_library/config/xai_models.yaml +85 -57
model_library/config/zai_models.yaml +23 -15
model_library/exceptions.py +12 -17
model_library/file_utils.py +1 -1
model_library/providers/amazon.py +32 -17
model_library/providers/anthropic.py +2 -6
model_library/providers/google/google.py +35 -29
model_library/providers/minimax.py +33 -0
model_library/providers/mistral.py +10 -1
model_library/providers/openai.py +10 -8
model_library/providers/together.py +18 -211
model_library/register_models.py +36 -38
model_library/registry_utils.py +18 -16
model_library/utils.py +2 -2
{model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/METADATA +3 -4
model_library-0.1.4.dist-info/RECORD +64 -0
model_library-0.1.2.dist-info/RECORD +0 -61
{model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/WHEEL +0 -0
{model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/licenses/LICENSE +0 -0
{model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/top_level.txt +0 -0

model_library/config/xai_models.yaml CHANGED Viewed

@@ -2,20 +2,21 @@ base-config:
   company: xAI
   documentation_url: https://docs.x.ai/docs#models
   open_source: false
-  class_properties:
-    supports_images: true
+  supports:
+    images: true
+    files: false
+    tools: true
+  metadata:
     available_as_evaluator: false
-    supports_files: false
     available_for_everyone: true
     ignored_for_cost: false
-    supports_tools: false
   properties:
     reasoning_model: false
 xai-models:
   base-config:
-    class_properties:
-      supports_temperature: true
+    supports:
+      temperature: true
     costs_per_million_token:
       cache:
         read_discount: 0.25
@@ -29,20 +30,16 @@ xai-models:
     release_date: 2025-08-25
     properties:
       context_window: 256_000
-      max_token_output: 40_000
+      max_tokens: 40_000
       reasoning_model: true
-    class_properties:
-      supports_images: false
-      supports_tools: true
+    supports:
+      images: false
     costs_per_million_token:
       input: 0.20
       output: 1.50
       cache:
         read: 0.02
     documentation_url: https://docs.x.ai/docs/models/grok-code-fast-1
-    default_parameters:
-      temperature: 0.7
-      max_output_tokens: 40000
     alternative_keys:
       - grok/grok-code-fast
       - grok/grok-code-fast-1-0825
@@ -52,16 +49,12 @@ xai-models:
     description: Latest advancement in cost-efficient reasoning models with unified architecture. Handles complex requests with deep chain-of-thought reasoning. Features 2M token context window and native tool use.
     release_date: 2025-09-19
     open_source: false
-    class_properties:
-      supports_images: true
-      available_as_evaluator: true
-      supports_metadata: true
-      supports_files: false
-      available_for_everyone: true
-      ignored_for_cost: false
+    supports:
+      images: true
+      files: false
     properties:
       context_window: 2_000_000
-      max_token_output: 2_000_000 # from openrouter
+      max_tokens: 2_000_000
       training_cutoff: null
       reasoning_model: true
     documentation_url: https://docs.x.ai/docs/models/grok-4-fast-reasoning
@@ -74,28 +67,69 @@ xai-models:
         threshold: 128_000
         input: 0.4
         output: 1.0
-    default_parameters:
-      temperature: 0.7
-      max_output_tokens: 128000
     alternative_keys:
       - grok/grok-4-fast
       - grok/grok-4-fast-reasoning-latest
+  grok/grok-4-1-fast-reasoning:
+    label: Grok 4.1 Fast (Reasoning)
+    description: ""
+    release_date: 2025-11-19
+    open_source: false
+    supports:
+      images: true
+      files: false
+    properties:
+      context_window: 2_000_000
+      max_tokens: 2_000_000 # from openrouter
+      training_cutoff: null
+      reasoning_model: true
+    documentation_url: ""
+    costs_per_million_token:
+      input: 0.20
+      output: 0.5
+      cache:
+        read: 0.05
+      context:
+        threshold: 128_000
+        input: 0.4
+        output: 1.0
+  grok/grok-4-1-fast-non-reasoning:
+    label: Grok 4.1 Fast Non-Reasoning
+    description: ""
+    release_date: 2025-11-19
+    open_source: false
+    supports:
+      images: true
+      files: false
+    properties:
+      context_window: 2_000_000
+      max_tokens: 2_000_000 # from openrouter
+      training_cutoff: null
+      reasoning_model: false
+    documentation_url: ""
+    costs_per_million_token:
+      input: 0.20
+      output: 0.5
+      cache:
+        read: 0.05
+      context:
+        threshold: 128_000
+        input: 0.4
+        output: 1.0
   grok/grok-4-fast-non-reasoning:
     label: Grok 4 Fast (Non-Reasoning)
     description: Cost-efficient model focused on speed and efficiency for straightforward tasks like summarization or classification without deep logical processing. Unified architecture with reasoning variant, steered via system prompts.
     release_date: 2025-09-19
     open_source: false
-    class_properties:
-      supports_images: true
-      available_as_evaluator: true
-      supports_metadata: true
-      supports_files: false
-      available_for_everyone: true
-      ignored_for_cost: false
+    supports:
+      images: true
+      files: false
     properties:
       context_window: 2_000_000
-      max_token_output: 2_000_000 # from openrouter
+      max_tokens: 2_000_000
       training_cutoff: null
       reasoning_model: false
     documentation_url: https://docs.x.ai/docs/models/grok-4-fast-non-reasoning
@@ -108,9 +142,6 @@ xai-models:
         threshold: 128_000
         input: 0.4
         output: 1.0
-    default_parameters:
-      temperature: 0.7
-      max_output_tokens: 2000000
     alternative_keys:
       - grok/grok-4-fast-non-reasoning-latest
@@ -118,13 +149,12 @@ xai-models:
     label: Grok 4
     description: Latest and greatest flagship model offering unparalleled performance in natural language, math and reasoning. The perfect jack of all trades with native tool use and structured outputs support.
     release_date: 2025-07-09
-    class_properties:
-      supports_images: true
-      available_for_everyone: false
-      supports_tools: true
+    supports:
+      images: true
+      tools: true
     properties:
       context_window: 256_000
-      max_token_output: 128_000
+      max_tokens: 128_000
       training_cutoff: null
       reasoning_model: true
     documentation_url: https://docs.x.ai/docs/models/grok-4-0709
@@ -137,9 +167,6 @@ xai-models:
         threshold: 128_000
         input: 6.00
         output: 30.00
-    default_parameters:
-      temperature: 0.7
-      max_output_tokens: 128000
     alternative_keys:
       - grok/grok-4
       - grok/grok-4-latest
@@ -150,15 +177,15 @@ xai-models:
     release_date: 2025-04-09
     properties:
       context_window: 131_072
-      max_token_output: null
+      max_tokens: null
       training_cutoff: null
       reasoning_model: true
-    class_properties:
+    metadata:
       deprecated: true
     costs_per_million_token:
       input: 0.30
       output: 0.50
-      cached:
+      cache:
         read: 0.075
     documentation_url: https://docs.x.ai/docs/models/grok-3-mini
     default_parameters:
@@ -188,7 +215,7 @@ xai-models:
     release_date: 2025-04-09
     properties:
       context_window: 131_072
-      max_token_output: null
+      max_tokens: null
       training_cutoff: null
     costs_per_million_token:
       input: 3.00
@@ -211,10 +238,10 @@ xai-models:
     release_date: 2024-12-12
     properties:
       context_window: 8_192
-      max_token_output: null
+      max_tokens: null
       training_cutoff: null
-    class_properties:
-      supports_images: true
+    supports:
+      images: true
     costs_per_million_token:
       input: 2.00
       output: 10.00
@@ -228,9 +255,9 @@ xai-models:
     release_date: 2024-12-11
     properties:
       context_window: 131_072
-      max_token_output: null
+      max_tokens: null
       training_cutoff: null
-    class_properties:
+    metadata:
       deprecated: true
     costs_per_million_token:
       input: 2.00
@@ -242,10 +269,11 @@ xai-models:
     release_date: 2024-12-12
     properties:
       context_window: 8_192
-      max_token_output: null
+      max_tokens: null
       training_cutoff: null
-    class_properties:
-      supports_images: true
+    supports:
+      images: true
+    metadata:
       deprecated: true
     costs_per_million_token:
       input: 5.00
@@ -257,9 +285,9 @@ xai-models:
     release_date: 2024-12-11
     properties:
       context_window: 131_072
-      max_token_output: null
+      max_tokens: null
       training_cutoff: null
-    class_properties:
+    metadata:
       deprecated: true
     costs_per_million_token:
       input: 5.00

model_library/config/zai_models.yaml CHANGED Viewed

@@ -2,12 +2,13 @@ base-config:
   company: zAI
   open_source: true
   documentation_url: https://docs.z.ai/
-  class_properties:
-    supports_images: false
-    supports_files: false
+  supports:
+    images: false
+    files: false
+    temperature: true
+    tools: true
+  properties:
     reasoning_model: true
-    supports_temperature: true
-    supports_tools: true
   default_parameters:
     temperature: 0.6
     top_p: 1
@@ -23,14 +24,17 @@ zai-models:
     release_date: 2025-07-28
     properties:
       context_window: 128_000
-      max_token_output: 81_920
+      max_tokens: 81_920
     costs_per_million_token:
       input: 0.6
       output: 2.2
       cache:
         read: 0.11
     alternative_keys:
-      - fireworks/glm-4p5
+      - fireworks/glm-4p5:
+        costs_per_million_token:
+          input: 0.55
+          output: 2.19
   zai/glm-4.5-air:
     label: GLM 4.5 Air
@@ -38,14 +42,17 @@ zai-models:
     release_date: 2025-07-28
     properties:
       context_window: 128_000
-      max_token_output: 81_920
+      max_tokens: 81_920
     costs_per_million_token:
       input: 0.2
       output: 1.1
-      cache:
+      cache:
         read: 0.03
     alternative_keys:
-      - together/zai-org/GLM-4.5-Air-FP8
+      - together/zai-org/GLM-4.5-Air-FP8:
+        costs_per_million_token:
+          input: 0.22
+          output: 0.88
   zai/glm-4.6:
     label: GLM 4.6
@@ -53,13 +60,14 @@ zai-models:
     release_date: 2025-09-30
     properties:
       context_window: 200_000
-      max_token_output: 122_880
+      max_tokens: 122_880
     costs_per_million_token:
       input: 0.6
       output: 2.2
-      cache:
+      cache:
         read: 0.11
     alternative_keys:
-      - fireworks/glm-4p6
+      - fireworks/glm-4p6:
+        costs_per_million_token:
+          input: 0.55
+          output: 2.19

model_library/exceptions.py CHANGED Viewed

@@ -5,9 +5,11 @@ from typing import Any, Callable
 import backoff
 from ai21 import TooManyRequestsError as AI21RateLimitError
+from anthropic import InternalServerError
 from anthropic import RateLimitError as AnthropicRateLimitError
 from backoff._typing import Details
 from httpcore import ReadError as HTTPCoreReadError
+from httpx import ConnectError as HTTPXConnectError
 from httpx import ReadError as HTTPXReadError
 from httpx import RemoteProtocolError
 from openai import APIConnectionError as OpenAIAPIConnectionError
@@ -53,20 +55,6 @@ class MaxOutputTokensExceededError(Exception):
         super().__init__(message or MaxOutputTokensExceededError.DEFAULT_MESSAGE)
-class MaxInputTokensExceededError(Exception):
-    """
-    Raised when the input exceeds the allowed max input tokens limit
-    """
-    DEFAULT_MESSAGE: str = (
-        "Input exceeded the maximum allowed input tokens. "
-        "Consider reducing the input size."
-    )
-    def __init__(self, message: str | None = None):
-        super().__init__(message or MaxInputTokensExceededError.DEFAULT_MESSAGE)
 class MaxContextWindowExceededError(Exception):
     """
     Raised when the context window exceeds the allowed max context window limit
@@ -97,7 +85,9 @@ CONTEXT_WINDOW_PATTERN = re.compile(
     r"sent message larger than max|"
     r"input tokens exceeded|"
     r"(messages?|total length).*too long|"
-    r"payload.*too large"
+    r"payload.*too large|"
+    r"string too long|"
+    r"input exceeded the context window"
 )
@@ -166,9 +156,11 @@ RETRIABLE_EXCEPTIONS = [
     OpenAIUnprocessableEntityError,
     OpenAIAPIConnectionError,
     AnthropicRateLimitError,
+    InternalServerError,
     AI21RateLimitError,
     RemoteProtocolError,  # httpx connection closing when running models from sdk
     HTTPXReadError,
+    HTTPXConnectError,
     HTTPCoreReadError,
 ]
@@ -186,11 +178,13 @@ RETRIABLE_EXCEPTION_CODES = [
     "connection_error",
     "service_unavailable",
     "rate_limit",
+    "rate limit",
     "internal_error",
     "server_error",
     "overloaded",
     "throttling",  # AWS throttling errors
     "throttlingexception",  # AWS throttling errors
+    "internal server error",
 ]
@@ -239,8 +233,9 @@ def retry_llm_call(
     logger: logging.Logger,
     max_tries: int = RETRY_MAX_TRIES,
     max_time: float | None = None,
-    backoff_callback: Callable[[int, Exception | None, float, float], None]
-    | None = None,
+    backoff_callback: (
+        Callable[[int, Exception | None, float, float], None] | None
+    ) = None,
 ):
     def on_backoff(details: Details):
         exception = details.get("exception")

model_library/file_utils.py CHANGED Viewed

@@ -56,7 +56,7 @@ def concat_images(
         new_width = int(combined_image.width * scale_factor)
         new_height = int(combined_image.height * scale_factor)
-        combined_image = combined_image.resize(
+        combined_image = combined_image.resize(  # type: ignore
             (new_width, new_height), Image.Resampling.LANCZOS
         )

model_library/providers/amazon.py CHANGED Viewed

@@ -26,6 +26,7 @@ from model_library.base import (
     ToolDefinition,
     ToolResult,
 )
+from model_library.base.input import FileBase
 from model_library.exceptions import (
     BadInputError,
     MaxOutputTokensExceededError,
@@ -60,11 +61,13 @@ class AmazonModel(LLM):
         config: LLMConfig | None = None,
     ):
         super().__init__(model_name, provider, config=config)
-        if self.model_name.endswith("-thinking"):
-            self.model_name = self.model_name.replace("-thinking", "")
-            self.reasoning = True
-            if self.max_tokens < 1024:
-                self.max_tokens = 2048
+        self.supports_cache = "amazon" in self.model_name or "claude" in self.model_name
+        self.supports_cache = (
+            self.supports_cache and "v2" not in self.model_name
+        )  # supported but no access yet
+        self.supports_tool_cache = self.supports_cache and "claude" in self.model_name
+    cache_control = {"type": "default"}
     @override
     async def parse_input(
@@ -120,6 +123,10 @@ class AmazonModel(LLM):
                             new_input.append(item)
         if content_user:
+            if self.supports_cache:
+                if not isinstance(input[-1], FileBase):
+                    # last item cannot be file
+                    content_user.append({"cachePoint": self.cache_control})
             new_input.append({"role": "user", "content": content_user})
         return new_input
@@ -174,6 +181,8 @@ class AmazonModel(LLM):
                     }
                 }
             )
+        if parsed_tools and self.supports_tool_cache:
+            parsed_tools.append({"cachePoint": self.cache_control})
         return parsed_tools
     @override
@@ -203,8 +212,12 @@ class AmazonModel(LLM):
         if "system_prompt" in kwargs:
             body["system"] = [{"text": kwargs.pop("system_prompt")}]
+            if self.supports_cache:
+                body["system"].append({"cachePoint": self.cache_control})
         if self.reasoning:
+            if self.max_tokens < 1024:
+                self.max_tokens = 2048
             budget_tokens = kwargs.pop(
                 "budget_tokens", get_default_budget_tokens(self.max_tokens)
             )
@@ -244,9 +257,8 @@ class AmazonModel(LLM):
         tool_calls: dict[str, Any] = {}
         messages: dict[str, Any] = {"content": []}
-        input_tokens = 0
-        output_tokens = 0
         stop_reason: str = ""
+        metadata = QueryResultMetadata()
         for chunk in response["stream"]:
             key = list(chunk.keys())[0]
@@ -281,8 +293,16 @@ class AmazonModel(LLM):
                             tool_calls["input"] += delta["toolUse"]["input"]
                 case "metadata":
-                    input_tokens = value["usage"]["inputTokens"]
-                    output_tokens = value["usage"]["outputTokens"]
+                    metadata = QueryResultMetadata(
+                        in_tokens=value["usage"]["inputTokens"],
+                        out_tokens=value["usage"]["outputTokens"],
+                    )
+                    metadata.cache_read_tokens = value["usage"].get(
+                        "cacheReadInputTokens", None
+                    )
+                    metadata.cache_write_tokens = value["usage"].get(
+                        "cacheWriteInputTokens", None
+                    )
                 case "contentBlockStop":
                     if tool_calls:
@@ -308,7 +328,7 @@ class AmazonModel(LLM):
                 case "messageStop":
                     stop_reason = value["stopReason"]
-        return messages, stop_reason, input_tokens, output_tokens
+        return messages, stop_reason, metadata
     # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime/client/converse.html#
     @override
@@ -326,9 +346,7 @@ class AmazonModel(LLM):
             **body,
         )
-        messages, stop_reason, input_tokens, output_tokens = await self.stream_response(
-            response
-        )
+        messages, stop_reason, metadata = await self.stream_response(response)
         text = " ".join([i["text"] for i in messages["content"] if "text" in i])
         reasoning = " ".join(
@@ -361,10 +379,7 @@ class AmazonModel(LLM):
         return QueryResult(
             output_text=text,
             reasoning=reasoning,
-            metadata=QueryResultMetadata(
-                in_tokens=input_tokens,
-                out_tokens=output_tokens,
-            ),
+            metadata=metadata,
             tool_calls=tool_calls,
             history=[*input, messages],
         )

model_library/providers/anthropic.py CHANGED Viewed

@@ -562,12 +562,8 @@ class AnthropicModel(LLM):
         body = await self.create_body(input, tools=tools, **kwargs)
-        betas = [
-            "files-api-2025-04-14",
-            "interleaved-thinking-2025-05-14",
-        ]
-        if "claude-sonnet-4-5" in self.model_name:
+        betas = ["files-api-2025-04-14", "interleaved-thinking-2025-05-14"]
+        if "sonnet-4-5" in self.model_name:
             betas.append("context-1m-2025-08-07")
         async with self.get_client().beta.messages.stream(

model-library 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

model-library 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl