PyPI - model-library - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

model-library 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

model_library/base/base.py +2 -0
model_library/base/output.py +24 -9
model_library/base/utils.py +27 -5
model_library/config/README.md +169 -0
model_library/config/ai21labs_models.yaml +11 -11
model_library/config/alibaba_models.yaml +21 -22
model_library/config/all_models.json +4623 -2599
model_library/config/amazon_models.yaml +100 -102
model_library/config/anthropic_models.yaml +43 -52
model_library/config/cohere_models.yaml +25 -24
model_library/config/deepseek_models.yaml +28 -25
model_library/config/dummy_model.yaml +9 -7
model_library/config/fireworks_models.yaml +86 -56
model_library/config/google_models.yaml +146 -126
model_library/config/inception_models.yaml +6 -6
model_library/config/kimi_models.yaml +13 -14
model_library/config/minimax_models.yaml +37 -0
model_library/config/mistral_models.yaml +85 -29
model_library/config/openai_models.yaml +192 -150
model_library/config/perplexity_models.yaml +10 -23
model_library/config/together_models.yaml +115 -104
model_library/config/xai_models.yaml +47 -79
model_library/config/zai_models.yaml +23 -15
model_library/exceptions.py +7 -16
model_library/providers/amazon.py +32 -17
model_library/providers/minimax.py +33 -0
model_library/providers/mistral.py +10 -1
model_library/providers/openai.py +2 -6
model_library/register_models.py +36 -36
model_library/registry_utils.py +78 -16
model_library/utils.py +2 -2
{model_library-0.1.3.dist-info → model_library-0.1.5.dist-info}/METADATA +2 -2
model_library-0.1.5.dist-info/RECORD +64 -0
model_library-0.1.3.dist-info/RECORD +0 -61
{model_library-0.1.3.dist-info → model_library-0.1.5.dist-info}/WHEEL +0 -0
{model_library-0.1.3.dist-info → model_library-0.1.5.dist-info}/licenses/LICENSE +0 -0
{model_library-0.1.3.dist-info → model_library-0.1.5.dist-info}/top_level.txt +0 -0

model_library/config/xai_models.yaml CHANGED Viewed

@@ -2,20 +2,21 @@ base-config:
   company: xAI
   documentation_url: https://docs.x.ai/docs#models
   open_source: false
-  class_properties:
-    supports_images: true
+  supports:
+    images: true
+    files: false
+    tools: true
+  metadata:
     available_as_evaluator: false
-    supports_files: false
     available_for_everyone: true
     ignored_for_cost: false
-    supports_tools: true
   properties:
     reasoning_model: false
 xai-models:
   base-config:
-    class_properties:
-      supports_temperature: true
+    supports:
+      temperature: true
     costs_per_million_token:
       cache:
         read_discount: 0.25
@@ -29,19 +30,16 @@ xai-models:
     release_date: 2025-08-25
     properties:
       context_window: 256_000
-      max_token_output: 40_000
+      max_tokens: 40_000
       reasoning_model: true
-    class_properties:
-      supports_images: false
+    supports:
+      images: false
     costs_per_million_token:
       input: 0.20
       output: 1.50
       cache:
         read: 0.02
     documentation_url: https://docs.x.ai/docs/models/grok-code-fast-1
-    default_parameters:
-      temperature: 0.7
-      max_output_tokens: 40000
     alternative_keys:
       - grok/grok-code-fast
       - grok/grok-code-fast-1-0825
@@ -51,16 +49,12 @@ xai-models:
     description: Latest advancement in cost-efficient reasoning models with unified architecture. Handles complex requests with deep chain-of-thought reasoning. Features 2M token context window and native tool use.
     release_date: 2025-09-19
     open_source: false
-    class_properties:
-      supports_images: true
-      available_as_evaluator: true
-      supports_metadata: true
-      supports_files: false
-      available_for_everyone: true
-      ignored_for_cost: false
+    supports:
+      images: true
+      files: false
     properties:
       context_window: 2_000_000
-      max_token_output: 2_000_000 # from openrouter
+      max_tokens: 2_000_000
       training_cutoff: null
       reasoning_model: true
     documentation_url: https://docs.x.ai/docs/models/grok-4-fast-reasoning
@@ -73,9 +67,6 @@ xai-models:
         threshold: 128_000
         input: 0.4
         output: 1.0
-    default_parameters:
-      temperature: 0.7
-      max_output_tokens: 128000
     alternative_keys:
       - grok/grok-4-fast
       - grok/grok-4-fast-reasoning-latest
@@ -83,18 +74,14 @@ xai-models:
   grok/grok-4-1-fast-reasoning:
     label: Grok 4.1 Fast (Reasoning)
     description: ""
-    release_date: 2025-10-19
+    release_date: 2025-11-19
     open_source: false
-    class_properties:
-      supports_images: true
-      available_as_evaluator: true
-      supports_metadata: true
-      supports_files: false
-      available_for_everyone: true
-      ignored_for_cost: false
+    supports:
+      images: true
+      files: false
     properties:
       context_window: 2_000_000
-      max_token_output: 2_000_000 # from openrouter
+      max_tokens: 2_000_000 # from openrouter
       training_cutoff: null
       reasoning_model: true
     documentation_url: ""
@@ -107,25 +94,18 @@ xai-models:
         threshold: 128_000
         input: 0.4
         output: 1.0
-    default_parameters:
-      temperature: 0.7
-      max_output_tokens: 128000
   grok/grok-4-1-fast-non-reasoning:
     label: Grok 4.1 Fast Non-Reasoning
     description: ""
-    release_date: 2025-10-19
+    release_date: 2025-11-19
     open_source: false
-    class_properties:
-      supports_images: true
-      available_as_evaluator: true
-      supports_metadata: true
-      supports_files: false
-      available_for_everyone: true
-      ignored_for_cost: false
+    supports:
+      images: true
+      files: false
     properties:
       context_window: 2_000_000
-      max_token_output: 2_000_000 # from openrouter
+      max_tokens: 2_000_000 # from openrouter
       training_cutoff: null
       reasoning_model: false
     documentation_url: ""
@@ -138,25 +118,18 @@ xai-models:
         threshold: 128_000
         input: 0.4
         output: 1.0
-    default_parameters:
-      temperature: 0.7
-      max_output_tokens: 128000
   grok/grok-4-fast-non-reasoning:
     label: Grok 4 Fast (Non-Reasoning)
     description: Cost-efficient model focused on speed and efficiency for straightforward tasks like summarization or classification without deep logical processing. Unified architecture with reasoning variant, steered via system prompts.
     release_date: 2025-09-19
     open_source: false
-    class_properties:
-      supports_images: true
-      available_as_evaluator: true
-      supports_metadata: true
-      supports_files: false
-      available_for_everyone: true
-      ignored_for_cost: false
+    supports:
+      images: true
+      files: false
     properties:
       context_window: 2_000_000
-      max_token_output: 2_000_000 # from openrouter
+      max_tokens: 2_000_000
       training_cutoff: null
       reasoning_model: false
     documentation_url: https://docs.x.ai/docs/models/grok-4-fast-non-reasoning
@@ -169,9 +142,6 @@ xai-models:
         threshold: 128_000
         input: 0.4
         output: 1.0
-    default_parameters:
-      temperature: 0.7
-      max_output_tokens: 2000000
     alternative_keys:
       - grok/grok-4-fast-non-reasoning-latest
@@ -179,12 +149,12 @@ xai-models:
     label: Grok 4
     description: Latest and greatest flagship model offering unparalleled performance in natural language, math and reasoning. The perfect jack of all trades with native tool use and structured outputs support.
     release_date: 2025-07-09
-    class_properties:
-      supports_images: true
-      available_for_everyone: false
+    supports:
+      images: true
+      tools: true
     properties:
       context_window: 256_000
-      max_token_output: 128_000
+      max_tokens: 128_000
       training_cutoff: null
       reasoning_model: true
     documentation_url: https://docs.x.ai/docs/models/grok-4-0709
@@ -197,9 +167,6 @@ xai-models:
         threshold: 128_000
         input: 6.00
         output: 30.00
-    default_parameters:
-      temperature: 0.7
-      max_output_tokens: 128000
     alternative_keys:
       - grok/grok-4
       - grok/grok-4-latest
@@ -210,15 +177,15 @@ xai-models:
     release_date: 2025-04-09
     properties:
       context_window: 131_072
-      max_token_output: null
+      max_tokens: null
       training_cutoff: null
       reasoning_model: true
-    class_properties:
+    metadata:
       deprecated: true
     costs_per_million_token:
       input: 0.30
       output: 0.50
-      cached:
+      cache:
         read: 0.075
     documentation_url: https://docs.x.ai/docs/models/grok-3-mini
     default_parameters:
@@ -248,7 +215,7 @@ xai-models:
     release_date: 2025-04-09
     properties:
       context_window: 131_072
-      max_token_output: null
+      max_tokens: null
       training_cutoff: null
     costs_per_million_token:
       input: 3.00
@@ -271,10 +238,10 @@ xai-models:
     release_date: 2024-12-12
     properties:
       context_window: 8_192
-      max_token_output: null
+      max_tokens: null
       training_cutoff: null
-    class_properties:
-      supports_images: true
+    supports:
+      images: true
     costs_per_million_token:
       input: 2.00
       output: 10.00
@@ -288,9 +255,9 @@ xai-models:
     release_date: 2024-12-11
     properties:
       context_window: 131_072
-      max_token_output: null
+      max_tokens: null
       training_cutoff: null
-    class_properties:
+    metadata:
       deprecated: true
     costs_per_million_token:
       input: 2.00
@@ -302,10 +269,11 @@ xai-models:
     release_date: 2024-12-12
     properties:
       context_window: 8_192
-      max_token_output: null
+      max_tokens: null
       training_cutoff: null
-    class_properties:
-      supports_images: true
+    supports:
+      images: true
+    metadata:
       deprecated: true
     costs_per_million_token:
       input: 5.00
@@ -317,9 +285,9 @@ xai-models:
     release_date: 2024-12-11
     properties:
       context_window: 131_072
-      max_token_output: null
+      max_tokens: null
       training_cutoff: null
-    class_properties:
+    metadata:
       deprecated: true
     costs_per_million_token:
       input: 5.00

model_library/config/zai_models.yaml CHANGED Viewed

@@ -2,12 +2,13 @@ base-config:
   company: zAI
   open_source: true
   documentation_url: https://docs.z.ai/
-  class_properties:
-    supports_images: false
-    supports_files: false
+  supports:
+    images: false
+    files: false
+    temperature: true
+    tools: true
+  properties:
     reasoning_model: true
-    supports_temperature: true
-    supports_tools: true
   default_parameters:
     temperature: 0.6
     top_p: 1
@@ -23,14 +24,17 @@ zai-models:
     release_date: 2025-07-28
     properties:
       context_window: 128_000
-      max_token_output: 81_920
+      max_tokens: 81_920
     costs_per_million_token:
       input: 0.6
       output: 2.2
       cache:
         read: 0.11
     alternative_keys:
-      - fireworks/glm-4p5
+      - fireworks/glm-4p5:
+        costs_per_million_token:
+          input: 0.55
+          output: 2.19
   zai/glm-4.5-air:
     label: GLM 4.5 Air
@@ -38,14 +42,17 @@ zai-models:
     release_date: 2025-07-28
     properties:
       context_window: 128_000
-      max_token_output: 81_920
+      max_tokens: 81_920
     costs_per_million_token:
       input: 0.2
       output: 1.1
-      cache:
+      cache:
         read: 0.03
     alternative_keys:
-      - together/zai-org/GLM-4.5-Air-FP8
+      - together/zai-org/GLM-4.5-Air-FP8:
+        costs_per_million_token:
+          input: 0.22
+          output: 0.88
   zai/glm-4.6:
     label: GLM 4.6
@@ -53,13 +60,14 @@ zai-models:
     release_date: 2025-09-30
     properties:
       context_window: 200_000
-      max_token_output: 122_880
+      max_tokens: 122_880
     costs_per_million_token:
       input: 0.6
       output: 2.2
-      cache:
+      cache:
         read: 0.11
     alternative_keys:
-      - fireworks/glm-4p6
+      - fireworks/glm-4p6:
+        costs_per_million_token:
+          input: 0.55
+          output: 2.19

model_library/exceptions.py CHANGED Viewed

@@ -9,6 +9,7 @@ from anthropic import InternalServerError
 from anthropic import RateLimitError as AnthropicRateLimitError
 from backoff._typing import Details
 from httpcore import ReadError as HTTPCoreReadError
+from httpx import ConnectError as HTTPXConnectError
 from httpx import ReadError as HTTPXReadError
 from httpx import RemoteProtocolError
 from openai import APIConnectionError as OpenAIAPIConnectionError
@@ -54,20 +55,6 @@ class MaxOutputTokensExceededError(Exception):
         super().__init__(message or MaxOutputTokensExceededError.DEFAULT_MESSAGE)
-class MaxInputTokensExceededError(Exception):
-    """
-    Raised when the input exceeds the allowed max input tokens limit
-    """
-    DEFAULT_MESSAGE: str = (
-        "Input exceeded the maximum allowed input tokens. "
-        "Consider reducing the input size."
-    )
-    def __init__(self, message: str | None = None):
-        super().__init__(message or MaxInputTokensExceededError.DEFAULT_MESSAGE)
 class MaxContextWindowExceededError(Exception):
     """
     Raised when the context window exceeds the allowed max context window limit
@@ -98,7 +85,9 @@ CONTEXT_WINDOW_PATTERN = re.compile(
     r"sent message larger than max|"
     r"input tokens exceeded|"
     r"(messages?|total length).*too long|"
-    r"payload.*too large"
+    r"payload.*too large|"
+    r"string too long|"
+    r"input exceeded the context window"
 )
@@ -171,6 +160,7 @@ RETRIABLE_EXCEPTIONS = [
     AI21RateLimitError,
     RemoteProtocolError,  # httpx connection closing when running models from sdk
     HTTPXReadError,
+    HTTPXConnectError,
     HTTPCoreReadError,
 ]
@@ -188,12 +178,13 @@ RETRIABLE_EXCEPTION_CODES = [
     "connection_error",
     "service_unavailable",
     "rate_limit",
+    "rate limit",
     "internal_error",
     "server_error",
     "overloaded",
     "throttling",  # AWS throttling errors
-    "throttlingexception",  # AWS throttling errors
     "internal server error",
+    "InternalServerError",
 ]

model_library/providers/amazon.py CHANGED Viewed

@@ -26,6 +26,7 @@ from model_library.base import (
     ToolDefinition,
     ToolResult,
 )
+from model_library.base.input import FileBase
 from model_library.exceptions import (
     BadInputError,
     MaxOutputTokensExceededError,
@@ -60,11 +61,13 @@ class AmazonModel(LLM):
         config: LLMConfig | None = None,
     ):
         super().__init__(model_name, provider, config=config)
-        if self.model_name.endswith("-thinking"):
-            self.model_name = self.model_name.replace("-thinking", "")
-            self.reasoning = True
-            if self.max_tokens < 1024:
-                self.max_tokens = 2048
+        self.supports_cache = "amazon" in self.model_name or "claude" in self.model_name
+        self.supports_cache = (
+            self.supports_cache and "v2" not in self.model_name
+        )  # supported but no access yet
+        self.supports_tool_cache = self.supports_cache and "claude" in self.model_name
+    cache_control = {"type": "default"}
     @override
     async def parse_input(
@@ -120,6 +123,10 @@ class AmazonModel(LLM):
                             new_input.append(item)
         if content_user:
+            if self.supports_cache:
+                if not isinstance(input[-1], FileBase):
+                    # last item cannot be file
+                    content_user.append({"cachePoint": self.cache_control})
             new_input.append({"role": "user", "content": content_user})
         return new_input
@@ -174,6 +181,8 @@ class AmazonModel(LLM):
                     }
                 }
             )
+        if parsed_tools and self.supports_tool_cache:
+            parsed_tools.append({"cachePoint": self.cache_control})
         return parsed_tools
     @override
@@ -203,8 +212,12 @@ class AmazonModel(LLM):
         if "system_prompt" in kwargs:
             body["system"] = [{"text": kwargs.pop("system_prompt")}]
+            if self.supports_cache:
+                body["system"].append({"cachePoint": self.cache_control})
         if self.reasoning:
+            if self.max_tokens < 1024:
+                self.max_tokens = 2048
             budget_tokens = kwargs.pop(
                 "budget_tokens", get_default_budget_tokens(self.max_tokens)
             )
@@ -244,9 +257,8 @@ class AmazonModel(LLM):
         tool_calls: dict[str, Any] = {}
         messages: dict[str, Any] = {"content": []}
-        input_tokens = 0
-        output_tokens = 0
         stop_reason: str = ""
+        metadata = QueryResultMetadata()
         for chunk in response["stream"]:
             key = list(chunk.keys())[0]
@@ -281,8 +293,16 @@ class AmazonModel(LLM):
                             tool_calls["input"] += delta["toolUse"]["input"]
                 case "metadata":
-                    input_tokens = value["usage"]["inputTokens"]
-                    output_tokens = value["usage"]["outputTokens"]
+                    metadata = QueryResultMetadata(
+                        in_tokens=value["usage"]["inputTokens"],
+                        out_tokens=value["usage"]["outputTokens"],
+                    )
+                    metadata.cache_read_tokens = value["usage"].get(
+                        "cacheReadInputTokens", None
+                    )
+                    metadata.cache_write_tokens = value["usage"].get(
+                        "cacheWriteInputTokens", None
+                    )
                 case "contentBlockStop":
                     if tool_calls:
@@ -308,7 +328,7 @@ class AmazonModel(LLM):
                 case "messageStop":
                     stop_reason = value["stopReason"]
-        return messages, stop_reason, input_tokens, output_tokens
+        return messages, stop_reason, metadata
     # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/bedrock-runtime/client/converse.html#
     @override
@@ -326,9 +346,7 @@ class AmazonModel(LLM):
             **body,
         )
-        messages, stop_reason, input_tokens, output_tokens = await self.stream_response(
-            response
-        )
+        messages, stop_reason, metadata = await self.stream_response(response)
         text = " ".join([i["text"] for i in messages["content"] if "text" in i])
         reasoning = " ".join(
@@ -361,10 +379,7 @@ class AmazonModel(LLM):
         return QueryResult(
             output_text=text,
             reasoning=reasoning,
-            metadata=QueryResultMetadata(
-                in_tokens=input_tokens,
-                out_tokens=output_tokens,
-            ),
+            metadata=metadata,
             tool_calls=tool_calls,
             history=[*input, messages],
         )

model_library/providers/minimax.py ADDED Viewed

@@ -0,0 +1,33 @@
+from typing import Literal
+from model_library import model_library_settings
+from model_library.base import (
+    DelegateOnly,
+    LLMConfig,
+)
+from model_library.providers.openai import OpenAIModel
+from model_library.register_models import register_provider
+from model_library.utils import create_openai_client_with_defaults
+@register_provider("minimax")
+class MinimaxModel(DelegateOnly):
+    def __init__(
+        self,
+        model_name: str,
+        provider: Literal["minimax"] = "minimax",
+        *,
+        config: LLMConfig | None = None,
+    ):
+        super().__init__(model_name, provider, config=config)
+        self.delegate = OpenAIModel(
+            model_name=self.model_name,
+            provider=self.provider,
+            config=config,
+            custom_client=create_openai_client_with_defaults(
+                api_key=model_library_settings.MINIMAX_API_KEY,
+                base_url="https://api.minimax.io/v1",
+            ),
+            use_completions=True,
+        )

model_library/providers/mistral.py CHANGED Viewed

@@ -29,6 +29,7 @@ from model_library.base import (
 from model_library.exceptions import (
     BadInputError,
     MaxOutputTokensExceededError,
+    ModelNoOutputError,
 )
 from model_library.file_utils import trim_images
 from model_library.register_models import register_provider
@@ -250,9 +251,17 @@ class MistralModel(LLM):
             self.logger.error(f"Error: {e}", exc_info=True)
             raise e
-        if finish_reason == "length" and not text and not reasoning:
+        if (
+            finish_reason == "length"
+            and not text
+            and not reasoning
+            and not raw_tool_calls
+        ):
             raise MaxOutputTokensExceededError()
+        if not text and not reasoning and not raw_tool_calls:
+            raise ModelNoOutputError()
         tool_calls: list[ToolCall] = []
         for tool_call in raw_tool_calls or []:

model_library/providers/openai.py CHANGED Viewed

@@ -521,10 +521,6 @@ class OpenAIModel(LLM):
         metadata: QueryResultMetadata = QueryResultMetadata()
         raw_tool_calls: list[ChatCompletionMessageToolCall] = []
-        # enable usage data in streaming responses
-        if "stream_options" not in body:
-            body["stream_options"] = {"include_usage": True}
         stream = await self.get_client().chat.completions.create(
             **body,  # pyright: ignore[reportAny]
             stream=True,
@@ -587,7 +583,7 @@ class OpenAIModel(LLM):
                 cache_read_tokens = (
                     chunk.usage.prompt_tokens_details.cached_tokens or 0
                     if chunk.usage.prompt_tokens_details
-                    else 0
+                    else getattr(chunk.usage, "cached_tokens", 0)  # for kimi
                 )
                 metadata = QueryResultMetadata(
                     in_tokens=chunk.usage.prompt_tokens - cache_read_tokens,
@@ -625,7 +621,7 @@ class OpenAIModel(LLM):
             if raw_tool_calls
             else None,
         )
-        if hasattr(final_message, "reasoning_content") and reasoning_text:
+        if reasoning_text:
             setattr(final_message, "reasoning_content", reasoning_text)
         return QueryResult(

model-library 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

model-library 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl