PyPI - model-library - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

model-library 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

model_library/base/__init__.py +7 -0
model_library/{base.py → base/base.py} +58 -429
model_library/base/batch.py +121 -0
model_library/base/delegate_only.py +94 -0
model_library/base/input.py +100 -0
model_library/base/output.py +229 -0
model_library/base/utils.py +43 -0
model_library/config/ai21labs_models.yaml +1 -0
model_library/config/all_models.json +461 -36
model_library/config/anthropic_models.yaml +30 -3
model_library/config/deepseek_models.yaml +3 -1
model_library/config/google_models.yaml +49 -0
model_library/config/openai_models.yaml +43 -4
model_library/config/together_models.yaml +1 -0
model_library/config/xai_models.yaml +63 -3
model_library/exceptions.py +8 -2
model_library/file_utils.py +1 -1
model_library/providers/__init__.py +0 -0
model_library/providers/ai21labs.py +2 -0
model_library/providers/alibaba.py +16 -78
model_library/providers/amazon.py +3 -0
model_library/providers/anthropic.py +215 -8
model_library/providers/azure.py +2 -0
model_library/providers/cohere.py +14 -80
model_library/providers/deepseek.py +14 -90
model_library/providers/fireworks.py +17 -81
model_library/providers/google/google.py +55 -47
model_library/providers/inception.py +15 -83
model_library/providers/kimi.py +15 -83
model_library/providers/mistral.py +2 -0
model_library/providers/openai.py +10 -2
model_library/providers/perplexity.py +12 -79
model_library/providers/together.py +19 -210
model_library/providers/vals.py +2 -0
model_library/providers/xai.py +2 -0
model_library/providers/zai.py +15 -83
model_library/register_models.py +75 -57
model_library/registry_utils.py +5 -5
model_library/utils.py +3 -28
{model_library-0.1.1.dist-info → model_library-0.1.3.dist-info}/METADATA +2 -3
model_library-0.1.3.dist-info/RECORD +61 -0
model_library-0.1.1.dist-info/RECORD +0 -54
{model_library-0.1.1.dist-info → model_library-0.1.3.dist-info}/WHEEL +0 -0
{model_library-0.1.1.dist-info → model_library-0.1.3.dist-info}/licenses/LICENSE +0 -0
{model_library-0.1.1.dist-info → model_library-0.1.3.dist-info}/top_level.txt +0 -0

model_library/config/anthropic_models.yaml CHANGED Viewed

@@ -23,6 +23,32 @@ base-config:
     temperature: 1
 claude-4-models:
+  base-config:
+    class_properties:
+      supports_batch_requests: true
+  anthropic/claude-opus-4-5-20251101:
+    label: Claude Opus 4.5 (Nonthinking)
+    release_date: 2025-11-24
+    properties:
+      context_window: 200_000
+      max_token_output: 64_000
+      extending_thinking: 64_000
+    class_properties:
+      available_for_everyone: false
+    default_parameters:
+      max_output_tokens: 64_000
+    costs_per_million_token:
+      input: 15.0
+      output: 75.0
+      cache:
+        read: 1.5
+        write: 18.75
+    alternative_keys:
+      - anthropic/claude-opus-4-5-20251101-thinking:
+          properties:
+            reasoning_model: true
   anthropic/claude-opus-4-1-20250805:
     label: Claude Opus 4.1 (Nonthinking)
     description: Advanced model for specialized complex
@@ -193,11 +219,12 @@ claude-3-5-models:
     alternative_keys:
       - anthropic/claude-3-5-sonnet-latest
       - anthropic/claude-3.5-sonnet-latest
   anthropic/claude-3-5-sonnet-20240620:
     label: Claude 3.5 Sonnet
     release_date: 2024-06-20
-    description: Claude Sonnet 3.5 (June 2024) variant for code and content generation,
+    description:
+      Claude Sonnet 3.5 (June 2024) variant for code and content generation,
       multilingual and vision-capable, deprecated.
     class_properties:
       deprecated: true
@@ -335,7 +362,7 @@ claude-2-models:
     costs_per_million_token:
       input: 8.0
       output: 24.0
   anthropic/claude-1.3:
     label: Claude 1.3
     release_date: null

model_library/config/deepseek_models.yaml CHANGED Viewed

@@ -10,7 +10,7 @@ base-config:
     ignored_for_cost: false
   properties:
     reasoning_model: false
 deepseek-v3p2-exp-models:
   base-config:
     class_properties:
@@ -23,6 +23,8 @@ deepseek-v3p2-exp-models:
     costs_per_million_token:
       input: 0.28
       output: 0.42
+      cache:
+        read_discount: 0.1
   deepseek/deepseek-chat:
     label: DeepSeek V3.2-Exp (Nonthinking)

model_library/config/google_models.yaml CHANGED Viewed

@@ -31,6 +31,55 @@ gemma-models:
       input: 0.00
       output: 0.00
+gemini-3-models:
+  base-config:
+    properties:
+      context_window: 1_048_576
+      max_token_output: 8_192
+      training_cutoff: "2025-01"
+    class_properties:
+      supports_images: true
+      supports_files: true
+      supports_videos: true
+      supports_tools: true
+      supports_batch_requests: true
+      supports_temperature: true
+    costs_per_million_token:
+      cache:
+        read_discount: 0.1
+    default_parameters:
+      temperature: 1
+      reasoning_effort: "high"
+  google/gemini-3-pro-preview:
+    label: Gemini 3 Pro (11/25)
+    description: Gemini 3 Pro, Google's most powerful model.
+    release_date: 2025-11-18
+    properties:
+      context_window: 1048576
+      max_token_output: 65536
+      training_cutoff: "2025-01"
+      reasoning_model: true
+    class_properties:
+      supports_images: true
+      supports_files: true
+      supports_videos: true
+      supports_tools: true
+      supports_batch_requests: true
+      supports_temperature: true
+    costs_per_million_token:
+      input: 2
+      output: 12
+      cache:
+        read: 0.20
+      context:
+        threshold: 200_000
+        input: 2.5
+        output: 15.0
+        cache:
+          read: 0.40
 gemini-2.5-models:
   base-config:
     properties:

model_library/config/openai_models.yaml CHANGED Viewed

@@ -31,6 +31,31 @@ gpt-5-models:
       supports_temperature: false
       supports_files: true
       supports_tools: true
+  openai/gpt-5.1-codex:
+    label: GPT 5.1 Codex
+    documentation_url: https://platform.openai.com/docs/models/gpt-5.1-codex
+    description: OpenAI's latest coding model
+    release_date: 2025-11-13
+    costs_per_million_token:
+      input: 1.25
+      output: 10.0
+      cache:
+        read: 0.125
+    default_parameters:
+      max_output_tokens: 128_000
+  openai/gpt-5.1-codex-mini:
+    label: GPT 5.1 Codex Mini
+    documentation_url: https://platform.openai.com/docs/models/gpt-5.1-codex-mini
+    description: OpenAI's miniature coding model
+    release_date: 2025-11-13
+    costs_per_million_token:
+      input: 0.25
+      output: 2.00
+      cache:
+        read: 0.025
+    default_parameters:
+      max_output_tokens: 128_000
   openai/gpt-5-codex:
     label: GPT 5 Codex
@@ -48,7 +73,24 @@ gpt-5-models:
       available_as_evaluator: true
       supports_images: true
     default_parameters:
-      temperature: 1
+      max_output_tokens: 128_000
+  openai/gpt-5.1-2025-11-13:
+    label: GPT 5.1
+    documentation_url: https://platform.openai.com/docs/models/gpt-5.1
+    description: GPT-5.1 is OpenAI's flagship model for coding and agentic tasks with configurable reasoning and non-reasoning effort.
+    release_date: 2025-11-13
+    costs_per_million_token:
+      input: 1.25
+      output: 10
+      cache:
+        read: 0.125
+    properties:
+      training_cutoff: "2024-09"
+    class_properties:
+      available_as_evaluator: true
+      supports_images: true
+    default_parameters:
       max_output_tokens: 128_000
   openai/gpt-5-2025-08-07:
@@ -67,7 +109,6 @@ gpt-5-models:
       available_as_evaluator: true
       supports_images: true
     default_parameters:
-      temperature: 1
       max_output_tokens: 128_000
     alternative_keys:
       - azure/gpt-5-2025-08-07
@@ -85,7 +126,6 @@ gpt-5-models:
     properties:
       training_cutoff: "2024-05"
     default_parameters:
-      temperature: 1
       max_output_tokens: 128_000
     class_properties:
       supports_images: true
@@ -105,7 +145,6 @@ gpt-5-models:
     properties:
       training_cutoff: "2024-05"
     default_parameters:
-      temperature: 1
       max_output_tokens: 128_000
     class_properties:
       supports_images: true

model_library/config/together_models.yaml CHANGED Viewed

@@ -28,6 +28,7 @@ kimi-models:
       supports_temperature: true
     default_parameters:
       temperature: 0.3
+      max_output_tokens: 16_384
   together/moonshotai/Kimi-K2-Instruct:
     label: Kimi K2 Instruct

model_library/config/xai_models.yaml CHANGED Viewed

@@ -8,7 +8,7 @@ base-config:
     supports_files: false
     available_for_everyone: true
     ignored_for_cost: false
-    supports_tools: false
+    supports_tools: true
   properties:
     reasoning_model: false
@@ -33,7 +33,6 @@ xai-models:
       reasoning_model: true
     class_properties:
       supports_images: false
-      supports_tools: true
     costs_per_million_token:
       input: 0.20
       output: 1.50
@@ -81,6 +80,68 @@ xai-models:
       - grok/grok-4-fast
       - grok/grok-4-fast-reasoning-latest
+  grok/grok-4-1-fast-reasoning:
+    label: Grok 4.1 Fast (Reasoning)
+    description: ""
+    release_date: 2025-10-19
+    open_source: false
+    class_properties:
+      supports_images: true
+      available_as_evaluator: true
+      supports_metadata: true
+      supports_files: false
+      available_for_everyone: true
+      ignored_for_cost: false
+    properties:
+      context_window: 2_000_000
+      max_token_output: 2_000_000 # from openrouter
+      training_cutoff: null
+      reasoning_model: true
+    documentation_url: ""
+    costs_per_million_token:
+      input: 0.20
+      output: 0.5
+      cache:
+        read: 0.05
+      context:
+        threshold: 128_000
+        input: 0.4
+        output: 1.0
+    default_parameters:
+      temperature: 0.7
+      max_output_tokens: 128000
+  grok/grok-4-1-fast-non-reasoning:
+    label: Grok 4.1 Fast Non-Reasoning
+    description: ""
+    release_date: 2025-10-19
+    open_source: false
+    class_properties:
+      supports_images: true
+      available_as_evaluator: true
+      supports_metadata: true
+      supports_files: false
+      available_for_everyone: true
+      ignored_for_cost: false
+    properties:
+      context_window: 2_000_000
+      max_token_output: 2_000_000 # from openrouter
+      training_cutoff: null
+      reasoning_model: false
+    documentation_url: ""
+    costs_per_million_token:
+      input: 0.20
+      output: 0.5
+      cache:
+        read: 0.05
+      context:
+        threshold: 128_000
+        input: 0.4
+        output: 1.0
+    default_parameters:
+      temperature: 0.7
+      max_output_tokens: 128000
   grok/grok-4-fast-non-reasoning:
     label: Grok 4 Fast (Non-Reasoning)
     description: Cost-efficient model focused on speed and efficiency for straightforward tasks like summarization or classification without deep logical processing. Unified architecture with reasoning variant, steered via system prompts.
@@ -121,7 +182,6 @@ xai-models:
     class_properties:
       supports_images: true
       available_for_everyone: false
-      supports_tools: true
     properties:
       context_window: 256_000
       max_token_output: 128_000

model_library/exceptions.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Any, Callable
 import backoff
 from ai21 import TooManyRequestsError as AI21RateLimitError
+from anthropic import InternalServerError
 from anthropic import RateLimitError as AnthropicRateLimitError
 from backoff._typing import Details
 from httpcore import ReadError as HTTPCoreReadError
@@ -166,6 +167,7 @@ RETRIABLE_EXCEPTIONS = [
     OpenAIUnprocessableEntityError,
     OpenAIAPIConnectionError,
     AnthropicRateLimitError,
+    InternalServerError,
     AI21RateLimitError,
     RemoteProtocolError,  # httpx connection closing when running models from sdk
     HTTPXReadError,
@@ -189,6 +191,9 @@ RETRIABLE_EXCEPTION_CODES = [
     "internal_error",
     "server_error",
     "overloaded",
+    "throttling",  # AWS throttling errors
+    "throttlingexception",  # AWS throttling errors
+    "internal server error",
 ]
@@ -237,8 +242,9 @@ def retry_llm_call(
     logger: logging.Logger,
     max_tries: int = RETRY_MAX_TRIES,
     max_time: float | None = None,
-    backoff_callback: Callable[[int, Exception | None, float, float], None]
-    | None = None,
+    backoff_callback: (
+        Callable[[int, Exception | None, float, float], None] | None
+    ) = None,
 ):
     def on_backoff(details: Details):
         exception = details.get("exception")

model_library/file_utils.py CHANGED Viewed

@@ -56,7 +56,7 @@ def concat_images(
         new_width = int(combined_image.width * scale_factor)
         new_height = int(combined_image.height * scale_factor)
-        combined_image = combined_image.resize(
+        combined_image = combined_image.resize(  # type: ignore
             (new_width, new_height), Image.Resampling.LANCZOS
         )

model_library/providers/__init__.py ADDED Viewed

File without changes

model_library/providers/ai21labs.py CHANGED Viewed

@@ -26,9 +26,11 @@ from model_library.exceptions import (
     MaxOutputTokensExceededError,
     ModelNoOutputError,
 )
+from model_library.register_models import register_provider
 from model_library.utils import default_httpx_client
+@register_provider("ai21labs")
 class AI21LabsModel(LLM):
     _client: AsyncAI21Client | None = None

model_library/providers/alibaba.py CHANGED Viewed

@@ -1,29 +1,21 @@
-import io
-from typing import Any, Literal, Sequence
+from typing import Literal
 from typing_extensions import override
 from model_library import model_library_settings
 from model_library.base import (
-    LLM,
-    FileInput,
-    FileWithId,
-    InputItem,
+    DelegateOnly,
     LLMConfig,
-    QueryResult,
     QueryResultCost,
     QueryResultMetadata,
-    ToolDefinition,
 )
 from model_library.providers.openai import OpenAIModel
+from model_library.register_models import register_provider
 from model_library.utils import create_openai_client_with_defaults
-class AlibabaModel(LLM):
-    @override
-    def get_client(self) -> None:
-        raise NotImplementedError("Not implemented")
+@register_provider("alibaba")
+class AlibabaModel(DelegateOnly):
     def __init__(
         self,
         model_name: str,
@@ -32,23 +24,20 @@ class AlibabaModel(LLM):
         config: LLMConfig | None = None,
     ):
         super().__init__(model_name, provider, config=config)
-        self.native: bool = False
-        self.delegate: OpenAIModel | None = (
-            None
-            if self.native
-            else OpenAIModel(
-                model_name=model_name,
-                provider=provider,
-                config=config,
-                custom_client=create_openai_client_with_defaults(
-                    api_key=model_library_settings.DASHSCOPE_API_KEY,
-                    base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
-                ),
-                use_completions=True,
-            )
+        # https://www.alibabacloud.com/help/en/model-studio/first-api-call-to-qwen
+        self.delegate = OpenAIModel(
+            model_name=self.model_name,
+            provider=self.provider,
+            config=config,
+            custom_client=create_openai_client_with_defaults(
+                api_key=model_library_settings.DASHSCOPE_API_KEY,
+                base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
+            ),
+            use_completions=True,
         )
+    @override
     async def _calculate_cost(
         self,
         metadata: QueryResultMetadata,
@@ -94,54 +83,3 @@ class AlibabaModel(LLM):
             else None,
             cache_write=None,
         )
-    @override
-    async def parse_input(
-        self,
-        input: Sequence[InputItem],
-        **kwargs: Any,
-    ) -> Any:
-        raise NotImplementedError()
-    @override
-    async def parse_image(
-        self,
-        image: FileInput,
-    ) -> Any:
-        raise NotImplementedError()
-    @override
-    async def parse_file(
-        self,
-        file: FileInput,
-    ) -> Any:
-        raise NotImplementedError()
-    @override
-    async def parse_tools(
-        self,
-        tools: list[ToolDefinition],
-    ) -> Any:
-        raise NotImplementedError()
-    @override
-    async def upload_file(
-        self,
-        name: str,
-        mime: str,
-        bytes: io.BytesIO,
-        type: Literal["image", "file"] = "file",
-    ) -> FileWithId:
-        raise NotImplementedError()
-    @override
-    async def _query_impl(
-        self,
-        input: Sequence[InputItem],
-        *,
-        tools: list[ToolDefinition],
-        **kwargs: object,
-    ) -> QueryResult:
-        if self.delegate:
-            return await self.delegate_query(input, tools=tools, **kwargs)
-        raise NotImplementedError()

model_library/providers/amazon.py CHANGED Viewed

@@ -31,8 +31,11 @@ from model_library.exceptions import (
     MaxOutputTokensExceededError,
 )
 from model_library.model_utils import get_default_budget_tokens
+from model_library.register_models import register_provider
+@register_provider("amazon")
+@register_provider("bedrock")
 class AmazonModel(LLM):
     _client: BaseClient | None = None

model-library 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl

model-library 0.1.1py3-none-any.whl → 0.1.3py3-none-any.whl