PyPI - model-library - Versions diffs - 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl - Mend

model-library 0.1.7py3-none-any.whl → 0.1.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

model_library/base/base.py +141 -62
model_library/base/delegate_only.py +77 -10
model_library/base/output.py +43 -0
model_library/base/utils.py +35 -0
model_library/config/alibaba_models.yaml +49 -57
model_library/config/all_models.json +353 -120
model_library/config/anthropic_models.yaml +2 -1
model_library/config/kimi_models.yaml +30 -3
model_library/config/mistral_models.yaml +2 -0
model_library/config/openai_models.yaml +15 -23
model_library/config/together_models.yaml +2 -0
model_library/config/xiaomi_models.yaml +43 -0
model_library/config/zai_models.yaml +27 -3
model_library/exceptions.py +3 -77
model_library/providers/ai21labs.py +12 -8
model_library/providers/alibaba.py +17 -8
model_library/providers/amazon.py +49 -16
model_library/providers/anthropic.py +128 -48
model_library/providers/azure.py +22 -10
model_library/providers/cohere.py +7 -7
model_library/providers/deepseek.py +8 -8
model_library/providers/fireworks.py +7 -8
model_library/providers/google/batch.py +14 -10
model_library/providers/google/google.py +57 -30
model_library/providers/inception.py +7 -7
model_library/providers/kimi.py +18 -8
model_library/providers/minimax.py +15 -17
model_library/providers/mistral.py +20 -8
model_library/providers/openai.py +99 -22
model_library/providers/openrouter.py +34 -0
model_library/providers/perplexity.py +7 -7
model_library/providers/together.py +7 -8
model_library/providers/vals.py +12 -6
model_library/providers/vercel.py +34 -0
model_library/providers/xai.py +47 -42
model_library/providers/xiaomi.py +34 -0
model_library/providers/zai.py +38 -8
model_library/register_models.py +5 -0
model_library/registry_utils.py +48 -17
model_library/retriers/__init__.py +0 -0
model_library/retriers/backoff.py +73 -0
model_library/retriers/base.py +225 -0
model_library/retriers/token.py +427 -0
model_library/retriers/utils.py +11 -0
model_library/settings.py +1 -1
model_library/utils.py +17 -7
{model_library-0.1.7.dist-info → model_library-0.1.9.dist-info}/METADATA +2 -1
model_library-0.1.9.dist-info/RECORD +73 -0
{model_library-0.1.7.dist-info → model_library-0.1.9.dist-info}/WHEEL +1 -1
model_library-0.1.7.dist-info/RECORD +0 -64
{model_library-0.1.7.dist-info → model_library-0.1.9.dist-info}/licenses/LICENSE +0 -0
{model_library-0.1.7.dist-info → model_library-0.1.9.dist-info}/top_level.txt +0 -0

model_library/providers/anthropic.py CHANGED Viewed

@@ -1,16 +1,20 @@
+import datetime
 import io
 import logging
+import time
 from typing import Any, Literal, Sequence, cast
-from anthropic import AsyncAnthropic
+from anthropic import APIConnectionError, AsyncAnthropic
 from anthropic.types.beta.beta_tool_use_block import BetaToolUseBlock
 from anthropic.types.beta.parsed_beta_message import ParsedBetaMessage
+from pydantic import SecretStr
 from typing_extensions import override
 from model_library import model_library_settings
 from model_library.base import (
     LLM,
     BatchResult,
+    DelegateConfig,
     FileBase,
     FileInput,
     FileWithBase64,
@@ -19,9 +23,11 @@ from model_library.base import (
     InputItem,
     LLMBatchMixin,
     LLMConfig,
+    ProviderConfig,
     QueryResult,
     QueryResultCost,
     QueryResultMetadata,
+    RateLimit,
     RawInput,
     RawResponse,
     TextInput,
@@ -31,6 +37,7 @@ from model_library.base import (
     ToolResult,
 )
 from model_library.exceptions import (
+    ImmediateRetryException,
     MaxOutputTokensExceededError,
     NoMatchingToolCallError,
 )
@@ -38,11 +45,15 @@ from model_library.model_utils import get_default_budget_tokens
 from model_library.providers.openai import OpenAIModel
 from model_library.register_models import register_provider
 from model_library.utils import (
-    create_openai_client_with_defaults,
-    default_httpx_client,
+    create_anthropic_client_with_defaults,
 )
+class AnthropicConfig(ProviderConfig):
+    supports_compute_effort: bool = False
+    supports_auto_thinking: bool = False
 class AnthropicBatchMixin(LLMBatchMixin):
     """Batch processing support for Anthropic's Message Batches API."""
@@ -246,21 +257,27 @@ class AnthropicBatchMixin(LLMBatchMixin):
 @register_provider("anthropic")
 class AnthropicModel(LLM):
-    _client: AsyncAnthropic | None = None
+    provider_config = AnthropicConfig()
+    def _get_default_api_key(self) -> str:
+        if self.delegate_config:
+            return self.delegate_config.api_key.get_secret_value()
+        return model_library_settings.ANTHROPIC_API_KEY
     @override
-    def get_client(self) -> AsyncAnthropic:
-        if self._delegate_client:
-            return self._delegate_client
-        if not AnthropicModel._client:
+    def get_client(self, api_key: str | None = None) -> AsyncAnthropic:
+        if not self.has_client():
+            assert api_key
             headers: dict[str, str] = {}
-            AnthropicModel._client = AsyncAnthropic(
-                api_key=model_library_settings.ANTHROPIC_API_KEY,
-                http_client=default_httpx_client(),
-                max_retries=1,
+            client = create_anthropic_client_with_defaults(
+                base_url=self.delegate_config.base_url
+                if self.delegate_config
+                else None,
+                api_key=api_key,
                 default_headers=headers,
             )
-        return AnthropicModel._client
+            self.assign_client(client)
+        return super().get_client()
     def __init__(
         self,
@@ -268,33 +285,32 @@ class AnthropicModel(LLM):
         provider: str = "anthropic",
         *,
         config: LLMConfig | None = None,
-        custom_client: AsyncAnthropic | None = None,
+        delegate_config: DelegateConfig | None = None,
     ):
-        super().__init__(model_name, provider, config=config)
+        self.delegate_config = delegate_config
-        # allow custom client to act as delegate (native)
-        self._delegate_client: AsyncAnthropic | None = custom_client
+        super().__init__(model_name, provider, config=config)
         # https://docs.anthropic.com/en/api/openai-sdk
         self.delegate = (
             None
-            if self.native or custom_client
+            if self.native or self.delegate_config
             else OpenAIModel(
                 model_name=self.model_name,
-                provider=provider,
+                provider=self.provider,
                 config=config,
-                custom_client=create_openai_client_with_defaults(
-                    api_key=model_library_settings.ANTHROPIC_API_KEY,
+                use_completions=True,
+                delegate_config=DelegateConfig(
                     base_url="https://api.anthropic.com/v1/",
+                    api_key=SecretStr(model_library_settings.ANTHROPIC_API_KEY),
                 ),
-                use_completions=True,
             )
         )
         # Initialize batch support if enabled
         # Disable batch when using custom_client (similar to OpenAI)
         self.supports_batch: bool = (
-            self.supports_batch and self.native and not custom_client
+            self.supports_batch and self.native and not self.delegate_config
         )
         self.batch: LLMBatchMixin | None = (
             AnthropicBatchMixin(self) if self.supports_batch else None
@@ -520,7 +536,6 @@ class AnthropicModel(LLM):
         **kwargs: object,
     ) -> dict[str, Any]:
         body: dict[str, Any] = {
-            "max_tokens": self.max_tokens,
             "model": self.model_name,
             "messages": await self.parse_input(input),
         }
@@ -534,14 +549,28 @@ class AnthropicModel(LLM):
                 }
             ]
+        if not self.max_tokens:
+            raise Exception("Anthropic models require a max_tokens parameter")
+        body["max_tokens"] = self.max_tokens
         if self.reasoning:
-            budget_tokens = kwargs.pop(
-                "budget_tokens", get_default_budget_tokens(self.max_tokens)
-            )
-            body["thinking"] = {
-                "type": "enabled",
-                "budget_tokens": budget_tokens,
-            }
+            if self.provider_config.supports_auto_thinking:
+                body["thinking"] = {"type": "auto"}
+            else:
+                budget_tokens = kwargs.pop(
+                    "budget_tokens", get_default_budget_tokens(self.max_tokens)
+                )
+                body["thinking"] = {
+                    "type": "enabled",
+                    "budget_tokens": budget_tokens,
+                }
+        # effort controls compute allocation for text, tool calls, and thinking. Opus-4.5+
+        # use instead of reasoning_effort with auto_thinking
+        if self.provider_config.supports_compute_effort and self.compute_effort:
+            # default is "high"
+            body["output_config"] = {"effort": self.compute_effort}
         # Thinking models don't support temperature: https://docs.claude.com/en/docs/build-with-claude/extended-thinking#feature-compatibility
         if self.supports_temperature and not self.reasoning:
@@ -577,7 +606,7 @@ class AnthropicModel(LLM):
         client = self.get_client()
         # only send betas for the official Anthropic endpoint
-        is_anthropic_endpoint = self._delegate_client is None
+        is_anthropic_endpoint = self.delegate_config is None
         if not is_anthropic_endpoint:
             client_base_url = getattr(client, "_base_url", None) or getattr(
                 client, "base_url", None
@@ -587,16 +616,29 @@ class AnthropicModel(LLM):
         stream_kwargs = {**body}
         if is_anthropic_endpoint:
-            betas = ["files-api-2025-04-14", "interleaved-thinking-2025-05-14"]
+            betas = ["files-api-2025-04-14"]
+            if self.provider_config.supports_auto_thinking:
+                betas.extend(
+                    [
+                        "auto-thinking-2026-01-12",
+                        "effort-2025-11-24",
+                        "max-effort-2026-01-24",
+                    ]
+                )
+            else:
+                betas.extend(["interleaved-thinking-2025-05-14"])
             if "sonnet-4-5" in self.model_name:
                 betas.append("context-1m-2025-08-07")
             stream_kwargs["betas"] = betas
-        async with client.beta.messages.stream(
-            **stream_kwargs,
-        ) as stream:  # pyright: ignore[reportAny]
-            message = await stream.get_final_message()
-        self.logger.info(f"Anthropic Response finished: {message.id}")
+        try:
+            async with client.beta.messages.stream(
+                **stream_kwargs,
+            ) as stream:  # pyright: ignore[reportAny]
+                message = await stream.get_final_message()
+            self.logger.info(f"Anthropic Response finished: {message.id}")
+        except APIConnectionError:
+            raise ImmediateRetryException("Failed to connect to Anthropic")
         text = ""
         reasoning = ""
@@ -632,6 +674,38 @@ class AnthropicModel(LLM):
             history=[*input, RawResponse(response=message)],
         )
+    @override
+    async def get_rate_limit(self) -> RateLimit:
+        response = await self.get_client().messages.with_raw_response.create(
+            max_tokens=1,
+            messages=[
+                {
+                    "role": "user",
+                    "content": "Ping",
+                }
+            ],
+            model=self.model_name,
+        )
+        headers = response.headers
+        server_time_str = headers.get("date")
+        if server_time_str:
+            server_time = datetime.datetime.strptime(
+                server_time_str, "%a, %d %b %Y %H:%M:%S GMT"
+            ).replace(tzinfo=datetime.timezone.utc)
+            timestamp = server_time.timestamp()
+        else:
+            timestamp = time.time()
+        return RateLimit(
+            unix_timestamp=timestamp,
+            raw=headers,
+            request_limit=int(headers["anthropic-ratelimit-requests-limit"]),
+            request_remaining=int(headers["anthropic-ratelimit-requests-remaining"]),
+            token_limit=int(response.headers["anthropic-ratelimit-tokens-limit"]),
+            token_remaining=int(headers["anthropic-ratelimit-tokens-remaining"]),
+        )
     @override
     async def count_tokens(
         self,
@@ -645,20 +719,26 @@ class AnthropicModel(LLM):
         Count the number of tokens using Anthropic's native token counting API.
         https://docs.anthropic.com/en/docs/build-with-claude/token-counting
         """
-        input = [*history, *input]
-        if not input:
-            return 0
+        try:
+            input = [*history, *input]
+            if not input:
+                return 0
-        body = await self.build_body(input, tools=tools, **kwargs)
+            body = await self.build_body(input, tools=tools, **kwargs)
-        # Remove fields not supported by count_tokens endpoint
-        body.pop("max_tokens", None)
-        body.pop("temperature", None)
+            # Remove fields not supported by count_tokens endpoint
+            body.pop("max_tokens", None)
+            body.pop("temperature", None)
-        client = self.get_client()
-        response = await client.messages.count_tokens(**body)
+            client = self.get_client()
+            response = await client.messages.count_tokens(**body)
-        return response.input_tokens
+            return response.input_tokens
+        except Exception as e:
+            self.logger.error(f"Error counting tokens: {e}")
+            return await super().count_tokens(
+                input, history=history, tools=tools, **kwargs
+            )
     @override
     async def _calculate_cost(

model_library/providers/azure.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import json
 from typing import Literal
 from openai.lib.azure import AsyncAzureOpenAI
@@ -14,21 +15,32 @@ from model_library.utils import default_httpx_client
 @register_provider("azure")
 class AzureOpenAIModel(OpenAIModel):
-    _azure_client: AsyncAzureOpenAI | None = None
     @override
-    def get_client(self) -> AsyncAzureOpenAI:
-        if not AzureOpenAIModel._azure_client:
-            AzureOpenAIModel._azure_client = AsyncAzureOpenAI(
-                api_key=model_library_settings.AZURE_API_KEY,
-                azure_endpoint=model_library_settings.AZURE_ENDPOINT,
-                api_version=model_library_settings.get(
+    def _get_default_api_key(self) -> str:
+        return json.dumps(
+            {
+                "AZURE_API_KEY": model_library_settings.AZURE_API_KEY,
+                "AZURE_ENDPOINT": model_library_settings.AZURE_ENDPOINT,
+                "AZURE_API_VERSION": model_library_settings.get(
                     "AZURE_API_VERSION", "2025-04-01-preview"
                 ),
+            }
+        )
+    @override
+    def get_client(self, api_key: str | None = None) -> AsyncAzureOpenAI:
+        if not self.has_client():
+            assert api_key
+            creds = json.loads(api_key)
+            client = AsyncAzureOpenAI(
+                api_key=creds["AZURE_API_KEY"],
+                azure_endpoint=creds["AZURE_ENDPOINT"],
+                api_version=creds["AZURE_API_VERSION"],
                 http_client=default_httpx_client(),
-                max_retries=1,
+                max_retries=3,
             )
-        return AzureOpenAIModel._azure_client
+            self.assign_client(client)
+        return super(OpenAIModel, self).get_client(api_key)
     def __init__(
         self,

model_library/providers/cohere.py CHANGED Viewed

@@ -1,13 +1,14 @@
 from typing import Literal
+from pydantic import SecretStr
 from model_library import model_library_settings
 from model_library.base import (
+    DelegateConfig,
     DelegateOnly,
     LLMConfig,
 )
-from model_library.providers.openai import OpenAIModel
 from model_library.register_models import register_provider
-from model_library.utils import create_openai_client_with_defaults
 @register_provider("cohere")
@@ -22,13 +23,12 @@ class CohereModel(DelegateOnly):
         super().__init__(model_name, provider, config=config)
         # https://docs.cohere.com/docs/compatibility-api
-        self.delegate = OpenAIModel(
-            model_name=self.model_name,
-            provider=self.provider,
+        self.init_delegate(
             config=config,
-            custom_client=create_openai_client_with_defaults(
-                api_key=model_library_settings.COHERE_API_KEY,
+            delegate_config=DelegateConfig(
                 base_url="https://api.cohere.ai/compatibility/v1",
+                api_key=SecretStr(model_library_settings.COHERE_API_KEY),
             ),
             use_completions=True,
+            delegate_provider="openai",
         )

model_library/providers/deepseek.py CHANGED Viewed

@@ -5,14 +5,15 @@ https://cdn.deepseek.com/policies/en-US/deepseek-privacy-policy.html
 from typing import Literal
+from pydantic import SecretStr
 from model_library import model_library_settings
 from model_library.base import (
+    DelegateConfig,
     DelegateOnly,
     LLMConfig,
 )
-from model_library.providers.openai import OpenAIModel
 from model_library.register_models import register_provider
-from model_library.utils import create_openai_client_with_defaults
 @register_provider("deepseek")
@@ -27,13 +28,12 @@ class DeepSeekModel(DelegateOnly):
         super().__init__(model_name, provider, config=config)
         # https://api-docs.deepseek.com/
-        self.delegate = OpenAIModel(
-            model_name=self.model_name,
-            provider=self.provider,
+        self.init_delegate(
             config=config,
-            custom_client=create_openai_client_with_defaults(
-                api_key=model_library_settings.DEEPSEEK_API_KEY,
-                base_url="https://api.deepseek.com",
+            delegate_config=DelegateConfig(
+                base_url="https://api.deepseek.com/v1",
+                api_key=SecretStr(model_library_settings.DEEPSEEK_API_KEY),
             ),
             use_completions=True,
+            delegate_provider="openai",
         )

model_library/providers/fireworks.py CHANGED Viewed

@@ -1,18 +1,18 @@
 from typing import Literal
+from pydantic import SecretStr
 from typing_extensions import override
 from model_library import model_library_settings
 from model_library.base import (
+    DelegateConfig,
+    DelegateOnly,
     LLMConfig,
     ProviderConfig,
     QueryResultCost,
     QueryResultMetadata,
 )
-from model_library.base.delegate_only import DelegateOnly
-from model_library.providers.openai import OpenAIModel
 from model_library.register_models import register_provider
-from model_library.utils import create_openai_client_with_defaults
 class FireworksConfig(ProviderConfig):
@@ -38,15 +38,14 @@ class FireworksModel(DelegateOnly):
             self.model_name = "accounts/rayan-936e28/deployedModels/" + self.model_name
         # https://docs.fireworks.ai/tools-sdks/openai-compatibility
-        self.delegate = OpenAIModel(
-            model_name=self.model_name,
-            provider=self.provider,
+        self.init_delegate(
             config=config,
-            custom_client=create_openai_client_with_defaults(
-                api_key=model_library_settings.FIREWORKS_API_KEY,
+            delegate_config=DelegateConfig(
                 base_url="https://api.fireworks.ai/inference/v1",
+                api_key=SecretStr(model_library_settings.FIREWORKS_API_KEY),
             ),
             use_completions=True,
+            delegate_provider="openai",
         )
     @override

model_library/providers/google/batch.py CHANGED Viewed

@@ -24,16 +24,19 @@ from google.genai.types import (
 )
-def extract_text_from_json_response(response: dict[str, Any]) -> str:
+def extract_text_from_json_response(response: dict[str, Any]) -> tuple[str, str]:
     """Extract concatenated non-thought text from a JSON response structure."""
     # TODO: fix the typing we always ignore
     text = ""
+    reasoning = ""
     for candidate in response.get("candidates", []) or []:  # type: ignore
         content = (candidate or {}).get("content") or {}  # type: ignore
         for part in content.get("parts", []) or []:  # type: ignore
-            if not part.get("thought", False):  # type: ignore
+            if part.get("thought", False):  # type: ignore
+                reasoning += part.get("text", "")  # type: ignore
+            else:
                 text += part.get("text", "")  # type: ignore
-    return text  # type: ignore
+    return text, reasoning  # type: ignore
 def parse_predictions_jsonl(jsonl: str) -> list[BatchResult]:
@@ -48,9 +51,10 @@ def parse_predictions_jsonl(jsonl: str) -> list[BatchResult]:
         custom_id = data.get("key", "unknown")
         if "response" in data:
             response = data["response"]
-            text = extract_text_from_json_response(response)
+            text, reasoning = extract_text_from_json_response(response)
             output = QueryResult()
             output.output_text = text
+            output.reasoning = reasoning
             if "usageMetadata" in response:
                 output.metadata.in_tokens = response["usageMetadata"].get(
                     "promptTokenCount", 0
@@ -196,7 +200,7 @@ class GoogleBatchMixin(LLMBatchMixin):
             custom_id = labels.get("qa_pair_id", f"request-{i}")
             jsonl_lines.append(json.dumps({"key": custom_id, "request": request_data}))
-        batch_request_file = self._root.client.files.upload(
+        batch_request_file = self._root.get_client().files.upload(
             file=io.StringIO("\n".join(jsonl_lines)),
             config=UploadFileConfig(mime_type="application/jsonl"),
         )
@@ -205,7 +209,7 @@ class GoogleBatchMixin(LLMBatchMixin):
             raise Exception("Failed to upload batch jsonl")
         try:
-            job: BatchJob = await self._root.client.aio.batches.create(
+            job: BatchJob = await self._root.get_client().aio.batches.create(
                 model=self._root.model_name,
                 src=batch_request_file.name,
                 config={"display_name": batch_name},
@@ -224,14 +228,14 @@ class GoogleBatchMixin(LLMBatchMixin):
     async def get_batch_results(self, batch_id: str) -> list[BatchResult]:
         self._root.logger.info(f"Retrieving batch results for {batch_id}")
-        job = await self._root.client.aio.batches.get(name=batch_id)
+        job = await self._root.get_client().aio.batches.get(name=batch_id)
         results: list[BatchResult] = []
         if job.state == JobState.JOB_STATE_SUCCEEDED:
             if job.dest and job.dest.file_name:
                 results_file_name = job.dest.file_name
-                file_content = await self._root.client.aio.files.download(
+                file_content = await self._root.get_client().aio.files.download(
                     file=results_file_name
                 )
                 decoded = file_content.decode("utf-8")
@@ -250,7 +254,7 @@ class GoogleBatchMixin(LLMBatchMixin):
     @override
     async def cancel_batch_request(self, batch_id: str):
         self._root.logger.info(f"Cancelling batch {batch_id}")
-        await self._root.client.aio.batches.cancel(name=batch_id)
+        await self._root.get_client().aio.batches.cancel(name=batch_id)
     @override
     async def get_batch_progress(self, batch_id: str) -> int:
@@ -262,7 +266,7 @@ class GoogleBatchMixin(LLMBatchMixin):
         try:
             self._root.logger.debug(f"Checking batch status for {batch_id}")
-            job: BatchJob = await self._root.client.aio.batches.get(name=batch_id)
+            job: BatchJob = await self._root.get_client().aio.batches.get(name=batch_id)
             state = job.state
             if not state:

model-library 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

model-library 0.1.7py3-none-any.whl → 0.1.9py3-none-any.whl