PyPI - model-library - Versions diffs - 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl - Mend

model-library 0.1.7py3-none-any.whl → 0.1.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

model_library/base/base.py +141 -62
model_library/base/delegate_only.py +77 -10
model_library/base/output.py +43 -0
model_library/base/utils.py +35 -0
model_library/config/alibaba_models.yaml +49 -57
model_library/config/all_models.json +353 -120
model_library/config/anthropic_models.yaml +2 -1
model_library/config/kimi_models.yaml +30 -3
model_library/config/mistral_models.yaml +2 -0
model_library/config/openai_models.yaml +15 -23
model_library/config/together_models.yaml +2 -0
model_library/config/xiaomi_models.yaml +43 -0
model_library/config/zai_models.yaml +27 -3
model_library/exceptions.py +3 -77
model_library/providers/ai21labs.py +12 -8
model_library/providers/alibaba.py +17 -8
model_library/providers/amazon.py +49 -16
model_library/providers/anthropic.py +128 -48
model_library/providers/azure.py +22 -10
model_library/providers/cohere.py +7 -7
model_library/providers/deepseek.py +8 -8
model_library/providers/fireworks.py +7 -8
model_library/providers/google/batch.py +14 -10
model_library/providers/google/google.py +57 -30
model_library/providers/inception.py +7 -7
model_library/providers/kimi.py +18 -8
model_library/providers/minimax.py +15 -17
model_library/providers/mistral.py +20 -8
model_library/providers/openai.py +99 -22
model_library/providers/openrouter.py +34 -0
model_library/providers/perplexity.py +7 -7
model_library/providers/together.py +7 -8
model_library/providers/vals.py +12 -6
model_library/providers/vercel.py +34 -0
model_library/providers/xai.py +47 -42
model_library/providers/xiaomi.py +34 -0
model_library/providers/zai.py +38 -8
model_library/register_models.py +5 -0
model_library/registry_utils.py +48 -17
model_library/retriers/__init__.py +0 -0
model_library/retriers/backoff.py +73 -0
model_library/retriers/base.py +225 -0
model_library/retriers/token.py +427 -0
model_library/retriers/utils.py +11 -0
model_library/settings.py +1 -1
model_library/utils.py +17 -7
{model_library-0.1.7.dist-info → model_library-0.1.9.dist-info}/METADATA +2 -1
model_library-0.1.9.dist-info/RECORD +73 -0
{model_library-0.1.7.dist-info → model_library-0.1.9.dist-info}/WHEEL +1 -1
model_library-0.1.7.dist-info/RECORD +0 -64
{model_library-0.1.7.dist-info → model_library-0.1.9.dist-info}/licenses/LICENSE +0 -0
{model_library-0.1.7.dist-info → model_library-0.1.9.dist-info}/top_level.txt +0 -0

model_library/providers/together.py CHANGED Viewed

@@ -1,18 +1,18 @@
 from typing import Literal
+from pydantic import SecretStr
 from typing_extensions import override
 from model_library import model_library_settings
 from model_library.base import (
+    DelegateConfig,
     DelegateOnly,
     LLMConfig,
     ProviderConfig,
     QueryResultCost,
     QueryResultMetadata,
 )
-from model_library.providers.openai import OpenAIModel
 from model_library.register_models import register_provider
-from model_library.utils import create_openai_client_with_defaults
 class TogetherConfig(ProviderConfig):
@@ -32,15 +32,14 @@ class TogetherModel(DelegateOnly):
     ):
         super().__init__(model_name, provider, config=config)
         # https://docs.together.ai/docs/openai-api-compatibility
-        self.delegate = OpenAIModel(
-            model_name=self.model_name,
-            provider=self.provider,
+        self.init_delegate(
             config=config,
-            custom_client=create_openai_client_with_defaults(
-                api_key=model_library_settings.TOGETHER_API_KEY,
-                base_url="https://api.together.xyz/v1",
+            delegate_config=DelegateConfig(
+                base_url="https://api.together.xyz/v1/",
+                api_key=SecretStr(model_library_settings.TOGETHER_API_KEY),
             ),
             use_completions=True,
+            delegate_provider="openai",
         )
     @override

model_library/providers/vals.py CHANGED Viewed

@@ -151,13 +151,17 @@ class DummyAIBatchMixin(LLMBatchMixin):
 class DummyAIModel(LLM):
     _client: Redis | None = None
-    @override
-    def get_client(self) -> Redis:
-        if not DummyAIModel._client:
-            DummyAIModel._client = redis.from_url(  # pyright: ignore[reportUnknownMemberType]
+    def _get_default_api_key(self) -> str:
+        return model_library_settings.REDIS_URL
+    def get_client(self, api_key: str | None = None) -> Redis:
+        if not self.has_client():
+            assert api_key
+            client = redis.from_url(  # pyright: ignore[reportUnknownMemberType]
                 model_library_settings.REDIS_URL, decode_responses=True
             )
-        return DummyAIModel._client
+            self.assign_client(client)
+        return super().get_client()
     def __init__(
         self,
@@ -238,12 +242,14 @@ class DummyAIModel(LLM):
         messages = await self.parse_input(input)
         body: dict[str, Any] = {
             "model": self.model_name,
-            "max_tokens": self.max_tokens,
             "seed": 0,
             "messages": messages,
             "tools": await self.parse_tools(tools),
         }
+        if self.max_tokens:
+            body["max_tokens"] = self.max_tokens
         if self.supports_temperature:
             if self.temperature is not None:
                 body["temperature"] = self.temperature

model_library/providers/vercel.py ADDED Viewed

@@ -0,0 +1,34 @@
+from typing import Literal
+from pydantic import SecretStr
+from model_library import model_library_settings
+from model_library.base import (
+    DelegateConfig,
+    DelegateOnly,
+    LLMConfig,
+)
+from model_library.register_models import register_provider
+@register_provider("vercel")
+class VercelModel(DelegateOnly):
+    def __init__(
+        self,
+        model_name: str,
+        provider: Literal["vercel"] = "vercel",
+        *,
+        config: LLMConfig | None = None,
+    ):
+        super().__init__(model_name, provider, config=config)
+        # https://vercel.com/docs/ai-gateway/sdks-and-apis#quick-start
+        self.init_delegate(
+            config=config,
+            delegate_config=DelegateConfig(
+                base_url="https://ai-gateway.vercel.sh/v1",
+                api_key=SecretStr(model_library_settings.VERCEL_API_KEY),
+            ),
+            use_completions=True,
+            delegate_provider="openai",
+        )

model_library/providers/xai.py CHANGED Viewed

@@ -2,7 +2,7 @@ import io
 import logging
 from typing import Any, Literal, Sequence
-import grpc
+from pydantic import SecretStr
 from typing_extensions import override
 from xai_sdk import AsyncClient
 from xai_sdk.aio.chat import Chat
@@ -14,6 +14,7 @@ from xai_sdk.proto.v6.chat_pb2 import Message, Tool
 from model_library import model_library_settings
 from model_library.base import (
     LLM,
+    DelegateConfig,
     FileBase,
     FileInput,
     FileWithBase64,
@@ -36,24 +37,26 @@ from model_library.exceptions import (
     MaxOutputTokensExceededError,
     ModelNoOutputError,
     NoMatchingToolCallError,
-    RateLimitException,
 )
 from model_library.providers.openai import OpenAIModel
 from model_library.register_models import register_provider
-from model_library.utils import create_openai_client_with_defaults
 @register_provider("grok")
 class XAIModel(LLM):
-    _client: AsyncClient | None = None
+    @override
+    def _get_default_api_key(self) -> str:
+        return model_library_settings.XAI_API_KEY
     @override
-    def get_client(self) -> AsyncClient:
-        if not XAIModel._client:
-            XAIModel._client = AsyncClient(
-                api_key=model_library_settings.XAI_API_KEY,
+    def get_client(self, api_key: str | None = None) -> AsyncClient:
+        if not self.has_client():
+            assert api_key
+            client = AsyncClient(
+                api_key=api_key,
             )
-        return XAIModel._client
+            self.assign_client(client)
+        return super().get_client()
     @override
     def __init__(
@@ -73,13 +76,13 @@ class XAIModel(LLM):
                 model_name=self.model_name,
                 provider=provider,
                 config=config,
-                custom_client=create_openai_client_with_defaults(
-                    api_key=model_library_settings.XAI_API_KEY,
+                delegate_config=DelegateConfig(
                     base_url=(
                         "https://us-west-1.api.x.ai/v1"
                         if "grok-3-mini-reasoning" in self.model_name
                         else "https://api.x.ai/v1"
                     ),
+                    api_key=SecretStr(model_library_settings.XAI_API_KEY),
                 ),
                 use_completions=True,
             )
@@ -210,12 +213,14 @@ class XAIModel(LLM):
             messages.append(system(str(kwargs.pop("system_prompt"))))
         body: dict[str, Any] = {
-            "max_tokens": self.max_tokens,
             "model": self.model_name,
             "tools": await self.parse_tools(tools),
             "messages": messages,
         }
+        if self.max_tokens:
+            body["max_tokens"] = self.max_tokens
         if self.supports_temperature:
             if self.temperature is not None:
                 body["temperature"] = self.temperature
@@ -253,38 +258,35 @@ class XAIModel(LLM):
         body = await self.build_body(input, tools=tools, **kwargs)
-        try:
-            chat: Chat = self.get_client().chat.create(**body)
-            latest_response: Response | None = None
-            async for response, _ in chat.stream():
-                latest_response = response
-            if not latest_response:
-                raise ModelNoOutputError("Model failed to produce a response")
-            tool_calls: list[ToolCall] = []
-            if (
-                latest_response.finish_reason == "REASON_TOOL_CALLS"
-                and latest_response.tool_calls
-            ):
-                for tool_call in latest_response.tool_calls:
-                    tool_calls.append(
-                        ToolCall(
-                            id=tool_call.id,
-                            name=tool_call.function.name,
-                            args=tool_call.function.arguments,
-                        )
+        chat: Chat = self.get_client().chat.create(**body)
+        latest_response: Response | None = None
+        async for response, _ in chat.stream():
+            latest_response = response
+        if not latest_response:
+            raise ModelNoOutputError("Model failed to produce a response")
+        tool_calls: list[ToolCall] = []
+        if (
+            latest_response.finish_reason == "REASON_TOOL_CALLS"
+            and latest_response.tool_calls
+        ):
+            for tool_call in latest_response.tool_calls:
+                tool_calls.append(
+                    ToolCall(
+                        id=tool_call.id,
+                        name=tool_call.function.name,
+                        args=tool_call.function.arguments,
                     )
+                )
-            if (
-                latest_response.finish_reason == "REASON_MAX_LEN"
-                and not latest_response.content
-                and not latest_response.reasoning_content
-            ):
-                raise MaxOutputTokensExceededError()
-        except grpc.RpcError as e:
-            raise RateLimitException(e.details())
+        if (
+            latest_response.finish_reason == "REASON_MAX_LEN"
+            and not latest_response.content
+            and not latest_response.reasoning_content
+        ):
+            raise MaxOutputTokensExceededError()
         return QueryResult(
             output_text=latest_response.content,
@@ -310,6 +312,9 @@ class XAIModel(LLM):
         tools: list[ToolDefinition] = [],
         **kwargs: object,
     ) -> int:
+        if not input and not history:
+            return 0
         string_input = await self.stringify_input(input, history=history, tools=tools)
         self.logger.debug(string_input)

model_library/providers/xiaomi.py ADDED Viewed

@@ -0,0 +1,34 @@
+from typing import Literal
+from pydantic import SecretStr
+from model_library import model_library_settings
+from model_library.base import (
+    DelegateConfig,
+    DelegateOnly,
+    LLMConfig,
+)
+from model_library.register_models import register_provider
+@register_provider("xiaomi")
+class XiaomiModel(DelegateOnly):
+    def __init__(
+        self,
+        model_name: str,
+        provider: Literal["xiaomi"] = "xiaomi",
+        *,
+        config: LLMConfig | None = None,
+    ):
+        super().__init__(model_name, provider, config=config)
+        # https://platform.xiaomimimo.com/#/docs/quick-start/first-api-call
+        self.init_delegate(
+            config=config,
+            delegate_config=DelegateConfig(
+                base_url="https://api.xiaomimimo.com/v1",
+                api_key=SecretStr(model_library_settings.XIAOMI_API_KEY),
+            ),
+            use_completions=True,
+            delegate_provider="openai",
+        )

model_library/providers/zai.py CHANGED Viewed

@@ -1,17 +1,36 @@
-from typing import Literal
+from typing import Any, Literal
+from pydantic import SecretStr
+from typing_extensions import override
 from model_library import model_library_settings
 from model_library.base import (
+    DelegateConfig,
     DelegateOnly,
     LLMConfig,
+    ProviderConfig,
 )
-from model_library.providers.openai import OpenAIModel
 from model_library.register_models import register_provider
-from model_library.utils import create_openai_client_with_defaults
+class ZAIConfig(ProviderConfig):
+    """Configuration for ZAI (GLM) models.
+    Attributes:
+        clear_thinking: When disabled, reasoning content from previous turns is
+            preserved in context. This is useful for multi-turn conversations where
+            you want the model to maintain coherent reasoning across turns.
+            Enabled by default on the standard API endpoint.
+            See: https://docs.z.ai/guides/capabilities/thinking-mode
+    """
+    clear_thinking: bool = True
 @register_provider("zai")
 class ZAIModel(DelegateOnly):
+    provider_config = ZAIConfig()
     def __init__(
         self,
         model_name: str,
@@ -21,14 +40,25 @@ class ZAIModel(DelegateOnly):
     ):
         super().__init__(model_name, provider, config=config)
+        self.clear_thinking = self.provider_config.clear_thinking
         # https://docs.z.ai/guides/develop/openai/python
-        self.delegate = OpenAIModel(
-            model_name=self.model_name,
-            provider=self.provider,
+        self.init_delegate(
             config=config,
-            custom_client=create_openai_client_with_defaults(
-                api_key=model_library_settings.ZAI_API_KEY,
+            delegate_config=DelegateConfig(
                 base_url="https://open.bigmodel.cn/api/paas/v4/",
+                api_key=SecretStr(model_library_settings.ZAI_API_KEY),
             ),
             use_completions=True,
+            delegate_provider="openai",
         )
+    @override
+    def _get_extra_body(self) -> dict[str, Any]:
+        """Build extra body parameters for GLM-specific features."""
+        return {
+            "thinking": {
+                "type": "enabled" if self.reasoning else "disabled",
+                "clear_thinking": self.clear_thinking,
+            }
+        }

model_library/register_models.py CHANGED Viewed

@@ -170,6 +170,7 @@ class DefaultParameters(BaseModel):
     top_p: float | None = None
     top_k: int | None = None
     reasoning_effort: str | bool | None = None
+    compute_effort: str | bool | None = None
 class RawModelConfig(BaseModel):
@@ -338,6 +339,10 @@ def _register_models() -> ModelRegistry:
                         copy.slug = key.replace("/", "_")
                         copy.full_key = key
                         copy.alternative_keys = []
+                        copy.provider_properties = ProviderProperties.model_validate(
+                            provider_properties
+                        )
                         registry[key] = copy
     return registry

model_library/registry_utils.py CHANGED Viewed

@@ -1,11 +1,15 @@
+import tiktoken
 from functools import cache
 from pathlib import Path
 from typing import TypedDict
-import tiktoken
-from model_library.base import LLM, LLMConfig, ProviderConfig
-from model_library.base.output import QueryResultCost, QueryResultMetadata
+from model_library.base import (
+    LLM,
+    LLMConfig,
+    ProviderConfig,
+    QueryResultCost,
+    QueryResultMetadata,
+)
 from model_library.register_models import (
     CostProperties,
     ModelConfig,
@@ -196,19 +200,46 @@ def get_provider_names() -> list[str]:
 @cache
-def get_model_names() -> list[str]:
-    """Return all model names in the registry"""
-    return sorted([model_name for model_name in get_model_registry().keys()])
+def get_model_names(
+    provider: str | None = None,
+    include_deprecated: bool = False,
+    include_alt_keys: bool = True,
+) -> list[str]:
+    """
+    Return model names in the registry
+    - provider: Filter by provider name
+    - include_deprecated: Include deprecated models
+    - include_alt_keys: Include alternative keys from the same provider
+    """
+    registry = get_model_registry()
+    alternative_keys_set: set[str] = set()
+    if not include_alt_keys:
+        for model in registry.values():
+            for alt_item in model.alternative_keys:
+                alt_key = (
+                    alt_item if isinstance(alt_item, str) else list(alt_item.keys())[0]
+                )
+                if alt_key.split("/")[0] == model.provider_name:
+                    alternative_keys_set.add(alt_key)
+    return sorted(
+        [
+            model.full_key
+            for model in get_model_registry().values()
+            if (not provider or model.provider_name.lower() == provider.lower())
+            and (not model.metadata.deprecated or include_deprecated)
+            and model.full_key not in alternative_keys_set
+        ]
+    )
-@cache
-def get_model_names_by_provider(provider_name: str) -> list[str]:
-    """Return all models in the registry from a provider"""
-    return [
-        model.full_key
-        for model in get_model_registry().values()
-        if model.provider_name.lower() == provider_name.lower()
-    ]
+"""
+everything below this comment is included for legacy support of caselaw/corpfin custom models.
+@orestes please remove this as part of the migration to a standard CorpFin harness.
+"""
+DEFAULT_CONTEXT_WINDOW = 128_000
 @cache
@@ -233,7 +264,7 @@ def auto_trim_document(
         Trimmed document, or original document if trimming isn't needed
     """
-    max_tokens = get_max_document_tokens(model_name)
+    max_tokens = get_max_document_tokens(model_name) or DEFAULT_CONTEXT_WINDOW
     encoding = _get_tiktoken_encoder()
     tokens = encoding.encode(document)
@@ -260,5 +291,5 @@ def get_max_document_tokens(model_name: str, output_buffer: int = 10000) -> int:
     # Import here to avoid circular imports
     from model_library.utils import get_context_window_for_model
-    context_window = get_context_window_for_model(model_name)
+    context_window = get_context_window_for_model(model_name) or DEFAULT_CONTEXT_WINDOW
     return context_window - output_buffer

model_library/retriers/__init__.py ADDED Viewed

File without changes

model_library/retriers/backoff.py ADDED Viewed

@@ -0,0 +1,73 @@
+import logging
+from typing import Callable
+from model_library.base.base import QueryResult
+from model_library.exceptions import exception_message
+from model_library.retriers.base import BaseRetrier
+from model_library.retriers.utils import jitter
+RETRY_MAX_TRIES: int = 20
+RETRY_INITIAL: float = 10.0
+RETRY_EXPO: float = 1.4
+RETRY_MAX_BACKOFF_WAIT: float = 240.0
+class ExponentialBackoffRetrier(BaseRetrier):
+    """
+    Exponential backoff retry strategy.
+    Uses exponential backoff with jitter for wait times.
+    """
+    def __init__(
+        self,
+        logger: logging.Logger,
+        max_tries: int = RETRY_MAX_TRIES,
+        max_time: float | None = None,
+        retry_callback: Callable[[int, Exception | None, float, float], None]
+        | None = None,
+        *,
+        initial: float = RETRY_INITIAL,
+        expo: float = RETRY_EXPO,
+        max_backoff_wait: float = RETRY_MAX_BACKOFF_WAIT,
+    ):
+        super().__init__(
+            strategy="backoff",
+            logger=logger,
+            max_tries=max_tries,
+            max_time=max_time,
+            retry_callback=retry_callback,
+        )
+        self.initial = initial
+        self.expo = expo
+        self.max_backoff_wait = max_backoff_wait
+    async def _calculate_wait_time(
+        self, attempt: int, exception: Exception | None = None
+    ) -> float:
+        """Calculate exponential backoff wait time with jitter"""
+        exponential_wait = self.initial * (self.expo**attempt)
+        capped_wait = min(exponential_wait, self.max_backoff_wait)
+        return jitter(capped_wait)
+    async def _on_retry(
+        self, exception: Exception | None, elapsed: float, wait_time: float
+    ) -> None:
+        """Increment attempt counter and log retry attempt"""
+        logger_msg = f"[Retry] | {self.strategy} | Attempt: {self.attempts} | Elapsed: {elapsed:.1f}s | Next wait: {wait_time:.1f}s | Exception: {exception_message(exception)} "
+        self.logger.warning(logger_msg)
+        if self.retry_callback:
+            self.retry_callback(self.attempts, exception, elapsed, wait_time)
+    async def _pre_function(self) -> None:
+        return
+    async def _post_function(self, result: tuple[QueryResult, float]) -> None:
+        return
+    async def validate(self) -> None:
+        return

model-library 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

model-library 0.1.7py3-none-any.whl → 0.1.9py3-none-any.whl