PyPI - model-library - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

model-library 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

model_library/base/base.py +13 -6
model_library/base/output.py +55 -0
model_library/base/utils.py +3 -2
model_library/config/README.md +169 -0
model_library/config/ai21labs_models.yaml +11 -10
model_library/config/alibaba_models.yaml +21 -22
model_library/config/all_models.json +4708 -2471
model_library/config/amazon_models.yaml +100 -102
model_library/config/anthropic_models.yaml +59 -45
model_library/config/cohere_models.yaml +25 -24
model_library/config/deepseek_models.yaml +28 -25
model_library/config/dummy_model.yaml +9 -7
model_library/config/fireworks_models.yaml +86 -56
model_library/config/google_models.yaml +156 -102
model_library/config/inception_models.yaml +6 -6
model_library/config/kimi_models.yaml +13 -14
model_library/config/minimax_models.yaml +37 -0
model_library/config/mistral_models.yaml +85 -29
model_library/config/openai_models.yaml +192 -159
model_library/config/perplexity_models.yaml +8 -23
model_library/config/together_models.yaml +115 -103
model_library/config/xai_models.yaml +85 -57
model_library/config/zai_models.yaml +23 -15
model_library/exceptions.py +12 -17
model_library/file_utils.py +1 -1
model_library/providers/amazon.py +32 -17
model_library/providers/anthropic.py +2 -6
model_library/providers/google/google.py +35 -29
model_library/providers/minimax.py +33 -0
model_library/providers/mistral.py +10 -1
model_library/providers/openai.py +10 -8
model_library/providers/together.py +18 -211
model_library/register_models.py +36 -38
model_library/registry_utils.py +18 -16
model_library/utils.py +2 -2
{model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/METADATA +3 -4
model_library-0.1.4.dist-info/RECORD +64 -0
model_library-0.1.2.dist-info/RECORD +0 -61
{model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/WHEEL +0 -0
{model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/licenses/LICENSE +0 -0
{model_library-0.1.2.dist-info → model_library-0.1.4.dist-info}/top_level.txt +0 -0

model_library/providers/google/google.py CHANGED Viewed

@@ -2,8 +2,6 @@ import base64
 import io
 from typing import Any, Literal, Sequence, cast
-from typing_extensions import override
 from google.genai import Client
 from google.genai import errors as genai_errors
 from google.genai.types import (
@@ -18,10 +16,14 @@ from google.genai.types import (
     Part,
     SafetySetting,
     ThinkingConfig,
+    ThinkingLevel,
     Tool,
     ToolListUnion,
     UploadFileConfig,
+    FinishReason,
 )
+from typing_extensions import override
 from model_library import model_library_settings
 from model_library.base import (
     LLM,
@@ -119,15 +121,6 @@ class GoogleModel(LLM):
     ):
         super().__init__(model_name, provider, config=config)
-        # thinking tag
-        if self.model_name.endswith("-thinking"):
-            original_name = self.model_name
-            self.model_name = self.model_name.replace("-thinking", "")
-            self.reasoning = True
-            self.logger.info(
-                f"Enabled thinking mode for {original_name} -> {self.model_name}"
-            )
         if self.provider_config.use_vertex:
             self.supports_batch = False
@@ -261,14 +254,12 @@ class GoogleModel(LLM):
         bytes: io.BytesIO,
         type: Literal["image", "file"] = "file",
     ) -> FileWithId:
-        if not self.supports_batch:
-            if self.provider_config.use_vertex:
-                raise Exception(
-                    "Vertex AI does not support file uploads. "
-                    "use FileWithBase64 to pass files as inline data"
-                    "or use genai for file uploads"
-                )
-            raise Exception("Model does not support batching")
+        if self.provider_config.use_vertex:
+            raise Exception(
+                "Vertex AI does not support file uploads. "
+                "use FileWithBase64 to pass files as inline data"
+                "or use genai for file uploads"
+            )
         mime = f"image/{mime}" if type == "image" else mime  # TODO:
         response: File = self.client.files.upload(
@@ -294,7 +285,6 @@ class GoogleModel(LLM):
         tools: list[ToolDefinition],
         **kwargs: object,
     ) -> dict[str, Any]:
-        self.logger.debug(f"Creating request body for {self.model_name}")
         generation_config = GenerateContentConfig(
             max_output_tokens=self.max_tokens,
         )
@@ -310,13 +300,15 @@ class GoogleModel(LLM):
         if system_prompt and isinstance(system_prompt, str) and system_prompt.strip():
             generation_config.system_instruction = str(system_prompt)
-        if "gemini-2.5" in self.model_name and self.reasoning:
-            generation_config.thinking_config = ThinkingConfig(
-                thinking_budget=cast(
+        if self.reasoning:
+            reasoning_config = ThinkingConfig(include_thoughts=True)
+            if self.reasoning_effort:
+                reasoning_config.thinking_level = ThinkingLevel(self.reasoning_effort)
+            else:
+                reasoning_config.thinking_budget = cast(
                     int, kwargs.pop("thinking_budget", self.DEFAULT_THINKING_BUDGET)
-                ),
-                include_thoughts=True,
-            )
+                )
+            generation_config.thinking_config = reasoning_config
         if tools:
             generation_config.tools = cast(ToolListUnion, await self.parse_tools(tools))
@@ -343,17 +335,20 @@ class GoogleModel(LLM):
         text: str = ""
         reasoning: str = ""
         tool_calls: list[ToolCall] = []
-        last_content: Content | None = None
         metadata: GenerateContentResponseUsageMetadata | None = None
         stream = await self.client.aio.models.generate_content_stream(**body)
+        contents: list[Content | None] = []
+        finish_reason: FinishReason | None = None
         async for chunk in stream:
             candidates = chunk.candidates
             if not candidates:
                 continue
             content = candidates[0].content
             if content and content.parts:
                 for part in content.parts:
                     if part.function_call:
@@ -378,14 +373,24 @@ class GoogleModel(LLM):
             if chunk.usage_metadata:
                 metadata = chunk.usage_metadata
-            last_content = content
+            if content:
+                contents.append(content)
+            if candidates[0].finish_reason:
+                finish_reason = candidates[0].finish_reason
+        if finish_reason != FinishReason.STOP:
+            self.logger.error(f"Unexpected finish reason: {finish_reason}")
+        if not text and not reasoning and not tool_calls:
+            raise ModelNoOutputError("Model returned empty response")
         result = QueryResult(
             output_text=text,
             reasoning=reasoning,
-            history=[*input, last_content],
+            history=[*input, *contents],
             tool_calls=tool_calls,
         )
         if metadata:
             # see _calculate_cost
             cache_read_tokens = metadata.cached_content_token_count or 0
@@ -446,6 +451,7 @@ class GoogleModel(LLM):
                 "response_mime_type": "application/json",
             }
         )
         body["config"] = config
         # Make the request with retry wrapper

model_library/providers/minimax.py ADDED Viewed

@@ -0,0 +1,33 @@
+from typing import Literal
+from model_library import model_library_settings
+from model_library.base import (
+    DelegateOnly,
+    LLMConfig,
+)
+from model_library.providers.openai import OpenAIModel
+from model_library.register_models import register_provider
+from model_library.utils import create_openai_client_with_defaults
+@register_provider("minimax")
+class MinimaxModel(DelegateOnly):
+    def __init__(
+        self,
+        model_name: str,
+        provider: Literal["minimax"] = "minimax",
+        *,
+        config: LLMConfig | None = None,
+    ):
+        super().__init__(model_name, provider, config=config)
+        self.delegate = OpenAIModel(
+            model_name=self.model_name,
+            provider=self.provider,
+            config=config,
+            custom_client=create_openai_client_with_defaults(
+                api_key=model_library_settings.MINIMAX_API_KEY,
+                base_url="https://api.minimax.io/v1",
+            ),
+            use_completions=True,
+        )

model_library/providers/mistral.py CHANGED Viewed

@@ -29,6 +29,7 @@ from model_library.base import (
 from model_library.exceptions import (
     BadInputError,
     MaxOutputTokensExceededError,
+    ModelNoOutputError,
 )
 from model_library.file_utils import trim_images
 from model_library.register_models import register_provider
@@ -250,9 +251,17 @@ class MistralModel(LLM):
             self.logger.error(f"Error: {e}", exc_info=True)
             raise e
-        if finish_reason == "length" and not text and not reasoning:
+        if (
+            finish_reason == "length"
+            and not text
+            and not reasoning
+            and not raw_tool_calls
+        ):
             raise MaxOutputTokensExceededError()
+        if not text and not reasoning and not raw_tool_calls:
+            raise ModelNoOutputError()
         tool_calls: list[ToolCall] = []
         for tool_call in raw_tool_calls or []:

model_library/providers/openai.py CHANGED Viewed

@@ -5,7 +5,11 @@ import json
 from typing import Any, Literal, Sequence, cast
 from openai import APIConnectionError, AsyncOpenAI
-from openai.types.chat import ChatCompletionMessage, ChatCompletionMessageToolCall
+from openai.types.chat import (
+    ChatCompletionMessage,
+    ChatCompletionMessageToolCall,
+    ChatCompletionMessageToolCallUnion,
+)
 from openai.types.chat.chat_completion_message_tool_call import Function
 from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
 from openai.types.create_embedding_response import CreateEmbeddingResponse
@@ -517,10 +521,6 @@ class OpenAIModel(LLM):
         metadata: QueryResultMetadata = QueryResultMetadata()
         raw_tool_calls: list[ChatCompletionMessageToolCall] = []
-        # enable usage data in streaming responses
-        if "stream_options" not in body:
-            body["stream_options"] = {"include_usage": True}
         stream = await self.get_client().chat.completions.create(
             **body,  # pyright: ignore[reportAny]
             stream=True,
@@ -583,7 +583,7 @@ class OpenAIModel(LLM):
                 cache_read_tokens = (
                     chunk.usage.prompt_tokens_details.cached_tokens or 0
                     if chunk.usage.prompt_tokens_details
-                    else 0
+                    else getattr(chunk.usage, "cached_tokens", 0)  # for kimi
                 )
                 metadata = QueryResultMetadata(
                     in_tokens=chunk.usage.prompt_tokens - cache_read_tokens,
@@ -617,9 +617,11 @@ class OpenAIModel(LLM):
         final_message = ChatCompletionMessage(
             role="assistant",
             content=output_text if output_text else None,
-            tool_calls=raw_tool_calls if raw_tool_calls else None,
+            tool_calls=cast(list[ChatCompletionMessageToolCallUnion], raw_tool_calls)
+            if raw_tool_calls
+            else None,
         )
-        if hasattr(final_message, "reasoning_content") and reasoning_text:
+        if reasoning_text:
             setattr(final_message, "reasoning_content", reasoning_text)
         return QueryResult(

model_library/providers/together.py CHANGED Viewed

@@ -1,51 +1,27 @@
-import io
-from typing import Any, Literal, Sequence, cast
+from typing import Literal
-from together import AsyncTogether
-from together.types.chat_completions import (
-    ChatCompletionMessage,
-    ChatCompletionResponse,
-)
 from typing_extensions import override
 from model_library import model_library_settings
 from model_library.base import (
-    LLM,
-    FileInput,
-    FileWithBase64,
-    FileWithId,
-    FileWithUrl,
-    InputItem,
+    DelegateOnly,
     LLMConfig,
-    QueryResult,
+    ProviderConfig,
     QueryResultCost,
     QueryResultMetadata,
-    TextInput,
-    ToolDefinition,
-)
-from model_library.exceptions import (
-    BadInputError,
-    MaxOutputTokensExceededError,
-    ModelNoOutputError,
 )
-from model_library.file_utils import trim_images
-from model_library.model_utils import get_reasoning_in_tag
 from model_library.providers.openai import OpenAIModel
 from model_library.register_models import register_provider
 from model_library.utils import create_openai_client_with_defaults
-@register_provider("together")
-class TogetherModel(LLM):
-    _client: AsyncTogether | None = None
+class TogetherConfig(ProviderConfig):
+    serverless: bool = True
-    @override
-    def get_client(self) -> AsyncTogether:
-        if not TogetherModel._client:
-            TogetherModel._client = AsyncTogether(
-                api_key=model_library_settings.TOGETHER_API_KEY,
-            )
-        return TogetherModel._client
+@register_provider("together")
+class TogetherModel(DelegateOnly):
+    provider_config = TogetherConfig()
     def __init__(
         self,
@@ -55,187 +31,18 @@ class TogetherModel(LLM):
         config: LLMConfig | None = None,
     ):
         super().__init__(model_name, provider, config=config)
         # https://docs.together.ai/docs/openai-api-compatibility
-        self.delegate: OpenAIModel | None = (
-            None
-            if self.native
-            else OpenAIModel(
-                model_name=model_name,
-                provider=provider,
-                config=config,
-                custom_client=create_openai_client_with_defaults(
-                    api_key=model_library_settings.TOGETHER_API_KEY,
-                    base_url="https://api.together.xyz/v1",
-                ),
-                use_completions=False,
-            )
-        )
-    @override
-    async def parse_input(
-        self,
-        input: Sequence[InputItem],
-        **kwargs: Any,
-    ) -> list[dict[str, Any] | Any]:
-        new_input: list[dict[str, Any] | Any] = []
-        content_user: list[dict[str, Any]] = []
-        def flush_content_user():
-            nonlocal content_user
-            if content_user:
-                new_input.append({"role": "user", "content": content_user})
-                content_user = []
-        for item in input:
-            match item:
-                case TextInput():
-                    content_user.append({"type": "text", "text": item.text})
-                case FileWithBase64() | FileWithUrl() | FileWithId():
-                    match item.type:
-                        case "image":
-                            content_user.append(await self.parse_image(item))
-                        case "file":
-                            content_user.append(await self.parse_file(item))
-                case ChatCompletionMessage():
-                    flush_content_user()
-                    new_input.append(item)
-                case _:
-                    raise BadInputError("Unsupported input type")
-        flush_content_user()
-        return new_input
-    @override
-    async def parse_image(
-        self,
-        image: FileInput,
-    ) -> dict[str, Any]:
-        match image:
-            case FileWithBase64():
-                return {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": f"data:image/{image.mime};base64,{image.base64}"
-                    },
-                }
-            case _:
-                # docs show that we can pass in s3 location somehow
-                raise BadInputError("Unsupported image type")
-    @override
-    async def parse_file(
-        self,
-        file: FileInput,
-    ) -> Any:
-        raise NotImplementedError()
-    @override
-    async def parse_tools(
-        self,
-        tools: list[ToolDefinition],
-    ) -> Any:
-        raise NotImplementedError()
-    @override
-    async def upload_file(
-        self,
-        name: str,
-        mime: str,
-        bytes: io.BytesIO,
-        type: Literal["image", "file"] = "file",
-    ) -> FileWithId:
-        raise NotImplementedError()
-    @override
-    async def _query_impl(
-        self,
-        input: Sequence[InputItem],
-        *,
-        tools: list[ToolDefinition],
-        **kwargs: object,
-    ) -> QueryResult:
-        if self.delegate:
-            return await self.delegate_query(input, tools=tools, **kwargs)
-        # llama supports max 5 images
-        if "lama-4" in self.model_name:
-            input = trim_images(input, max_images=5)
-        messages: list[dict[str, Any]] = []
-        if "nemotron-super" in self.model_name:
-            # move system prompt to prompt
-            if "system_prompt" in kwargs:
-                first_text_item = next(
-                    (item for item in input if isinstance(item, TextInput)), None
-                )
-                if not first_text_item:
-                    raise Exception(
-                        "Given system prompt for nemotron-super model, but no text input found"
-                    )
-                system_prompt = kwargs.pop("system_prompt")
-                first_text_item.text = f"SYSTEM PROMPT: {system_prompt}\nUSER PROMPT: {first_text_item.text}"
-            # set system prompt to detailed thinking
-            mode = "on" if self.reasoning else "off"
-            kwargs["system_prompt"] = f"detailed thinking {mode}"
-            messages.append(
-                {
-                    "role": "system",
-                    "content": f"detailed thinking {mode}",
-                }
-            )
-        if "system_prompt" in kwargs:
-            messages.append({"role": "system", "content": kwargs.pop("system_prompt")})
-        messages.extend(await self.parse_input(input))
-        body: dict[str, Any] = {
-            "max_tokens": self.max_tokens,
-            "model": self.model_name,
-            "messages": messages,
-        }
-        if self.supports_temperature:
-            if self.temperature is not None:
-                body["temperature"] = self.temperature
-            if self.top_p is not None:
-                body["top_p"] = self.top_p
-        body.update(kwargs)
-        response = await self.get_client().chat.completions.create(**body, stream=False)  # pyright: ignore[reportAny]
-        response = cast(ChatCompletionResponse, response)
-        if not response or not response.choices or not response.choices[0].message:
-            raise ModelNoOutputError("Model returned no completions")
-        text = str(response.choices[0].message.content)
-        reasoning = None
-        if response.choices[0].finish_reason == "length" and not text:
-            raise MaxOutputTokensExceededError()
-        if self.reasoning:
-            text, reasoning = get_reasoning_in_tag(text)
-        output = QueryResult(
-            output_text=text,
-            reasoning=reasoning,
-            history=[*input, response.choices[0].message],
+        self.delegate = OpenAIModel(
+            model_name=self.model_name,
+            provider=self.provider,
+            config=config,
+            custom_client=create_openai_client_with_defaults(
+                api_key=model_library_settings.TOGETHER_API_KEY,
+                base_url="https://api.together.xyz/v1",
+            ),
+            use_completions=True,
         )
-        if response.usage:
-            output.metadata.in_tokens = response.usage.prompt_tokens
-            output.metadata.out_tokens = response.usage.completion_tokens
-            # no cache tokens it seems
-        return output
     @override
     async def _calculate_cost(
         self,

model_library/register_models.py CHANGED Viewed

@@ -27,9 +27,25 @@ You can set metadata configs that are not passed into the LLMConfig class here,
 """
+class Supports(BaseModel):
+    images: bool | None = None
+    videos: bool | None = None
+    files: bool | None = None
+    batch: bool | None = None
+    temperature: bool | None = None
+    tools: bool | None = None
+class Metadata(BaseModel):
+    deprecated: bool = False
+    available_for_everyone: bool = True
+    available_as_evaluator: bool = False
+    ignored_for_cost: bool = False
 class Properties(BaseModel):
     context_window: int | None = None
-    max_token_output: int | None = None
+    max_tokens: int | None = None
     training_cutoff: str | None = None
     reasoning_model: bool | None = None
@@ -118,33 +134,9 @@ class CostProperties(BaseModel):
     context: ContextCost | None = None
-class ClassProperties(BaseModel):
-    supports_images: bool | None = None
-    supports_videos: bool | None = None
-    supports_files: bool | None = None
-    supports_batch_requests: bool | None = None
-    supports_temperature: bool | None = None
-    supports_tools: bool | None = None
-    # vals specific
-    deprecated: bool = False
-    available_for_everyone: bool = True
-    available_as_evaluator: bool = False
-    ignored_for_cost: bool = False
-"""
-Each provider can have a set of provider-specific properties, we however want to accept
-any possible property from a provider in the yaml, and validate later. So we join all
-provider-specific properties into a single class.
-This has no effect on runtime use of ProviderConfig, only used to load the yaml
-"""
 class BaseProviderProperties(BaseModel):
     """Static base class for dynamic ProviderProperties."""
-    pass
 def all_subclasses(cls: type) -> list[type]:
     """Recursively find all subclasses of a class."""
@@ -174,9 +166,9 @@ def get_dynamic_provider_properties_model() -> type[BaseProviderProperties]:
 class DefaultParameters(BaseModel):
-    max_output_tokens: int | None = None
     temperature: float | None = None
     top_p: float | None = None
+    top_k: int | None = None
     reasoning_effort: str | None = None
@@ -188,26 +180,29 @@ class RawModelConfig(BaseModel):
     open_source: bool
     documentation_url: str | None = None
     properties: Properties = Field(default_factory=Properties)
-    class_properties: ClassProperties = Field(default_factory=ClassProperties)
-    provider_properties: BaseProviderProperties | None = None
+    supports: Supports
+    metadata: Metadata = Field(default_factory=Metadata)
+    provider_properties: BaseProviderProperties = Field(
+        default_factory=BaseProviderProperties
+    )
     costs_per_million_token: CostProperties = Field(default_factory=CostProperties)
     alternative_keys: list[str | dict[str, Any]] = Field(default_factory=list)
     default_parameters: DefaultParameters = Field(default_factory=DefaultParameters)
+    provider_endpoint: str | None = None
     def model_dump(self, *args: object, **kwargs: object):
         data = super().model_dump(*args, **kwargs)
-        if self.provider_properties is not None:
-            # explicitly dump dynamic ProviderProperties instance
-            data["provider_properties"] = self.provider_properties.model_dump(
-                *args, **kwargs
-            )
+        # explicitly dump dynamic ProviderProperties instance
+        data["provider_properties"] = self.provider_properties.model_dump(
+            *args, **kwargs
+        )
         return data
 class ModelConfig(RawModelConfig):
     # post processing fields
+    provider_endpoint: str  # pyright: ignore[reportIncompatibleVariableOverride, reportGeneralTypeIssues]
     provider_name: str
-    provider_endpoint: str
     full_key: str
     slug: str
@@ -274,14 +269,17 @@ def _register_models() -> ModelRegistry:
                         current_model_config, model_config
                     )
+                    provider_properties = current_model_config.pop(
+                        "provider_properties", {}
+                    )
                     # create model config object
                     raw_model_obj: RawModelConfig = RawModelConfig.model_validate(
-                        current_model_config, strict=True
+                        current_model_config, strict=True, extra="forbid"
                     )
                     provider_endpoint = (
-                        current_model_config.get("provider_endpoint", None)
-                        or model_name.split("/", 1)[1]
+                        raw_model_obj.provider_endpoint or model_name.split("/", 1)[1]
                     )
                     # add provider metadata
                     model_obj = ModelConfig.model_validate(
@@ -295,7 +293,7 @@ def _register_models() -> ModelRegistry:
                     )
                     # load provider properties separately since the model was generated at runtime
                     model_obj.provider_properties = ProviderProperties.model_validate(
-                        current_model_config.get("provider_properties", {})
+                        provider_properties
                     )
                     registry[model_name] = model_obj

model-library 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

model-library 0.1.2py3-none-any.whl → 0.1.4py3-none-any.whl