PyPI - model-library - Versions diffs - 0.1.4__tar.gz → 0.1.6__tar.gz - Mend

model-library 0.1.4tar.gz → 0.1.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (127) hide show

{model_library-0.1.4 → model_library-0.1.6}/Makefile RENAMED Viewed

@@ -16,8 +16,10 @@ help:
 	@echo "  make examples <model> Run all examples with specified model"
 	@echo "  make browse_models    Interactively browse models and their configurations"
+PYTHON_VERSION ?= 3.11
 install:
-	uv venv
+	uv venv --python $(PYTHON_VERSION)
 	uv sync --dev
 	@echo "🎉 Done! Run 'source .venv/bin/activate' to activate the environment locally."

{model_library-0.1.4 → model_library-0.1.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: model-library
-Version: 0.1.4
+Version: 0.1.6
 Summary: Model Library for vals.ai
 Author-email: "Vals AI, Inc." <contact@vals.ai>
 License: MIT

{model_library-0.1.4 → model_library-0.1.6}/model_library/base/base.py RENAMED Viewed

@@ -6,7 +6,6 @@ from abc import ABC, abstractmethod
 from collections.abc import Awaitable
 from pprint import pformat
 from typing import (
-    TYPE_CHECKING,
     Any,
     Callable,
     Literal,
@@ -43,9 +42,6 @@ from model_library.exceptions import (
 )
 from model_library.utils import truncate_str
-if TYPE_CHECKING:
-    from model_library.providers.openai import OpenAIModel
 PydanticT = TypeVar("PydanticT", bound=BaseModel)
@@ -66,7 +62,7 @@ class LLMConfig(BaseModel):
     top_p: float | None = None
     top_k: int | None = None
     reasoning: bool = False
-    reasoning_effort: str | None = None
+    reasoning_effort: str | bool | None = None
     supports_images: bool = False
     supports_files: bool = False
     supports_videos: bool = False
@@ -110,7 +106,7 @@ class LLM(ABC):
         self.top_k: int | None = config.top_k
         self.reasoning: bool = config.reasoning
-        self.reasoning_effort: str | None = config.reasoning_effort
+        self.reasoning_effort: str | bool | None = config.reasoning_effort
         self.supports_files: bool = config.supports_files
         self.supports_videos: bool = config.supports_videos
@@ -120,7 +116,7 @@ class LLM(ABC):
         self.supports_tools: bool = config.supports_tools
         self.native: bool = config.native
-        self.delegate: "OpenAIModel | None" = None
+        self.delegate: "LLM | None" = None
         self.batch: LLMBatchMixin | None = None
         if config.provider_config:
@@ -198,11 +194,14 @@ class LLM(ABC):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition] = [],
+        query_logger: logging.Logger,
         **kwargs: object,
     ) -> QueryResult:
         if not self.delegate:
             raise Exception("Delegate not set")
-        return await self.delegate._query_impl(input, tools=tools, **kwargs)  # pyright: ignore[reportPrivateUsage]
+        return await self.delegate._query_impl(  # pyright: ignore[reportPrivateUsage]
+            input, tools=tools, query_logger=query_logger, **kwargs
+        )
     async def query(
         self,
@@ -213,6 +212,7 @@ class LLM(ABC):
         # for backwards compatibility
         files: list[FileInput] = [],
         images: list[FileInput] = [],
+        query_logger: logging.Logger | None = None,
         **kwargs: object,
     ) -> QueryResult:
         """
@@ -256,15 +256,18 @@ class LLM(ABC):
         input = [*history, *input]
         # unique logger for the query
-        query_id = uuid.uuid4().hex[:14]
-        query_logger = self.logger.getChild(f"query={query_id}")
+        if not query_logger:
+            query_id = uuid.uuid4().hex[:14]
+            query_logger = self.logger.getChild(f"query={query_id}")
         query_logger.info(
             "Query started:\n" + item_info + tool_info + f"--- kwargs: {short_kwargs}\n"
         )
         async def query_func() -> QueryResult:
-            return await self._query_impl(input, tools=tools, **kwargs)
+            return await self._query_impl(
+                input, tools=tools, query_logger=query_logger, **kwargs
+            )
         async def timed_query() -> tuple[QueryResult, float]:
             return await LLM.timer_wrapper(query_func)
@@ -361,7 +364,8 @@ class LLM(ABC):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
-        **kwargs: object,  # TODO: pass in query logger
+        query_logger: logging.Logger,
+        **kwargs: object,
     ) -> QueryResult:
         """
         Query the model with input

{model_library-0.1.4 → model_library-0.1.6}/model_library/base/delegate_only.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import io
+import logging
 from typing import Any, Literal, Sequence
 from typing_extensions import override
@@ -48,11 +49,14 @@ class DelegateOnly(LLM):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
+        query_logger: logging.Logger,
         **kwargs: object,
     ) -> QueryResult:
         assert self.delegate
-        return await self.delegate_query(input, tools=tools, **kwargs)
+        return await self.delegate_query(
+            input, tools=tools, query_logger=query_logger, **kwargs
+        )
     @override
     async def parse_input(

{model_library-0.1.4 → model_library-0.1.6}/model_library/base/output.py RENAMED Viewed

@@ -9,9 +9,7 @@ from pydantic import BaseModel, Field, computed_field, field_validator
 from typing_extensions import override
 from model_library.base.input import InputItem, ToolCall
-from model_library.base.utils import (
-    sum_optional,
-)
+from model_library.base.utils import add_optional
 from model_library.utils import truncate_str
@@ -42,10 +40,14 @@ class QueryResultCost(BaseModel):
     reasoning: float | None = None
     cache_read: float | None = None
     cache_write: float | None = None
+    total_override: float | None = None
     @computed_field
     @property
     def total(self) -> float:
+        if self.total_override is not None:
+            return self.total_override
         return sum(
             filter(
                 None,
@@ -86,6 +88,16 @@ class QueryResultCost(BaseModel):
             )
         )
+    def __add__(self, other: "QueryResultCost") -> "QueryResultCost":
+        return QueryResultCost(
+            input=self.input + other.input,
+            output=self.output + other.output,
+            reasoning=add_optional(self.reasoning, other.reasoning),
+            cache_read=add_optional(self.cache_read, other.cache_read),
+            cache_write=add_optional(self.cache_write, other.cache_write),
+            total_override=add_optional(self.total_override, other.total_override),
+        )
     @override
     def __repr__(self):
         use_cents = self.total < 1
@@ -150,18 +162,20 @@ class QueryResultMetadata(BaseModel):
         return QueryResultMetadata(
             in_tokens=self.in_tokens + other.in_tokens,
             out_tokens=self.out_tokens + other.out_tokens,
-            reasoning_tokens=sum_optional(
-                self.reasoning_tokens, other.reasoning_tokens
+            reasoning_tokens=cast(
+                int | None, add_optional(self.reasoning_tokens, other.reasoning_tokens)
             ),
-            cache_read_tokens=sum_optional(
-                self.cache_read_tokens, other.cache_read_tokens
+            cache_read_tokens=cast(
+                int | None,
+                add_optional(self.cache_read_tokens, other.cache_read_tokens),
             ),
-            cache_write_tokens=sum_optional(
-                self.cache_write_tokens, other.cache_write_tokens
+            cache_write_tokens=cast(
+                int | None,
+                add_optional(self.cache_write_tokens, other.cache_write_tokens),
             ),
             duration_seconds=self.default_duration_seconds
             + other.default_duration_seconds,
-            cost=self.cost,
+            cost=cast(QueryResultCost | None, add_optional(self.cost, other.cost)),
         )
     @override

{model_library-0.1.4 → model_library-0.1.6}/model_library/base/utils.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import Sequence, cast
+from typing import Sequence, TypeVar, cast
 from model_library.base.input import (
     FileBase,
@@ -8,17 +8,39 @@ from model_library.base.input import (
     ToolResult,
 )
 from model_library.utils import truncate_str
+from pydantic import BaseModel
+T = TypeVar("T", bound=BaseModel)
-def sum_optional(a: int | None, b: int | None) -> int | None:
-    """Sum two optional integers, returning None if both are None.
+def add_optional(
+    a: int | float | T | None, b: int | float | T | None
+) -> int | float | T | None:
+    """Add two optional objects, returning None if both are None.
     Preserves None to indicate "unknown/not provided" when both inputs are None,
-    otherwise treats None as 0 for summation.
+    otherwise returns the non-None value or their sum.
     """
     if a is None and b is None:
         return None
-    return (a or 0) + (b or 0)
+    if a is None or b is None:
+        return a or b
+    if isinstance(a, (int, float)) and isinstance(b, (int, float)):
+        return a + b
+    # NOTE: Ensure that the subtypes are the same so we can use the __add__ method just from one
+    if type(a) is type(b):
+        add_method = getattr(a, "__add__", None)
+        if add_method is not None:
+            return add_method(b)
+    else:
+        raise ValueError(
+            f"Cannot add {type(a)} and {type(b)} because they are not the same subclass"
+        )
+    return None
 def get_pretty_input_types(input: Sequence["InputItem"], verbose: bool = False) -> str:

{model_library-0.1.4 → model_library-0.1.6}/model_library/config/all_models.json RENAMED Viewed

@@ -1,4 +1,144 @@
 {
+    "minimax/MiniMax-M2.1": {
+        "company": "MiniMax",
+        "label": "MiniMax-M2.1",
+        "description": null,
+        "release_date": "2025-12-23",
+        "open_source": true,
+        "documentation_url": "https://platform.minimax.io/docs",
+        "properties": {
+            "context_window": 204800,
+            "max_tokens": 131000,
+            "training_cutoff": null,
+            "reasoning_model": true
+        },
+        "supports": {
+            "images": false,
+            "files": false,
+            "temperature": true,
+            "tools": true
+        },
+        "metadata": {
+            "deprecated": false,
+            "available_for_everyone": true,
+            "available_as_evaluator": false,
+            "ignored_for_cost": false
+        },
+        "provider_properties": {},
+        "costs_per_million_token": {
+            "input": 0.3,
+            "output": 1.2,
+            "cache": {
+                "read": 0.03,
+                "write": 0.375,
+                "write_markup": 1.0
+            }
+        },
+        "alternative_keys": [],
+        "default_parameters": {
+            "temperature": 1.0,
+            "top_p": 0.95
+        },
+        "provider_endpoint": "MiniMax-M2.1",
+        "provider_name": "minimax",
+        "full_key": "minimax/MiniMax-M2.1",
+        "slug": "minimax_MiniMax-M2.1"
+    },
+    "zai/glm-4.7": {
+        "company": "zAI",
+        "label": "GLM 4.7",
+        "description": "Latest model from ZAI",
+        "release_date": "2025-12-22",
+        "open_source": true,
+        "documentation_url": "https://docs.z.ai/",
+        "properties": {
+            "context_window": 200000,
+            "max_tokens": 128000,
+            "training_cutoff": null,
+            "reasoning_model": true
+        },
+        "supports": {
+            "images": false,
+            "files": false,
+            "temperature": true,
+            "tools": true
+        },
+        "metadata": {
+            "deprecated": false,
+            "available_for_everyone": true,
+            "available_as_evaluator": false,
+            "ignored_for_cost": false
+        },
+        "provider_properties": {},
+        "costs_per_million_token": {
+            "input": 0.6,
+            "output": 2.2,
+            "cache": {
+                "read": 0.11,
+                "read_discount": 1.0,
+                "write_markup": 1.0
+            }
+        },
+        "alternative_keys": [],
+        "default_parameters": {
+            "temperature": 1.0,
+            "top_p": 1.0
+        },
+        "provider_endpoint": "glm-4.7",
+        "provider_name": "zai",
+        "full_key": "zai/glm-4.7",
+        "slug": "zai_glm-4.7"
+    },
+    "google/gemini-3-flash-preview": {
+        "company": "Google",
+        "label": "Gemini 3 Flash (12/25)",
+        "description": "Google's newest budget workhorse model",
+        "release_date": "2025-12-17",
+        "open_source": false,
+        "documentation_url": "https://ai.google.dev/gemini-api/docs/models",
+        "properties": {
+            "context_window": 1048576,
+            "max_tokens": 65536,
+            "training_cutoff": null,
+            "reasoning_model": true
+        },
+        "supports": {
+            "images": true,
+            "videos": true,
+            "files": true,
+            "batch": true,
+            "temperature": true,
+            "tools": true
+        },
+        "metadata": {
+            "deprecated": false,
+            "available_for_everyone": true,
+            "available_as_evaluator": false,
+            "ignored_for_cost": false
+        },
+        "provider_properties": {},
+        "costs_per_million_token": {
+            "input": 0.5,
+            "output": 3.0,
+            "cache": {
+                "read_discount": 0.1,
+                "write_markup": 1.0
+            },
+            "batch": {
+                "input_discount": 0.5,
+                "output_discount": 0.5
+            }
+        },
+        "alternative_keys": [],
+        "default_parameters": {
+            "temperature": 1.0,
+            "reasoning_effort": "high"
+        },
+        "provider_endpoint": "gemini-3-flash-preview",
+        "provider_name": "google",
+        "full_key": "google/gemini-3-flash-preview",
+        "slug": "google_gemini-3-flash-preview"
+    },
     "openai/gpt-5.2-pro-2025-12-11": {
         "company": "OpenAI",
         "label": "GPT 5.2 Pro",
@@ -454,7 +594,8 @@
             }
         ],
         "default_parameters": {
-            "temperature": 1.0
+            "temperature": 1.0,
+            "reasoning_effort": "none"
         },
         "provider_endpoint": "deepseek-v3p2",
         "provider_name": "fireworks",
@@ -15428,7 +15569,7 @@
             "tools": false
         },
         "metadata": {
-            "deprecated": false,
+            "deprecated": true,
             "available_for_everyone": true,
             "available_as_evaluator": false,
             "ignored_for_cost": false

{model_library-0.1.4 → model_library-0.1.6}/model_library/config/fireworks_models.yaml RENAMED Viewed

@@ -150,6 +150,8 @@ deepseek-models:
       context_window: 160_000
       max_tokens: 20_480
       reasoning_model: false
+    default_parameters:
+      reasoning_effort: "none"
     costs_per_million_token:
       input: 0.56
       output: 1.68

{model_library-0.1.4 → model_library-0.1.6}/model_library/config/google_models.yaml RENAMED Viewed

@@ -54,6 +54,21 @@ gemini-3-models:
       temperature: 1
       reasoning_effort: "high"
+  google/gemini-3-flash-preview:
+    label: Gemini 3 Flash (12/25)
+    description: Google's newest budget workhorse model
+    release_date: 2025-12-17
+    properties:
+      context_window: 1048576
+      max_tokens: 65536
+      reasoning_model: true
+    costs_per_million_token:
+      input: 0.50
+      output: 3.00
+    default_parameters:
+      temperature: 1
+      reasoning_effort: "high"
   google/gemini-3-pro-preview:
     label: Gemini 3 Pro (11/25)
     description: Gemini 3 Pro, Google's most powerful model.

{model_library-0.1.4 → model_library-0.1.6}/model_library/config/minimax_models.yaml RENAMED Viewed

@@ -16,6 +16,24 @@ base-config:
 minimax-m2-models:
+  minimax/MiniMax-M2.1:
+    label: MiniMax-M2.1
+    release_date: 2025-12-23
+    properties:
+      context_window: 204_800
+      max_tokens: 131_000
+      reasoning_model: true
+      training_cutoff: null
+    default_parameters:
+      temperature: 1.0
+      top_p: 0.95
+    costs_per_million_token:
+      input: 0.30
+      output: 1.20
+      cache:
+        read: 0.03
+        write: 0.375
   minimax/MiniMax-M2:
     label: MiniMax-M2
     description: MiniMax-M2 is a cost-efficient open-source model optimized for agentic applications and coding in particular.

{model_library-0.1.4 → model_library-0.1.6}/model_library/config/perplexity_models.yaml RENAMED Viewed

@@ -46,6 +46,8 @@ perplexity-models:
     label: Sonar Reasoning
     description: Reasoning-focused search model that exposes intermediate thinking for step-by-step answers.
     documentation_url: https://docs.perplexity.ai/models/models/sonar-reasoning
+    metadata:
+      deprecated: true
     properties:
       context_window: 128000
       reasoning_model: true

{model_library-0.1.4 → model_library-0.1.6}/model_library/config/zai_models.yaml RENAMED Viewed

@@ -18,6 +18,20 @@ base-config:
       write_markup: 1
 zai-models:
+  zai/glm-4.7:
+    label: GLM 4.7
+    description: "Latest model from ZAI"
+    release_date: 2025-12-22
+    properties:
+      context_window: 200_000
+      max_tokens: 128_000
+    costs_per_million_token:
+      input: 0.6
+      output: 2.2
+      cache:
+        read: 0.11
+    default_parameters:
+      temperature: 1
   zai/glm-4.5:
     label: GLM 4.5
     description: "z.AI old model"

{model_library-0.1.4 → model_library-0.1.6}/model_library/exceptions.py RENAMED Viewed

@@ -183,8 +183,8 @@ RETRIABLE_EXCEPTION_CODES = [
     "server_error",
     "overloaded",
     "throttling",  # AWS throttling errors
-    "throttlingexception",  # AWS throttling errors
     "internal server error",
+    "InternalServerError",
 ]

{model_library-0.1.4 → model_library-0.1.6}/model_library/providers/ai21labs.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import io
+import logging
 from typing import Any, Literal, Sequence
 from ai21 import AsyncAI21Client
@@ -137,6 +138,7 @@ class AI21LabsModel(LLM):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
+        query_logger: logging.Logger,
         **kwargs: object,
     ) -> QueryResult:
         messages: list[ChatMessage] = []

{model_library-0.1.4 → model_library-0.1.6}/model_library/providers/amazon.py RENAMED Viewed

@@ -3,6 +3,7 @@ import asyncio
 import base64
 import io
 import json
+import logging
 from typing import Any, Literal, Sequence, cast
 import boto3
@@ -337,6 +338,7 @@ class AmazonModel(LLM):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
+        query_logger: logging.Logger,
         **kwargs: object,
     ) -> QueryResult:
         body = await self.build_body(input, tools=tools, **kwargs)

{model_library-0.1.4 → model_library-0.1.6}/model_library/providers/anthropic.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import io
+import logging
 from typing import Any, Literal, Sequence, cast
 from anthropic import AsyncAnthropic
@@ -249,6 +250,8 @@ class AnthropicModel(LLM):
     @override
     def get_client(self) -> AsyncAnthropic:
+        if self._delegate_client:
+            return self._delegate_client
         if not AnthropicModel._client:
             headers: dict[str, str] = {}
             AnthropicModel._client = AsyncAnthropic(
@@ -262,16 +265,20 @@ class AnthropicModel(LLM):
     def __init__(
         self,
         model_name: str,
-        provider: Literal["anthropic"] = "anthropic",
+        provider: str = "anthropic",
         *,
         config: LLMConfig | None = None,
+        custom_client: AsyncAnthropic | None = None,
     ):
         super().__init__(model_name, provider, config=config)
+        # allow custom client to act as delegate (native)
+        self._delegate_client: AsyncAnthropic | None = custom_client
         # https://docs.anthropic.com/en/api/openai-sdk
-        self.delegate: OpenAIModel | None = (
+        self.delegate = (
             None
-            if self.native
+            if self.native or custom_client
             else OpenAIModel(
                 model_name=self.model_name,
                 provider=provider,
@@ -285,7 +292,10 @@ class AnthropicModel(LLM):
         )
         # Initialize batch support if enabled
-        self.supports_batch: bool = self.supports_batch and self.native
+        # Disable batch when using custom_client (similar to OpenAI)
+        self.supports_batch: bool = (
+            self.supports_batch and self.native and not custom_client
+        )
         self.batch: LLMBatchMixin | None = (
             AnthropicBatchMixin(self) if self.supports_batch else None
         )
@@ -555,20 +565,36 @@ class AnthropicModel(LLM):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
+        query_logger: logging.Logger,
         **kwargs: object,
     ) -> QueryResult:
         if self.delegate:
-            return await self.delegate_query(input, tools=tools, **kwargs)
+            return await self.delegate_query(
+                input, tools=tools, query_logger=query_logger, **kwargs
+            )
         body = await self.create_body(input, tools=tools, **kwargs)
-        betas = ["files-api-2025-04-14", "interleaved-thinking-2025-05-14"]
-        if "sonnet-4-5" in self.model_name:
-            betas.append("context-1m-2025-08-07")
+        client = self.get_client()
-        async with self.get_client().beta.messages.stream(
-            **body,
-            betas=betas,
+        # only send betas for the official Anthropic endpoint
+        is_anthropic_endpoint = self._delegate_client is None
+        if not is_anthropic_endpoint:
+            client_base_url = getattr(client, "_base_url", None) or getattr(
+                client, "base_url", None
+            )
+            if client_base_url:
+                is_anthropic_endpoint = "api.anthropic.com" in str(client_base_url)
+        stream_kwargs = {**body}
+        if is_anthropic_endpoint:
+            betas = ["files-api-2025-04-14", "interleaved-thinking-2025-05-14"]
+            if "sonnet-4-5" in self.model_name:
+                betas.append("context-1m-2025-08-07")
+            stream_kwargs["betas"] = betas
+        async with client.beta.messages.stream(
+            **stream_kwargs,
         ) as stream:  # pyright: ignore[reportAny]
             message = await stream.get_final_message()
         self.logger.info(f"Anthropic Response finished: {message.id}")

model-library 0.1.4__tar.gz → 0.1.6__tar.gz

model-library 0.1.4tar.gz → 0.1.6tar.gz