PyPI - model-library - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl - Mend

model-library 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

model_library/base/base.py +16 -12
model_library/base/delegate_only.py +5 -1
model_library/base/output.py +24 -10
model_library/base/utils.py +27 -5
model_library/config/all_models.json +143 -2
model_library/config/fireworks_models.yaml +2 -0
model_library/config/google_models.yaml +15 -0
model_library/config/minimax_models.yaml +18 -0
model_library/config/perplexity_models.yaml +2 -0
model_library/config/zai_models.yaml +14 -0
model_library/exceptions.py +1 -1
model_library/providers/ai21labs.py +2 -0
model_library/providers/amazon.py +2 -0
model_library/providers/anthropic.py +37 -11
model_library/providers/google/google.py +10 -2
model_library/providers/minimax.py +10 -10
model_library/providers/mistral.py +2 -0
model_library/providers/openai.py +9 -4
model_library/providers/vals.py +2 -0
model_library/providers/xai.py +6 -2
model_library/register_models.py +1 -1
model_library/registry_utils.py +60 -0
{model_library-0.1.4.dist-info → model_library-0.1.6.dist-info}/METADATA +1 -1
{model_library-0.1.4.dist-info → model_library-0.1.6.dist-info}/RECORD +27 -27
{model_library-0.1.4.dist-info → model_library-0.1.6.dist-info}/WHEEL +0 -0
{model_library-0.1.4.dist-info → model_library-0.1.6.dist-info}/licenses/LICENSE +0 -0
{model_library-0.1.4.dist-info → model_library-0.1.6.dist-info}/top_level.txt +0 -0

model_library/base/base.py CHANGED Viewed

@@ -6,7 +6,6 @@ from abc import ABC, abstractmethod
 from collections.abc import Awaitable
 from pprint import pformat
 from typing import (
-    TYPE_CHECKING,
     Any,
     Callable,
     Literal,
@@ -43,9 +42,6 @@ from model_library.exceptions import (
 )
 from model_library.utils import truncate_str
-if TYPE_CHECKING:
-    from model_library.providers.openai import OpenAIModel
 PydanticT = TypeVar("PydanticT", bound=BaseModel)
@@ -66,7 +62,7 @@ class LLMConfig(BaseModel):
     top_p: float | None = None
     top_k: int | None = None
     reasoning: bool = False
-    reasoning_effort: str | None = None
+    reasoning_effort: str | bool | None = None
     supports_images: bool = False
     supports_files: bool = False
     supports_videos: bool = False
@@ -110,7 +106,7 @@ class LLM(ABC):
         self.top_k: int | None = config.top_k
         self.reasoning: bool = config.reasoning
-        self.reasoning_effort: str | None = config.reasoning_effort
+        self.reasoning_effort: str | bool | None = config.reasoning_effort
         self.supports_files: bool = config.supports_files
         self.supports_videos: bool = config.supports_videos
@@ -120,7 +116,7 @@ class LLM(ABC):
         self.supports_tools: bool = config.supports_tools
         self.native: bool = config.native
-        self.delegate: "OpenAIModel | None" = None
+        self.delegate: "LLM | None" = None
         self.batch: LLMBatchMixin | None = None
         if config.provider_config:
@@ -198,11 +194,14 @@ class LLM(ABC):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition] = [],
+        query_logger: logging.Logger,
         **kwargs: object,
     ) -> QueryResult:
         if not self.delegate:
             raise Exception("Delegate not set")
-        return await self.delegate._query_impl(input, tools=tools, **kwargs)  # pyright: ignore[reportPrivateUsage]
+        return await self.delegate._query_impl(  # pyright: ignore[reportPrivateUsage]
+            input, tools=tools, query_logger=query_logger, **kwargs
+        )
     async def query(
         self,
@@ -213,6 +212,7 @@ class LLM(ABC):
         # for backwards compatibility
         files: list[FileInput] = [],
         images: list[FileInput] = [],
+        query_logger: logging.Logger | None = None,
         **kwargs: object,
     ) -> QueryResult:
         """
@@ -256,15 +256,18 @@ class LLM(ABC):
         input = [*history, *input]
         # unique logger for the query
-        query_id = uuid.uuid4().hex[:14]
-        query_logger = self.logger.getChild(f"query={query_id}")
+        if not query_logger:
+            query_id = uuid.uuid4().hex[:14]
+            query_logger = self.logger.getChild(f"query={query_id}")
         query_logger.info(
             "Query started:\n" + item_info + tool_info + f"--- kwargs: {short_kwargs}\n"
         )
         async def query_func() -> QueryResult:
-            return await self._query_impl(input, tools=tools, **kwargs)
+            return await self._query_impl(
+                input, tools=tools, query_logger=query_logger, **kwargs
+            )
         async def timed_query() -> tuple[QueryResult, float]:
             return await LLM.timer_wrapper(query_func)
@@ -361,7 +364,8 @@ class LLM(ABC):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
-        **kwargs: object,  # TODO: pass in query logger
+        query_logger: logging.Logger,
+        **kwargs: object,
     ) -> QueryResult:
         """
         Query the model with input

model_library/base/delegate_only.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import io
+import logging
 from typing import Any, Literal, Sequence
 from typing_extensions import override
@@ -48,11 +49,14 @@ class DelegateOnly(LLM):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
+        query_logger: logging.Logger,
         **kwargs: object,
     ) -> QueryResult:
         assert self.delegate
-        return await self.delegate_query(input, tools=tools, **kwargs)
+        return await self.delegate_query(
+            input, tools=tools, query_logger=query_logger, **kwargs
+        )
     @override
     async def parse_input(

model_library/base/output.py CHANGED Viewed

@@ -9,9 +9,7 @@ from pydantic import BaseModel, Field, computed_field, field_validator
 from typing_extensions import override
 from model_library.base.input import InputItem, ToolCall
-from model_library.base.utils import (
-    sum_optional,
-)
+from model_library.base.utils import add_optional
 from model_library.utils import truncate_str
@@ -42,10 +40,14 @@ class QueryResultCost(BaseModel):
     reasoning: float | None = None
     cache_read: float | None = None
     cache_write: float | None = None
+    total_override: float | None = None
     @computed_field
     @property
     def total(self) -> float:
+        if self.total_override is not None:
+            return self.total_override
         return sum(
             filter(
                 None,
@@ -86,6 +88,16 @@ class QueryResultCost(BaseModel):
             )
         )
+    def __add__(self, other: "QueryResultCost") -> "QueryResultCost":
+        return QueryResultCost(
+            input=self.input + other.input,
+            output=self.output + other.output,
+            reasoning=add_optional(self.reasoning, other.reasoning),
+            cache_read=add_optional(self.cache_read, other.cache_read),
+            cache_write=add_optional(self.cache_write, other.cache_write),
+            total_override=add_optional(self.total_override, other.total_override),
+        )
     @override
     def __repr__(self):
         use_cents = self.total < 1
@@ -150,18 +162,20 @@ class QueryResultMetadata(BaseModel):
         return QueryResultMetadata(
             in_tokens=self.in_tokens + other.in_tokens,
             out_tokens=self.out_tokens + other.out_tokens,
-            reasoning_tokens=sum_optional(
-                self.reasoning_tokens, other.reasoning_tokens
+            reasoning_tokens=cast(
+                int | None, add_optional(self.reasoning_tokens, other.reasoning_tokens)
             ),
-            cache_read_tokens=sum_optional(
-                self.cache_read_tokens, other.cache_read_tokens
+            cache_read_tokens=cast(
+                int | None,
+                add_optional(self.cache_read_tokens, other.cache_read_tokens),
             ),
-            cache_write_tokens=sum_optional(
-                self.cache_write_tokens, other.cache_write_tokens
+            cache_write_tokens=cast(
+                int | None,
+                add_optional(self.cache_write_tokens, other.cache_write_tokens),
             ),
             duration_seconds=self.default_duration_seconds
             + other.default_duration_seconds,
-            cost=self.cost,
+            cost=cast(QueryResultCost | None, add_optional(self.cost, other.cost)),
         )
     @override

model_library/base/utils.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Sequence, cast
+from typing import Sequence, TypeVar, cast
 from model_library.base.input import (
     FileBase,
@@ -8,17 +8,39 @@ from model_library.base.input import (
     ToolResult,
 )
 from model_library.utils import truncate_str
+from pydantic import BaseModel
+T = TypeVar("T", bound=BaseModel)
-def sum_optional(a: int | None, b: int | None) -> int | None:
-    """Sum two optional integers, returning None if both are None.
+def add_optional(
+    a: int | float | T | None, b: int | float | T | None
+) -> int | float | T | None:
+    """Add two optional objects, returning None if both are None.
     Preserves None to indicate "unknown/not provided" when both inputs are None,
-    otherwise treats None as 0 for summation.
+    otherwise returns the non-None value or their sum.
     """
     if a is None and b is None:
         return None
-    return (a or 0) + (b or 0)
+    if a is None or b is None:
+        return a or b
+    if isinstance(a, (int, float)) and isinstance(b, (int, float)):
+        return a + b
+    # NOTE: Ensure that the subtypes are the same so we can use the __add__ method just from one
+    if type(a) is type(b):
+        add_method = getattr(a, "__add__", None)
+        if add_method is not None:
+            return add_method(b)
+    else:
+        raise ValueError(
+            f"Cannot add {type(a)} and {type(b)} because they are not the same subclass"
+        )
+    return None
 def get_pretty_input_types(input: Sequence["InputItem"], verbose: bool = False) -> str:

model_library/config/all_models.json CHANGED Viewed

@@ -1,4 +1,144 @@
 {
+    "minimax/MiniMax-M2.1": {
+        "company": "MiniMax",
+        "label": "MiniMax-M2.1",
+        "description": null,
+        "release_date": "2025-12-23",
+        "open_source": true,
+        "documentation_url": "https://platform.minimax.io/docs",
+        "properties": {
+            "context_window": 204800,
+            "max_tokens": 131000,
+            "training_cutoff": null,
+            "reasoning_model": true
+        },
+        "supports": {
+            "images": false,
+            "files": false,
+            "temperature": true,
+            "tools": true
+        },
+        "metadata": {
+            "deprecated": false,
+            "available_for_everyone": true,
+            "available_as_evaluator": false,
+            "ignored_for_cost": false
+        },
+        "provider_properties": {},
+        "costs_per_million_token": {
+            "input": 0.3,
+            "output": 1.2,
+            "cache": {
+                "read": 0.03,
+                "write": 0.375,
+                "write_markup": 1.0
+            }
+        },
+        "alternative_keys": [],
+        "default_parameters": {
+            "temperature": 1.0,
+            "top_p": 0.95
+        },
+        "provider_endpoint": "MiniMax-M2.1",
+        "provider_name": "minimax",
+        "full_key": "minimax/MiniMax-M2.1",
+        "slug": "minimax_MiniMax-M2.1"
+    },
+    "zai/glm-4.7": {
+        "company": "zAI",
+        "label": "GLM 4.7",
+        "description": "Latest model from ZAI",
+        "release_date": "2025-12-22",
+        "open_source": true,
+        "documentation_url": "https://docs.z.ai/",
+        "properties": {
+            "context_window": 200000,
+            "max_tokens": 128000,
+            "training_cutoff": null,
+            "reasoning_model": true
+        },
+        "supports": {
+            "images": false,
+            "files": false,
+            "temperature": true,
+            "tools": true
+        },
+        "metadata": {
+            "deprecated": false,
+            "available_for_everyone": true,
+            "available_as_evaluator": false,
+            "ignored_for_cost": false
+        },
+        "provider_properties": {},
+        "costs_per_million_token": {
+            "input": 0.6,
+            "output": 2.2,
+            "cache": {
+                "read": 0.11,
+                "read_discount": 1.0,
+                "write_markup": 1.0
+            }
+        },
+        "alternative_keys": [],
+        "default_parameters": {
+            "temperature": 1.0,
+            "top_p": 1.0
+        },
+        "provider_endpoint": "glm-4.7",
+        "provider_name": "zai",
+        "full_key": "zai/glm-4.7",
+        "slug": "zai_glm-4.7"
+    },
+    "google/gemini-3-flash-preview": {
+        "company": "Google",
+        "label": "Gemini 3 Flash (12/25)",
+        "description": "Google's newest budget workhorse model",
+        "release_date": "2025-12-17",
+        "open_source": false,
+        "documentation_url": "https://ai.google.dev/gemini-api/docs/models",
+        "properties": {
+            "context_window": 1048576,
+            "max_tokens": 65536,
+            "training_cutoff": null,
+            "reasoning_model": true
+        },
+        "supports": {
+            "images": true,
+            "videos": true,
+            "files": true,
+            "batch": true,
+            "temperature": true,
+            "tools": true
+        },
+        "metadata": {
+            "deprecated": false,
+            "available_for_everyone": true,
+            "available_as_evaluator": false,
+            "ignored_for_cost": false
+        },
+        "provider_properties": {},
+        "costs_per_million_token": {
+            "input": 0.5,
+            "output": 3.0,
+            "cache": {
+                "read_discount": 0.1,
+                "write_markup": 1.0
+            },
+            "batch": {
+                "input_discount": 0.5,
+                "output_discount": 0.5
+            }
+        },
+        "alternative_keys": [],
+        "default_parameters": {
+            "temperature": 1.0,
+            "reasoning_effort": "high"
+        },
+        "provider_endpoint": "gemini-3-flash-preview",
+        "provider_name": "google",
+        "full_key": "google/gemini-3-flash-preview",
+        "slug": "google_gemini-3-flash-preview"
+    },
     "openai/gpt-5.2-pro-2025-12-11": {
         "company": "OpenAI",
         "label": "GPT 5.2 Pro",
@@ -454,7 +594,8 @@
             }
         ],
         "default_parameters": {
-            "temperature": 1.0
+            "temperature": 1.0,
+            "reasoning_effort": "none"
         },
         "provider_endpoint": "deepseek-v3p2",
         "provider_name": "fireworks",
@@ -15428,7 +15569,7 @@
             "tools": false
         },
         "metadata": {
-            "deprecated": false,
+            "deprecated": true,
             "available_for_everyone": true,
             "available_as_evaluator": false,
             "ignored_for_cost": false

model_library/config/fireworks_models.yaml CHANGED Viewed

@@ -150,6 +150,8 @@ deepseek-models:
       context_window: 160_000
       max_tokens: 20_480
       reasoning_model: false
+    default_parameters:
+      reasoning_effort: "none"
     costs_per_million_token:
       input: 0.56
       output: 1.68

model_library/config/google_models.yaml CHANGED Viewed

@@ -54,6 +54,21 @@ gemini-3-models:
       temperature: 1
       reasoning_effort: "high"
+  google/gemini-3-flash-preview:
+    label: Gemini 3 Flash (12/25)
+    description: Google's newest budget workhorse model
+    release_date: 2025-12-17
+    properties:
+      context_window: 1048576
+      max_tokens: 65536
+      reasoning_model: true
+    costs_per_million_token:
+      input: 0.50
+      output: 3.00
+    default_parameters:
+      temperature: 1
+      reasoning_effort: "high"
   google/gemini-3-pro-preview:
     label: Gemini 3 Pro (11/25)
     description: Gemini 3 Pro, Google's most powerful model.

model_library/config/minimax_models.yaml CHANGED Viewed

@@ -16,6 +16,24 @@ base-config:
 minimax-m2-models:
+  minimax/MiniMax-M2.1:
+    label: MiniMax-M2.1
+    release_date: 2025-12-23
+    properties:
+      context_window: 204_800
+      max_tokens: 131_000
+      reasoning_model: true
+      training_cutoff: null
+    default_parameters:
+      temperature: 1.0
+      top_p: 0.95
+    costs_per_million_token:
+      input: 0.30
+      output: 1.20
+      cache:
+        read: 0.03
+        write: 0.375
   minimax/MiniMax-M2:
     label: MiniMax-M2
     description: MiniMax-M2 is a cost-efficient open-source model optimized for agentic applications and coding in particular.

model_library/config/perplexity_models.yaml CHANGED Viewed

@@ -46,6 +46,8 @@ perplexity-models:
     label: Sonar Reasoning
     description: Reasoning-focused search model that exposes intermediate thinking for step-by-step answers.
     documentation_url: https://docs.perplexity.ai/models/models/sonar-reasoning
+    metadata:
+      deprecated: true
     properties:
       context_window: 128000
       reasoning_model: true

model_library/config/zai_models.yaml CHANGED Viewed

@@ -18,6 +18,20 @@ base-config:
       write_markup: 1
 zai-models:
+  zai/glm-4.7:
+    label: GLM 4.7
+    description: "Latest model from ZAI"
+    release_date: 2025-12-22
+    properties:
+      context_window: 200_000
+      max_tokens: 128_000
+    costs_per_million_token:
+      input: 0.6
+      output: 2.2
+      cache:
+        read: 0.11
+    default_parameters:
+      temperature: 1
   zai/glm-4.5:
     label: GLM 4.5
     description: "z.AI old model"

model_library/exceptions.py CHANGED Viewed

@@ -183,8 +183,8 @@ RETRIABLE_EXCEPTION_CODES = [
     "server_error",
     "overloaded",
     "throttling",  # AWS throttling errors
-    "throttlingexception",  # AWS throttling errors
     "internal server error",
+    "InternalServerError",
 ]

model_library/providers/ai21labs.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import io
+import logging
 from typing import Any, Literal, Sequence
 from ai21 import AsyncAI21Client
@@ -137,6 +138,7 @@ class AI21LabsModel(LLM):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
+        query_logger: logging.Logger,
         **kwargs: object,
     ) -> QueryResult:
         messages: list[ChatMessage] = []

model_library/providers/amazon.py CHANGED Viewed

@@ -3,6 +3,7 @@ import asyncio
 import base64
 import io
 import json
+import logging
 from typing import Any, Literal, Sequence, cast
 import boto3
@@ -337,6 +338,7 @@ class AmazonModel(LLM):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
+        query_logger: logging.Logger,
         **kwargs: object,
     ) -> QueryResult:
         body = await self.build_body(input, tools=tools, **kwargs)

model_library/providers/anthropic.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import io
+import logging
 from typing import Any, Literal, Sequence, cast
 from anthropic import AsyncAnthropic
@@ -249,6 +250,8 @@ class AnthropicModel(LLM):
     @override
     def get_client(self) -> AsyncAnthropic:
+        if self._delegate_client:
+            return self._delegate_client
         if not AnthropicModel._client:
             headers: dict[str, str] = {}
             AnthropicModel._client = AsyncAnthropic(
@@ -262,16 +265,20 @@ class AnthropicModel(LLM):
     def __init__(
         self,
         model_name: str,
-        provider: Literal["anthropic"] = "anthropic",
+        provider: str = "anthropic",
         *,
         config: LLMConfig | None = None,
+        custom_client: AsyncAnthropic | None = None,
     ):
         super().__init__(model_name, provider, config=config)
+        # allow custom client to act as delegate (native)
+        self._delegate_client: AsyncAnthropic | None = custom_client
         # https://docs.anthropic.com/en/api/openai-sdk
-        self.delegate: OpenAIModel | None = (
+        self.delegate = (
             None
-            if self.native
+            if self.native or custom_client
             else OpenAIModel(
                 model_name=self.model_name,
                 provider=provider,
@@ -285,7 +292,10 @@ class AnthropicModel(LLM):
         )
         # Initialize batch support if enabled
-        self.supports_batch: bool = self.supports_batch and self.native
+        # Disable batch when using custom_client (similar to OpenAI)
+        self.supports_batch: bool = (
+            self.supports_batch and self.native and not custom_client
+        )
         self.batch: LLMBatchMixin | None = (
             AnthropicBatchMixin(self) if self.supports_batch else None
         )
@@ -555,20 +565,36 @@ class AnthropicModel(LLM):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
+        query_logger: logging.Logger,
         **kwargs: object,
     ) -> QueryResult:
         if self.delegate:
-            return await self.delegate_query(input, tools=tools, **kwargs)
+            return await self.delegate_query(
+                input, tools=tools, query_logger=query_logger, **kwargs
+            )
         body = await self.create_body(input, tools=tools, **kwargs)
-        betas = ["files-api-2025-04-14", "interleaved-thinking-2025-05-14"]
-        if "sonnet-4-5" in self.model_name:
-            betas.append("context-1m-2025-08-07")
+        client = self.get_client()
-        async with self.get_client().beta.messages.stream(
-            **body,
-            betas=betas,
+        # only send betas for the official Anthropic endpoint
+        is_anthropic_endpoint = self._delegate_client is None
+        if not is_anthropic_endpoint:
+            client_base_url = getattr(client, "_base_url", None) or getattr(
+                client, "base_url", None
+            )
+            if client_base_url:
+                is_anthropic_endpoint = "api.anthropic.com" in str(client_base_url)
+        stream_kwargs = {**body}
+        if is_anthropic_endpoint:
+            betas = ["files-api-2025-04-14", "interleaved-thinking-2025-05-14"]
+            if "sonnet-4-5" in self.model_name:
+                betas.append("context-1m-2025-08-07")
+            stream_kwargs["betas"] = betas
+        async with client.beta.messages.stream(
+            **stream_kwargs,
         ) as stream:  # pyright: ignore[reportAny]
             message = await stream.get_final_message()
         self.logger.info(f"Anthropic Response finished: {message.id}")

model_library/providers/google/google.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import base64
 import io
+import logging
 from typing import Any, Literal, Sequence, cast
 from google.genai import Client
@@ -54,6 +55,11 @@ from model_library.exceptions import (
 from model_library.providers.google.batch import GoogleBatchMixin
 from model_library.register_models import register_provider
 from model_library.utils import normalize_tool_result
+import uuid
+def generate_tool_call_id(tool_name: str) -> str:
+    return str(tool_name + "_" + str(uuid.uuid4()))
 class GoogleConfig(ProviderConfig):
@@ -328,6 +334,7 @@ class GoogleModel(LLM):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
+        query_logger: logging.Logger,
         **kwargs: object,
     ) -> QueryResult:
         body: dict[str, Any] = await self.create_body(input, tools=tools, **kwargs)
@@ -357,9 +364,10 @@ class GoogleModel(LLM):
                         call_args = part.function_call.args or {}
                         tool_calls.append(
-                            # weirdly, id is not required
+                            # Weirdly, id is not required. If not provided, we generate one.
                             ToolCall(
-                                id=part.function_call.id or "",
+                                id=part.function_call.id
+                                or generate_tool_call_id(part.function_call.name),
                                 name=part.function_call.name,
                                 args=call_args,
                             )

model_library/providers/minimax.py CHANGED Viewed

@@ -1,13 +1,12 @@
 from typing import Literal
 from model_library import model_library_settings
-from model_library.base import (
-    DelegateOnly,
-    LLMConfig,
-)
-from model_library.providers.openai import OpenAIModel
+from model_library.base import DelegateOnly, LLMConfig
+from model_library.providers.anthropic import AnthropicModel
 from model_library.register_models import register_provider
-from model_library.utils import create_openai_client_with_defaults
+from model_library.utils import default_httpx_client
+from anthropic import AsyncAnthropic
 @register_provider("minimax")
@@ -21,13 +20,14 @@ class MinimaxModel(DelegateOnly):
     ):
         super().__init__(model_name, provider, config=config)
-        self.delegate = OpenAIModel(
+        self.delegate = AnthropicModel(
             model_name=self.model_name,
             provider=self.provider,
             config=config,
-            custom_client=create_openai_client_with_defaults(
+            custom_client=AsyncAnthropic(
                 api_key=model_library_settings.MINIMAX_API_KEY,
-                base_url="https://api.minimax.io/v1",
+                base_url="https://api.minimax.io/anthropic",
+                http_client=default_httpx_client(),
+                max_retries=1,
             ),
-            use_completions=True,
         )

model_library/providers/mistral.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import io
+import logging
 import time
 from collections.abc import Sequence
 from typing import Any, Literal
@@ -171,6 +172,7 @@ class MistralModel(LLM):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
+        query_logger: logging.Logger,
         **kwargs: object,
     ) -> QueryResult:
         # mistral supports max 8 images, merge extra images into the 8th image

model_library/providers/openai.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import io
 import json
+import logging
 from typing import Any, Literal, Sequence, cast
 from openai import APIConnectionError, AsyncOpenAI
@@ -505,8 +506,11 @@ class OpenAIModel(LLM):
         if self.reasoning:
             del body["max_tokens"]
             body["max_completion_tokens"] = self.max_tokens
-            if self.reasoning_effort:
-                body["reasoning_effort"] = self.reasoning_effort
+        # some model endpoints (like `fireworks/deepseek-v3p2`)
+        # require explicitly setting reasoning effort to disable thinking
+        if self.reasoning_effort is not None:
+            body["reasoning_effort"] = self.reasoning_effort
         if self.supports_temperature:
             if self.temperature is not None:
@@ -701,8 +705,8 @@ class OpenAIModel(LLM):
         if self.reasoning:
             body["reasoning"] = {"summary": "auto"}
-            if self.reasoning_effort:
-                body["reasoning"]["effort"] = self.reasoning_effort
+            if self.reasoning_effort is not None:
+                body["reasoning"]["effort"] = self.reasoning_effort  # type: ignore[reportArgumentType]
         if self.supports_temperature:
             if self.temperature is not None:
@@ -722,6 +726,7 @@ class OpenAIModel(LLM):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
+        query_logger: logging.Logger,
         **kwargs: object,
     ) -> QueryResult:
         if self.use_completions:

model_library/providers/vals.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import io
 import json
+import logging
 import random
 import re
 import time
@@ -271,6 +272,7 @@ class DummyAIModel(LLM):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
+        query_logger: logging.Logger,
         **kwargs: object,
     ) -> QueryResult:
         body = await self.create_body(input, tools=tools, **kwargs)

model_library/providers/xai.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import asyncio
 import io
+import logging
 from typing import Any, Literal, Sequence, cast
 import grpc
@@ -78,7 +79,7 @@ class XAIModel(LLM):
         super().__init__(model_name, provider, config=config)
         # https://docs.x.ai/docs/guides/migration
-        self.delegate: OpenAIModel | None = (
+        self.delegate = (
             None
             if self.native
             else OpenAIModel(
@@ -225,13 +226,16 @@ class XAIModel(LLM):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
+        query_logger: logging.Logger,
         **kwargs: object,
     ) -> QueryResult:
         if self.reasoning_effort:
             kwargs["reasoning_effort"] = self.reasoning_effort
         if self.delegate:
-            return await self.delegate_query(input, tools=tools, **kwargs)
+            return await self.delegate_query(
+                input, tools=tools, query_logger=query_logger, **kwargs
+            )
         messages: Sequence[Message] = []
         if "system_prompt" in kwargs:

model_library/register_models.py CHANGED Viewed

@@ -169,7 +169,7 @@ class DefaultParameters(BaseModel):
     temperature: float | None = None
     top_p: float | None = None
     top_k: int | None = None
-    reasoning_effort: str | None = None
+    reasoning_effort: str | bool | None = None
 class RawModelConfig(BaseModel):

model_library/registry_utils.py CHANGED Viewed

@@ -1,9 +1,11 @@
 from functools import cache
 from pathlib import Path
+from typing import TypedDict
 import tiktoken
 from model_library.base import LLM, LLMConfig, ProviderConfig
+from model_library.base.output import QueryResultCost, QueryResultMetadata
 from model_library.register_models import (
     CostProperties,
     ModelConfig,
@@ -129,6 +131,64 @@ def get_model_cost(model_str: str) -> CostProperties | None:
     return model_config.costs_per_million_token
+class TokenDict(TypedDict, total=False):
+    """Token counts for cost calculation."""
+    in_tokens: int
+    out_tokens: int
+    reasoning_tokens: int | None
+    cache_read_tokens: int | None
+    cache_write_tokens: int | None
+async def recompute_cost(
+    model_str: str,
+    tokens: TokenDict,
+) -> QueryResultCost:
+    """
+    Recompute the cost for a model based on token information.
+    Uses the model provider's existing _calculate_cost method to ensure
+    provider-specific cost calculations are applied.
+    Args:
+        model_str: The model identifier (e.g., "openai/gpt-4o")
+        tokens: Dictionary containing token counts with keys:
+            - in_tokens (required): Number of input tokens
+            - out_tokens (required): Number of output tokens
+            - reasoning_tokens (optional): Number of reasoning tokens
+            - cache_read_tokens (optional): Number of cache read tokens
+            - cache_write_tokens (optional): Number of cache write tokens
+    Returns:
+        QueryResultCost with computed costs
+    Raises:
+        ValueError: If required token parameters are missing
+        Exception: If model not found in registry or costs not configured
+    """
+    if "in_tokens" not in tokens:
+        raise ValueError("Token dict must contain 'in_tokens'")
+    if "out_tokens" not in tokens:
+        raise ValueError("Token dict must contain 'out_tokens'")
+    model = get_registry_model(model_str)
+    metadata = QueryResultMetadata(
+        in_tokens=tokens["in_tokens"],
+        out_tokens=tokens["out_tokens"],
+        reasoning_tokens=tokens.get("reasoning_tokens"),
+        cache_read_tokens=tokens.get("cache_read_tokens"),
+        cache_write_tokens=tokens.get("cache_write_tokens"),
+    )
+    cost = await model._calculate_cost(metadata)  # type: ignore[arg-type]
+    if cost is None:
+        raise Exception(f"No cost information available for model {model_str}")
+    return cost
 @cache
 def get_provider_names() -> list[str]:
     """Return all provider names in the registry"""

{model_library-0.1.4.dist-info → model_library-0.1.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: model-library
-Version: 0.1.4
+Version: 0.1.6
 Summary: Model Library for vals.ai
 Author-email: "Vals AI, Inc." <contact@vals.ai>
 License: MIT

{model_library-0.1.4.dist-info → model_library-0.1.6.dist-info}/RECORD RENAMED Viewed

@@ -1,64 +1,64 @@
 model_library/__init__.py,sha256=AKc_15aklOf-LbcS9z1Xer_moRWNpG6Dh3kqvSQ0nOI,714
-model_library/exceptions.py,sha256=I9wquqj5hE640OfwVjUFtQUuu_potWAejLcOQCpDxIg,8705
+model_library/exceptions.py,sha256=ZHMr6lloXZz4V4Wy1UP8zc1CdUHx6-IS9_rOi6oN45s,8680
 model_library/file_utils.py,sha256=FAZRRtDT8c4Rjfoj64Te3knEHggXAAfRRuS8WLCsSe8,3682
 model_library/logging.py,sha256=McyaPHUk7RkB38-LrfnudrrU1B62ta8wAbbIBwLRmj0,853
 model_library/model_utils.py,sha256=l8oCltGeimMGtnne_3Q1EguVtzCj61UMsLsma-1czwg,753
 model_library/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-model_library/register_models.py,sha256=CY3Wd16AcWf7tYu_O2I2_kg_hdvQJFcvyQQA2OUu2SA,13646
-model_library/registry_utils.py,sha256=-ut95Aup5RYrZdv5Aih3bbYhe2vw9V0l4EFyH_1ONsQ,6797
+model_library/register_models.py,sha256=3FeFrcS2qRpAhj9ahXNuZ6jcH5UEks3I_PaT6rPvKgs,13653
+model_library/registry_utils.py,sha256=BVauHcP02Et2maLxowNBbdpGd32cnLz1_zSjDLVJjp0,8843
 model_library/settings.py,sha256=QyeUqzWBpexFi014L_mZkoXP49no3SAQNJRObATXrL8,873
 model_library/utils.py,sha256=T91ACGTc-KtksVyMFspt-vJtR5I-xcO3nVfH6SltmMU,3988
 model_library/base/__init__.py,sha256=TtxCXGUtkEqWZNMMofLPuC4orN7Ja2hemtbtHitt_UA,266
-model_library/base/base.py,sha256=HXxImh2H-GIIiVGNqV7gRPi0HH1KJxB_4ckuKyEqAYo,14139
+model_library/base/base.py,sha256=mvubt5VC1eM8cuLw_RHP04hTgNEcULzIBiJcHqKF--c,14289
 model_library/base/batch.py,sha256=-jd6L0ECc5pkj73zoX2ZYcv_9iQdqxEi1kEilwaXWSA,2895
-model_library/base/delegate_only.py,sha256=V2MzENtvBg0pySKncgE-mfCLBhhRZ0y4BntQwQsxbqU,2111
+model_library/base/delegate_only.py,sha256=YJUvP9k8x2kBsI-0ACNmx1Jx77zdZSBWCMSpx0LAyXE,2213
 model_library/base/input.py,sha256=Nhg8Ril1kFau1DnE8u102JC1l-vxNd-v9e3SjovR-Do,1876
-model_library/base/output.py,sha256=9pQZSOskkLDd_MAuDbYSimrbEcBL6x_3z6nLrPUnCOw,6701
-model_library/base/utils.py,sha256=KJZRVWr38Tik3yNJvTXnBy62ccilzzmSxHZFpQBJMPo,1330
+model_library/base/output.py,sha256=Ak6CJRYqtjYILsSWkfE70fSK3yvP7v_n5NYfysMaIL4,7464
+model_library/base/utils.py,sha256=YGQLPyQgCbfHNBxyTxCvpZNZ-ctEji258IdfMiXUJXs,1962
 model_library/config/README.md,sha256=i8_wHnlI6uHIqWN9fYBkDCglZM2p5ZMVD3SLlxiwUVk,4274
 model_library/config/ai21labs_models.yaml,sha256=ZWHhk1cep2GQIYHqkTS_0152mF3oZg2tSzMPmvfMRSI,2478
 model_library/config/alibaba_models.yaml,sha256=-RLWOwh3ZaCQqjaZ-4Zw0BJNVE6JVHJ8Ggm9gQJZ6QI,2082
-model_library/config/all_models.json,sha256=HuTWNX-noeGfLNoWuzLVjhjXqkFGJX0CgBMt01Ejy3A,529312
+model_library/config/all_models.json,sha256=U-XQrbaWWhjmkawg0Bd9NTxoDN-DT0WPhmDLF6OALR4,533621
 model_library/config/amazon_models.yaml,sha256=HgLmhpfedHCQtkPEviEJCBbAb-dNQPOnVtf4UnwrDds,7654
 model_library/config/anthropic_models.yaml,sha256=bTc_3Oqn4wCdq-dcWcEfmXrPVZjcR8-V6pTao7sGa_E,10475
 model_library/config/cohere_models.yaml,sha256=ZfWrS1K45Hxd5nT_gpP5YGAovJcBIlLNIdaRyE3V-7o,5022
 model_library/config/deepseek_models.yaml,sha256=4CCrf-4UPBgFCrS6CQa3vzNiaYlD4B6dFJFK_kIYBWY,1156
 model_library/config/dummy_model.yaml,sha256=lImYJBtBVJk_jgnLbkuSyOshQphVlYCMkw-UiJIBYhY,877
-model_library/config/fireworks_models.yaml,sha256=BMyQqjEpayNfSVGekzOFNIx7Ng3QOfPtldw5k2msqX0,6269
-model_library/config/google_models.yaml,sha256=n6yPRSVLyKGoJQW7L3UiVmb182zKiYhVLbmiUQDwXiY,16101
+model_library/config/fireworks_models.yaml,sha256=bAlXvjkdt-CnRp66WbfDv2qTrF5UHceRd2pvrsBERMk,6324
+model_library/config/google_models.yaml,sha256=Rg127nsBbHpk62X7WBq2ckdHo0bwYM0NVjF7T2h_1c0,16494
 model_library/config/inception_models.yaml,sha256=YCqfQlkH_pTdHIKee5dP_aRFXw_fTIEQCpUvX2bwO0M,560
 model_library/config/kimi_models.yaml,sha256=AAqse_BCE-lrHkJHIWJVqMtttnZQCa-5Qy5qiLUJjYs,755
-model_library/config/minimax_models.yaml,sha256=IttkpdBrp75J9WZQ0IRE4m4eSfd0LonfcA9OtrzJrMY,873
+model_library/config/minimax_models.yaml,sha256=gWTuTcl1-zyCF6KRuU6DSre2Cw5gXC-TeKV2Qp4StnQ,1263
 model_library/config/mistral_models.yaml,sha256=mYKYSzJl24lUiA_erSkom7nCBxAoeJ57Mi3090q1ArM,5162
 model_library/config/openai_models.yaml,sha256=1lKsTQwsxMMJqXtEoYs3liy6NcaK4p8NN7b-GSFnl8k,25261
-model_library/config/perplexity_models.yaml,sha256=XEvs3fXrsSYjYNHLJuGSlTW7biHMaXpZaW4Q-aVn6wU,2299
+model_library/config/perplexity_models.yaml,sha256=WUDqhLvnB0kQhCCwPv19FYLHozet3m33Spdo6bGff3Q,2336
 model_library/config/together_models.yaml,sha256=BeqRJonYzPvWwoLfkyH0RMRKBYUrCSEQhg_25Nvx97M,23867
 model_library/config/xai_models.yaml,sha256=2KRNNQy3kV-4xxSfhj7Uhp9TZF-S5qPlM8Ef-04zv8Y,7985
-model_library/config/zai_models.yaml,sha256=Esa4P-zc5K1pejQTylKPe-uiH9AnvB_Zn7RB_sAZ5mU,1577
+model_library/config/zai_models.yaml,sha256=lcYMh2FCrLWkKqdCnarRlwDoL3SbutRBNAiMPBUYQiw,1894
 model_library/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-model_library/providers/ai21labs.py,sha256=7PnXKl-Fv8KlE95eBv2izbFg1u7utDRQPdWXYVl_-as,5832
+model_library/providers/ai21labs.py,sha256=sqmu9R7owZZQLxOkNV9dhSeZVAlTMDCNFVdxJyJo6UA,5885
 model_library/providers/alibaba.py,sha256=k6LZErV_l9oTFTdKTwyw1SXD509Rl3AqFbN8umCryEE,2941
-model_library/providers/amazon.py,sha256=jRqOYCnxiONlbjT2C0UuFIrFOMU4d-hvLElPp41n5Ds,14015
-model_library/providers/anthropic.py,sha256=6YI04jdDDtDjLS17jThVYlNvLbqd9THrKAtaVTYL6eg,22194
+model_library/providers/amazon.py,sha256=U0tH5mw8dAMDg06BtnVoR-RxYlcAL1-7ZR737sR8xgU,14068
+model_library/providers/anthropic.py,sha256=G94hFkRFTWutq9XYd3321KkPrxVHnR6fs_h2AdkVqx4,23197
 model_library/providers/azure.py,sha256=brQNCED-zHvYjL5K5hdjFBNso6hJZg0HTHNnAgJPPG0,1408
 model_library/providers/cohere.py,sha256=lCBm1PP1l_UOa1pKFMIZM3C0wCv3QWB6UP0-jvjkFa4,1066
 model_library/providers/deepseek.py,sha256=7T4lxDiV5wmWUK7TAKwr332_T6uyXNCOiirZOCCETL0,1159
 model_library/providers/fireworks.py,sha256=w-5mOF5oNzqx_0ijCoTm1lSn2ZHwhp6fURKhV3LEqIc,2309
 model_library/providers/inception.py,sha256=Nrky53iujIM9spAWoNRtoJg2inFiL0li6E75vT3b6V8,1107
 model_library/providers/kimi.py,sha256=zzvcKpZLsM1xPebpLeMxNKTt_FRiLN1rFWrIly7wfXA,1092
-model_library/providers/minimax.py,sha256=HkM601mxTC0tpDGtxLTGq5IwnCfFfHG4EF6l1Bg77K4,1001
-model_library/providers/mistral.py,sha256=9zGYLpkn436ahZ716-5R5AQzn7htwVres1IjP5x5bFw,9745
-model_library/providers/openai.py,sha256=1PNmS-0ERjqLzWS9Prr1_cUpctyEj_xp15XOpl9-IGE,33421
+model_library/providers/minimax.py,sha256=YRtJW2wgiu6KXEBScYETeVMNTfhPvpjL2J-oo0wE_BI,1057
+model_library/providers/mistral.py,sha256=r0PY30kHY-guaSzIEahdp2I45meJzo71Ql97NfkPv-8,9798
+model_library/providers/openai.py,sha256=MMm6K4iewhSpPzEeRhrPRYf_txrpklCrefNHiUly8S8,33665
 model_library/providers/perplexity.py,sha256=eIzzkaZ4ZMlRKFVI9bnwyo91iJkh7aEmJ-0_4OKeAWc,1083
 model_library/providers/together.py,sha256=7Y4QLnX8c_fyXUud-W_C1gidmROQainTgODBwbvFyXQ,2033
-model_library/providers/vals.py,sha256=VLF1rsCR13a_kmtZfboDzJJ64Io_tBFe60vf-0BdYPc,9830
-model_library/providers/xai.py,sha256=oJiMICYLkybHpLv77PmMbi1Xj9IUZmKX3kANksjjFEQ,10828
+model_library/providers/vals.py,sha256=mKaItg_g9RJeaIDhoBu7ksTe42P0MRYFI4X1cla8YC0,9883
+model_library/providers/xai.py,sha256=toSqWBHUaHE000aMdOayAW3-_ZmDUotWEpZ4-X33LuY,10918
 model_library/providers/zai.py,sha256=O_GM6KlJ0fM2wYoxO9xrCWfnpYH7IpoKEzjiD4jB8Kc,1050
 model_library/providers/google/__init__.py,sha256=ypuLVL_QJEQ7C3S47FhC9y4wyawYOdGikAViJmACI0U,115
 model_library/providers/google/batch.py,sha256=4TE90Uo1adi54dVtGcGyUAxw11YExJq-Y4KmkQ-cyHA,9978
-model_library/providers/google/google.py,sha256=s9vky9r5SVNhBvMXcIr0_h0MlKLXwx_tQlZzs57xXYo,16507
-model_library-0.1.4.dist-info/licenses/LICENSE,sha256=x6mf4o7U_wHaaqcfxoU-0R6uYJLbqL_TNuoULP3asaA,1070
-model_library-0.1.4.dist-info/METADATA,sha256=4XPEbWSeOBYYoQ3ZYsdktZSnrDz2YbZixPIW7wTqJfw,6989
-model_library-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-model_library-0.1.4.dist-info/top_level.txt,sha256=HtQYxA_7RP8UT35I6VcUw20L6edI0Zf2t5Ys1uDGVjs,14
-model_library-0.1.4.dist-info/RECORD,,
+model_library/providers/google/google.py,sha256=xmiktN-Z9W1fC1jHUT_m6x5fTpI6-mWpKvbMGg9kgXE,16787
+model_library-0.1.6.dist-info/licenses/LICENSE,sha256=x6mf4o7U_wHaaqcfxoU-0R6uYJLbqL_TNuoULP3asaA,1070
+model_library-0.1.6.dist-info/METADATA,sha256=sNWBOgDqydFI184UERputqhulBz0olrbye-fO7owrCE,6989
+model_library-0.1.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+model_library-0.1.6.dist-info/top_level.txt,sha256=HtQYxA_7RP8UT35I6VcUw20L6edI0Zf2t5Ys1uDGVjs,14
+model_library-0.1.6.dist-info/RECORD,,

{model_library-0.1.4.dist-info → model_library-0.1.6.dist-info}/WHEEL RENAMED Viewed

File without changes

{model_library-0.1.4.dist-info → model_library-0.1.6.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{model_library-0.1.4.dist-info → model_library-0.1.6.dist-info}/top_level.txt RENAMED Viewed

File without changes

model-library 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

model-library 0.1.4py3-none-any.whl → 0.1.6py3-none-any.whl