PyPI - model-library - Versions diffs - 0.1.5__tar.gz → 0.1.7__tar.gz - Mend

model-library 0.1.5tar.gz → 0.1.7tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (130) hide show

{model_library-0.1.5 → model_library-0.1.7}/Makefile RENAMED Viewed

@@ -16,8 +16,10 @@ help:
 	@echo "  make examples <model> Run all examples with specified model"
 	@echo "  make browse_models    Interactively browse models and their configurations"
+PYTHON_VERSION ?= 3.11
 install:
-	uv venv
+	uv venv --python $(PYTHON_VERSION)
 	uv sync --dev
 	@echo "🎉 Done! Run 'source .venv/bin/activate' to activate the environment locally."

{model_library-0.1.5 → model_library-0.1.7}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: model-library
-Version: 0.1.5
+Version: 0.1.7
 Summary: Model Library for vals.ai
 Author-email: "Vals AI, Inc." <contact@vals.ai>
 License: MIT
@@ -13,13 +13,13 @@ Requires-Dist: pyyaml>=6.0.2
 Requires-Dist: rich
 Requires-Dist: backoff<3.0,>=2.2.1
 Requires-Dist: redis<7.0,>=6.2.0
-Requires-Dist: tiktoken==0.11.0
+Requires-Dist: tiktoken>=0.12.0
 Requires-Dist: pillow
 Requires-Dist: openai<3.0,>=2.0
 Requires-Dist: anthropic<1.0,>=0.57.1
 Requires-Dist: mistralai<2.0,>=1.9.10
 Requires-Dist: xai-sdk<2.0,>=1.0.0
-Requires-Dist: ai21<5.0,>=4.0.3
+Requires-Dist: ai21<5.0,>=4.3.0
 Requires-Dist: boto3<2.0,>=1.38.27
 Requires-Dist: google-genai[aiohttp]>=1.51.0
 Requires-Dist: google-cloud-storage>=1.26.0

{model_library-0.1.5 → model_library-0.1.7}/examples/advanced/web_search.py RENAMED Viewed

@@ -2,6 +2,7 @@ import asyncio
 from typing import Any, cast
 from model_library.base import LLM, ToolDefinition
+from model_library.base.output import QueryResult
 from model_library.registry_utils import get_registry_model
 from ..setup import console_log, setup
@@ -41,31 +42,7 @@ def print_search_details(tool_call: Any) -> None:
                         console_log(f"    - {source}")
-def print_citations(response: Any) -> None:
-    """Extract and print citations from response history."""
-    if not response.history:
-        return
-    for item in response.history:
-        if not (hasattr(item, "content") and isinstance(item.content, list)):
-            continue
-        content_list = cast(list[Any], item.content)
-        for content_item in content_list:
-            if not (hasattr(content_item, "annotations") and content_item.annotations):
-                continue
-            console_log("\nCitations:")
-            annotations = cast(list[Any], content_item.annotations)
-            for annotation in annotations:
-                if hasattr(annotation, "url") and annotation.url:
-                    title = getattr(annotation, "title", "Untitled")
-                    url = annotation.url
-                    location = getattr(annotation, "location", "Unknown")
-                    console_log(f"- {title}: {url} (Location: {location})")
-def print_web_search_results(response: Any) -> None:
+def print_web_search_results(response: QueryResult) -> None:
     """Print comprehensive web search results."""
     console_log(f"Response: {response.output_text}")
@@ -74,7 +51,7 @@ def print_web_search_results(response: Any) -> None:
         for tool_call in response.tool_calls:
             print_search_details(tool_call)
-    print_citations(response)
+    print(response.extras.citations)
 async def web_search_domain_filtered(model: LLM) -> None:

model_library-0.1.7/examples/count_tokens.py ADDED Viewed

@@ -0,0 +1,95 @@
+import asyncio
+import logging
+from model_library import set_logging
+from model_library.base import (
+    LLM,
+    QueryResult,
+    TextInput,
+    ToolBody,
+    ToolDefinition,
+)
+from model_library.registry_utils import get_registry_model
+from .setup import console_log, setup
+async def count_tokens(model: LLM):
+    console_log("\n--- Count Tokens ---\n")
+    tools = [
+        ToolDefinition(
+            name="get_weather",
+            body=ToolBody(
+                name="get_weather",
+                description="Get current temperature in a given location",
+                properties={
+                    "location": {
+                        "type": "string",
+                        "description": "City and country e.g. Bogotá, Colombia",
+                    },
+                },
+                required=["location"],
+            ),
+        ),
+        ToolDefinition(
+            name="get_danger",
+            body=ToolBody(
+                name="get_danger",
+                description="Get current danger in a given location",
+                properties={
+                    "location": {
+                        "type": "string",
+                        "description": "City and country e.g. Bogotá, Colombia",
+                    },
+                },
+                required=["location"],
+            ),
+        ),
+    ]
+    system_prompt = "You must make exactly 0 or 1 tool calls per answer. You must not make more than 1 tool call per answer."
+    user_prompt = "What is the weather in San Francisco right now?"
+    predicted_tokens = await model.count_tokens(
+        [TextInput(text=user_prompt)],
+        tools=tools,
+        system_prompt=system_prompt,
+    )
+    response: QueryResult = await model.query(
+        [TextInput(text=user_prompt)],
+        tools=tools,
+        system_prompt=system_prompt,
+    )
+    actual_tokens = response.metadata.total_input_tokens
+    console_log(f"Predicted Token Count: {predicted_tokens}")
+    console_log(f"Actual Token Count: {actual_tokens}\n")
+async def main():
+    import argparse
+    parser = argparse.ArgumentParser(description="Example of counting tokens")
+    parser.add_argument(
+        "model",
+        nargs="?",
+        default="google/gemini-2.5-flash",
+        type=str,
+        help="Model endpoint (default: google/gemini-2.5-flash)",
+    )
+    args = parser.parse_args()
+    model = get_registry_model(args.model)
+    model.logger.info(model)
+    set_logging(enable=True, level=logging.INFO)
+    await count_tokens(model)
+if __name__ == "__main__":
+    setup()
+    asyncio.run(main())

{model_library-0.1.5 → model_library-0.1.7}/model_library/base/base.py RENAMED Viewed

@@ -6,7 +6,6 @@ from abc import ABC, abstractmethod
 from collections.abc import Awaitable
 from pprint import pformat
 from typing import (
-    TYPE_CHECKING,
     Any,
     Callable,
     Literal,
@@ -14,8 +13,10 @@ from typing import (
     TypeVar,
 )
+import tiktoken
 from pydantic import model_serializer
 from pydantic.main import BaseModel
+from tiktoken.core import Encoding
 from typing_extensions import override
 from model_library.base.batch import (
@@ -36,6 +37,7 @@ from model_library.base.output import (
 )
 from model_library.base.utils import (
     get_pretty_input_types,
+    serialize_for_tokenizing,
 )
 from model_library.exceptions import (
     ImmediateRetryException,
@@ -43,9 +45,6 @@ from model_library.exceptions import (
 )
 from model_library.utils import truncate_str
-if TYPE_CHECKING:
-    from model_library.providers.openai import OpenAIModel
 PydanticT = TypeVar("PydanticT", bound=BaseModel)
@@ -66,7 +65,7 @@ class LLMConfig(BaseModel):
     top_p: float | None = None
     top_k: int | None = None
     reasoning: bool = False
-    reasoning_effort: str | None = None
+    reasoning_effort: str | bool | None = None
     supports_images: bool = False
     supports_files: bool = False
     supports_videos: bool = False
@@ -110,7 +109,7 @@ class LLM(ABC):
         self.top_k: int | None = config.top_k
         self.reasoning: bool = config.reasoning
-        self.reasoning_effort: str | None = config.reasoning_effort
+        self.reasoning_effort: str | bool | None = config.reasoning_effort
         self.supports_files: bool = config.supports_files
         self.supports_videos: bool = config.supports_videos
@@ -120,7 +119,7 @@ class LLM(ABC):
         self.supports_tools: bool = config.supports_tools
         self.native: bool = config.native
-        self.delegate: "OpenAIModel | None" = None
+        self.delegate: "LLM | None" = None
         self.batch: LLMBatchMixin | None = None
         if config.provider_config:
@@ -198,11 +197,14 @@ class LLM(ABC):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition] = [],
+        query_logger: logging.Logger,
         **kwargs: object,
     ) -> QueryResult:
         if not self.delegate:
             raise Exception("Delegate not set")
-        return await self.delegate._query_impl(input, tools=tools, **kwargs)  # pyright: ignore[reportPrivateUsage]
+        return await self.delegate._query_impl(  # pyright: ignore[reportPrivateUsage]
+            input, tools=tools, query_logger=query_logger, **kwargs
+        )
     async def query(
         self,
@@ -213,6 +215,7 @@ class LLM(ABC):
         # for backwards compatibility
         files: list[FileInput] = [],
         images: list[FileInput] = [],
+        query_logger: logging.Logger | None = None,
         **kwargs: object,
     ) -> QueryResult:
         """
@@ -256,15 +259,18 @@ class LLM(ABC):
         input = [*history, *input]
         # unique logger for the query
-        query_id = uuid.uuid4().hex[:14]
-        query_logger = self.logger.getChild(f"query={query_id}")
+        if not query_logger:
+            query_id = uuid.uuid4().hex[:14]
+            query_logger = self.logger.getChild(f"query={query_id}")
         query_logger.info(
             "Query started:\n" + item_info + tool_info + f"--- kwargs: {short_kwargs}\n"
         )
         async def query_func() -> QueryResult:
-            return await self._query_impl(input, tools=tools, **kwargs)
+            return await self._query_impl(
+                input, tools=tools, query_logger=query_logger, **kwargs
+            )
         async def timed_query() -> tuple[QueryResult, float]:
             return await LLM.timer_wrapper(query_func)
@@ -361,7 +367,8 @@ class LLM(ABC):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
-        **kwargs: object,  # TODO: pass in query logger
+        query_logger: logging.Logger,
+        **kwargs: object,
     ) -> QueryResult:
         """
         Query the model with input
@@ -375,6 +382,20 @@ class LLM(ABC):
         """
         ...
+    @abstractmethod
+    async def build_body(
+        self,
+        input: Sequence[InputItem],
+        *,
+        tools: list[ToolDefinition],
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        """
+        Builds the body of the request to the model provider
+        Calls parse_input
+        """
+        ...
     @abstractmethod
     async def parse_input(
         self,
@@ -417,6 +438,87 @@ class LLM(ABC):
         """Upload a file to the model provider"""
         ...
+    async def get_encoding(self) -> Encoding:
+        """Get the appropriate tokenizer"""
+        model = self.model_name.lower()
+        if any(x in model for x in ["gpt-4o", "o1", "o3", "gpt-4.1", "gpt-5"]):
+            return tiktoken.get_encoding("o200k_base")
+        elif "gpt-4" in model or "gpt-3.5" in model:
+            try:
+                return tiktoken.encoding_for_model(self.model_name)
+            except KeyError:
+                return tiktoken.get_encoding("cl100k_base")
+        elif "claude" in model:
+            return tiktoken.get_encoding("cl100k_base")
+        elif "gemini" in model:
+            return tiktoken.get_encoding("o200k_base")
+        elif "llama" in model or "mistral" in model:
+            return tiktoken.get_encoding("cl100k_base")
+        else:
+            return tiktoken.get_encoding("cl100k_base")
+    async def stringify_input(
+        self,
+        input: Sequence[InputItem],
+        *,
+        history: Sequence[InputItem] = [],
+        tools: list[ToolDefinition] = [],
+        **kwargs: object,
+    ) -> str:
+        input = [*history, *input]
+        system_prompt = kwargs.pop(
+            "system_prompt", ""
+        )  # TODO: refactor along with system prompt arg change
+        # special case if using a delegate
+        # don't inherit method override by default
+        if self.delegate:
+            parsed_input = await self.delegate.parse_input(input, **kwargs)
+            parsed_tools = await self.delegate.parse_tools(tools)
+        else:
+            parsed_input = await self.parse_input(input, **kwargs)
+            parsed_tools = await self.parse_tools(tools)
+        serialized_input = serialize_for_tokenizing(parsed_input)
+        serialized_tools = serialize_for_tokenizing(parsed_tools)
+        combined = f"{system_prompt}\n{serialized_input}\n{serialized_tools}"
+        return combined
+    async def count_tokens(
+        self,
+        input: Sequence[InputItem],
+        *,
+        history: Sequence[InputItem] = [],
+        tools: list[ToolDefinition] = [],
+        **kwargs: object,
+    ) -> int:
+        """
+        Count the number of tokens for a query.
+        Combines parsed input and tools, then tokenizes the result.
+        """
+        if not input and not history:
+            return 0
+        if self.delegate:
+            encoding = await self.delegate.get_encoding()
+        else:
+            encoding = await self.get_encoding()
+        self.logger.debug(f"Token Count Encoding: {encoding}")
+        string_input = await self.stringify_input(
+            input, history=history, tools=tools, **kwargs
+        )
+        count = len(encoding.encode(string_input, disallowed_special=()))
+        self.logger.debug(f"Combined Token Count Input: {count}")
+        return count
     async def query_json(
         self,
         input: Sequence[InputItem],

{model_library-0.1.5 → model_library-0.1.7}/model_library/base/delegate_only.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import io
+import logging
 from typing import Any, Literal, Sequence
 from typing_extensions import override
@@ -48,11 +49,24 @@ class DelegateOnly(LLM):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
+        query_logger: logging.Logger,
         **kwargs: object,
     ) -> QueryResult:
         assert self.delegate
-        return await self.delegate_query(input, tools=tools, **kwargs)
+        return await self.delegate_query(
+            input, tools=tools, query_logger=query_logger, **kwargs
+        )
+    @override
+    async def build_body(
+        self,
+        input: Sequence[InputItem],
+        *,
+        tools: list[ToolDefinition],
+        **kwargs: object,
+    ) -> dict[str, Any]:
+        raise DelegateOnlyException()
     @override
     async def parse_input(

{model_library-0.1.5 → model_library-0.1.7}/model_library/base/input.py RENAMED Viewed

@@ -74,8 +74,6 @@ class ToolCall(BaseModel):
 --- INPUT ---
 """
-RawResponse = Any
 class ToolInput(BaseModel):
     tools: list[ToolDefinition] = []
@@ -90,11 +88,16 @@ class TextInput(BaseModel):
     text: str
-RawInputItem = dict[
-    str, Any
-]  # to pass in, for example, a mock convertsation with {"role": "user", "content": "Hello"}
+class RawResponse(BaseModel):
+    # used to store a received response
+    response: Any
+class RawInput(BaseModel):
+    # used to pass in anything provider specific (e.g. a mock conversation)
+    input: Any
 InputItem = (
-    TextInput | FileInput | ToolResult | RawInputItem | RawResponse
-)  # input item can either be a prompt, a file (image or file), a tool call result, raw input, or a previous response
+    TextInput | FileInput | ToolResult | RawInput | RawResponse
+)  # input item can either be a prompt, a file (image or file), a tool call result, a previous response, or raw input

{model_library-0.1.5 → model_library-0.1.7}/model_library/base/output.py RENAMED Viewed

@@ -24,6 +24,11 @@ class Citation(BaseModel):
     index: int | None = None
     container_id: str | None = None
+    @override
+    def __repr__(self):
+        attrs = vars(self).copy()
+        return f"{self.__class__.__name__}(\n{pformat(attrs, indent=2)}\n)"
 class QueryResultExtras(BaseModel):
     citations: list[Citation] = Field(default_factory=list)

{model_library-0.1.5 → model_library-0.1.7}/model_library/base/utils.py RENAMED Viewed

@@ -1,18 +1,34 @@
-from typing import Sequence, TypeVar, cast
+import json
+from typing import Any, Sequence, TypeVar
+from pydantic import BaseModel
 from model_library.base.input import (
     FileBase,
     InputItem,
-    RawInputItem,
+    RawInput,
+    RawResponse,
     TextInput,
     ToolResult,
 )
 from model_library.utils import truncate_str
-from pydantic import BaseModel
 T = TypeVar("T", bound=BaseModel)
+def serialize_for_tokenizing(content: Any) -> str:
+    """
+    Serialize parsed content into a string for tokenization
+    """
+    parts: list[str] = []
+    if content:
+        if isinstance(content, str):
+            parts.append(content)
+        else:
+            parts.append(json.dumps(content, default=str))
+    return "\n".join(parts)
 def add_optional(
     a: int | float | T | None, b: int | float | T | None
 ) -> int | float | T | None:
@@ -54,11 +70,9 @@ def get_pretty_input_types(input: Sequence["InputItem"], verbose: bool = False)
                 return repr(item)
             case ToolResult():
                 return repr(item)
-            case dict():
-                item = cast(RawInputItem, item)
+            case RawInput():
                 return repr(item)
-            case _:
-                # RawResponse
+            case RawResponse():
                 return repr(item)
     processed_items = [f"  {process_item(item)}" for item in input]

{model_library-0.1.5 → model_library-0.1.7}/model_library/config/all_models.json RENAMED Viewed

@@ -1,4 +1,94 @@
 {
+    "minimax/MiniMax-M2.1": {
+        "company": "MiniMax",
+        "label": "MiniMax-M2.1",
+        "description": null,
+        "release_date": "2025-12-23",
+        "open_source": true,
+        "documentation_url": "https://platform.minimax.io/docs",
+        "properties": {
+            "context_window": 204800,
+            "max_tokens": 131000,
+            "training_cutoff": null,
+            "reasoning_model": true
+        },
+        "supports": {
+            "images": false,
+            "files": false,
+            "temperature": true,
+            "tools": true
+        },
+        "metadata": {
+            "deprecated": false,
+            "available_for_everyone": true,
+            "available_as_evaluator": false,
+            "ignored_for_cost": false
+        },
+        "provider_properties": {},
+        "costs_per_million_token": {
+            "input": 0.3,
+            "output": 1.2,
+            "cache": {
+                "read": 0.03,
+                "write": 0.375,
+                "write_markup": 1.0
+            }
+        },
+        "alternative_keys": [],
+        "default_parameters": {
+            "temperature": 1.0,
+            "top_p": 0.95
+        },
+        "provider_endpoint": "MiniMax-M2.1",
+        "provider_name": "minimax",
+        "full_key": "minimax/MiniMax-M2.1",
+        "slug": "minimax_MiniMax-M2.1"
+    },
+    "zai/glm-4.7": {
+        "company": "zAI",
+        "label": "GLM 4.7",
+        "description": "Latest model from ZAI",
+        "release_date": "2025-12-22",
+        "open_source": true,
+        "documentation_url": "https://docs.z.ai/",
+        "properties": {
+            "context_window": 200000,
+            "max_tokens": 128000,
+            "training_cutoff": null,
+            "reasoning_model": true
+        },
+        "supports": {
+            "images": false,
+            "files": false,
+            "temperature": true,
+            "tools": true
+        },
+        "metadata": {
+            "deprecated": false,
+            "available_for_everyone": true,
+            "available_as_evaluator": false,
+            "ignored_for_cost": false
+        },
+        "provider_properties": {},
+        "costs_per_million_token": {
+            "input": 0.6,
+            "output": 2.2,
+            "cache": {
+                "read": 0.11,
+                "read_discount": 1.0,
+                "write_markup": 1.0
+            }
+        },
+        "alternative_keys": [],
+        "default_parameters": {
+            "temperature": 1.0,
+            "top_p": 1.0
+        },
+        "provider_endpoint": "glm-4.7",
+        "provider_name": "zai",
+        "full_key": "zai/glm-4.7",
+        "slug": "zai_glm-4.7"
+    },
     "google/gemini-3-flash-preview": {
         "company": "Google",
         "label": "Gemini 3 Flash (12/25)",
@@ -504,7 +594,8 @@
             }
         ],
         "default_parameters": {
-            "temperature": 1.0
+            "temperature": 1.0,
+            "reasoning_effort": "none"
         },
         "provider_endpoint": "deepseek-v3p2",
         "provider_name": "fireworks",

{model_library-0.1.5 → model_library-0.1.7}/model_library/config/fireworks_models.yaml RENAMED Viewed

@@ -150,6 +150,8 @@ deepseek-models:
       context_window: 160_000
       max_tokens: 20_480
       reasoning_model: false
+    default_parameters:
+      reasoning_effort: "none"
     costs_per_million_token:
       input: 0.56
       output: 1.68

{model_library-0.1.5 → model_library-0.1.7}/model_library/config/minimax_models.yaml RENAMED Viewed

@@ -16,6 +16,24 @@ base-config:
 minimax-m2-models:
+  minimax/MiniMax-M2.1:
+    label: MiniMax-M2.1
+    release_date: 2025-12-23
+    properties:
+      context_window: 204_800
+      max_tokens: 131_000
+      reasoning_model: true
+      training_cutoff: null
+    default_parameters:
+      temperature: 1.0
+      top_p: 0.95
+    costs_per_million_token:
+      input: 0.30
+      output: 1.20
+      cache:
+        read: 0.03
+        write: 0.375
   minimax/MiniMax-M2:
     label: MiniMax-M2
     description: MiniMax-M2 is a cost-efficient open-source model optimized for agentic applications and coding in particular.

model-library 0.1.5__tar.gz → 0.1.7__tar.gz

model-library 0.1.5tar.gz → 0.1.7tar.gz