PyPI - model-library - Versions diffs - 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl - Mend

model-library 0.1.6py3-none-any.whl → 0.1.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

model_library/base/base.py +98 -0
model_library/base/delegate_only.py +10 -0
model_library/base/input.py +10 -7
model_library/base/output.py +5 -0
model_library/base/utils.py +21 -7
model_library/exceptions.py +11 -0
model_library/logging.py +6 -2
model_library/providers/ai21labs.py +19 -7
model_library/providers/amazon.py +70 -48
model_library/providers/anthropic.py +101 -74
model_library/providers/google/batch.py +3 -3
model_library/providers/google/google.py +83 -45
model_library/providers/minimax.py +19 -0
model_library/providers/mistral.py +41 -27
model_library/providers/openai.py +122 -73
model_library/providers/vals.py +4 -3
model_library/providers/xai.py +123 -115
model_library/register_models.py +4 -2
model_library/utils.py +0 -35
{model_library-0.1.6.dist-info → model_library-0.1.7.dist-info}/METADATA +3 -3
{model_library-0.1.6.dist-info → model_library-0.1.7.dist-info}/RECORD +24 -24
{model_library-0.1.6.dist-info → model_library-0.1.7.dist-info}/WHEEL +0 -0
{model_library-0.1.6.dist-info → model_library-0.1.7.dist-info}/licenses/LICENSE +0 -0
{model_library-0.1.6.dist-info → model_library-0.1.7.dist-info}/top_level.txt +0 -0

model_library/base/base.py CHANGED Viewed

@@ -13,8 +13,10 @@ from typing import (
     TypeVar,
 )
+import tiktoken
 from pydantic import model_serializer
 from pydantic.main import BaseModel
+from tiktoken.core import Encoding
 from typing_extensions import override
 from model_library.base.batch import (
@@ -35,6 +37,7 @@ from model_library.base.output import (
 )
 from model_library.base.utils import (
     get_pretty_input_types,
+    serialize_for_tokenizing,
 )
 from model_library.exceptions import (
     ImmediateRetryException,
@@ -379,6 +382,20 @@ class LLM(ABC):
         """
         ...
+    @abstractmethod
+    async def build_body(
+        self,
+        input: Sequence[InputItem],
+        *,
+        tools: list[ToolDefinition],
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        """
+        Builds the body of the request to the model provider
+        Calls parse_input
+        """
+        ...
     @abstractmethod
     async def parse_input(
         self,
@@ -421,6 +438,87 @@ class LLM(ABC):
         """Upload a file to the model provider"""
         ...
+    async def get_encoding(self) -> Encoding:
+        """Get the appropriate tokenizer"""
+        model = self.model_name.lower()
+        if any(x in model for x in ["gpt-4o", "o1", "o3", "gpt-4.1", "gpt-5"]):
+            return tiktoken.get_encoding("o200k_base")
+        elif "gpt-4" in model or "gpt-3.5" in model:
+            try:
+                return tiktoken.encoding_for_model(self.model_name)
+            except KeyError:
+                return tiktoken.get_encoding("cl100k_base")
+        elif "claude" in model:
+            return tiktoken.get_encoding("cl100k_base")
+        elif "gemini" in model:
+            return tiktoken.get_encoding("o200k_base")
+        elif "llama" in model or "mistral" in model:
+            return tiktoken.get_encoding("cl100k_base")
+        else:
+            return tiktoken.get_encoding("cl100k_base")
+    async def stringify_input(
+        self,
+        input: Sequence[InputItem],
+        *,
+        history: Sequence[InputItem] = [],
+        tools: list[ToolDefinition] = [],
+        **kwargs: object,
+    ) -> str:
+        input = [*history, *input]
+        system_prompt = kwargs.pop(
+            "system_prompt", ""
+        )  # TODO: refactor along with system prompt arg change
+        # special case if using a delegate
+        # don't inherit method override by default
+        if self.delegate:
+            parsed_input = await self.delegate.parse_input(input, **kwargs)
+            parsed_tools = await self.delegate.parse_tools(tools)
+        else:
+            parsed_input = await self.parse_input(input, **kwargs)
+            parsed_tools = await self.parse_tools(tools)
+        serialized_input = serialize_for_tokenizing(parsed_input)
+        serialized_tools = serialize_for_tokenizing(parsed_tools)
+        combined = f"{system_prompt}\n{serialized_input}\n{serialized_tools}"
+        return combined
+    async def count_tokens(
+        self,
+        input: Sequence[InputItem],
+        *,
+        history: Sequence[InputItem] = [],
+        tools: list[ToolDefinition] = [],
+        **kwargs: object,
+    ) -> int:
+        """
+        Count the number of tokens for a query.
+        Combines parsed input and tools, then tokenizes the result.
+        """
+        if not input and not history:
+            return 0
+        if self.delegate:
+            encoding = await self.delegate.get_encoding()
+        else:
+            encoding = await self.get_encoding()
+        self.logger.debug(f"Token Count Encoding: {encoding}")
+        string_input = await self.stringify_input(
+            input, history=history, tools=tools, **kwargs
+        )
+        count = len(encoding.encode(string_input, disallowed_special=()))
+        self.logger.debug(f"Combined Token Count Input: {count}")
+        return count
     async def query_json(
         self,
         input: Sequence[InputItem],

model_library/base/delegate_only.py CHANGED Viewed

@@ -58,6 +58,16 @@ class DelegateOnly(LLM):
             input, tools=tools, query_logger=query_logger, **kwargs
         )
+    @override
+    async def build_body(
+        self,
+        input: Sequence[InputItem],
+        *,
+        tools: list[ToolDefinition],
+        **kwargs: object,
+    ) -> dict[str, Any]:
+        raise DelegateOnlyException()
     @override
     async def parse_input(
         self,

model_library/base/input.py CHANGED Viewed

@@ -74,8 +74,6 @@ class ToolCall(BaseModel):
 --- INPUT ---
 """
-RawResponse = Any
 class ToolInput(BaseModel):
     tools: list[ToolDefinition] = []
@@ -90,11 +88,16 @@ class TextInput(BaseModel):
     text: str
-RawInputItem = dict[
-    str, Any
-]  # to pass in, for example, a mock convertsation with {"role": "user", "content": "Hello"}
+class RawResponse(BaseModel):
+    # used to store a received response
+    response: Any
+class RawInput(BaseModel):
+    # used to pass in anything provider specific (e.g. a mock conversation)
+    input: Any
 InputItem = (
-    TextInput | FileInput | ToolResult | RawInputItem | RawResponse
-)  # input item can either be a prompt, a file (image or file), a tool call result, raw input, or a previous response
+    TextInput | FileInput | ToolResult | RawInput | RawResponse
+)  # input item can either be a prompt, a file (image or file), a tool call result, a previous response, or raw input

model_library/base/output.py CHANGED Viewed

@@ -24,6 +24,11 @@ class Citation(BaseModel):
     index: int | None = None
     container_id: str | None = None
+    @override
+    def __repr__(self):
+        attrs = vars(self).copy()
+        return f"{self.__class__.__name__}(\n{pformat(attrs, indent=2)}\n)"
 class QueryResultExtras(BaseModel):
     citations: list[Citation] = Field(default_factory=list)

model_library/base/utils.py CHANGED Viewed

@@ -1,18 +1,34 @@
-from typing import Sequence, TypeVar, cast
+import json
+from typing import Any, Sequence, TypeVar
+from pydantic import BaseModel
 from model_library.base.input import (
     FileBase,
     InputItem,
-    RawInputItem,
+    RawInput,
+    RawResponse,
     TextInput,
     ToolResult,
 )
 from model_library.utils import truncate_str
-from pydantic import BaseModel
 T = TypeVar("T", bound=BaseModel)
+def serialize_for_tokenizing(content: Any) -> str:
+    """
+    Serialize parsed content into a string for tokenization
+    """
+    parts: list[str] = []
+    if content:
+        if isinstance(content, str):
+            parts.append(content)
+        else:
+            parts.append(json.dumps(content, default=str))
+    return "\n".join(parts)
 def add_optional(
     a: int | float | T | None, b: int | float | T | None
 ) -> int | float | T | None:
@@ -54,11 +70,9 @@ def get_pretty_input_types(input: Sequence["InputItem"], verbose: bool = False)
                 return repr(item)
             case ToolResult():
                 return repr(item)
-            case dict():
-                item = cast(RawInputItem, item)
+            case RawInput():
                 return repr(item)
-            case _:
-                # RawResponse
+            case RawResponse():
                 return repr(item)
     processed_items = [f"  {process_item(item)}" for item in input]

model_library/exceptions.py CHANGED Viewed

@@ -146,6 +146,17 @@ class BadInputError(Exception):
         super().__init__(message or BadInputError.DEFAULT_MESSAGE)
+class NoMatchingToolCallError(Exception):
+    """
+    Raised when a tool call result is provided with no matching tool call
+    """
+    DEFAULT_MESSAGE: str = "Tool call result provided with no matching tool call"
+    def __init__(self, message: str | None = None):
+        super().__init__(message or NoMatchingToolCallError.DEFAULT_MESSAGE)
 # Add more retriable exceptions as needed
 # Providers that don't have an explicit rate limit error are handled manually
 # by wrapping errored Http/gRPC requests with a BackoffRetryException

model_library/logging.py CHANGED Viewed

@@ -6,7 +6,11 @@ from rich.logging import RichHandler
 _llm_logger = logging.getLogger("llm")
-def set_logging(enable: bool = True, handler: logging.Handler | None = None):
+def set_logging(
+    enable: bool = True,
+    level: int = logging.INFO,
+    handler: logging.Handler | None = None,
+):
     """
     Sets up logging for the model library
@@ -15,7 +19,7 @@ def set_logging(enable: bool = True, handler: logging.Handler | None = None):
         handler (logging.Handler, optional): A custom logging handler. Defaults to RichHandler.
     """
     if enable:
-        _llm_logger.setLevel(logging.INFO)
+        _llm_logger.setLevel(level)
     else:
         _llm_logger.setLevel(logging.CRITICAL)

model_library/providers/ai21labs.py CHANGED Viewed

@@ -22,6 +22,7 @@ from model_library.base import (
     ToolDefinition,
     ToolResult,
 )
+from model_library.base.input import RawResponse
 from model_library.exceptions import (
     BadInputError,
     MaxOutputTokensExceededError,
@@ -65,8 +66,6 @@ class AI21LabsModel(LLM):
             match item:
                 case TextInput():
                     new_input.append(ChatMessage(role="user", content=item.text))
-                case AssistantMessage():
-                    new_input.append(item)
                 case ToolResult():
                     new_input.append(
                         ToolMessage(
@@ -74,7 +73,9 @@ class AI21LabsModel(LLM):
                             content=item.result,
                             tool_call_id=item.tool_call.id,
                         )
-                    )
+                    )  # TODO: tool calling metadata and test
+                case RawResponse():
+                    new_input.append(item.response)
                 case _:
                     raise BadInputError("Unsupported input type")
         return new_input
@@ -133,14 +134,13 @@ class AI21LabsModel(LLM):
         raise NotImplementedError()
     @override
-    async def _query_impl(
+    async def build_body(
         self,
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
-        query_logger: logging.Logger,
         **kwargs: object,
-    ) -> QueryResult:
+    ) -> dict[str, Any]:
         messages: list[ChatMessage] = []
         if "system_prompt" in kwargs:
             messages.append(
@@ -162,6 +162,18 @@ class AI21LabsModel(LLM):
                 body["top_p"] = self.top_p
         body.update(kwargs)
+        return body
+    @override
+    async def _query_impl(
+        self,
+        input: Sequence[InputItem],
+        *,
+        tools: list[ToolDefinition],
+        query_logger: logging.Logger,
+        **kwargs: object,
+    ) -> QueryResult:
+        body = await self.build_body(input, tools=tools, **kwargs)
         response: ChatCompletionResponse = (
             await self.get_client().chat.completions.create(**body, stream=False)  # pyright: ignore[reportAny, reportUnknownMemberType]
@@ -186,7 +198,7 @@ class AI21LabsModel(LLM):
         output = QueryResult(
             output_text=choice.message.content,
-            history=[*input, choice.message],
+            history=[*input, RawResponse(response=choice.message)],
             metadata=QueryResultMetadata(
                 in_tokens=response.usage.prompt_tokens,
                 out_tokens=response.usage.completion_tokens,

model_library/providers/amazon.py CHANGED Viewed

@@ -13,24 +13,26 @@ from typing_extensions import override
 from model_library.base import (
     LLM,
+    FileBase,
     FileInput,
     FileWithBase64,
     FileWithId,
-    FileWithUrl,
     InputItem,
     LLMConfig,
     QueryResult,
     QueryResultMetadata,
+    RawInput,
+    RawResponse,
     TextInput,
     ToolBody,
     ToolCall,
     ToolDefinition,
     ToolResult,
 )
-from model_library.base.input import FileBase
 from model_library.exceptions import (
     BadInputError,
     MaxOutputTokensExceededError,
+    NoMatchingToolCallError,
 )
 from model_library.model_utils import get_default_budget_tokens
 from model_library.register_models import register_provider
@@ -70,6 +72,20 @@ class AmazonModel(LLM):
     cache_control = {"type": "default"}
+    async def get_tool_call_ids(self, input: Sequence[InputItem]) -> list[str]:
+        raw_responses = [x for x in input if isinstance(x, RawResponse)]
+        tool_call_ids: list[str] = []
+        calls = [
+            y["toolUse"]
+            for x in raw_responses
+            if "content" in x.response
+            for y in x.response["content"]
+            if "toolUse" in y
+        ]
+        tool_call_ids.extend([x["toolUseId"] for x in calls])
+        return tool_call_ids
     @override
     async def parse_input(
         self,
@@ -77,58 +93,63 @@ class AmazonModel(LLM):
         **kwargs: Any,
     ) -> list[dict[str, Any]]:
         new_input: list[dict[str, Any] | Any] = []
         content_user: list[dict[str, Any]] = []
+        def flush_content_user():
+            if content_user:
+                # NOTE: must make new object as we clear()
+                new_input.append({"role": "user", "content": content_user.copy()})
+                content_user.clear()
+        tool_call_ids = await self.get_tool_call_ids(input)
         for item in input:
+            if isinstance(item, TextInput):
+                content_user.append({"text": item.text})
+                continue
+            if isinstance(item, FileBase):
+                match item.type:
+                    case "image":
+                        parsed = await self.parse_image(item)
+                    case "file":
+                        parsed = await self.parse_file(item)
+                content_user.append(parsed)
+                continue
+            # non content user item
+            flush_content_user()
             match item:
-                case TextInput():
-                    content_user.append({"text": item.text})
-                case FileWithBase64() | FileWithUrl() | FileWithId():
-                    match item.type:
-                        case "image":
-                            content_user.append(await self.parse_image(item))
-                        case "file":
-                            content_user.append(await self.parse_file(item))
-                case _:
-                    if content_user:
-                        new_input.append({"role": "user", "content": content_user})
-                        content_user = []
-                    match item:
-                        case ToolResult():
-                            if not (
-                                isinstance(x, dict)
-                                and "toolUse" in x
-                                and x["toolUse"].get("toolUseId")
-                                == item.tool_call.call_id
-                                for x in new_input
-                            ):
-                                raise Exception(
-                                    "Tool call result provided with no matching tool call"
-                                )
-                            new_input.append(
+                case ToolResult():
+                    if item.tool_call.id not in tool_call_ids:
+                        raise NoMatchingToolCallError()
+                    new_input.append(
+                        {
+                            "role": "user",
+                            "content": [
                                 {
-                                    "role": "user",
-                                    "content": [
-                                        {
-                                            "toolResult": {
-                                                "toolUseId": item.tool_call.id,
-                                                "content": [
-                                                    {"json": {"result": item.result}}
-                                                ],
-                                            }
-                                        }
-                                    ],
+                                    "toolResult": {
+                                        "toolUseId": item.tool_call.id,
+                                        "content": [{"json": {"result": item.result}}],
+                                    }
                                 }
-                            )
-                        case dict():  # RawInputItem and RawResponse
-                            new_input.append(item)
+                            ],
+                        }
+                    )
+                case RawResponse():
+                    new_input.append(item.response)
+                case RawInput():
+                    new_input.append(item.input)
-        if content_user:
-            if self.supports_cache:
-                if not isinstance(input[-1], FileBase):
-                    # last item cannot be file
-                    content_user.append({"cachePoint": self.cache_control})
-            new_input.append({"role": "user", "content": content_user})
+        if content_user and self.supports_cache:
+            if not isinstance(input[-1], FileBase):
+                # last item cannot be file
+                content_user.append({"cachePoint": self.cache_control})
+        flush_content_user()
         return new_input
@@ -196,6 +217,7 @@ class AmazonModel(LLM):
     ) -> FileWithId:
         raise NotImplementedError()
+    @override
     async def build_body(
         self,
         input: Sequence[InputItem],
@@ -383,5 +405,5 @@ class AmazonModel(LLM):
             reasoning=reasoning,
             metadata=metadata,
             tool_calls=tool_calls,
-            history=[*input, messages],
+            history=[*input, RawResponse(response=messages)],
         )

model-library 0.1.6__py3-none-any.whl → 0.1.7__py3-none-any.whl

model-library 0.1.6py3-none-any.whl → 0.1.7py3-none-any.whl