PyPI - model-library - Versions diffs - 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl - Mend

model-library 0.1.5py3-none-any.whl → 0.1.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

model_library/base/base.py +114 -12
model_library/base/delegate_only.py +15 -1
model_library/base/input.py +10 -7
model_library/base/output.py +5 -0
model_library/base/utils.py +21 -7
model_library/config/all_models.json +92 -1
model_library/config/fireworks_models.yaml +2 -0
model_library/config/minimax_models.yaml +18 -0
model_library/config/zai_models.yaml +14 -0
model_library/exceptions.py +11 -0
model_library/logging.py +6 -2
model_library/providers/ai21labs.py +20 -6
model_library/providers/amazon.py +72 -48
model_library/providers/anthropic.py +138 -85
model_library/providers/google/batch.py +3 -3
model_library/providers/google/google.py +92 -46
model_library/providers/minimax.py +29 -10
model_library/providers/mistral.py +42 -26
model_library/providers/openai.py +131 -77
model_library/providers/vals.py +6 -3
model_library/providers/xai.py +125 -113
model_library/register_models.py +5 -3
model_library/utils.py +0 -35
{model_library-0.1.5.dist-info → model_library-0.1.7.dist-info}/METADATA +3 -3
{model_library-0.1.5.dist-info → model_library-0.1.7.dist-info}/RECORD +28 -28
{model_library-0.1.5.dist-info → model_library-0.1.7.dist-info}/WHEEL +0 -0
{model_library-0.1.5.dist-info → model_library-0.1.7.dist-info}/licenses/LICENSE +0 -0
{model_library-0.1.5.dist-info → model_library-0.1.7.dist-info}/top_level.txt +0 -0

model_library/providers/google/google.py CHANGED Viewed

@@ -1,12 +1,16 @@
 import base64
 import io
+import logging
+import uuid
 from typing import Any, Literal, Sequence, cast
 from google.genai import Client
 from google.genai import errors as genai_errors
 from google.genai.types import (
     Content,
+    CountTokensConfig,
     File,
+    FinishReason,
     FunctionDeclaration,
     GenerateContentConfig,
     GenerateContentResponse,
@@ -20,13 +24,13 @@ from google.genai.types import (
     Tool,
     ToolListUnion,
     UploadFileConfig,
-    FinishReason,
 )
 from typing_extensions import override
 from model_library import model_library_settings
 from model_library.base import (
     LLM,
+    FileBase,
     FileInput,
     FileWithBase64,
     FileWithId,
@@ -39,6 +43,8 @@ from model_library.base import (
     QueryResult,
     QueryResultCost,
     QueryResultMetadata,
+    RawInput,
+    RawResponse,
     TextInput,
     ToolBody,
     ToolCall,
@@ -53,7 +59,10 @@ from model_library.exceptions import (
 )
 from model_library.providers.google.batch import GoogleBatchMixin
 from model_library.register_models import register_provider
-from model_library.utils import normalize_tool_result
+def generate_tool_call_id(tool_name: str) -> str:
+    return str(tool_name + "_" + str(uuid.uuid4()))
 class GoogleConfig(ProviderConfig):
@@ -140,63 +149,52 @@ class GoogleModel(LLM):
         input: Sequence[InputItem],
         **kwargs: Any,
     ) -> list[Content]:
-        parsed_input: list[Content] = []
-        parts: list[Part] = []
+        new_input: list[Content] = []
-        def flush_parts():
-            nonlocal parts
+        content_user: list[Part] = []
-            if parts:
-                parsed_input.append(Content(parts=parts, role="user"))
-                parts = []
+        def flush_content_user():
+            if content_user:
+                new_input.append(Content(parts=content_user, role="user"))
+                content_user.clear()
         for item in input:
-            match item:
-                case TextInput():
-                    if item.text.strip():
-                        parts.append(Part.from_text(text=item.text))
+            if isinstance(item, TextInput):
+                content_user.append(Part.from_text(text=item.text))
+                continue
+            if isinstance(item, FileBase):
+                parsed = await self.parse_file(item)
+                content_user.append(parsed)
+                continue
-                case FileWithBase64() | FileWithUrl() | FileWithId():
-                    part = await self.parse_file(item)
-                    parts.append(part)
+            # non content user item
+            flush_content_user()
+            match item:
                 case ToolResult():
-                    flush_parts()
-                    result_str = normalize_tool_result(item.result)
-                    parsed_input.append(
+                    # id check
+                    new_input.append(
                         Content(
                             role="function",
                             parts=[
                                 Part.from_function_response(
                                     name=item.tool_call.name,
-                                    response={"result": result_str},
+                                    response={"result": item.result},
                                 )
                             ],
                         )
                     )
-                case GenerateContentResponse():
-                    flush_parts()
-                    candidates = item.candidates
-                    if candidates and candidates[0]:
-                        content0 = candidates[0].content
-                        if content0 is not None:
-                            parsed_input.append(content0)
-                    else:
-                        self.logger.debug(
-                            "GenerateContentResponse missing candidates; skipping"
-                        )
-                case Content():
-                    flush_parts()
-                    parsed_input.append(item)
+                case RawResponse():
+                    new_input.extend(item.response)
+                case RawInput():
+                    new_input.append(item.input)
-                case _:
-                    raise BadInputError(f"Unsupported input type: {type(item)}")
+        # in case content user item is the last item
+        flush_content_user()
-        flush_parts()
-        return parsed_input
+        return new_input
     @override
     async def parse_file(self, file: FileInput) -> Part:
@@ -278,7 +276,8 @@ class GoogleModel(LLM):
             mime=mime,
         )
-    async def create_body(
+    @override
+    async def build_body(
         self,
         input: Sequence[InputItem],
         *,
@@ -328,9 +327,10 @@ class GoogleModel(LLM):
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
+        query_logger: logging.Logger,
         **kwargs: object,
     ) -> QueryResult:
-        body: dict[str, Any] = await self.create_body(input, tools=tools, **kwargs)
+        body: dict[str, Any] = await self.build_body(input, tools=tools, **kwargs)
         text: str = ""
         reasoning: str = ""
@@ -357,9 +357,10 @@ class GoogleModel(LLM):
                         call_args = part.function_call.args or {}
                         tool_calls.append(
-                            # weirdly, id is not required
+                            # Weirdly, id is not required. If not provided, we generate one.
                             ToolCall(
-                                id=part.function_call.id or "",
+                                id=part.function_call.id
+                                or generate_tool_call_id(part.function_call.name),
                                 name=part.function_call.name,
                                 args=call_args,
                             )
@@ -387,7 +388,7 @@ class GoogleModel(LLM):
         result = QueryResult(
             output_text=text,
             reasoning=reasoning,
-            history=[*input, *contents],
+            history=[*input, RawResponse(response=contents)],
             tool_calls=tool_calls,
         )
@@ -402,6 +403,51 @@ class GoogleModel(LLM):
             )
         return result
+    @override
+    async def count_tokens(
+        self,
+        input: Sequence[InputItem],
+        *,
+        history: Sequence[InputItem] = [],
+        tools: list[ToolDefinition] = [],
+        **kwargs: object,
+    ) -> int:
+        """
+        Count the number of tokens using Google's native token counting API.
+        https://ai.google.dev/gemini-api/docs/tokens
+        Only Vertex AI supports system_instruction and tools in count_tokens.
+        For Gemini API, fall back to the base implementation.
+        TODO: implement token counting for non-Vertex models.
+        """
+        if not self.provider_config.use_vertex:
+            return await super().count_tokens(
+                input, history=history, tools=tools, **kwargs
+            )
+        input = [*history, *input]
+        if not input:
+            return 0
+        system_prompt = kwargs.pop("system_prompt", None)
+        contents = await self.parse_input(input, **kwargs)
+        parsed_tools = await self.parse_tools(tools) if tools else None
+        config = CountTokensConfig(
+            system_instruction=str(system_prompt) if system_prompt else None,
+            tools=parsed_tools,
+        )
+        response = await self.client.aio.models.count_tokens(
+            model=self.model_name,
+            contents=cast(Any, contents),
+            config=config,
+        )
+        if response.total_tokens is None:
+            raise ValueError("count_tokens returned None")
+        return response.total_tokens
     @override
     async def _calculate_cost(
         self,
@@ -438,7 +484,7 @@ class GoogleModel(LLM):
         **kwargs: object,
     ) -> PydanticT:
         # Create the request body with JSON schema
-        body: dict[str, Any] = await self.create_body(input, tools=[], **kwargs)
+        body: dict[str, Any] = await self.build_body(input, tools=[], **kwargs)
         # Get the JSON schema from the Pydantic model
         json_schema = pydantic_model.model_json_schema()

model_library/providers/minimax.py CHANGED Viewed

@@ -1,13 +1,16 @@
 from typing import Literal
 from model_library import model_library_settings
-from model_library.base import (
-    DelegateOnly,
-    LLMConfig,
-)
-from model_library.providers.openai import OpenAIModel
+from model_library.base import DelegateOnly, LLMConfig
+from model_library.base.input import InputItem, ToolDefinition
+from model_library.providers.anthropic import AnthropicModel
 from model_library.register_models import register_provider
-from model_library.utils import create_openai_client_with_defaults
+from model_library.utils import default_httpx_client
+from anthropic import AsyncAnthropic
+from typing import Sequence
+from typing_extensions import override
 @register_provider("minimax")
@@ -21,13 +24,29 @@ class MinimaxModel(DelegateOnly):
     ):
         super().__init__(model_name, provider, config=config)
-        self.delegate = OpenAIModel(
+        self.delegate = AnthropicModel(
             model_name=self.model_name,
             provider=self.provider,
             config=config,
-            custom_client=create_openai_client_with_defaults(
+            custom_client=AsyncAnthropic(
                 api_key=model_library_settings.MINIMAX_API_KEY,
-                base_url="https://api.minimax.io/v1",
+                base_url="https://api.minimax.io/anthropic",
+                http_client=default_httpx_client(),
+                max_retries=1,
             ),
-            use_completions=True,
+        )
+    # minimax client shares anthropic's syntax
+    @override
+    async def count_tokens(
+        self,
+        input: Sequence[InputItem],
+        *,
+        history: Sequence[InputItem] = [],
+        tools: list[ToolDefinition] = [],
+        **kwargs: object,
+    ) -> int:
+        assert self.delegate
+        return await self.delegate.count_tokens(
+            input, history=history, tools=tools, **kwargs
         )

model_library/providers/mistral.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import io
-import time
+import logging
 from collections.abc import Sequence
 from typing import Any, Literal
@@ -12,14 +12,16 @@ from typing_extensions import override
 from model_library import model_library_settings
 from model_library.base import (
     LLM,
+    FileBase,
     FileInput,
     FileWithBase64,
     FileWithId,
-    FileWithUrl,
     InputItem,
     LLMConfig,
     QueryResult,
     QueryResultMetadata,
+    RawInput,
+    RawResponse,
     TextInput,
     ToolBody,
     ToolCall,
@@ -68,27 +70,30 @@ class MistralModel(LLM):
         content_user: list[dict[str, Any]] = []
         def flush_content_user():
-            nonlocal content_user
             if content_user:
-                new_input.append({"role": "user", "content": content_user})
-                content_user = []
+                # NOTE: must make new object as we clear()
+                new_input.append({"role": "user", "content": content_user.copy()})
+                content_user.clear()
         for item in input:
+            if isinstance(item, TextInput):
+                content_user.append({"type": "text", "text": item.text})
+                continue
+            if isinstance(item, FileBase):
+                match item.type:
+                    case "image":
+                        parsed = await self.parse_image(item)
+                    case "file":
+                        parsed = await self.parse_file(item)
+                content_user.append(parsed)
+                continue
+            # non content user item
+            flush_content_user()
             match item:
-                case TextInput():
-                    content_user.append({"type": "text", "text": item.text})
-                case FileWithBase64() | FileWithUrl() | FileWithId():
-                    match item.type:
-                        case "image":
-                            content_user.append(await self.parse_image(item))
-                        case "file":
-                            content_user.append(await self.parse_file(item))
-                case AssistantMessage():
-                    flush_content_user()
-                    new_input.append(item)
                 case ToolResult():
-                    flush_content_user()
                     new_input.append(
                         {
                             "role": "tool",
@@ -97,9 +102,12 @@ class MistralModel(LLM):
                             "tool_call_id": item.tool_call.id,
                         }
                     )
-                case _:
-                    raise BadInputError("Unsupported input type")
+                case RawResponse():
+                    new_input.append(item.response)
+                case RawInput():
+                    new_input.append(item.input)
+        # in case content user item is the last item
         flush_content_user()
         return new_input
@@ -166,13 +174,13 @@ class MistralModel(LLM):
         raise NotImplementedError()
     @override
-    async def _query_impl(
+    async def build_body(
         self,
         input: Sequence[InputItem],
         *,
         tools: list[ToolDefinition],
         **kwargs: object,
-    ) -> QueryResult:
+    ) -> dict[str, Any]:
         # mistral supports max 8 images, merge extra images into the 8th image
         input = trim_images(input, max_images=8)
@@ -203,8 +211,18 @@ class MistralModel(LLM):
                 body["top_p"] = self.top_p
         body.update(kwargs)
+        return body
-        start = time.time()
+    @override
+    async def _query_impl(
+        self,
+        input: Sequence[InputItem],
+        *,
+        tools: list[ToolDefinition],
+        query_logger: logging.Logger,
+        **kwargs: object,
+    ) -> QueryResult:
+        body = await self.build_body(input, tools=tools, **kwargs)
         response: EventStreamAsync[
             CompletionEvent
@@ -245,8 +263,6 @@ class MistralModel(LLM):
                     in_tokens += data.usage.prompt_tokens or 0
                     out_tokens += data.usage.completion_tokens or 0
-            self.logger.info(f"Finished in: {time.time() - start}")
         except Exception as e:
             self.logger.error(f"Error: {e}", exc_info=True)
             raise e
@@ -300,7 +316,7 @@ class MistralModel(LLM):
         return QueryResult(
             output_text=text,
             reasoning=reasoning or None,
-            history=[*input, message],
+            history=[*input, RawResponse(response=message)],
             tool_calls=tool_calls,
             metadata=QueryResultMetadata(
                 in_tokens=in_tokens,

model-library 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl

model-library 0.1.5py3-none-any.whl → 0.1.7py3-none-any.whl