PyPI - model-library - Versions diffs - 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl - Mend

model-library 0.1.6py3-none-any.whl → 0.1.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

model_library/base/base.py +237 -62
model_library/base/delegate_only.py +86 -9
model_library/base/input.py +10 -7
model_library/base/output.py +48 -0
model_library/base/utils.py +56 -7
model_library/config/alibaba_models.yaml +44 -57
model_library/config/all_models.json +253 -126
model_library/config/kimi_models.yaml +30 -3
model_library/config/openai_models.yaml +15 -23
model_library/config/zai_models.yaml +24 -3
model_library/exceptions.py +14 -77
model_library/logging.py +6 -2
model_library/providers/ai21labs.py +30 -14
model_library/providers/alibaba.py +17 -8
model_library/providers/amazon.py +119 -64
model_library/providers/anthropic.py +184 -104
model_library/providers/azure.py +22 -10
model_library/providers/cohere.py +7 -7
model_library/providers/deepseek.py +8 -8
model_library/providers/fireworks.py +7 -8
model_library/providers/google/batch.py +17 -13
model_library/providers/google/google.py +130 -73
model_library/providers/inception.py +7 -7
model_library/providers/kimi.py +18 -8
model_library/providers/minimax.py +30 -13
model_library/providers/mistral.py +61 -35
model_library/providers/openai.py +219 -93
model_library/providers/openrouter.py +34 -0
model_library/providers/perplexity.py +7 -7
model_library/providers/together.py +7 -8
model_library/providers/vals.py +16 -9
model_library/providers/xai.py +157 -144
model_library/providers/zai.py +38 -8
model_library/register_models.py +4 -2
model_library/registry_utils.py +39 -15
model_library/retriers/__init__.py +0 -0
model_library/retriers/backoff.py +73 -0
model_library/retriers/base.py +225 -0
model_library/retriers/token.py +427 -0
model_library/retriers/utils.py +11 -0
model_library/settings.py +1 -1
model_library/utils.py +13 -35
{model_library-0.1.6.dist-info → model_library-0.1.8.dist-info}/METADATA +4 -3
model_library-0.1.8.dist-info/RECORD +70 -0
{model_library-0.1.6.dist-info → model_library-0.1.8.dist-info}/WHEEL +1 -1
model_library-0.1.6.dist-info/RECORD +0 -64
{model_library-0.1.6.dist-info → model_library-0.1.8.dist-info}/licenses/LICENSE +0 -0
{model_library-0.1.6.dist-info → model_library-0.1.8.dist-info}/top_level.txt +0 -0

model_library/providers/google/batch.py CHANGED Viewed

@@ -2,8 +2,6 @@ import io
 import json
 from typing import TYPE_CHECKING, Any, Final, Sequence, cast
-from typing_extensions import override
 from google.genai.types import (
     BatchJob,
     Content,
@@ -11,6 +9,8 @@ from google.genai.types import (
     JobState,
     UploadFileConfig,
 )
+from typing_extensions import override
 from model_library.base import BatchResult, InputItem, LLMBatchMixin
 if TYPE_CHECKING:
@@ -24,16 +24,19 @@ from google.genai.types import (
 )
-def extract_text_from_json_response(response: dict[str, Any]) -> str:
+def extract_text_from_json_response(response: dict[str, Any]) -> tuple[str, str]:
     """Extract concatenated non-thought text from a JSON response structure."""
     # TODO: fix the typing we always ignore
     text = ""
+    reasoning = ""
     for candidate in response.get("candidates", []) or []:  # type: ignore
         content = (candidate or {}).get("content") or {}  # type: ignore
         for part in content.get("parts", []) or []:  # type: ignore
-            if not part.get("thought", False):  # type: ignore
+            if part.get("thought", False):  # type: ignore
+                reasoning += part.get("text", "")  # type: ignore
+            else:
                 text += part.get("text", "")  # type: ignore
-    return text  # type: ignore
+    return text, reasoning  # type: ignore
 def parse_predictions_jsonl(jsonl: str) -> list[BatchResult]:
@@ -48,9 +51,10 @@ def parse_predictions_jsonl(jsonl: str) -> list[BatchResult]:
         custom_id = data.get("key", "unknown")
         if "response" in data:
             response = data["response"]
-            text = extract_text_from_json_response(response)
+            text, reasoning = extract_text_from_json_response(response)
             output = QueryResult()
             output.output_text = text
+            output.reasoning = reasoning
             if "usageMetadata" in response:
                 output.metadata.in_tokens = response["usageMetadata"].get(
                     "promptTokenCount", 0
@@ -144,7 +148,7 @@ class GoogleBatchMixin(LLMBatchMixin):
         **kwargs: object,
     ) -> dict[str, Any]:
         self._root.logger.debug(f"Creating batch request for custom_id: {custom_id}")
-        body = await self._root.create_body(input, tools=[], **kwargs)
+        body = await self._root.build_body(input, tools=[], **kwargs)
         contents_any = body["contents"]
         serialized_contents: list[dict[str, Any]] = [
@@ -196,7 +200,7 @@ class GoogleBatchMixin(LLMBatchMixin):
             custom_id = labels.get("qa_pair_id", f"request-{i}")
             jsonl_lines.append(json.dumps({"key": custom_id, "request": request_data}))
-        batch_request_file = self._root.client.files.upload(
+        batch_request_file = self._root.get_client().files.upload(
             file=io.StringIO("\n".join(jsonl_lines)),
             config=UploadFileConfig(mime_type="application/jsonl"),
         )
@@ -205,7 +209,7 @@ class GoogleBatchMixin(LLMBatchMixin):
             raise Exception("Failed to upload batch jsonl")
         try:
-            job: BatchJob = await self._root.client.aio.batches.create(
+            job: BatchJob = await self._root.get_client().aio.batches.create(
                 model=self._root.model_name,
                 src=batch_request_file.name,
                 config={"display_name": batch_name},
@@ -224,14 +228,14 @@ class GoogleBatchMixin(LLMBatchMixin):
     async def get_batch_results(self, batch_id: str) -> list[BatchResult]:
         self._root.logger.info(f"Retrieving batch results for {batch_id}")
-        job = await self._root.client.aio.batches.get(name=batch_id)
+        job = await self._root.get_client().aio.batches.get(name=batch_id)
         results: list[BatchResult] = []
         if job.state == JobState.JOB_STATE_SUCCEEDED:
             if job.dest and job.dest.file_name:
                 results_file_name = job.dest.file_name
-                file_content = await self._root.client.aio.files.download(
+                file_content = await self._root.get_client().aio.files.download(
                     file=results_file_name
                 )
                 decoded = file_content.decode("utf-8")
@@ -250,7 +254,7 @@ class GoogleBatchMixin(LLMBatchMixin):
     @override
     async def cancel_batch_request(self, batch_id: str):
         self._root.logger.info(f"Cancelling batch {batch_id}")
-        await self._root.client.aio.batches.cancel(name=batch_id)
+        await self._root.get_client().aio.batches.cancel(name=batch_id)
     @override
     async def get_batch_progress(self, batch_id: str) -> int:
@@ -262,7 +266,7 @@ class GoogleBatchMixin(LLMBatchMixin):
         try:
             self._root.logger.debug(f"Checking batch status for {batch_id}")
-            job: BatchJob = await self._root.client.aio.batches.get(name=batch_id)
+            job: BatchJob = await self._root.get_client().aio.batches.get(name=batch_id)
             state = job.state
             if not state:

model_library/providers/google/google.py CHANGED Viewed

@@ -1,13 +1,17 @@
 import base64
 import io
+import json
 import logging
+import uuid
 from typing import Any, Literal, Sequence, cast
 from google.genai import Client
 from google.genai import errors as genai_errors
 from google.genai.types import (
     Content,
+    CountTokensConfig,
     File,
+    FinishReason,
     FunctionDeclaration,
     GenerateContentConfig,
     GenerateContentResponse,
@@ -21,13 +25,14 @@ from google.genai.types import (
     Tool,
     ToolListUnion,
     UploadFileConfig,
-    FinishReason,
 )
+from google.oauth2 import service_account
 from typing_extensions import override
 from model_library import model_library_settings
 from model_library.base import (
     LLM,
+    FileBase,
     FileInput,
     FileWithBase64,
     FileWithId,
@@ -40,6 +45,8 @@ from model_library.base import (
     QueryResult,
     QueryResultCost,
     QueryResultMetadata,
+    RawInput,
+    RawResponse,
     TextInput,
     ToolBody,
     ToolCall,
@@ -54,8 +61,6 @@ from model_library.exceptions import (
 )
 from model_library.providers.google.batch import GoogleBatchMixin
 from model_library.register_models import register_provider
-from model_library.utils import normalize_tool_result
-import uuid
 def generate_tool_call_id(tool_name: str) -> str:
@@ -92,31 +97,50 @@ class GoogleModel(LLM):
         ),
     ]
-    @override
-    def get_client(self) -> Client:
-        if self.provider_config.use_vertex:
-            # Preview Gemini releases from September 2025 are only served from the global
-            # Vertex region. The public docs for these SKUs list `global` as the sole
-            # availability region (see September 25, 2025 release notes), so we override
-            # the default `us-central1` when we detect them.
-            # https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash
-            MODEL_REGION_OVERRIDES: dict[str, str] = {
-                "gemini-2.5-flash-preview-09-2025": "global",
-                "gemini-2.5-flash-lite-preview-09-2025": "global",
+    def _get_default_api_key(self) -> str:
+        if not self.provider_config.use_vertex:
+            return model_library_settings.GOOGLE_API_KEY
+        return json.dumps(
+            {
+                "GCP_REGION": model_library_settings.GCP_REGION,
+                "GCP_PROJECT_ID": model_library_settings.GCP_PROJECT_ID,
+                "GCP_CREDS": model_library_settings.GCP_CREDS,
             }
-            region = model_library_settings.GCP_REGION
-            if self.model_name in MODEL_REGION_OVERRIDES:
-                region = MODEL_REGION_OVERRIDES[self.model_name]
-            return Client(
-                vertexai=True,
-                project=model_library_settings.GCP_PROJECT_ID,
-                location=region,
-                # Credentials object is not typed, so we have to ignore the error
-                credentials=model_library_settings.GCP_CREDS,
-            )
+        )
-        return Client(api_key=model_library_settings.GOOGLE_API_KEY)
+    @override
+    def get_client(self, api_key: str | None = None) -> Client:
+        if not self.has_client():
+            assert api_key
+            if self.provider_config.use_vertex:
+                # Gemini preview releases are only server from the global Vertex region after September 2025.
+                MODEL_REGION_OVERRIDES: dict[str, str] = {
+                    "gemini-2.5-flash-preview-09-2025": "global",
+                    "gemini-2.5-flash-lite-preview-09-2025": "global",
+                    "gemini-3-flash-preview": "global",
+                    "gemini-3-pro-preview": "global",
+                }
+                creds = json.loads(api_key)
+                region = creds["GCP_REGION"]
+                if self.model_name in MODEL_REGION_OVERRIDES:
+                    region = MODEL_REGION_OVERRIDES[self.model_name]
+                client = Client(
+                    vertexai=True,
+                    project=creds["GCP_PROJECT_ID"],
+                    location=region,
+                    credentials=service_account.Credentials.from_service_account_info(  # type: ignore
+                        json.loads(creds["GCP_CREDS"]),
+                        scopes=["https://www.googleapis.com/auth/cloud-platform"],
+                    ),
+                )
+            else:
+                client = Client(api_key=api_key)
+            self.assign_client(client)
+        return super().get_client()
     def __init__(
         self,
@@ -138,71 +162,58 @@ class GoogleModel(LLM):
             GoogleBatchMixin(self) if self.supports_batch else None
         )
-        self.client = self.get_client()
     @override
     async def parse_input(
         self,
         input: Sequence[InputItem],
         **kwargs: Any,
     ) -> list[Content]:
-        parsed_input: list[Content] = []
-        parts: list[Part] = []
+        new_input: list[Content] = []
-        def flush_parts():
-            nonlocal parts
+        content_user: list[Part] = []
-            if parts:
-                parsed_input.append(Content(parts=parts, role="user"))
-                parts = []
+        def flush_content_user():
+            if content_user:
+                new_input.append(Content(parts=content_user, role="user"))
+                content_user.clear()
         for item in input:
-            match item:
-                case TextInput():
-                    if item.text.strip():
-                        parts.append(Part.from_text(text=item.text))
+            if isinstance(item, TextInput):
+                content_user.append(Part.from_text(text=item.text))
+                continue
+            if isinstance(item, FileBase):
+                parsed = await self.parse_file(item)
+                content_user.append(parsed)
+                continue
-                case FileWithBase64() | FileWithUrl() | FileWithId():
-                    part = await self.parse_file(item)
-                    parts.append(part)
+            # non content user item
+            flush_content_user()
+            match item:
                 case ToolResult():
-                    flush_parts()
-                    result_str = normalize_tool_result(item.result)
-                    parsed_input.append(
+                    # id check
+                    new_input.append(
                         Content(
                             role="function",
                             parts=[
                                 Part.from_function_response(
                                     name=item.tool_call.name,
-                                    response={"result": result_str},
+                                    response={"result": item.result},
                                 )
                             ],
                         )
                     )
-                case GenerateContentResponse():
-                    flush_parts()
-                    candidates = item.candidates
-                    if candidates and candidates[0]:
-                        content0 = candidates[0].content
-                        if content0 is not None:
-                            parsed_input.append(content0)
-                    else:
-                        self.logger.debug(
-                            "GenerateContentResponse missing candidates; skipping"
-                        )
-                case Content():
-                    flush_parts()
-                    parsed_input.append(item)
+                case RawResponse():
+                    new_input.extend(item.response)
+                case RawInput():
+                    new_input.append(item.input)
-                case _:
-                    raise BadInputError(f"Unsupported input type: {type(item)}")
+        # in case content user item is the last item
+        flush_content_user()
-        flush_parts()
-        return parsed_input
+        return new_input
     @override
     async def parse_file(self, file: FileInput) -> Part:
@@ -268,7 +279,7 @@ class GoogleModel(LLM):
             )
         mime = f"image/{mime}" if type == "image" else mime  # TODO:
-        response: File = self.client.files.upload(
+        response: File = self.get_client().files.upload(
             file=bytes, config=UploadFileConfig(mime_type=mime)
         )
         if not response.name:
@@ -284,7 +295,8 @@ class GoogleModel(LLM):
             mime=mime,
         )
-    async def create_body(
+    @override
+    async def build_body(
         self,
         input: Sequence[InputItem],
         *,
@@ -337,7 +349,7 @@ class GoogleModel(LLM):
         query_logger: logging.Logger,
         **kwargs: object,
     ) -> QueryResult:
-        body: dict[str, Any] = await self.create_body(input, tools=tools, **kwargs)
+        body: dict[str, Any] = await self.build_body(input, tools=tools, **kwargs)
         text: str = ""
         reasoning: str = ""
@@ -345,7 +357,7 @@ class GoogleModel(LLM):
         metadata: GenerateContentResponseUsageMetadata | None = None
-        stream = await self.client.aio.models.generate_content_stream(**body)
+        stream = await self.get_client().aio.models.generate_content_stream(**body)
         contents: list[Content | None] = []
         finish_reason: FinishReason | None = None
@@ -395,7 +407,7 @@ class GoogleModel(LLM):
         result = QueryResult(
             output_text=text,
             reasoning=reasoning,
-            history=[*input, *contents],
+            history=[*input, RawResponse(response=contents)],
             tool_calls=tool_calls,
         )
@@ -410,6 +422,51 @@ class GoogleModel(LLM):
             )
         return result
+    @override
+    async def count_tokens(
+        self,
+        input: Sequence[InputItem],
+        *,
+        history: Sequence[InputItem] = [],
+        tools: list[ToolDefinition] = [],
+        **kwargs: object,
+    ) -> int:
+        """
+        Count the number of tokens using Google's native token counting API.
+        https://ai.google.dev/gemini-api/docs/tokens
+        Only Vertex AI supports system_instruction and tools in count_tokens.
+        For Gemini API, fall back to the base implementation.
+        TODO: implement token counting for non-Vertex models.
+        """
+        if not self.provider_config.use_vertex:
+            return await super().count_tokens(
+                input, history=history, tools=tools, **kwargs
+            )
+        input = [*history, *input]
+        if not input:
+            return 0
+        system_prompt = kwargs.pop("system_prompt", None)
+        contents = await self.parse_input(input, **kwargs)
+        parsed_tools = await self.parse_tools(tools) if tools else None
+        config = CountTokensConfig(
+            system_instruction=str(system_prompt) if system_prompt else None,
+            tools=parsed_tools,
+        )
+        response = await self.get_client().aio.models.count_tokens(
+            model=self.model_name,
+            contents=cast(Any, contents),
+            config=config,
+        )
+        if response.total_tokens is None:
+            raise ValueError("count_tokens returned None")
+        return response.total_tokens
     @override
     async def _calculate_cost(
         self,
@@ -446,7 +503,7 @@ class GoogleModel(LLM):
         **kwargs: object,
     ) -> PydanticT:
         # Create the request body with JSON schema
-        body: dict[str, Any] = await self.create_body(input, tools=[], **kwargs)
+        body: dict[str, Any] = await self.build_body(input, tools=[], **kwargs)
         # Get the JSON schema from the Pydantic model
         json_schema = pydantic_model.model_json_schema()
@@ -465,7 +522,7 @@ class GoogleModel(LLM):
         # Make the request with retry wrapper
         async def _query():
             try:
-                return await self.client.aio.models.generate_content(**body)
+                return await self.get_client().aio.models.generate_content(**body)
             except (genai_errors.ServerError, genai_errors.UnknownApiResponseError):
                 raise ImmediateRetryException("Failed to connect to Google API")

model_library/providers/inception.py CHANGED Viewed

@@ -1,13 +1,14 @@
 from typing import Literal
+from pydantic import SecretStr
 from model_library import model_library_settings
 from model_library.base import (
+    DelegateConfig,
     DelegateOnly,
     LLMConfig,
 )
-from model_library.providers.openai import OpenAIModel
 from model_library.register_models import register_provider
-from model_library.utils import create_openai_client_with_defaults
 @register_provider("inception")
@@ -22,13 +23,12 @@ class MercuryModel(DelegateOnly):
         super().__init__(model_name, provider, config=config)
         # https://docs.inceptionlabs.ai/get-started/get-started#external-libraries-compatibility
-        self.delegate = OpenAIModel(
-            model_name=self.model_name,
-            provider=self.provider,
+        self.init_delegate(
             config=config,
-            custom_client=create_openai_client_with_defaults(
-                api_key=model_library_settings.MERCURY_API_KEY,
+            delegate_config=DelegateConfig(
                 base_url="https://api.inceptionlabs.ai/v1/",
+                api_key=SecretStr(model_library_settings.MERCURY_API_KEY),
             ),
             use_completions=True,
+            delegate_provider="openai",
         )

model_library/providers/kimi.py CHANGED Viewed

@@ -1,13 +1,16 @@
-from typing import Literal
+from typing import Any, Literal
+from typing_extensions import override
+from pydantic import SecretStr
 from model_library import model_library_settings
 from model_library.base import (
+    DelegateConfig,
     DelegateOnly,
     LLMConfig,
 )
-from model_library.providers.openai import OpenAIModel
 from model_library.register_models import register_provider
-from model_library.utils import create_openai_client_with_defaults
 @register_provider("kimi")
@@ -22,13 +25,20 @@ class KimiModel(DelegateOnly):
         super().__init__(model_name, provider, config=config)
         # https://platform.moonshot.ai/docs/guide/migrating-from-openai-to-kimi#about-api-compatibility
-        self.delegate = OpenAIModel(
-            model_name=self.model_name,
-            provider=self.provider,
+        self.init_delegate(
             config=config,
-            custom_client=create_openai_client_with_defaults(
-                api_key=model_library_settings.KIMI_API_KEY,
+            delegate_config=DelegateConfig(
                 base_url="https://api.moonshot.ai/v1/",
+                api_key=SecretStr(model_library_settings.KIMI_API_KEY),
             ),
             use_completions=True,
+            delegate_provider="openai",
         )
+    @override
+    def _get_extra_body(self) -> dict[str, Any]:
+        """
+        Build extra body parameters for Kimi-specific features.
+        see https://platform.moonshot.ai/docs/guide/kimi-k2-5-quickstart#parameters-differences-in-request-body
+        """
+        return {"thinking": {"type": "enabled" if self.reasoning else "disabled"}}

model_library/providers/minimax.py CHANGED Viewed

@@ -1,12 +1,17 @@
-from typing import Literal
+from typing import Literal, Sequence
+from pydantic import SecretStr
+from typing_extensions import override
 from model_library import model_library_settings
-from model_library.base import DelegateOnly, LLMConfig
-from model_library.providers.anthropic import AnthropicModel
+from model_library.base import (
+    DelegateConfig,
+    DelegateOnly,
+    InputItem,
+    LLMConfig,
+    ToolDefinition,
+)
 from model_library.register_models import register_provider
-from model_library.utils import default_httpx_client
-from anthropic import AsyncAnthropic
 @register_provider("minimax")
@@ -20,14 +25,26 @@ class MinimaxModel(DelegateOnly):
     ):
         super().__init__(model_name, provider, config=config)
-        self.delegate = AnthropicModel(
-            model_name=self.model_name,
-            provider=self.provider,
+        self.init_delegate(
             config=config,
-            custom_client=AsyncAnthropic(
-                api_key=model_library_settings.MINIMAX_API_KEY,
+            delegate_config=DelegateConfig(
                 base_url="https://api.minimax.io/anthropic",
-                http_client=default_httpx_client(),
-                max_retries=1,
+                api_key=SecretStr(model_library_settings.MINIMAX_API_KEY),
             ),
+            delegate_provider="anthropic",
+        )
+    # minimax client shares anthropic's syntax
+    @override
+    async def count_tokens(
+        self,
+        input: Sequence[InputItem],
+        *,
+        history: Sequence[InputItem] = [],
+        tools: list[ToolDefinition] = [],
+        **kwargs: object,
+    ) -> int:
+        assert self.delegate
+        return await self.delegate.count_tokens(
+            input, history=history, tools=tools, **kwargs
         )

model-library 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

model-library 0.1.6py3-none-any.whl → 0.1.8py3-none-any.whl