PyPI - huggingface-hub - Versions diffs - 0.34.4__py3-none-any.whl → 1.0.0rc0__py3-none-any.whl - Mend

huggingface-hub 0.34.4py3-none-any.whl → 1.0.0rc0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of huggingface-hub might be problematic. Click here for more details.

Files changed (125) hide show

huggingface_hub/__init__.py +46 -45
huggingface_hub/_commit_api.py +28 -28
huggingface_hub/_commit_scheduler.py +11 -8
huggingface_hub/_inference_endpoints.py +8 -8
huggingface_hub/_jobs_api.py +167 -10
huggingface_hub/_login.py +13 -39
huggingface_hub/_oauth.py +8 -8
huggingface_hub/_snapshot_download.py +14 -28
huggingface_hub/_space_api.py +4 -4
huggingface_hub/_tensorboard_logger.py +13 -14
huggingface_hub/_upload_large_folder.py +15 -15
huggingface_hub/_webhooks_payload.py +3 -3
huggingface_hub/_webhooks_server.py +2 -2
huggingface_hub/cli/_cli_utils.py +2 -2
huggingface_hub/cli/auth.py +5 -6
huggingface_hub/cli/cache.py +14 -20
huggingface_hub/cli/download.py +4 -4
huggingface_hub/cli/jobs.py +560 -11
huggingface_hub/cli/lfs.py +4 -4
huggingface_hub/cli/repo.py +7 -7
huggingface_hub/cli/repo_files.py +2 -2
huggingface_hub/cli/upload.py +4 -4
huggingface_hub/cli/upload_large_folder.py +3 -3
huggingface_hub/commands/_cli_utils.py +2 -2
huggingface_hub/commands/delete_cache.py +13 -13
huggingface_hub/commands/download.py +4 -13
huggingface_hub/commands/lfs.py +4 -4
huggingface_hub/commands/repo_files.py +2 -2
huggingface_hub/commands/scan_cache.py +1 -1
huggingface_hub/commands/tag.py +1 -3
huggingface_hub/commands/upload.py +4 -4
huggingface_hub/commands/upload_large_folder.py +3 -3
huggingface_hub/commands/user.py +5 -6
huggingface_hub/community.py +5 -5
huggingface_hub/constants.py +3 -41
huggingface_hub/dataclasses.py +16 -19
huggingface_hub/errors.py +42 -29
huggingface_hub/fastai_utils.py +8 -9
huggingface_hub/file_download.py +153 -252
huggingface_hub/hf_api.py +815 -600
huggingface_hub/hf_file_system.py +98 -62
huggingface_hub/hub_mixin.py +37 -57
huggingface_hub/inference/_client.py +177 -325
huggingface_hub/inference/_common.py +110 -124
huggingface_hub/inference/_generated/_async_client.py +226 -432
huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
huggingface_hub/inference/_generated/types/base.py +10 -7
huggingface_hub/inference/_generated/types/chat_completion.py +18 -16
huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
huggingface_hub/inference/_generated/types/summarization.py +2 -2
huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
huggingface_hub/inference/_generated/types/text_generation.py +10 -10
huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
huggingface_hub/inference/_generated/types/token_classification.py +2 -2
huggingface_hub/inference/_generated/types/translation.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
huggingface_hub/inference/_mcp/agent.py +3 -3
huggingface_hub/inference/_mcp/cli.py +1 -1
huggingface_hub/inference/_mcp/constants.py +2 -3
huggingface_hub/inference/_mcp/mcp_client.py +58 -30
huggingface_hub/inference/_mcp/types.py +10 -7
huggingface_hub/inference/_mcp/utils.py +11 -7
huggingface_hub/inference/_providers/__init__.py +2 -2
huggingface_hub/inference/_providers/_common.py +49 -25
huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
huggingface_hub/inference/_providers/cohere.py +3 -3
huggingface_hub/inference/_providers/fal_ai.py +25 -25
huggingface_hub/inference/_providers/featherless_ai.py +4 -4
huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
huggingface_hub/inference/_providers/hf_inference.py +28 -20
huggingface_hub/inference/_providers/hyperbolic.py +4 -4
huggingface_hub/inference/_providers/nebius.py +10 -10
huggingface_hub/inference/_providers/novita.py +5 -5
huggingface_hub/inference/_providers/nscale.py +4 -4
huggingface_hub/inference/_providers/replicate.py +15 -15
huggingface_hub/inference/_providers/sambanova.py +6 -6
huggingface_hub/inference/_providers/together.py +7 -7
huggingface_hub/lfs.py +20 -31
huggingface_hub/repocard.py +18 -18
huggingface_hub/repocard_data.py +56 -56
huggingface_hub/serialization/__init__.py +0 -1
huggingface_hub/serialization/_base.py +9 -9
huggingface_hub/serialization/_dduf.py +7 -7
huggingface_hub/serialization/_torch.py +28 -28
huggingface_hub/utils/__init__.py +10 -4
huggingface_hub/utils/_auth.py +5 -5
huggingface_hub/utils/_cache_manager.py +31 -31
huggingface_hub/utils/_deprecation.py +1 -1
huggingface_hub/utils/_dotenv.py +3 -3
huggingface_hub/utils/_fixes.py +0 -10
huggingface_hub/utils/_git_credential.py +4 -4
huggingface_hub/utils/_headers.py +7 -29
huggingface_hub/utils/_http.py +366 -208
huggingface_hub/utils/_pagination.py +4 -4
huggingface_hub/utils/_paths.py +5 -5
huggingface_hub/utils/_runtime.py +15 -13
huggingface_hub/utils/_safetensors.py +21 -21
huggingface_hub/utils/_subprocess.py +9 -9
huggingface_hub/utils/_telemetry.py +3 -3
huggingface_hub/utils/_typing.py +25 -5
huggingface_hub/utils/_validators.py +53 -72
huggingface_hub/utils/_xet.py +16 -16
huggingface_hub/utils/_xet_progress_reporting.py +32 -11
huggingface_hub/utils/insecure_hashlib.py +3 -9
huggingface_hub/utils/tqdm.py +3 -3
{huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/METADATA +18 -29
huggingface_hub-1.0.0rc0.dist-info/RECORD +161 -0
huggingface_hub/inference_api.py +0 -217
huggingface_hub/keras_mixin.py +0 -500
huggingface_hub/repository.py +0 -1477
huggingface_hub/serialization/_tensorflow.py +0 -95
huggingface_hub/utils/_hf_folder.py +0 -68
huggingface_hub-0.34.4.dist-info/RECORD +0 -166
{huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/LICENSE +0 -0
{huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/WHEEL +0 -0
{huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/entry_points.txt +0 -0
{huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/top_level.txt +0 -0

huggingface_hub/inference/_client.py CHANGED Viewed

@@ -34,18 +34,17 @@
 # - Only the main parameters are publicly exposed. Power users can always read the docs for more options.
 import base64
 import logging
+import os
 import re
 import warnings
-from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Literal, Optional, Union, overload
-from requests import HTTPError
+from contextlib import ExitStack
+from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
 from huggingface_hub import constants
-from huggingface_hub.errors import BadRequestError, InferenceTimeoutError
+from huggingface_hub.errors import BadRequestError, HfHubHTTPError, InferenceTimeoutError
 from huggingface_hub.inference._common import (
     TASKS_EXPECTING_IMAGES,
     ContentT,
-    ModelStatus,
     RequestParameters,
     _b64_encode,
     _b64_to_image,
@@ -54,7 +53,6 @@ from huggingface_hub.inference._common import (
     _bytes_to_list,
     _get_unsupported_text_generation_kwargs,
     _import_numpy,
-    _open_as_binary,
     _set_unsupported_text_generation_kwargs,
     _stream_chat_completion_response,
     _stream_text_generation_response,
@@ -103,9 +101,13 @@ from huggingface_hub.inference._generated.types import (
     ZeroShotImageClassificationOutputElement,
 )
 from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
-from huggingface_hub.utils import build_hf_headers, get_session, hf_raise_for_status
+from huggingface_hub.utils import (
+    build_hf_headers,
+    get_session,
+    hf_raise_for_status,
+    validate_hf_hub_args,
+)
 from huggingface_hub.utils._auth import get_token
-from huggingface_hub.utils._deprecation import _deprecate_method
 if TYPE_CHECKING:
@@ -142,16 +144,14 @@ class InferenceClient:
             arguments are mutually exclusive and have the exact same behavior.
         timeout (`float`, `optional`):
             The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
-        headers (`Dict[str, str]`, `optional`):
+        headers (`dict[str, str]`, `optional`):
             Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
             Values in this dictionary will override the default values.
         bill_to (`str`, `optional`):
             The billing account to use for the requests. By default the requests are billed on the user's account.
             Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
-        cookies (`Dict[str, str]`, `optional`):
+        cookies (`dict[str, str]`, `optional`):
             Additional cookies to send to the server.
-        proxies (`Any`, `optional`):
-            Proxies to use for the request.
         base_url (`str`, `optional`):
             Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
             follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
@@ -160,6 +160,7 @@ class InferenceClient:
             follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
     """
+    @validate_hf_hub_args
     def __init__(
         self,
         model: Optional[str] = None,
@@ -167,9 +168,8 @@ class InferenceClient:
         provider: Optional[PROVIDER_OR_POLICY_T] = None,
         token: Optional[str] = None,
         timeout: Optional[float] = None,
-        headers: Optional[Dict[str, str]] = None,
-        cookies: Optional[Dict[str, str]] = None,
-        proxies: Optional[Any] = None,
+        headers: Optional[dict[str, str]] = None,
+        cookies: Optional[dict[str, str]] = None,
         bill_to: Optional[str] = None,
         # OpenAI compatibility
         base_url: Optional[str] = None,
@@ -231,11 +231,21 @@ class InferenceClient:
         self.cookies = cookies
         self.timeout = timeout
-        self.proxies = proxies
+        self.exit_stack = ExitStack()
     def __repr__(self):
         return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.exit_stack.close()
+    def close(self):
+        self.exit_stack.close()
     @overload
     def _inner_post(  # type: ignore[misc]
         self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
@@ -244,45 +254,46 @@ class InferenceClient:
     @overload
     def _inner_post(  # type: ignore[misc]
         self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
-    ) -> Iterable[bytes]: ...
+    ) -> Iterable[str]: ...
     @overload
     def _inner_post(
         self, request_parameters: RequestParameters, *, stream: bool = False
-    ) -> Union[bytes, Iterable[bytes]]: ...
+    ) -> Union[bytes, Iterable[str]]: ...
     def _inner_post(
         self, request_parameters: RequestParameters, *, stream: bool = False
-    ) -> Union[bytes, Iterable[bytes]]:
+    ) -> Union[bytes, Iterable[str]]:
         """Make a request to the inference server."""
         # TODO: this should be handled in provider helpers directly
         if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
             request_parameters.headers["Accept"] = "image/png"
-        with _open_as_binary(request_parameters.data) as data_as_binary:
-            try:
-                response = get_session().post(
+        try:
+            response = self.exit_stack.enter_context(
+                get_session().stream(
+                    "POST",
                     request_parameters.url,
                     json=request_parameters.json,
-                    data=data_as_binary,
+                    content=request_parameters.data,
                     headers=request_parameters.headers,
                     cookies=self.cookies,
                     timeout=self.timeout,
-                    stream=stream,
-                    proxies=self.proxies,
                 )
-            except TimeoutError as error:
-                # Convert any `TimeoutError` to a `InferenceTimeoutError`
-                raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error  # type: ignore
-        try:
+            )
             hf_raise_for_status(response)
-            return response.iter_lines() if stream else response.content
-        except HTTPError as error:
+            if stream:
+                return response.iter_lines()
+            else:
+                return response.read()
+        except TimeoutError as error:
+            # Convert any `TimeoutError` to a `InferenceTimeoutError`
+            raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error  # type: ignore
+        except HfHubHTTPError as error:
             if error.response.status_code == 422 and request_parameters.task != "unknown":
                 msg = str(error.args[0])
                 if len(error.response.text) > 0:
-                    msg += f"\n{error.response.text}\n"
+                    msg += f"{os.linesep}{error.response.text}{os.linesep}"
                 error.args = (msg,) + error.args[1:]
             raise
@@ -293,7 +304,7 @@ class InferenceClient:
         model: Optional[str] = None,
         top_k: Optional[int] = None,
         function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
-    ) -> List[AudioClassificationOutputElement]:
+    ) -> list[AudioClassificationOutputElement]:
         """
         Perform audio classification on the provided audio content.
@@ -311,12 +322,12 @@ class InferenceClient:
                 The function to apply to the model outputs in order to retrieve the scores.
         Returns:
-            `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
+            `list[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -348,7 +359,7 @@ class InferenceClient:
         audio: ContentT,
         *,
         model: Optional[str] = None,
-    ) -> List[AudioToAudioOutputElement]:
+    ) -> list[AudioToAudioOutputElement]:
         """
         Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
@@ -362,12 +373,12 @@ class InferenceClient:
                 audio_to_audio will be used.
         Returns:
-            `List[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
+            `list[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
         Raises:
             `InferenceTimeoutError`:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -400,7 +411,7 @@ class InferenceClient:
         audio: ContentT,
         *,
         model: Optional[str] = None,
-        extra_body: Optional[Dict] = None,
+        extra_body: Optional[dict] = None,
     ) -> AutomaticSpeechRecognitionOutput:
         """
         Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
@@ -411,7 +422,7 @@ class InferenceClient:
             model (`str`, *optional*):
                 The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
                 Inference Endpoint. If not provided, the default recommended model for ASR will be used.
-            extra_body (`Dict`, *optional*):
+            extra_body (`dict`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
         Returns:
@@ -420,7 +431,7 @@ class InferenceClient:
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -446,105 +457,105 @@ class InferenceClient:
     @overload
     def chat_completion(  # type: ignore
         self,
-        messages: List[Union[Dict, ChatCompletionInputMessage]],
+        messages: list[Union[dict, ChatCompletionInputMessage]],
         *,
         model: Optional[str] = None,
         stream: Literal[False] = False,
         frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[List[float]] = None,
+        logit_bias: Optional[list[float]] = None,
         logprobs: Optional[bool] = None,
         max_tokens: Optional[int] = None,
         n: Optional[int] = None,
         presence_penalty: Optional[float] = None,
         response_format: Optional[ChatCompletionInputGrammarType] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
+        stop: Optional[list[str]] = None,
         stream_options: Optional[ChatCompletionInputStreamOptions] = None,
         temperature: Optional[float] = None,
         tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
         tool_prompt: Optional[str] = None,
-        tools: Optional[List[ChatCompletionInputTool]] = None,
+        tools: Optional[list[ChatCompletionInputTool]] = None,
         top_logprobs: Optional[int] = None,
         top_p: Optional[float] = None,
-        extra_body: Optional[Dict] = None,
+        extra_body: Optional[dict] = None,
     ) -> ChatCompletionOutput: ...
     @overload
     def chat_completion(  # type: ignore
         self,
-        messages: List[Union[Dict, ChatCompletionInputMessage]],
+        messages: list[Union[dict, ChatCompletionInputMessage]],
         *,
         model: Optional[str] = None,
         stream: Literal[True] = True,
         frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[List[float]] = None,
+        logit_bias: Optional[list[float]] = None,
         logprobs: Optional[bool] = None,
         max_tokens: Optional[int] = None,
         n: Optional[int] = None,
         presence_penalty: Optional[float] = None,
         response_format: Optional[ChatCompletionInputGrammarType] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
+        stop: Optional[list[str]] = None,
         stream_options: Optional[ChatCompletionInputStreamOptions] = None,
         temperature: Optional[float] = None,
         tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
         tool_prompt: Optional[str] = None,
-        tools: Optional[List[ChatCompletionInputTool]] = None,
+        tools: Optional[list[ChatCompletionInputTool]] = None,
         top_logprobs: Optional[int] = None,
         top_p: Optional[float] = None,
-        extra_body: Optional[Dict] = None,
+        extra_body: Optional[dict] = None,
     ) -> Iterable[ChatCompletionStreamOutput]: ...
     @overload
     def chat_completion(
         self,
-        messages: List[Union[Dict, ChatCompletionInputMessage]],
+        messages: list[Union[dict, ChatCompletionInputMessage]],
         *,
         model: Optional[str] = None,
         stream: bool = False,
         frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[List[float]] = None,
+        logit_bias: Optional[list[float]] = None,
         logprobs: Optional[bool] = None,
         max_tokens: Optional[int] = None,
         n: Optional[int] = None,
         presence_penalty: Optional[float] = None,
         response_format: Optional[ChatCompletionInputGrammarType] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
+        stop: Optional[list[str]] = None,
         stream_options: Optional[ChatCompletionInputStreamOptions] = None,
         temperature: Optional[float] = None,
         tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
         tool_prompt: Optional[str] = None,
-        tools: Optional[List[ChatCompletionInputTool]] = None,
+        tools: Optional[list[ChatCompletionInputTool]] = None,
         top_logprobs: Optional[int] = None,
         top_p: Optional[float] = None,
-        extra_body: Optional[Dict] = None,
+        extra_body: Optional[dict] = None,
     ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: ...
     def chat_completion(
         self,
-        messages: List[Union[Dict, ChatCompletionInputMessage]],
+        messages: list[Union[dict, ChatCompletionInputMessage]],
         *,
         model: Optional[str] = None,
         stream: bool = False,
         # Parameters from ChatCompletionInput (handled manually)
         frequency_penalty: Optional[float] = None,
-        logit_bias: Optional[List[float]] = None,
+        logit_bias: Optional[list[float]] = None,
         logprobs: Optional[bool] = None,
         max_tokens: Optional[int] = None,
         n: Optional[int] = None,
         presence_penalty: Optional[float] = None,
         response_format: Optional[ChatCompletionInputGrammarType] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
+        stop: Optional[list[str]] = None,
         stream_options: Optional[ChatCompletionInputStreamOptions] = None,
         temperature: Optional[float] = None,
         tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
         tool_prompt: Optional[str] = None,
-        tools: Optional[List[ChatCompletionInputTool]] = None,
+        tools: Optional[list[ChatCompletionInputTool]] = None,
         top_logprobs: Optional[int] = None,
         top_p: Optional[float] = None,
-        extra_body: Optional[Dict] = None,
+        extra_body: Optional[dict] = None,
     ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]:
         """
         A method for completing conversations using a specified language model.
@@ -574,7 +585,7 @@ class InferenceClient:
             frequency_penalty (`float`, *optional*):
                 Penalizes new tokens based on their existing frequency
                 in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
-            logit_bias (`List[float]`, *optional*):
+            logit_bias (`list[float]`, *optional*):
                 Adjusts the likelihood of specific tokens appearing in the generated output.
             logprobs (`bool`, *optional*):
                 Whether to return log probabilities of the output tokens or not. If true, returns the log
@@ -590,7 +601,7 @@ class InferenceClient:
                 Grammar constraints. Can be either a JSONSchema or a regex.
             seed (Optional[`int`], *optional*):
                 Seed for reproducible control flow. Defaults to None.
-            stop (`List[str]`, *optional*):
+            stop (`list[str]`, *optional*):
                 Up to four strings which trigger the end of the response.
                 Defaults to None.
             stream (`bool`, *optional*):
@@ -614,7 +625,7 @@ class InferenceClient:
             tools (List of [`ChatCompletionInputTool`], *optional*):
                 A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
                 provide a list of functions the model may generate JSON inputs for.
-            extra_body (`Dict`, *optional*):
+            extra_body (`dict`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
         Returns:
@@ -626,7 +637,7 @@ class InferenceClient:
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -940,8 +951,8 @@ class InferenceClient:
         max_question_len: Optional[int] = None,
         max_seq_len: Optional[int] = None,
         top_k: Optional[int] = None,
-        word_boxes: Optional[List[Union[List[float], str]]] = None,
-    ) -> List[DocumentQuestionAnsweringOutputElement]:
+        word_boxes: Optional[list[Union[list[float], str]]] = None,
+    ) -> list[DocumentQuestionAnsweringOutputElement]:
         """
         Answer questions on document images.
@@ -971,16 +982,16 @@ class InferenceClient:
             top_k (`int`, *optional*):
                 The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
                 answers if there are not enough options available within the context.
-            word_boxes (`List[Union[List[float], str`, *optional*):
+            word_boxes (`list[Union[list[float], str`, *optional*):
                 A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
                 step and use the provided bounding boxes instead.
         Returns:
-            `List[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
+            `list[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
@@ -994,7 +1005,7 @@ class InferenceClient:
         """
         model_id = model or self.model
         provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
-        inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
+        inputs: dict[str, Any] = {"question": question, "image": _b64_encode(image)}
         request_parameters = provider_helper.prepare_request(
             inputs=inputs,
             parameters={
@@ -1055,7 +1066,7 @@ class InferenceClient:
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -1092,9 +1103,9 @@ class InferenceClient:
         text: str,
         *,
         model: Optional[str] = None,
-        targets: Optional[List[str]] = None,
+        targets: Optional[list[str]] = None,
         top_k: Optional[int] = None,
-    ) -> List[FillMaskOutputElement]:
+    ) -> list[FillMaskOutputElement]:
         """
         Fill in a hole with a missing word (token to be precise).
@@ -1104,20 +1115,20 @@ class InferenceClient:
             model (`str`, *optional*):
                 The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
                 a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
-            targets (`List[str`, *optional*):
+            targets (`list[str`, *optional*):
                 When passed, the model will limit the scores to the passed targets instead of looking up in the whole
                 vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
                 resulting token will be used (with a warning, and that might be slower).
             top_k (`int`, *optional*):
                 When passed, overrides the number of predictions to return.
         Returns:
-            `List[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
+            `list[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
             probability, token reference, and completed text.
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -1150,7 +1161,7 @@ class InferenceClient:
         model: Optional[str] = None,
         function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
         top_k: Optional[int] = None,
-    ) -> List[ImageClassificationOutputElement]:
+    ) -> list[ImageClassificationOutputElement]:
         """
         Perform image classification on the given image using the specified model.
@@ -1165,12 +1176,12 @@ class InferenceClient:
             top_k (`int`, *optional*):
                 When specified, limits the output to the top K most probable classes.
         Returns:
-            `List[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
+            `list[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -1202,7 +1213,7 @@ class InferenceClient:
         overlap_mask_area_threshold: Optional[float] = None,
         subtask: Optional["ImageSegmentationSubtask"] = None,
         threshold: Optional[float] = None,
-    ) -> List[ImageSegmentationOutputElement]:
+    ) -> list[ImageSegmentationOutputElement]:
         """
         Perform image segmentation on the given image using the specified model.
@@ -1227,12 +1238,12 @@ class InferenceClient:
             threshold (`float`, *optional*):
                 Probability threshold to filter out predicted masks.
         Returns:
-            `List[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
+            `list[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -1309,7 +1320,7 @@ class InferenceClient:
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -1439,7 +1450,7 @@ class InferenceClient:
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -1462,12 +1473,12 @@ class InferenceClient:
             api_key=self.token,
         )
         response = self._inner_post(request_parameters)
-        output = ImageToTextOutput.parse_obj(response)
-        return output[0] if isinstance(output, list) else output
+        output_list: list[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
+        return output_list[0]
     def object_detection(
         self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
-    ) -> List[ObjectDetectionOutputElement]:
+    ) -> list[ObjectDetectionOutputElement]:
         """
         Perform object detection on the given image using the specified model.
@@ -1486,12 +1497,12 @@ class InferenceClient:
             threshold (`float`, *optional*):
                 The probability necessary to make a prediction.
         Returns:
-            `List[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
+            `list[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
             `ValueError`:
                 If the request output is not a List.
@@ -1529,7 +1540,7 @@ class InferenceClient:
         max_question_len: Optional[int] = None,
         max_seq_len: Optional[int] = None,
         top_k: Optional[int] = None,
-    ) -> Union[QuestionAnsweringOutputElement, List[QuestionAnsweringOutputElement]]:
+    ) -> Union[QuestionAnsweringOutputElement, list[QuestionAnsweringOutputElement]]:
         """
         Retrieve the answer to a question from a given text.
@@ -1561,13 +1572,13 @@ class InferenceClient:
                 topk answers if there are not enough options available within the context.
         Returns:
-            Union[`QuestionAnsweringOutputElement`, List[`QuestionAnsweringOutputElement`]]:
+            Union[`QuestionAnsweringOutputElement`, list[`QuestionAnsweringOutputElement`]]:
                 When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
                 When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -1601,15 +1612,15 @@ class InferenceClient:
         return output
     def sentence_similarity(
-        self, sentence: str, other_sentences: List[str], *, model: Optional[str] = None
-    ) -> List[float]:
+        self, sentence: str, other_sentences: list[str], *, model: Optional[str] = None
+    ) -> list[float]:
         """
         Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
         Args:
             sentence (`str`):
                 The main sentence to compare to others.
-            other_sentences (`List[str]`):
+            other_sentences (`list[str]`):
                 The list of sentences to compare to.
             model (`str`, *optional*):
                 The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1617,12 +1628,12 @@ class InferenceClient:
                 Defaults to None.
         Returns:
-            `List[float]`: The embedding representing the input text.
+            `list[float]`: The embedding representing the input text.
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -1659,7 +1670,7 @@ class InferenceClient:
         *,
         model: Optional[str] = None,
         clean_up_tokenization_spaces: Optional[bool] = None,
-        generate_parameters: Optional[Dict[str, Any]] = None,
+        generate_parameters: Optional[dict[str, Any]] = None,
         truncation: Optional["SummarizationTruncationStrategy"] = None,
     ) -> SummarizationOutput:
         """
@@ -1673,7 +1684,7 @@ class InferenceClient:
                 Inference Endpoint. If not provided, the default recommended model for summarization will be used.
             clean_up_tokenization_spaces (`bool`, *optional*):
                 Whether to clean up the potential extra spaces in the text output.
-            generate_parameters (`Dict[str, Any]`, *optional*):
+            generate_parameters (`dict[str, Any]`, *optional*):
                 Additional parametrization of the text generation algorithm.
             truncation (`"SummarizationTruncationStrategy"`, *optional*):
                 The truncation strategy to use.
@@ -1683,7 +1694,7 @@ class InferenceClient:
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -1713,7 +1724,7 @@ class InferenceClient:
     def table_question_answering(
         self,
-        table: Dict[str, Any],
+        table: dict[str, Any],
         query: str,
         *,
         model: Optional[str] = None,
@@ -1748,7 +1759,7 @@ class InferenceClient:
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -1773,12 +1784,12 @@ class InferenceClient:
         response = self._inner_post(request_parameters)
         return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
-    def tabular_classification(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[str]:
+    def tabular_classification(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[str]:
         """
         Classifying a target category (a group) based on a set of attributes.
         Args:
-            table (`Dict[str, Any]`):
+            table (`dict[str, Any]`):
                 Set of attributes to classify.
             model (`str`, *optional*):
                 The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1791,7 +1802,7 @@ class InferenceClient:
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -1828,12 +1839,12 @@ class InferenceClient:
         response = self._inner_post(request_parameters)
         return _bytes_to_list(response)
-    def tabular_regression(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[float]:
+    def tabular_regression(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[float]:
         """
         Predicting a numerical target value given a set of attributes/features in a table.
         Args:
-            table (`Dict[str, Any]`):
+            table (`dict[str, Any]`):
                 Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
             model (`str`, *optional*):
                 The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1846,7 +1857,7 @@ class InferenceClient:
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -1885,7 +1896,7 @@ class InferenceClient:
         model: Optional[str] = None,
         top_k: Optional[int] = None,
         function_to_apply: Optional["TextClassificationOutputTransform"] = None,
-    ) -> List[TextClassificationOutputElement]:
+    ) -> list[TextClassificationOutputElement]:
         """
         Perform text classification (e.g. sentiment-analysis) on the given text.
@@ -1902,12 +1913,12 @@ class InferenceClient:
                 The function to apply to the model outputs in order to retrieve the scores.
         Returns:
-            `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
+            `list[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -1955,8 +1966,8 @@ class InferenceClient:
         repetition_penalty: Optional[float] = None,
         return_full_text: Optional[bool] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
-        stop_sequences: Optional[List[str]] = None,  # Deprecated, use `stop` instead
+        stop: Optional[list[str]] = None,
+        stop_sequences: Optional[list[str]] = None,  # Deprecated, use `stop` instead
         temperature: Optional[float] = None,
         top_k: Optional[int] = None,
         top_n_tokens: Optional[int] = None,
@@ -1985,8 +1996,8 @@ class InferenceClient:
         repetition_penalty: Optional[float] = None,
         return_full_text: Optional[bool] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
-        stop_sequences: Optional[List[str]] = None,  # Deprecated, use `stop` instead
+        stop: Optional[list[str]] = None,
+        stop_sequences: Optional[list[str]] = None,  # Deprecated, use `stop` instead
         temperature: Optional[float] = None,
         top_k: Optional[int] = None,
         top_n_tokens: Optional[int] = None,
@@ -2015,8 +2026,8 @@ class InferenceClient:
         repetition_penalty: Optional[float] = None,
         return_full_text: Optional[bool] = None,  # Manual default value
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
-        stop_sequences: Optional[List[str]] = None,  # Deprecated, use `stop` instead
+        stop: Optional[list[str]] = None,
+        stop_sequences: Optional[list[str]] = None,  # Deprecated, use `stop` instead
         temperature: Optional[float] = None,
         top_k: Optional[int] = None,
         top_n_tokens: Optional[int] = None,
@@ -2045,8 +2056,8 @@ class InferenceClient:
         repetition_penalty: Optional[float] = None,
         return_full_text: Optional[bool] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
-        stop_sequences: Optional[List[str]] = None,  # Deprecated, use `stop` instead
+        stop: Optional[list[str]] = None,
+        stop_sequences: Optional[list[str]] = None,  # Deprecated, use `stop` instead
         temperature: Optional[float] = None,
         top_k: Optional[int] = None,
         top_n_tokens: Optional[int] = None,
@@ -2075,8 +2086,8 @@ class InferenceClient:
         repetition_penalty: Optional[float] = None,
         return_full_text: Optional[bool] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
-        stop_sequences: Optional[List[str]] = None,  # Deprecated, use `stop` instead
+        stop: Optional[list[str]] = None,
+        stop_sequences: Optional[list[str]] = None,  # Deprecated, use `stop` instead
         temperature: Optional[float] = None,
         top_k: Optional[int] = None,
         top_n_tokens: Optional[int] = None,
@@ -2104,8 +2115,8 @@ class InferenceClient:
         repetition_penalty: Optional[float] = None,
         return_full_text: Optional[bool] = None,
         seed: Optional[int] = None,
-        stop: Optional[List[str]] = None,
-        stop_sequences: Optional[List[str]] = None,  # Deprecated, use `stop` instead
+        stop: Optional[list[str]] = None,
+        stop_sequences: Optional[list[str]] = None,  # Deprecated, use `stop` instead
         temperature: Optional[float] = None,
         top_k: Optional[int] = None,
         top_n_tokens: Optional[int] = None,
@@ -2161,9 +2172,9 @@ class InferenceClient:
                 Whether to prepend the prompt to the generated text
             seed (`int`, *optional*):
                 Random sampling seed
-            stop (`List[str]`, *optional*):
+            stop (`list[str]`, *optional*):
                 Stop generating tokens if a member of `stop` is generated.
-            stop_sequences (`List[str]`, *optional*):
+            stop_sequences (`list[str]`, *optional*):
                 Deprecated argument. Use `stop` instead.
             temperature (`float`, *optional*):
                 The value used to module the logits distribution.
@@ -2196,7 +2207,7 @@ class InferenceClient:
                 If input values are not valid. No HTTP call is made to the server.
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -2385,7 +2396,7 @@ class InferenceClient:
         # Handle errors separately for more precise error messages
         try:
             bytes_output = self._inner_post(request_parameters, stream=stream or False)
-        except HTTPError as e:
+        except HfHubHTTPError as e:
             match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
             if isinstance(e, BadRequestError) and match:
                 unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
@@ -2440,7 +2451,7 @@ class InferenceClient:
         model: Optional[str] = None,
         scheduler: Optional[str] = None,
         seed: Optional[int] = None,
-        extra_body: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[dict[str, Any]] = None,
     ) -> "Image":
         """
         Generate an image based on a given text using a specified model.
@@ -2478,7 +2489,7 @@ class InferenceClient:
                 Override the scheduler with a compatible one.
             seed (`int`, *optional*):
                 Seed for the random number generator.
-            extra_body (`Dict[str, Any]`, *optional*):
+            extra_body (`dict[str, Any]`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
@@ -2488,7 +2499,7 @@ class InferenceClient:
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -2577,11 +2588,11 @@ class InferenceClient:
         *,
         model: Optional[str] = None,
         guidance_scale: Optional[float] = None,
-        negative_prompt: Optional[List[str]] = None,
+        negative_prompt: Optional[list[str]] = None,
         num_frames: Optional[float] = None,
         num_inference_steps: Optional[int] = None,
         seed: Optional[int] = None,
-        extra_body: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[dict[str, Any]] = None,
     ) -> bytes:
         """
         Generate a video based on a given text.
@@ -2600,7 +2611,7 @@ class InferenceClient:
             guidance_scale (`float`, *optional*):
                 A higher guidance scale value encourages the model to generate videos closely linked to the text
                 prompt, but values too high may cause saturation and other artifacts.
-            negative_prompt (`List[str]`, *optional*):
+            negative_prompt (`list[str]`, *optional*):
                 One or several prompt to guide what NOT to include in video generation.
             num_frames (`float`, *optional*):
                 The num_frames parameter determines how many video frames are generated.
@@ -2609,7 +2620,7 @@ class InferenceClient:
                 expense of slower inference.
             seed (`int`, *optional*):
                 Seed for the random number generator.
-            extra_body (`Dict[str, Any]`, *optional*):
+            extra_body (`dict[str, Any]`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
@@ -2689,7 +2700,7 @@ class InferenceClient:
         top_p: Optional[float] = None,
         typical_p: Optional[float] = None,
         use_cache: Optional[bool] = None,
-        extra_body: Optional[Dict[str, Any]] = None,
+        extra_body: Optional[dict[str, Any]] = None,
     ) -> bytes:
         """
         Synthesize an audio of a voice pronouncing a given text.
@@ -2751,7 +2762,7 @@ class InferenceClient:
                 paper](https://hf.co/papers/2202.00666) for more details.
             use_cache (`bool`, *optional*):
                 Whether the model should use the past last key/values attentions to speed up decoding
-            extra_body (`Dict[str, Any]`, *optional*):
+            extra_body (`dict[str, Any]`, *optional*):
                 Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
                 for supported parameters.
         Returns:
@@ -2760,7 +2771,7 @@ class InferenceClient:
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -2883,9 +2894,9 @@ class InferenceClient:
         *,
         model: Optional[str] = None,
         aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
-        ignore_labels: Optional[List[str]] = None,
+        ignore_labels: Optional[list[str]] = None,
         stride: Optional[int] = None,
-    ) -> List[TokenClassificationOutputElement]:
+    ) -> list[TokenClassificationOutputElement]:
         """
         Perform token classification on the given text.
         Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
@@ -2899,18 +2910,18 @@ class InferenceClient:
                 Defaults to None.
             aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
                 The strategy used to fuse tokens based on model predictions
-            ignore_labels (`List[str`, *optional*):
+            ignore_labels (`list[str`, *optional*):
                 A list of labels to ignore
             stride (`int`, *optional*):
                 The number of overlapping tokens between chunks when splitting the input text.
         Returns:
-            `List[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
+            `list[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -2961,7 +2972,7 @@ class InferenceClient:
         tgt_lang: Optional[str] = None,
         clean_up_tokenization_spaces: Optional[bool] = None,
         truncation: Optional["TranslationTruncationStrategy"] = None,
-        generate_parameters: Optional[Dict[str, Any]] = None,
+        generate_parameters: Optional[dict[str, Any]] = None,
     ) -> TranslationOutput:
         """
         Convert text from one language to another.
@@ -2986,7 +2997,7 @@ class InferenceClient:
                 Whether to clean up the potential extra spaces in the text output.
             truncation (`"TranslationTruncationStrategy"`, *optional*):
                 The truncation strategy to use.
-            generate_parameters (`Dict[str, Any]`, *optional*):
+            generate_parameters (`dict[str, Any]`, *optional*):
                 Additional parametrization of the text generation algorithm.
         Returns:
@@ -2995,7 +3006,7 @@ class InferenceClient:
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
             `ValueError`:
                 If only one of the `src_lang` and `tgt_lang` arguments are provided.
@@ -3048,7 +3059,7 @@ class InferenceClient:
         *,
         model: Optional[str] = None,
         top_k: Optional[int] = None,
-    ) -> List[VisualQuestionAnsweringOutputElement]:
+    ) -> list[VisualQuestionAnsweringOutputElement]:
         """
         Answering open-ended questions based on an image.
@@ -3065,12 +3076,12 @@ class InferenceClient:
                 The number of answers to return (will be chosen by order of likelihood). Note that we return less than
                 topk answers if there are not enough options available within the context.
         Returns:
-            `List[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
+            `list[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
         Raises:
             `InferenceTimeoutError`:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -3103,21 +3114,21 @@ class InferenceClient:
     def zero_shot_classification(
         self,
         text: str,
-        candidate_labels: List[str],
+        candidate_labels: list[str],
         *,
         multi_label: Optional[bool] = False,
         hypothesis_template: Optional[str] = None,
         model: Optional[str] = None,
-    ) -> List[ZeroShotClassificationOutputElement]:
+    ) -> list[ZeroShotClassificationOutputElement]:
         """
         Provide as input a text and a set of candidate labels to classify the input text.
         Args:
             text (`str`):
                 The input text to classify.
-            candidate_labels (`List[str]`):
+            candidate_labels (`list[str]`):
                 The set of possible class labels to classify the text into.
-            labels (`List[str]`, *optional*):
+            labels (`list[str]`, *optional*):
                 (deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
             multi_label (`bool`, *optional*):
                 Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
@@ -3132,12 +3143,12 @@ class InferenceClient:
         Returns:
-            `List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
+            `list[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example with `multi_label=False`:
@@ -3209,22 +3220,22 @@ class InferenceClient:
     def zero_shot_image_classification(
         self,
         image: ContentT,
-        candidate_labels: List[str],
+        candidate_labels: list[str],
         *,
         model: Optional[str] = None,
         hypothesis_template: Optional[str] = None,
         # deprecated argument
-        labels: List[str] = None,  # type: ignore
-    ) -> List[ZeroShotImageClassificationOutputElement]:
+        labels: list[str] = None,  # type: ignore
+    ) -> list[ZeroShotImageClassificationOutputElement]:
         """
         Provide input image and text labels to predict text labels for the image.
         Args:
             image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
                 The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
-            candidate_labels (`List[str]`):
+            candidate_labels (`list[str]`):
                 The candidate labels for this image
-            labels (`List[str]`, *optional*):
+            labels (`list[str]`, *optional*):
                 (deprecated) List of string possible labels. There must be at least 2 labels.
             model (`str`, *optional*):
                 The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
@@ -3234,12 +3245,12 @@ class InferenceClient:
                 replacing the placeholder with the candidate labels.
         Returns:
-            `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
+            `list[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
         Raises:
             [`InferenceTimeoutError`]:
                 If the model is unavailable or the request times out.
-            `HTTPError`:
+            [`HfHubHTTPError`]:
                 If the request fails with an HTTP error status code other than HTTP 503.
         Example:
@@ -3273,102 +3284,7 @@ class InferenceClient:
         response = self._inner_post(request_parameters)
         return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
-    @_deprecate_method(
-        version="0.35.0",
-        message=(
-            "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
-            " Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
-        ),
-    )
-    def list_deployed_models(
-        self, frameworks: Union[None, str, Literal["all"], List[str]] = None
-    ) -> Dict[str, List[str]]:
-        """
-        List models deployed on the HF Serverless Inference API service.
-        This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
-        are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
-        specify `frameworks="all"` as input. Alternatively, if you know before-hand which framework you are interested
-        in, you can also restrict to search to this one (e.g. `frameworks="text-generation-inference"`). The more
-        frameworks are checked, the more time it will take.
-        <Tip warning={true}>
-        This endpoint method does not return a live list of all models available for the HF Inference API service.
-        It searches over a cached list of models that were recently available and the list may not be up to date.
-        If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
-        </Tip>
-        <Tip>
-        This endpoint method is mostly useful for discoverability. If you already know which model you want to use and want to
-        check its availability, you can directly use [`~InferenceClient.get_model_status`].
-        </Tip>
-        Args:
-            frameworks (`Literal["all"]` or `List[str]` or `str`, *optional*):
-                The frameworks to filter on. By default only a subset of the available frameworks are tested. If set to
-                "all", all available frameworks will be tested. It is also possible to provide a single framework or a
-                custom set of frameworks to check.
-        Returns:
-            `Dict[str, List[str]]`: A dictionary mapping task names to a sorted list of model IDs.
-        Example:
-        ```python
-        >>> from huggingface_hub import InferenceClient
-        >>> client = InferenceClient()
-        # Discover zero-shot-classification models currently deployed
-        >>> models = client.list_deployed_models()
-        >>> models["zero-shot-classification"]
-        ['Narsil/deberta-large-mnli-zero-cls', 'facebook/bart-large-mnli', ...]
-        # List from only 1 framework
-        >>> client.list_deployed_models("text-generation-inference")
-        {'text-generation': ['bigcode/starcoder', 'meta-llama/Llama-2-70b-chat-hf', ...], ...}
-        ```
-        """
-        if self.provider != "hf-inference":
-            raise ValueError(f"Listing deployed models is not supported on '{self.provider}'.")
-        # Resolve which frameworks to check
-        if frameworks is None:
-            frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
-        elif frameworks == "all":
-            frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
-        elif isinstance(frameworks, str):
-            frameworks = [frameworks]
-        frameworks = list(set(frameworks))
-        # Fetch them iteratively
-        models_by_task: Dict[str, List[str]] = {}
-        def _unpack_response(framework: str, items: List[Dict]) -> None:
-            for model in items:
-                if framework == "sentence-transformers":
-                    # Model running with the `sentence-transformers` framework can work with both tasks even if not
-                    # branded as such in the API response
-                    models_by_task.setdefault("feature-extraction", []).append(model["model_id"])
-                    models_by_task.setdefault("sentence-similarity", []).append(model["model_id"])
-                else:
-                    models_by_task.setdefault(model["task"], []).append(model["model_id"])
-        for framework in frameworks:
-            response = get_session().get(
-                f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
-            )
-            hf_raise_for_status(response)
-            _unpack_response(framework, response.json())
-        # Sort alphabetically for discoverability and return
-        for task, models in models_by_task.items():
-            models_by_task[task] = sorted(set(models), key=lambda x: x.lower())
-        return models_by_task
-    def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
+    def get_endpoint_info(self, *, model: Optional[str] = None) -> dict[str, Any]:
         """
         Get information about the deployed endpoint.
@@ -3381,7 +3297,7 @@ class InferenceClient:
                 Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
         Returns:
-            `Dict[str, Any]`: Information about the endpoint.
+            `dict[str, Any]`: Information about the endpoint.
         Example:
         ```py
@@ -3431,7 +3347,6 @@ class InferenceClient:
         Check the health of the deployed endpoint.
         Health check is only available with Inference Endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI).
-        For Inference API, please use [`InferenceClient.get_model_status`] instead.
         Args:
             model (`str`, *optional*):
@@ -3455,75 +3370,12 @@ class InferenceClient:
         if model is None:
             raise ValueError("Model id not provided.")
         if not model.startswith(("http://", "https://")):
-            raise ValueError(
-                "Model must be an Inference Endpoint URL. For serverless Inference API, please use `InferenceClient.get_model_status`."
-            )
+            raise ValueError("Model must be an Inference Endpoint URL.")
         url = model.rstrip("/") + "/health"
         response = get_session().get(url, headers=build_hf_headers(token=self.token))
         return response.status_code == 200
-    @_deprecate_method(
-        version="0.35.0",
-        message=(
-            "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
-            " Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
-        ),
-    )
-    def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
-        """
-        Get the status of a model hosted on the HF Inference API.
-        <Tip>
-        This endpoint is mostly useful when you already know which model you want to use and want to check its
-        availability. If you want to discover already deployed models, you should rather use [`~InferenceClient.list_deployed_models`].
-        </Tip>
-        Args:
-            model (`str`, *optional*):
-                Identifier of the model for witch the status gonna be checked. If model is not provided,
-                the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
-                identifier cannot be a URL.
-        Returns:
-            [`ModelStatus`]: An instance of ModelStatus dataclass, containing information,
-                         about the state of the model: load, state, compute type and framework.
-        Example:
-        ```py
-        >>> from huggingface_hub import InferenceClient
-        >>> client = InferenceClient()
-        >>> client.get_model_status("meta-llama/Meta-Llama-3-8B-Instruct")
-        ModelStatus(loaded=True, state='Loaded', compute_type='gpu', framework='text-generation-inference')
-        ```
-        """
-        if self.provider != "hf-inference":
-            raise ValueError(f"Getting model status is not supported on '{self.provider}'.")
-        model = model or self.model
-        if model is None:
-            raise ValueError("Model id not provided.")
-        if model.startswith("https://"):
-            raise NotImplementedError("Model status is only available for Inference API endpoints.")
-        url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
-        response = get_session().get(url, headers=build_hf_headers(token=self.token))
-        hf_raise_for_status(response)
-        response_data = response.json()
-        if "error" in response_data:
-            raise ValueError(response_data["error"])
-        return ModelStatus(
-            loaded=response_data["loaded"],
-            state=response_data["state"],
-            compute_type=response_data["compute_type"],
-            framework=response_data["framework"],
-        )
     @property
     def chat(self) -> "ProxyClientChat":
         return ProxyClientChat(self)

huggingface-hub 0.34.4__py3-none-any.whl → 1.0.0rc0__py3-none-any.whl

Potentially problematic release.

huggingface-hub 0.34.4py3-none-any.whl → 1.0.0rc0py3-none-any.whl