PyPI - huggingface-hub - Versions diffs - 0.35.0rc0__py3-none-any.whl → 0.35.1__py3-none-any.whl - Mend

huggingface-hub 0.35.0rc0py3-none-any.whl → 0.35.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of huggingface-hub might be problematic. Click here for more details.

Files changed (50) hide show

huggingface_hub/__init__.py +19 -1
huggingface_hub/_jobs_api.py +168 -12
huggingface_hub/_local_folder.py +1 -1
huggingface_hub/_oauth.py +5 -9
huggingface_hub/_tensorboard_logger.py +9 -10
huggingface_hub/_upload_large_folder.py +108 -1
huggingface_hub/cli/auth.py +4 -1
huggingface_hub/cli/cache.py +7 -9
huggingface_hub/cli/hf.py +2 -5
huggingface_hub/cli/jobs.py +591 -13
huggingface_hub/cli/repo.py +10 -4
huggingface_hub/commands/delete_cache.py +2 -2
huggingface_hub/commands/scan_cache.py +1 -1
huggingface_hub/dataclasses.py +3 -0
huggingface_hub/file_download.py +12 -10
huggingface_hub/hf_api.py +549 -95
huggingface_hub/hf_file_system.py +4 -10
huggingface_hub/hub_mixin.py +5 -3
huggingface_hub/inference/_client.py +98 -181
huggingface_hub/inference/_common.py +72 -70
huggingface_hub/inference/_generated/_async_client.py +116 -201
huggingface_hub/inference/_generated/types/chat_completion.py +2 -0
huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
huggingface_hub/inference/_mcp/cli.py +1 -1
huggingface_hub/inference/_mcp/constants.py +1 -1
huggingface_hub/inference/_mcp/mcp_client.py +28 -11
huggingface_hub/inference/_mcp/types.py +3 -0
huggingface_hub/inference/_mcp/utils.py +7 -3
huggingface_hub/inference/_providers/__init__.py +13 -0
huggingface_hub/inference/_providers/_common.py +29 -4
huggingface_hub/inference/_providers/black_forest_labs.py +1 -1
huggingface_hub/inference/_providers/fal_ai.py +33 -2
huggingface_hub/inference/_providers/hf_inference.py +15 -7
huggingface_hub/inference/_providers/publicai.py +6 -0
huggingface_hub/inference/_providers/replicate.py +1 -1
huggingface_hub/inference/_providers/scaleway.py +28 -0
huggingface_hub/lfs.py +2 -4
huggingface_hub/repocard.py +2 -1
huggingface_hub/utils/_dotenv.py +24 -20
huggingface_hub/utils/_git_credential.py +1 -1
huggingface_hub/utils/_http.py +3 -5
huggingface_hub/utils/_runtime.py +1 -0
huggingface_hub/utils/_typing.py +24 -4
huggingface_hub/utils/_xet_progress_reporting.py +31 -10
{huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/METADATA +7 -4
{huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/RECORD +50 -48
{huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/LICENSE +0 -0
{huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/WHEEL +0 -0
{huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/entry_points.txt +0 -0
{huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/top_level.txt +0 -0

huggingface_hub/hf_file_system.py CHANGED Viewed

@@ -896,7 +896,7 @@ class HfFileSystem(fsspec.AbstractFileSystem):
                     repo_type=resolve_remote_path.repo_type,
                     endpoint=self.endpoint,
                 ),
-                temp_file=outfile,
+                temp_file=outfile,  # type: ignore[arg-type]
                 displayed_filename=rpath,
                 expected_size=expected_size,
                 resume_size=0,
@@ -958,13 +958,7 @@ class HfFileSystemFile(fsspec.spec.AbstractBufferedFile):
             repo_type=self.resolved_path.repo_type,
             endpoint=self.fs.endpoint,
         )
-        r = http_backoff(
-            "GET",
-            url,
-            headers=headers,
-            retry_on_status_codes=(500, 502, 503, 504),
-            timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
-        )
+        r = http_backoff("GET", url, headers=headers, timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT)
         hf_raise_for_status(r)
         return r.content
@@ -1063,12 +1057,12 @@ class HfFileSystemStreamFile(fsspec.spec.AbstractBufferedFile):
                 "GET",
                 url,
                 headers=self.fs._api._build_hf_headers(),
-                retry_on_status_codes=(500, 502, 503, 504),
                 stream=True,
                 timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
             )
             hf_raise_for_status(self.response)
         try:
+            self.response.raw.decode_content = True
             out = self.response.raw.read(*read_args)
         except Exception:
             self.response.close()
@@ -1085,12 +1079,12 @@ class HfFileSystemStreamFile(fsspec.spec.AbstractBufferedFile):
                 "GET",
                 url,
                 headers={"Range": "bytes=%d-" % self.loc, **self.fs._api._build_hf_headers()},
-                retry_on_status_codes=(500, 502, 503, 504),
                 stream=True,
                 timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
             )
             hf_raise_for_status(self.response)
             try:
+                self.response.raw.decode_content = True
                 out = self.response.raw.read(*read_args)
             except Exception:
                 self.response.close()

huggingface_hub/hub_mixin.py CHANGED Viewed

@@ -266,12 +266,14 @@ class ModelHubMixin:
         if pipeline_tag is not None:
             info.model_card_data.pipeline_tag = pipeline_tag
         if tags is not None:
+            normalized_tags = list(tags)
             if info.model_card_data.tags is not None:
-                info.model_card_data.tags.extend(tags)
+                info.model_card_data.tags.extend(normalized_tags)
             else:
-                info.model_card_data.tags = tags
+                info.model_card_data.tags = normalized_tags
-        info.model_card_data.tags = sorted(set(info.model_card_data.tags))
+        if info.model_card_data.tags is not None:
+            info.model_card_data.tags = sorted(set(info.model_card_data.tags))
         # Handle encoders/decoders for args
         cls._hub_mixin_coders = coders or {}

huggingface_hub/inference/_client.py CHANGED Viewed

@@ -45,7 +45,6 @@ from huggingface_hub.errors import BadRequestError, InferenceTimeoutError
 from huggingface_hub.inference._common import (
     TASKS_EXPECTING_IMAGES,
     ContentT,
-    ModelStatus,
     RequestParameters,
     _b64_encode,
     _b64_to_image,
@@ -54,7 +53,6 @@ from huggingface_hub.inference._common import (
     _bytes_to_list,
     _get_unsupported_text_generation_kwargs,
     _import_numpy,
-    _open_as_binary,
     _set_unsupported_text_generation_kwargs,
     _stream_chat_completion_response,
     _stream_text_generation_response,
@@ -81,6 +79,7 @@ from huggingface_hub.inference._generated.types import (
     ImageSegmentationSubtask,
     ImageToImageTargetSize,
     ImageToTextOutput,
+    ImageToVideoTargetSize,
     ObjectDetectionOutputElement,
     Padding,
     QuestionAnsweringOutputElement,
@@ -104,7 +103,6 @@ from huggingface_hub.inference._generated.types import (
 from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
 from huggingface_hub.utils import build_hf_headers, get_session, hf_raise_for_status
 from huggingface_hub.utils._auth import get_token
-from huggingface_hub.utils._deprecation import _deprecate_method
 if TYPE_CHECKING:
@@ -132,7 +130,7 @@ class InferenceClient:
             Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2
             arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL.
         provider (`str`, *optional*):
-            Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"replicate"`, "sambanova"` or `"together"`.
+            Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `publicai`, `"replicate"`, `"sambanova"`, `"scaleway"` or `"together"`.
             Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
             If model is a URL or `base_url` is passed, then `provider` is not used.
         token (`str`, *optional*):
@@ -258,21 +256,20 @@ class InferenceClient:
         if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
             request_parameters.headers["Accept"] = "image/png"
-        with _open_as_binary(request_parameters.data) as data_as_binary:
-            try:
-                response = get_session().post(
-                    request_parameters.url,
-                    json=request_parameters.json,
-                    data=data_as_binary,
-                    headers=request_parameters.headers,
-                    cookies=self.cookies,
-                    timeout=self.timeout,
-                    stream=stream,
-                    proxies=self.proxies,
-                )
-            except TimeoutError as error:
-                # Convert any `TimeoutError` to a `InferenceTimeoutError`
-                raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error  # type: ignore
+        try:
+            response = get_session().post(
+                request_parameters.url,
+                json=request_parameters.json,
+                data=request_parameters.data,
+                headers=request_parameters.headers,
+                cookies=self.cookies,
+                timeout=self.timeout,
+                stream=stream,
+                proxies=self.proxies,
+            )
+        except TimeoutError as error:
+            # Convert any `TimeoutError` to a `InferenceTimeoutError`
+            raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error  # type: ignore
         try:
             hf_raise_for_status(response)
@@ -1339,6 +1336,85 @@ class InferenceClient:
         response = provider_helper.get_response(response, request_parameters)
         return _bytes_to_image(response)
+    def image_to_video(
+        self,
+        image: ContentT,
+        *,
+        model: Optional[str] = None,
+        prompt: Optional[str] = None,
+        negative_prompt: Optional[str] = None,
+        num_frames: Optional[float] = None,
+        num_inference_steps: Optional[int] = None,
+        guidance_scale: Optional[float] = None,
+        seed: Optional[int] = None,
+        target_size: Optional[ImageToVideoTargetSize] = None,
+        **kwargs,
+    ) -> bytes:
+        """
+        Generate a video from an input image.
+        Args:
+            image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
+                The input image to generate a video from. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
+            model (`str`, *optional*):
+                The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
+                Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
+            prompt (`str`, *optional*):
+                The text prompt to guide the video generation.
+            negative_prompt (`str`, *optional*):
+                One prompt to guide what NOT to include in video generation.
+            num_frames (`float`, *optional*):
+                The num_frames parameter determines how many video frames are generated.
+            num_inference_steps (`int`, *optional*):
+                For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher
+                quality image at the expense of slower inference.
+            guidance_scale (`float`, *optional*):
+                For diffusion models. A higher guidance scale value encourages the model to generate videos closely
+                linked to the text prompt at the expense of lower image quality.
+            seed (`int`, *optional*):
+                The seed to use for the video generation.
+            target_size (`ImageToVideoTargetSize`, *optional*):
+                The size in pixel of the output video frames.
+            num_inference_steps (`int`, *optional*):
+                The number of denoising steps. More denoising steps usually lead to a higher quality video at the
+                expense of slower inference.
+            seed (`int`, *optional*):
+                Seed for the random number generator.
+        Returns:
+            `bytes`: The generated video.
+        Examples:
+        ```py
+        >>> from huggingface_hub import InferenceClient
+        >>> client = InferenceClient()
+        >>> video = client.image_to_video("cat.jpg", model="Wan-AI/Wan2.2-I2V-A14B", prompt="turn the cat into a tiger")
+        >>> with open("tiger.mp4", "wb") as f:
+        ...     f.write(video)
+        ```
+        """
+        model_id = model or self.model
+        provider_helper = get_provider_helper(self.provider, task="image-to-video", model=model_id)
+        request_parameters = provider_helper.prepare_request(
+            inputs=image,
+            parameters={
+                "prompt": prompt,
+                "negative_prompt": negative_prompt,
+                "num_frames": num_frames,
+                "num_inference_steps": num_inference_steps,
+                "guidance_scale": guidance_scale,
+                "seed": seed,
+                "target_size": target_size,
+                **kwargs,
+            },
+            headers=self.headers,
+            model=model_id,
+            api_key=self.token,
+        )
+        response = self._inner_post(request_parameters)
+        response = provider_helper.get_response(response, request_parameters)
+        return response
     def image_to_text(self, image: ContentT, *, model: Optional[str] = None) -> ImageToTextOutput:
         """
         Takes an input image and return text.
@@ -1382,8 +1458,8 @@ class InferenceClient:
             api_key=self.token,
         )
         response = self._inner_post(request_parameters)
-        output = ImageToTextOutput.parse_obj(response)
-        return output[0] if isinstance(output, list) else output
+        output_list: List[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
+        return output_list[0]
     def object_detection(
         self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
@@ -3193,101 +3269,6 @@ class InferenceClient:
         response = self._inner_post(request_parameters)
         return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
-    @_deprecate_method(
-        version="0.35.0",
-        message=(
-            "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
-            " Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
-        ),
-    )
-    def list_deployed_models(
-        self, frameworks: Union[None, str, Literal["all"], List[str]] = None
-    ) -> Dict[str, List[str]]:
-        """
-        List models deployed on the HF Serverless Inference API service.
-        This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
-        are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
-        specify `frameworks="all"` as input. Alternatively, if you know before-hand which framework you are interested
-        in, you can also restrict to search to this one (e.g. `frameworks="text-generation-inference"`). The more
-        frameworks are checked, the more time it will take.
-        <Tip warning={true}>
-        This endpoint method does not return a live list of all models available for the HF Inference API service.
-        It searches over a cached list of models that were recently available and the list may not be up to date.
-        If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
-        </Tip>
-        <Tip>
-        This endpoint method is mostly useful for discoverability. If you already know which model you want to use and want to
-        check its availability, you can directly use [`~InferenceClient.get_model_status`].
-        </Tip>
-        Args:
-            frameworks (`Literal["all"]` or `List[str]` or `str`, *optional*):
-                The frameworks to filter on. By default only a subset of the available frameworks are tested. If set to
-                "all", all available frameworks will be tested. It is also possible to provide a single framework or a
-                custom set of frameworks to check.
-        Returns:
-            `Dict[str, List[str]]`: A dictionary mapping task names to a sorted list of model IDs.
-        Example:
-        ```python
-        >>> from huggingface_hub import InferenceClient
-        >>> client = InferenceClient()
-        # Discover zero-shot-classification models currently deployed
-        >>> models = client.list_deployed_models()
-        >>> models["zero-shot-classification"]
-        ['Narsil/deberta-large-mnli-zero-cls', 'facebook/bart-large-mnli', ...]
-        # List from only 1 framework
-        >>> client.list_deployed_models("text-generation-inference")
-        {'text-generation': ['bigcode/starcoder', 'meta-llama/Llama-2-70b-chat-hf', ...], ...}
-        ```
-        """
-        if self.provider != "hf-inference":
-            raise ValueError(f"Listing deployed models is not supported on '{self.provider}'.")
-        # Resolve which frameworks to check
-        if frameworks is None:
-            frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
-        elif frameworks == "all":
-            frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
-        elif isinstance(frameworks, str):
-            frameworks = [frameworks]
-        frameworks = list(set(frameworks))
-        # Fetch them iteratively
-        models_by_task: Dict[str, List[str]] = {}
-        def _unpack_response(framework: str, items: List[Dict]) -> None:
-            for model in items:
-                if framework == "sentence-transformers":
-                    # Model running with the `sentence-transformers` framework can work with both tasks even if not
-                    # branded as such in the API response
-                    models_by_task.setdefault("feature-extraction", []).append(model["model_id"])
-                    models_by_task.setdefault("sentence-similarity", []).append(model["model_id"])
-                else:
-                    models_by_task.setdefault(model["task"], []).append(model["model_id"])
-        for framework in frameworks:
-            response = get_session().get(
-                f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
-            )
-            hf_raise_for_status(response)
-            _unpack_response(framework, response.json())
-        # Sort alphabetically for discoverability and return
-        for task, models in models_by_task.items():
-            models_by_task[task] = sorted(set(models), key=lambda x: x.lower())
-        return models_by_task
     def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
         """
         Get information about the deployed endpoint.
@@ -3351,7 +3332,6 @@ class InferenceClient:
         Check the health of the deployed endpoint.
         Health check is only available with Inference Endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI).
-        For Inference API, please use [`InferenceClient.get_model_status`] instead.
         Args:
             model (`str`, *optional*):
@@ -3375,75 +3355,12 @@ class InferenceClient:
         if model is None:
             raise ValueError("Model id not provided.")
         if not model.startswith(("http://", "https://")):
-            raise ValueError(
-                "Model must be an Inference Endpoint URL. For serverless Inference API, please use `InferenceClient.get_model_status`."
-            )
+            raise ValueError("Model must be an Inference Endpoint URL.")
         url = model.rstrip("/") + "/health"
         response = get_session().get(url, headers=build_hf_headers(token=self.token))
         return response.status_code == 200
-    @_deprecate_method(
-        version="0.35.0",
-        message=(
-            "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
-            " Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
-        ),
-    )
-    def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
-        """
-        Get the status of a model hosted on the HF Inference API.
-        <Tip>
-        This endpoint is mostly useful when you already know which model you want to use and want to check its
-        availability. If you want to discover already deployed models, you should rather use [`~InferenceClient.list_deployed_models`].
-        </Tip>
-        Args:
-            model (`str`, *optional*):
-                Identifier of the model for witch the status gonna be checked. If model is not provided,
-                the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
-                identifier cannot be a URL.
-        Returns:
-            [`ModelStatus`]: An instance of ModelStatus dataclass, containing information,
-                         about the state of the model: load, state, compute type and framework.
-        Example:
-        ```py
-        >>> from huggingface_hub import InferenceClient
-        >>> client = InferenceClient()
-        >>> client.get_model_status("meta-llama/Meta-Llama-3-8B-Instruct")
-        ModelStatus(loaded=True, state='Loaded', compute_type='gpu', framework='text-generation-inference')
-        ```
-        """
-        if self.provider != "hf-inference":
-            raise ValueError(f"Getting model status is not supported on '{self.provider}'.")
-        model = model or self.model
-        if model is None:
-            raise ValueError("Model id not provided.")
-        if model.startswith("https://"):
-            raise NotImplementedError("Model status is only available for Inference API endpoints.")
-        url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
-        response = get_session().get(url, headers=build_hf_headers(token=self.token))
-        hf_raise_for_status(response)
-        response_data = response.json()
-        if "error" in response_data:
-            raise ValueError(response_data["error"])
-        return ModelStatus(
-            loaded=response_data["loaded"],
-            state=response_data["state"],
-            compute_type=response_data["compute_type"],
-            framework=response_data["framework"],
-        )
     @property
     def chat(self) -> "ProxyClientChat":
         return ProxyClientChat(self)

huggingface_hub/inference/_common.py CHANGED Viewed

@@ -19,7 +19,6 @@ import io
 import json
 import logging
 import mimetypes
-from contextlib import contextmanager
 from dataclasses import dataclass
 from pathlib import Path
 from typing import (
@@ -27,9 +26,7 @@ from typing import (
     Any,
     AsyncIterable,
     BinaryIO,
-    ContextManager,
     Dict,
-    Generator,
     Iterable,
     List,
     Literal,
@@ -61,8 +58,7 @@ if TYPE_CHECKING:
 # TYPES
 UrlT = str
 PathT = Union[str, Path]
-BinaryT = Union[bytes, BinaryIO]
-ContentT = Union[BinaryT, PathT, UrlT, "Image"]
+ContentT = Union[bytes, BinaryIO, PathT, UrlT, "Image", bytearray, memoryview]
 # Use to set a Accept: image/png header
 TASKS_EXPECTING_IMAGES = {"text-to-image", "image-to-image"}
@@ -76,39 +72,33 @@ class RequestParameters:
     task: str
     model: Optional[str]
     json: Optional[Union[str, Dict, List]]
-    data: Optional[ContentT]
+    data: Optional[bytes]
     headers: Dict[str, Any]
-# Add dataclass for ModelStatus. We use this dataclass in get_model_status function.
-@dataclass
-class ModelStatus:
+class MimeBytes(bytes):
     """
-    This Dataclass represents the model status in the HF Inference API.
-    Args:
-        loaded (`bool`):
-            If the model is currently loaded into HF's Inference API. Models
-            are loaded on-demand, leading to the user's first request taking longer.
-            If a model is loaded, you can be assured that it is in a healthy state.
-        state (`str`):
-            The current state of the model. This can be 'Loaded', 'Loadable', 'TooBig'.
-            If a model's state is 'Loadable', it's not too big and has a supported
-            backend. Loadable models are automatically loaded when the user first
-            requests inference on the endpoint. This means it is transparent for the
-            user to load a model, except that the first call takes longer to complete.
-        compute_type (`Dict`):
-            Information about the compute resource the model is using or will use, such as 'gpu' type and number of
-            replicas.
-        framework (`str`):
-            The name of the framework that the model was built with, such as 'transformers'
-            or 'text-generation-inference'.
+    A bytes object with a mime type.
+    To be returned by `_prepare_payload_open_as_mime_bytes` in subclasses.
+    Example:
+    ```python
+        >>> b = MimeBytes(b"hello", "text/plain")
+        >>> isinstance(b, bytes)
+        True
+        >>> b.mime_type
+        'text/plain'
+    ```
     """
-    loaded: bool
-    state: str
-    compute_type: Dict
-    framework: str
+    mime_type: Optional[str]
+    def __new__(cls, data: bytes, mime_type: Optional[str] = None):
+        obj = super().__new__(cls, data)
+        obj.mime_type = mime_type
+        if isinstance(data, MimeBytes) and mime_type is None:
+            obj.mime_type = data.mime_type
+        return obj
 ## IMPORT UTILS
@@ -148,31 +138,49 @@ def _import_pil_image():
 @overload
-def _open_as_binary(
-    content: ContentT,
-) -> ContextManager[BinaryT]: ...  # means "if input is not None, output is not None"
+def _open_as_mime_bytes(content: ContentT) -> MimeBytes: ...  # means "if input is not None, output is not None"
 @overload
-def _open_as_binary(
-    content: Literal[None],
-) -> ContextManager[Literal[None]]: ...  # means "if input is None, output is None"
+def _open_as_mime_bytes(content: Literal[None]) -> Literal[None]: ...  # means "if input is None, output is None"
-@contextmanager  # type: ignore
-def _open_as_binary(content: Optional[ContentT]) -> Generator[Optional[BinaryT], None, None]:
+def _open_as_mime_bytes(content: Optional[ContentT]) -> Optional[MimeBytes]:
     """Open `content` as a binary file, either from a URL, a local path, raw bytes, or a PIL Image.
     Do nothing if `content` is None.
-    TODO: handle base64 as input
     """
+    # If content is None, yield None
+    if content is None:
+        return None
+    # If content is bytes, return it
+    if isinstance(content, bytes):
+        return MimeBytes(content)
+    # If content is raw binary data (bytearray, memoryview)
+    if isinstance(content, (bytearray, memoryview)):
+        return MimeBytes(bytes(content))
+    # If content is a binary file-like object
+    if hasattr(content, "read"):  # duck-typing instead of isinstance(content, BinaryIO)
+        logger.debug("Reading content from BinaryIO")
+        data = content.read()
+        mime_type = mimetypes.guess_type(content.name)[0] if hasattr(content, "name") else None
+        if isinstance(data, str):
+            raise TypeError("Expected binary stream (bytes), but got text stream")
+        return MimeBytes(data, mime_type=mime_type)
     # If content is a string => must be either a URL or a path
     if isinstance(content, str):
         if content.startswith("https://") or content.startswith("http://"):
             logger.debug(f"Downloading content from {content}")
-            yield get_session().get(content).content  # TODO: retrieve as stream and pipe to post request ?
-            return
+            response = get_session().get(content)
+            mime_type = response.headers.get("Content-Type")
+            if mime_type is None:
+                mime_type = mimetypes.guess_type(content)[0]
+            return MimeBytes(response.content, mime_type=mime_type)
         content = Path(content)
         if not content.exists():
             raise FileNotFoundError(
@@ -183,9 +191,7 @@ def _open_as_binary(content: Optional[ContentT]) -> Generator[Optional[BinaryT],
     # If content is a Path => open it
     if isinstance(content, Path):
         logger.debug(f"Opening content from {content}")
-        with content.open("rb") as f:
-            yield f
-        return
+        return MimeBytes(content.read_bytes(), mime_type=mimetypes.guess_type(content)[0])
     # If content is a PIL Image => convert to bytes
     if is_pillow_available():
@@ -194,38 +200,37 @@ def _open_as_binary(content: Optional[ContentT]) -> Generator[Optional[BinaryT],
         if isinstance(content, Image.Image):
             logger.debug("Converting PIL Image to bytes")
             buffer = io.BytesIO()
-            content.save(buffer, format=content.format or "PNG")
-            yield buffer.getvalue()
-            return
+            format = content.format or "PNG"
+            content.save(buffer, format=format)
+            return MimeBytes(buffer.getvalue(), mime_type=f"image/{format.lower()}")
-    # Otherwise: already a file-like object or None
-    yield content  # type: ignore
+    # If nothing matched, raise error
+    raise TypeError(
+        f"Unsupported content type: {type(content)}. "
+        "Expected one of: bytes, bytearray, BinaryIO, memoryview, Path, str (URL or file path), or PIL.Image.Image."
+    )
 def _b64_encode(content: ContentT) -> str:
     """Encode a raw file (image, audio) into base64. Can be bytes, an opened file, a path or a URL."""
-    with _open_as_binary(content) as data:
-        data_as_bytes = data if isinstance(data, bytes) else data.read()
-        return base64.b64encode(data_as_bytes).decode()
+    raw_bytes = _open_as_mime_bytes(content)
+    return base64.b64encode(raw_bytes).decode()
 def _as_url(content: ContentT, default_mime_type: str) -> str:
-    if isinstance(content, str) and (content.startswith("https://") or content.startswith("http://")):
+    if isinstance(content, str) and content.startswith(("http://", "https://", "data:")):
         return content
-    # Handle MIME type detection for different content types
-    mime_type = None
-    if isinstance(content, (str, Path)):
-        mime_type = mimetypes.guess_type(content, strict=False)[0]
-    elif is_pillow_available():
-        from PIL import Image
+    # Convert content to bytes
+    raw_bytes = _open_as_mime_bytes(content)
-        if isinstance(content, Image.Image):
-            # Determine MIME type from PIL Image format, in sync with `_open_as_binary`
-            mime_type = f"image/{(content.format or 'PNG').lower()}"
+    # Get MIME type
+    mime_type = raw_bytes.mime_type or default_mime_type
-    mime_type = mime_type or default_mime_type
-    encoded_data = _b64_encode(content)
+    # Encode content to base64
+    encoded_data = base64.b64encode(raw_bytes).decode()
+    # Build data URL
     return f"data:{mime_type};base64,{encoded_data}"
@@ -270,9 +275,6 @@ def _as_dict(response: Union[bytes, Dict]) -> Dict:
     return json.loads(response) if isinstance(response, bytes) else response
-## PAYLOAD UTILS
 ## STREAMING UTILS

huggingface-hub 0.35.0rc0__py3-none-any.whl → 0.35.1__py3-none-any.whl

Potentially problematic release.

huggingface-hub 0.35.0rc0py3-none-any.whl → 0.35.1py3-none-any.whl