PyPI - huggingface-hub - Versions diffs - 0.20.3__py3-none-any.whl → 0.21.0__py3-none-any.whl - Mend

huggingface-hub 0.20.3py3-none-any.whl → 0.21.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of huggingface-hub might be problematic. Click here for more details.

Files changed (35) hide show

huggingface_hub/__init__.py +19 -1
huggingface_hub/_commit_api.py +49 -20
huggingface_hub/_inference_endpoints.py +10 -0
huggingface_hub/_login.py +2 -2
huggingface_hub/commands/download.py +1 -1
huggingface_hub/file_download.py +57 -21
huggingface_hub/hf_api.py +269 -54
huggingface_hub/hf_file_system.py +131 -8
huggingface_hub/hub_mixin.py +204 -42
huggingface_hub/inference/_client.py +56 -9
huggingface_hub/inference/_common.py +4 -3
huggingface_hub/inference/_generated/_async_client.py +57 -9
huggingface_hub/inference/_text_generation.py +5 -0
huggingface_hub/inference/_types.py +17 -0
huggingface_hub/lfs.py +6 -3
huggingface_hub/repocard.py +5 -3
huggingface_hub/repocard_data.py +11 -3
huggingface_hub/serialization/__init__.py +19 -0
huggingface_hub/serialization/_base.py +168 -0
huggingface_hub/serialization/_numpy.py +67 -0
huggingface_hub/serialization/_tensorflow.py +93 -0
huggingface_hub/serialization/_torch.py +199 -0
huggingface_hub/templates/datasetcard_template.md +1 -1
huggingface_hub/templates/modelcard_template.md +1 -4
huggingface_hub/utils/__init__.py +14 -10
huggingface_hub/utils/_datetime.py +4 -11
huggingface_hub/utils/_errors.py +29 -0
huggingface_hub/utils/_runtime.py +21 -15
huggingface_hub/utils/endpoint_helpers.py +27 -1
{huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/METADATA +7 -3
{huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/RECORD +35 -30
{huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/LICENSE +0 -0
{huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/WHEEL +0 -0
{huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/entry_points.txt +0 -0
{huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/top_level.txt +0 -0

huggingface_hub/inference/_client.py CHANGED Viewed

@@ -33,6 +33,7 @@
 # - Images are parsed as PIL.Image for easier manipulation.
 # - Provides a "recommended model" for each task => suboptimal but user-wise quicker to get a first script running.
 # - Only the main parameters are publicly exposed. Power users can always read the docs for more options.
+import base64
 import logging
 import time
 import warnings
@@ -78,6 +79,7 @@ from huggingface_hub.inference._text_generation import (
     raise_text_generation_error,
 )
 from huggingface_hub.inference._types import (
+    AudioToAudioOutput,
     ClassificationOutput,
     ConversationalOutput,
     FillMaskOutput,
@@ -299,6 +301,49 @@ class InferenceClient:
         response = self.post(data=audio, model=model, task="audio-classification")
         return _bytes_to_list(response)
+    def audio_to_audio(
+        self,
+        audio: ContentT,
+        *,
+        model: Optional[str] = None,
+    ) -> List[AudioToAudioOutput]:
+        """
+        Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
+        Args:
+            audio (Union[str, Path, bytes, BinaryIO]):
+                The audio content for the model. It can be raw audio bytes, a local audio file, or a URL pointing to an
+                audio file.
+            model (`str`, *optional*):
+                The model can be any model which takes an audio file and returns another audio file. Can be a model ID hosted on the Hugging Face Hub
+                or a URL to a deployed Inference Endpoint. If not provided, the default recommended model for
+                audio_to_audio will be used.
+        Returns:
+            `List[Dict]`: A list of dictionary where each index contains audios label, content-type, and audio content in blob.
+        Raises:
+            `InferenceTimeoutError`:
+                If the model is unavailable or the request times out.
+            `HTTPError`:
+                If the request fails with an HTTP error status code other than HTTP 503.
+        Example:
+        ```py
+        >>> from huggingface_hub import InferenceClient
+        >>> client = InferenceClient()
+        >>> audio_output = client.audio_to_audio("audio.flac")
+        >>> for i, item in enumerate(audio_output):
+        >>>     with open(f"output_{i}.flac", "wb") as f:
+                    f.write(item["blob"])
+        ```
+        """
+        response = self.post(data=audio, model=model, task="audio-to-audio")
+        audio_output = _bytes_to_list(response)
+        for item in audio_output:
+            item["blob"] = base64.b64decode(item["blob"])
+        return audio_output
     def automatic_speech_recognition(
         self,
         audio: ContentT,
@@ -1063,16 +1108,17 @@ class InferenceClient:
         )
         return _bytes_to_dict(response)  # type: ignore
-    def tabular_classification(self, table: Dict[str, Any], *, model: str) -> List[str]:
+    def tabular_classification(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[str]:
         """
         Classifying a target category (a group) based on a set of attributes.
         Args:
             table (`Dict[str, Any]`):
                 Set of attributes to classify.
-            model (`str`):
-                The model to use for the tabular-classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
-                a deployed Inference Endpoint.
+            model (`str`, *optional*):
+                The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
+                a deployed Inference Endpoint. If not provided, the default recommended tabular classification model will be used.
+                Defaults to None.
         Returns:
             `List`: a list of labels, one per row in the initial table.
@@ -1107,16 +1153,17 @@ class InferenceClient:
         response = self.post(json={"table": table}, model=model, task="tabular-classification")
         return _bytes_to_list(response)
-    def tabular_regression(self, table: Dict[str, Any], *, model: str) -> List[float]:
+    def tabular_regression(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[float]:
         """
         Predicting a numerical target value given a set of attributes/features in a table.
         Args:
             table (`Dict[str, Any]`):
                 Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
-            model (`str`):
-                The model to use for the tabular-regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
-                a deployed Inference Endpoint.
+            model (`str`, *optional*):
+                The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
+                a deployed Inference Endpoint. If not provided, the default recommended tabular regression model will be used.
+                Defaults to None.
         Returns:
             `List`: a list of predicted numerical target values.
@@ -1483,7 +1530,7 @@ class InferenceClient:
         # Remove some parameters if not a TGI server
         if not _is_tgi_server(model):
             ignored_parameters = []
-            for key in "watermark", "stop", "details", "decoder_input_details":
+            for key in "watermark", "stop", "details", "decoder_input_details", "best_of":
                 if payload["parameters"][key] is not None:
                     ignored_parameters.append(key)
                 del payload["parameters"][key]

huggingface_hub/inference/_common.py CHANGED Viewed

@@ -84,8 +84,9 @@ class ModelStatus:
             backend. Loadable models are automatically loaded when the user first
             requests inference on the endpoint. This means it is transparent for the
             user to load a model, except that the first call takes longer to complete.
-        compute_type (`str`):
-            The type of compute resource the model is using or will use, such as 'gpu' or 'cpu'.
+        compute_type (`Dict`):
+            Information about the compute resource the model is using or will use, such as 'gpu' type and number of
+            replicas.
         framework (`str`):
             The name of the framework that the model was built with, such as 'transformers'
             or 'text-generation-inference'.
@@ -93,7 +94,7 @@ class ModelStatus:
     loaded: bool
     state: str
-    compute_type: str
+    compute_type: Dict
     framework: str

huggingface_hub/inference/_generated/_async_client.py CHANGED Viewed

@@ -19,6 +19,7 @@
 # To re-generate the code, run `make style` or `python ./utils/generate_async_inference_client.py --update`.
 # WARNING
 import asyncio
+import base64
 import logging
 import time
 import warnings
@@ -63,6 +64,7 @@ from huggingface_hub.inference._text_generation import (
     raise_text_generation_error,
 )
 from huggingface_hub.inference._types import (
+    AudioToAudioOutput,
     ClassificationOutput,
     ConversationalOutput,
     FillMaskOutput,
@@ -295,6 +297,50 @@ class AsyncInferenceClient:
         response = await self.post(data=audio, model=model, task="audio-classification")
         return _bytes_to_list(response)
+    async def audio_to_audio(
+        self,
+        audio: ContentT,
+        *,
+        model: Optional[str] = None,
+    ) -> List[AudioToAudioOutput]:
+        """
+        Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
+        Args:
+            audio (Union[str, Path, bytes, BinaryIO]):
+                The audio content for the model. It can be raw audio bytes, a local audio file, or a URL pointing to an
+                audio file.
+            model (`str`, *optional*):
+                The model can be any model which takes an audio file and returns another audio file. Can be a model ID hosted on the Hugging Face Hub
+                or a URL to a deployed Inference Endpoint. If not provided, the default recommended model for
+                audio_to_audio will be used.
+        Returns:
+            `List[Dict]`: A list of dictionary where each index contains audios label, content-type, and audio content in blob.
+        Raises:
+            `InferenceTimeoutError`:
+                If the model is unavailable or the request times out.
+            `aiohttp.ClientResponseError`:
+                If the request fails with an HTTP error status code other than HTTP 503.
+        Example:
+        ```py
+        # Must be run in an async context
+        >>> from huggingface_hub import AsyncInferenceClient
+        >>> client = AsyncInferenceClient()
+        >>> audio_output = await client.audio_to_audio("audio.flac")
+        >>> async for i, item in enumerate(audio_output):
+        >>>     with open(f"output_{i}.flac", "wb") as f:
+                    f.write(item["blob"])
+        ```
+        """
+        response = await self.post(data=audio, model=model, task="audio-to-audio")
+        audio_output = _bytes_to_list(response)
+        for item in audio_output:
+            item["blob"] = base64.b64decode(item["blob"])
+        return audio_output
     async def automatic_speech_recognition(
         self,
         audio: ContentT,
@@ -1080,16 +1126,17 @@ class AsyncInferenceClient:
         )
         return _bytes_to_dict(response)  # type: ignore
-    async def tabular_classification(self, table: Dict[str, Any], *, model: str) -> List[str]:
+    async def tabular_classification(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[str]:
         """
         Classifying a target category (a group) based on a set of attributes.
         Args:
             table (`Dict[str, Any]`):
                 Set of attributes to classify.
-            model (`str`):
-                The model to use for the tabular-classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
-                a deployed Inference Endpoint.
+            model (`str`, *optional*):
+                The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
+                a deployed Inference Endpoint. If not provided, the default recommended tabular classification model will be used.
+                Defaults to None.
         Returns:
             `List`: a list of labels, one per row in the initial table.
@@ -1125,16 +1172,17 @@ class AsyncInferenceClient:
         response = await self.post(json={"table": table}, model=model, task="tabular-classification")
         return _bytes_to_list(response)
-    async def tabular_regression(self, table: Dict[str, Any], *, model: str) -> List[float]:
+    async def tabular_regression(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[float]:
         """
         Predicting a numerical target value given a set of attributes/features in a table.
         Args:
             table (`Dict[str, Any]`):
                 Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
-            model (`str`):
-                The model to use for the tabular-regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
-                a deployed Inference Endpoint.
+            model (`str`, *optional*):
+                The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
+                a deployed Inference Endpoint. If not provided, the default recommended tabular regression model will be used.
+                Defaults to None.
         Returns:
             `List`: a list of predicted numerical target values.
@@ -1504,7 +1552,7 @@ class AsyncInferenceClient:
         # Remove some parameters if not a TGI server
         if not _is_tgi_server(model):
             ignored_parameters = []
-            for key in "watermark", "stop", "details", "decoder_input_details":
+            for key in "watermark", "stop", "details", "decoder_input_details", "best_of":
                 if payload["parameters"][key] is not None:
                     ignored_parameters.append(key)
                 del payload["parameters"][key]

huggingface_hub/inference/_text_generation.py CHANGED Viewed

@@ -451,6 +451,8 @@ class TextGenerationStreamResponse:
     Args:
         token (`Token`):
             The generated token.
+        index (`Optional[int]`, *optional*):
+            The token index within the stream. Optional to support older clients that omit it.
         generated_text (`Optional[str]`, *optional*):
             The complete generated text. Only available when the generation is finished.
         details (`Optional[StreamDetails]`, *optional*):
@@ -459,6 +461,9 @@ class TextGenerationStreamResponse:
     # Generated token
     token: Token
+    # The token index within the stream
+    # Optional to support  older clients that omit it.
+    index: Optional[int] = None
     # Complete generated text
     # Only available when the generation is finished
     generated_text: Optional[str] = None

huggingface_hub/inference/_types.py CHANGED Viewed

@@ -19,6 +19,23 @@ if TYPE_CHECKING:
     from PIL import Image
+class AudioToAudioOutput(TypedDict):
+    """Dictionary containing the output of a [`~InferenceClient.audio_to_audio`] task.
+    Args:
+        label (`str`):
+            The label of the audio file.
+        content-type (`str`):
+            The content type of audio file.
+        blob (`bytes`):
+            The audio file in byte format.
+    """
+    label: str
+    content_type: str
+    blob: bytes
 class ClassificationOutput(TypedDict):
     """Dictionary containing the output of a [`~InferenceClient.audio_classification`] and  [`~InferenceClient.image_classification`] task.

huggingface_hub/lfs.py CHANGED Viewed

@@ -295,7 +295,7 @@ def _upload_single_part(operation: "CommitOperationAdd", upload_url: str) -> Non
     """
     with operation.as_file(with_tqdm=True) as fileobj:
         # S3 might raise a transient 500 error -> let's retry if that happens
-        response = http_backoff("PUT", upload_url, data=fileobj, retry_on_status_codes=(500, 503))
+        response = http_backoff("PUT", upload_url, data=fileobj, retry_on_status_codes=(500, 502, 503, 504))
         hf_raise_for_status(response)
@@ -380,7 +380,7 @@ def _upload_parts_iteratively(
             ) as fileobj_slice:
                 # S3 might raise a transient 500 error -> let's retry if that happens
                 part_upload_res = http_backoff(
-                    "PUT", part_upload_url, data=fileobj_slice, retry_on_status_codes=(500, 503)
+                    "PUT", part_upload_url, data=fileobj_slice, retry_on_status_codes=(500, 502, 503, 504)
                 )
                 hf_raise_for_status(part_upload_res)
                 headers.append(part_upload_res.headers)
@@ -409,7 +409,10 @@ def _upload_parts_hf_transfer(
     desc = operation.path_in_repo
     if len(desc) > 40:
         desc = f"(…){desc[-40:]}"
-    disable = bool(logger.getEffectiveLevel() == logging.NOTSET)
+    # set `disable=None` rather than `disable=False` by default to disable progress bar when no TTY attached
+    # see https://github.com/huggingface/huggingface_hub/pull/2000
+    disable = True if (logger.getEffectiveLevel() == logging.NOTSET) else None
     with tqdm(unit="B", unit_scale=True, total=total, initial=0, desc=desc, disable=disable) as progress:
         try:

huggingface_hub/repocard.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import os
 import re
-import warnings
 from pathlib import Path
 from typing import Any, Dict, Literal, Optional, Type, Union
@@ -21,7 +20,10 @@ from huggingface_hub.repocard_data import (
 from huggingface_hub.utils import get_session, is_jinja_available, yaml_dump
 from .constants import REPOCARD_NAME
-from .utils import EntryNotFoundError, SoftTemporaryDirectory, validate_hf_hub_args
+from .utils import EntryNotFoundError, SoftTemporaryDirectory, logging, validate_hf_hub_args
+logger = logging.get_logger(__name__)
 TEMPLATE_MODELCARD_PATH = Path(__file__).parent / "templates" / "modelcard_template.md"
@@ -102,7 +104,7 @@ class RepoCard:
                 raise ValueError("repo card metadata block should be a dict")
         else:
             # Model card without metadata... create empty metadata
-            warnings.warn("Repo card metadata block was not found. Setting CardData to empty.")
+            logger.warning("Repo card metadata block was not found. Setting CardData to empty.")
             data_dict = {}
             self.text = content

huggingface_hub/repocard_data.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import copy
-import warnings
 from collections import defaultdict
 from dataclasses import dataclass
 from typing import Any, Dict, List, Optional, Tuple, Union
-from huggingface_hub.utils import yaml_dump
+from huggingface_hub.utils import logging, yaml_dump
+logger = logging.get_logger(__name__)
 @dataclass
@@ -253,6 +255,10 @@ class ModelCardData(CardData):
         tags (`List[str]`, *optional*):
             List of tags to add to your model that can be used when filtering on the Hugging
             Face Hub. Defaults to None.
+        base_model (`str` or `List[str]`, *optional*):
+            The identifier of the base model from which the model derives. This is applicable for example if your model is a
+            fine-tune or adapter of an existing model. The value must be the ID of a model on the Hub (or a list of IDs
+            if your model derives from multiple models). Defaults to None.
         datasets (`List[str]`, *optional*):
             List of datasets that were used to train this model. Should be a dataset ID
             found on https://hf.co/datasets. Defaults to None.
@@ -295,6 +301,7 @@ class ModelCardData(CardData):
         license: Optional[str] = None,
         library_name: Optional[str] = None,
         tags: Optional[List[str]] = None,
+        base_model: Optional[Union[str, List[str]]] = None,
         datasets: Optional[List[str]] = None,
         metrics: Optional[List[str]] = None,
         eval_results: Optional[List[EvalResult]] = None,
@@ -306,6 +313,7 @@ class ModelCardData(CardData):
         self.license = license
         self.library_name = library_name
         self.tags = tags
+        self.base_model = base_model
         self.datasets = datasets
         self.metrics = metrics
         self.eval_results = eval_results
@@ -319,7 +327,7 @@ class ModelCardData(CardData):
                 self.eval_results = eval_results
             except (KeyError, TypeError) as error:
                 if ignore_metadata_errors:
-                    warnings.warn("Invalid model-index. Not loading eval results into CardData.")
+                    logger.warning("Invalid model-index. Not loading eval results into CardData.")
                 else:
                     raise ValueError(
                         f"Invalid `model_index` in metadata cannot be parsed: {error.__class__} {error}. Pass"

huggingface_hub/serialization/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ruff: noqa: F401
+"""Contains helpers to serialize tensors."""
+from ._base import StateDictSplit, split_state_dict_into_shards_factory
+from ._numpy import split_numpy_state_dict_into_shards
+from ._tensorflow import split_tf_state_dict_into_shards
+from ._torch import split_torch_state_dict_into_shards

huggingface_hub/serialization/_base.py ADDED Viewed

@@ -0,0 +1,168 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains helpers to split tensors into shards."""
+from dataclasses import dataclass, field
+from typing import Any, Callable, Dict, List, Optional, TypeVar
+from .. import logging
+TensorT = TypeVar("TensorT")
+TensorSizeFn_T = Callable[[TensorT], int]
+StorageIDFn_T = Callable[[TensorT], Optional[Any]]
+MAX_SHARD_SIZE = 5_000_000_000  # 5GB
+FILENAME_PATTERN = "model{suffix}.safetensors"
+logger = logging.get_logger(__file__)
+@dataclass
+class StateDictSplit:
+    is_sharded: bool = field(init=False)
+    metadata: Dict[str, Any]
+    filename_to_tensors: Dict[str, List[str]]
+    tensor_to_filename: Dict[str, str]
+    def __post_init__(self):
+        self.is_sharded = len(self.filename_to_tensors) > 1
+def split_state_dict_into_shards_factory(
+    state_dict: Dict[str, TensorT],
+    *,
+    get_tensor_size: TensorSizeFn_T,
+    get_storage_id: StorageIDFn_T = lambda tensor: None,
+    filename_pattern: str = FILENAME_PATTERN,
+    max_shard_size: int = MAX_SHARD_SIZE,
+) -> StateDictSplit:
+    """
+    Split a model state dictionary in shards so that each shard is smaller than a given size.
+    The shards are determined by iterating through the `state_dict` in the order of its keys. There is no optimization
+    made to make each shard as close as possible to the maximum size passed. For example, if the limit is 10GB and we
+    have tensors of sizes [6GB, 6GB, 2GB, 6GB, 2GB, 2GB] they will get sharded as [6GB], [6+2GB], [6+2+2GB] and not
+    [6+2+2GB], [6+2GB], [6GB].
+    <Tip warning={true}>
+    If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
+    size greater than `max_shard_size`.
+    </Tip>
+    Args:
+        state_dict (`Dict[str, Tensor]`):
+            The state dictionary to save.
+        get_tensor_size (`Callable[[Tensor], int]`):
+            A function that returns the size of a tensor in bytes.
+        get_storage_id (`Callable[[Tensor], Optional[Any]]`, *optional*):
+            A function that returns a unique identifier to a tensor storage. Multiple different tensors can share the
+            same underlying storage. This identifier is guaranteed to be unique and constant for this tensor's storage
+            during its lifetime. Two tensor storages with non-overlapping lifetimes may have the same id.
+        filename_pattern (`str`, *optional*):
+            The pattern to generate the files names in which the model will be saved. Pattern must be a string that
+            can be formatted with `filename_pattern.format(suffix=...)` and must contain the keyword `suffix`
+            Defaults to `"model{suffix}.safetensors"`.
+        max_shard_size (`int` or `str`, *optional*):
+            The maximum size of each shard, in bytes. Defaults to 5GB.
+    Returns:
+        [`StateDictSplit`]: A `StateDictSplit` object containing the shards and the index to retrieve them.
+    """
+    storage_id_to_tensors: Dict[Any, List[str]] = {}
+    shard_list: List[Dict[str, TensorT]] = []
+    current_shard: Dict[str, TensorT] = {}
+    current_shard_size = 0
+    total_size = 0
+    for key, tensor in state_dict.items():
+        # when bnb serialization is used the weights in the state dict can be strings
+        # check: https://github.com/huggingface/transformers/pull/24416 for more details
+        if isinstance(tensor, str):
+            logger.info("Skipping tensor %s as it is a string (bnb serialization)", key)
+            continue
+        # If a `tensor` shares the same underlying storage as another tensor, we put `tensor` in the same `block`
+        storage_id = get_storage_id(tensor)
+        if storage_id is not None:
+            if storage_id in storage_id_to_tensors:
+                # We skip this tensor for now and will reassign to correct shard later
+                storage_id_to_tensors[storage_id].append(key)
+                continue
+            else:
+                # This is the first tensor with this storage_id, we create a new entry
+                # in the storage_id_to_tensors dict => we will assign the shard id later
+                storage_id_to_tensors[storage_id] = [key]
+        # Compute tensor size
+        tensor_size = get_tensor_size(tensor)
+        # If this tensor is bigger than the maximal size, we put it in its own shard
+        if tensor_size > max_shard_size:
+            total_size += tensor_size
+            shard_list.append({key: tensor})
+            continue
+        # If this tensor is going to tip up over the maximal size, we split.
+        # Current shard already has some tensors, we add it to the list of shards and create a new one.
+        if current_shard_size + tensor_size > max_shard_size:
+            shard_list.append(current_shard)
+            current_shard = {}
+            current_shard_size = 0
+        # Add the tensor to the current shard
+        current_shard[key] = tensor
+        current_shard_size += tensor_size
+        total_size += tensor_size
+    # Add the last shard
+    if len(current_shard) > 0:
+        shard_list.append(current_shard)
+    nb_shards = len(shard_list)
+    # Loop over the tensors that share the same storage and assign them together
+    for storage_id, keys in storage_id_to_tensors.items():
+        # Let's try to find the shard where the first tensor of this storage is and put all tensors in the same shard
+        for shard in shard_list:
+            if keys[0] in shard:
+                for key in keys:
+                    shard[key] = state_dict[key]
+                break
+    # If we only have one shard, we return it => no need to build the index
+    if nb_shards == 1:
+        filename = filename_pattern.format(suffix="")
+        return StateDictSplit(
+            metadata={"total_size": total_size},
+            filename_to_tensors={filename: list(state_dict.keys())},
+            tensor_to_filename={key: filename for key in state_dict.keys()},
+        )
+    # Now that each tensor is assigned to a shard, let's assign a filename to each shard
+    tensor_name_to_filename = {}
+    filename_to_tensors = {}
+    for idx, shard in enumerate(shard_list):
+        filename = filename_pattern.format(suffix=f"-{idx+1:05d}-of-{nb_shards:05d}")
+        for key in shard:
+            tensor_name_to_filename[key] = filename
+        filename_to_tensors[filename] = list(shard.keys())
+    # Build the index and return
+    return StateDictSplit(
+        metadata={"total_size": total_size},
+        filename_to_tensors=filename_to_tensors,
+        tensor_to_filename=tensor_name_to_filename,
+    )

huggingface_hub/serialization/_numpy.py ADDED Viewed

@@ -0,0 +1,67 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains numpy-specific helpers."""
+from typing import TYPE_CHECKING, Dict
+from ._base import FILENAME_PATTERN, MAX_SHARD_SIZE, StateDictSplit, split_state_dict_into_shards_factory
+if TYPE_CHECKING:
+    import numpy as np
+def split_numpy_state_dict_into_shards(
+    state_dict: Dict[str, "np.ndarray"],
+    *,
+    filename_pattern: str = FILENAME_PATTERN,
+    max_shard_size: int = MAX_SHARD_SIZE,
+) -> StateDictSplit:
+    """
+    Split a model state dictionary in shards so that each shard is smaller than a given size.
+    The shards are determined by iterating through the `state_dict` in the order of its keys. There is no optimization
+    made to make each shard as close as possible to the maximum size passed. For example, if the limit is 10GB and we
+    have tensors of sizes [6GB, 6GB, 2GB, 6GB, 2GB, 2GB] they will get sharded as [6GB], [6+2GB], [6+2+2GB] and not
+    [6+2+2GB], [6+2GB], [6GB].
+    <Tip warning={true}>
+    If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
+    size greater than `max_shard_size`.
+    </Tip>
+    Args:
+        state_dict (`Dict[str, np.ndarray]`):
+            The state dictionary to save.
+        filename_pattern (`str`, *optional*):
+            The pattern to generate the files names in which the model will be saved. Pattern must be a string that
+            can be formatted with `filename_pattern.format(suffix=...)` and must contain the keyword `suffix`
+            Defaults to `"model{suffix}.safetensors"`.
+        max_shard_size (`int` or `str`, *optional*):
+            The maximum size of each shard, in bytes. Defaults to 5GB.
+    Returns:
+        [`StateDictSplit`]: A `StateDictSplit` object containing the shards and the index to retrieve them.
+    """
+    return split_state_dict_into_shards_factory(
+        state_dict,
+        max_shard_size=max_shard_size,
+        filename_pattern=filename_pattern,
+        get_tensor_size=get_tensor_size,
+    )
+def get_tensor_size(tensor: "np.ndarray") -> int:
+    return tensor.nbytes

huggingface-hub 0.20.3__py3-none-any.whl → 0.21.0__py3-none-any.whl

Potentially problematic release.

huggingface-hub 0.20.3py3-none-any.whl → 0.21.0py3-none-any.whl