PyPI - huggingface-hub - Versions diffs - 0.26.3__py3-none-any.whl → 0.27.0rc1__py3-none-any.whl - Mend

huggingface-hub 0.26.3py3-none-any.whl → 0.27.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of huggingface-hub might be problematic. Click here for more details.

Files changed (61) hide show

huggingface_hub/serialization/_torch.py CHANGED Viewed

@@ -17,10 +17,12 @@ import importlib
 import json
 import os
 import re
-from collections import defaultdict
+from collections import defaultdict, namedtuple
 from functools import lru_cache
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Union
+from typing import TYPE_CHECKING, Any, Dict, Iterable, List, NamedTuple, Optional, Set, Tuple, Union
+from packaging import version
 from .. import constants, logging
 from ._base import MAX_SHARD_SIZE, StateDictSplit, split_state_dict_into_shards_factory
@@ -31,6 +33,8 @@ logger = logging.get_logger(__file__)
 if TYPE_CHECKING:
     import torch
+# SAVING
 def save_torch_model(
     model: "torch.nn.Module",
@@ -41,6 +45,8 @@ def save_torch_model(
     max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
     metadata: Optional[Dict[str, str]] = None,
     safe_serialization: bool = True,
+    is_main_process: bool = True,
+    shared_tensors_to_discard: Optional[List[str]] = None,
 ):
     """
     Saves a given torch model to disk, handling sharding and shared tensors issues.
@@ -64,6 +70,12 @@ def save_torch_model(
     </Tip>
+    <Tip warning={true}>
+    If your model is a `transformers.PreTrainedModel`, you should pass `model._tied_weights_keys` as `shared_tensors_to_discard` to properly handle shared tensors saving. This ensures the correct duplicate tensors are discarded during saving.
+    </Tip>
     Args:
         model (`torch.nn.Module`):
             The model to save on disk.
@@ -88,6 +100,13 @@ def save_torch_model(
             Whether to save as safetensors, which is the default behavior. If `False`, the shards are saved as pickle.
             Safe serialization is recommended for security reasons. Saving as pickle is deprecated and will be removed
             in a future version.
+        is_main_process (`bool`, *optional*):
+            Whether the process calling this is the main process or not. Useful when in distributed training like
+            TPUs and need to call this function from all processes. In this case, set `is_main_process=True` only on
+            the main process to avoid race conditions. Defaults to True.
+        shared_tensors_to_discard (`List[str]`, *optional*):
+            List of tensor names to drop when saving shared tensors. If not provided and shared tensors are
+            detected, it will drop the first name alphabetically.
     Example:
@@ -112,6 +131,8 @@ def save_torch_model(
         metadata=metadata,
         safe_serialization=safe_serialization,
         save_directory=save_directory,
+        is_main_process=is_main_process,
+        shared_tensors_to_discard=shared_tensors_to_discard,
     )
@@ -124,6 +145,8 @@ def save_torch_state_dict(
     max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
     metadata: Optional[Dict[str, str]] = None,
     safe_serialization: bool = True,
+    is_main_process: bool = True,
+    shared_tensors_to_discard: Optional[List[str]] = None,
 ) -> None:
     """
     Save a model state dictionary to the disk, handling sharding and shared tensors issues.
@@ -147,6 +170,12 @@ def save_torch_state_dict(
     </Tip>
+    <Tip warning={true}>
+    If your model is a `transformers.PreTrainedModel`, you should pass `model._tied_weights_keys` as `shared_tensors_to_discard` to properly handle shared tensors saving. This ensures the correct duplicate tensors are discarded during saving.
+    </Tip>
     Args:
         state_dict (`Dict[str, torch.Tensor]`):
             The state dictionary to save.
@@ -171,6 +200,13 @@ def save_torch_state_dict(
             Whether to save as safetensors, which is the default behavior. If `False`, the shards are saved as pickle.
             Safe serialization is recommended for security reasons. Saving as pickle is deprecated and will be removed
             in a future version.
+        is_main_process (`bool`, *optional*):
+            Whether the process calling this is the main process or not. Useful when in distributed training like
+            TPUs and need to call this function from all processes. In this case, set `is_main_process=True` only on
+            the main process to avoid race conditions. Defaults to True.
+        shared_tensors_to_discard (`List[str]`, *optional*):
+            List of tensor names to drop when saving shared tensors. If not provided and shared tensors are
+            detected, it will drop the first name alphabetically.
     Example:
@@ -192,7 +228,8 @@ def save_torch_state_dict(
             else constants.PYTORCH_WEIGHTS_FILE_PATTERN
         )
-    # Imports correct library
+    if metadata is None:
+        metadata = {}
     if safe_serialization:
         try:
             from safetensors.torch import save_file as save_file_fn
@@ -201,7 +238,13 @@ def save_torch_state_dict(
                 "Please install `safetensors` to use safe serialization. "
                 "You can install it with `pip install safetensors`."
             ) from e
+        # Clean state dict for safetensors
+        state_dict = _clean_state_dict_for_safetensors(
+            state_dict,
+            metadata,
+            force_contiguous=force_contiguous,
+            shared_tensors_to_discard=shared_tensors_to_discard,
+        )
     else:
         from torch import save as save_file_fn  # type: ignore[assignment]
@@ -210,27 +253,23 @@ def save_torch_state_dict(
             "pickled models from untrusted sources. If you intend to share your model, we strongly recommend "
             "using safe serialization by installing `safetensors` with `pip install safetensors`."
         )
-    # Clean state dict for safetensors
-    if metadata is None:
-        metadata = {}
-    if safe_serialization:
-        state_dict = _clean_state_dict_for_safetensors(state_dict, metadata, force_contiguous=force_contiguous)
     # Split dict
     state_dict_split = split_torch_state_dict_into_shards(
         state_dict, filename_pattern=filename_pattern, max_shard_size=max_shard_size
     )
-    # Clean the folder from previous save
-    existing_files_regex = re.compile(filename_pattern.format(suffix=r"(-\d{5}-of-\d{5})?") + r"(\.index\.json)?")
-    for filename in os.listdir(save_directory):
-        if existing_files_regex.match(filename):
-            try:
-                logger.debug(f"Removing existing file '{filename}' from folder.")
-                os.remove(os.path.join(save_directory, filename))
-            except Exception as e:
-                logger.warning(f"Error when trying to remove existing '{filename}' from folder: {e}. Continuing...")
+    # Only main process should clean up existing files to avoid race conditions in distributed environment
+    if is_main_process:
+        existing_files_regex = re.compile(filename_pattern.format(suffix=r"(-\d{5}-of-\d{5})?") + r"(\.index\.json)?")
+        for filename in os.listdir(save_directory):
+            if existing_files_regex.match(filename):
+                try:
+                    logger.debug(f"Removing existing file '{filename}' from folder.")
+                    os.remove(os.path.join(save_directory, filename))
+                except Exception as e:
+                    logger.warning(
+                        f"Error when trying to remove existing '{filename}' from folder: {e}. Continuing..."
+                    )
     # Save each shard
     per_file_metadata = {"format": "pt"}
@@ -336,6 +375,331 @@ def split_torch_state_dict_into_shards(
     )
+# LOADING
+def load_torch_model(
+    model: "torch.nn.Module",
+    checkpoint_path: Union[str, os.PathLike],
+    *,
+    strict: bool = False,
+    safe: bool = True,
+    weights_only: bool = False,
+    map_location: Optional[Union[str, "torch.device"]] = None,
+    mmap: bool = False,
+    filename_pattern: Optional[str] = None,
+) -> NamedTuple:
+    """
+    Load a checkpoint into a model, handling both sharded and non-sharded checkpoints.
+    Args:
+        model (`torch.nn.Module`):
+            The model in which to load the checkpoint.
+        checkpoint_path (`str` or `os.PathLike`):
+            Path to either the checkpoint file or directory containing the checkpoint(s).
+        strict (`bool`, *optional*, defaults to `False`):
+            Whether to strictly enforce that the keys in the model state dict match the keys in the checkpoint.
+        safe (`bool`, *optional*, defaults to `True`):
+            If `safe` is True, the safetensors files will be loaded. If `safe` is False, the function
+            will first attempt to load safetensors files if they are available, otherwise it will fall back to loading
+            pickle files. `filename_pattern` parameter takes precedence over `safe` parameter.
+        weights_only (`bool`, *optional*, defaults to `False`):
+            If True, only loads the model weights without optimizer states and other metadata.
+            Only supported in PyTorch >= 1.13.
+        map_location (`str` or `torch.device`, *optional*):
+            A `torch.device` object, string or a dict specifying how to remap storage locations. It
+            indicates the location where all tensors should be loaded.
+        mmap (`bool`, *optional*, defaults to `False`):
+            Whether to use memory-mapped file loading. Memory mapping can improve loading performance
+            for large models in PyTorch >= 2.1.0 with zipfile-based checkpoints.
+        filename_pattern (`str`, *optional*):
+            The pattern to look for the index file. Pattern must be a string that
+            can be formatted with `filename_pattern.format(suffix=...)` and must contain the keyword `suffix`
+            Defaults to `"model{suffix}.safetensors"`.
+    Returns:
+        `NamedTuple`: A named tuple with `missing_keys` and `unexpected_keys` fields.
+            - `missing_keys` is a list of str containing the missing keys, i.e. keys that are in the model but not in the checkpoint.
+            - `unexpected_keys` is a list of str containing the unexpected keys, i.e. keys that are in the checkpoint but not in the model.
+    Raises:
+        [`FileNotFoundError`](https://docs.python.org/3/library/exceptions.html#FileNotFoundError)
+            If the checkpoint file or directory does not exist.
+        [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError)
+            If safetensors or torch is not installed when trying to load a .safetensors file or a PyTorch checkpoint respectively.
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+           If the checkpoint path is invalid or if the checkpoint format cannot be determined.
+    Example:
+    ```python
+    >>> from huggingface_hub import load_torch_model
+    >>> model = ... # A PyTorch model
+    >>> load_torch_model(model, "path/to/checkpoint")
+    ```
+    """
+    checkpoint_path = Path(checkpoint_path)
+    if not checkpoint_path.exists():
+        raise ValueError(f"Checkpoint path {checkpoint_path} does not exist")
+    # 1. Check if checkpoint is a single file
+    if checkpoint_path.is_file():
+        state_dict = load_state_dict_from_file(
+            checkpoint_file=checkpoint_path,
+            map_location=map_location,
+            weights_only=weights_only,
+        )
+        return model.load_state_dict(state_dict, strict=strict)
+    # 2. If not, checkpoint_path is a directory
+    if filename_pattern is None:
+        filename_pattern = constants.SAFETENSORS_WEIGHTS_FILE_PATTERN
+        index_path = checkpoint_path / (filename_pattern.format(suffix="") + ".index.json")
+        # Only fallback to pickle format if safetensors index is not found and safe is False.
+        if not index_path.is_file() and not safe:
+            filename_pattern = constants.PYTORCH_WEIGHTS_FILE_PATTERN
+    index_path = checkpoint_path / (filename_pattern.format(suffix="") + ".index.json")
+    if index_path.is_file():
+        return _load_sharded_checkpoint(
+            model=model,
+            save_directory=checkpoint_path,
+            strict=strict,
+            weights_only=weights_only,
+            filename_pattern=filename_pattern,
+        )
+    # Look for single model file
+    model_files = list(checkpoint_path.glob("*.safetensors" if safe else "*.bin"))
+    if len(model_files) == 1:
+        state_dict = load_state_dict_from_file(
+            checkpoint_file=model_files[0],
+            map_location=map_location,
+            weights_only=weights_only,
+            mmap=mmap,
+        )
+        return model.load_state_dict(state_dict, strict=strict)
+    raise ValueError(
+        f"Directory '{checkpoint_path}' does not contain a valid checkpoint. "
+        "Expected either a sharded checkpoint with an index file, or a single model file."
+    )
+def _load_sharded_checkpoint(
+    model: "torch.nn.Module",
+    save_directory: os.PathLike,
+    *,
+    strict: bool = False,
+    weights_only: bool = False,
+    filename_pattern: str = constants.SAFETENSORS_WEIGHTS_FILE_PATTERN,
+) -> NamedTuple:
+    """
+    Loads a sharded checkpoint into a model. This is the same as
+    [`torch.nn.Module.load_state_dict`](https://pytorch.org/docs/stable/generated/torch.nn.Module.html?highlight=load_state_dict#torch.nn.Module.load_state_dict)
+    but for a sharded checkpoint. Each shard is loaded one by one and removed from memory after being loaded into the model.
+    Args:
+        model (`torch.nn.Module`):
+            The model in which to load the checkpoint.
+        save_directory (`str` or `os.PathLike`):
+            A path to a folder containing the sharded checkpoint.
+        strict (`bool`, *optional*, defaults to `False`):
+            Whether to strictly enforce that the keys in the model state dict match the keys in the sharded checkpoint.
+        weights_only (`bool`, *optional*, defaults to `False`):
+            If True, only loads the model weights without optimizer states and other metadata.
+            Only supported in PyTorch >= 1.13.
+        filename_pattern (`str`, *optional*, defaults to `"model{suffix}.safetensors"`):
+            The pattern to look for the index file. Pattern must be a string that
+            can be formatted with `filename_pattern.format(suffix=...)` and must contain the keyword `suffix`
+            Defaults to `"model{suffix}.safetensors"`.
+    Returns:
+        `NamedTuple`: A named tuple with `missing_keys` and `unexpected_keys` fields,
+            - `missing_keys` is a list of str containing the missing keys
+            - `unexpected_keys` is a list of str containing the unexpected keys
+    """
+    # 1. Load and validate index file
+    # The index file contains mapping of parameter names to shard files
+    index_path = filename_pattern.format(suffix="") + ".index.json"
+    index_file = os.path.join(save_directory, index_path)
+    with open(index_file, "r", encoding="utf-8") as f:
+        index = json.load(f)
+    # 2. Validate keys if in strict mode
+    # This is done before loading any shards to fail fast
+    if strict:
+        _validate_keys_for_strict_loading(model, index["weight_map"].keys())
+    # 3. Load each shard using `load_state_dict`
+    # Get unique shard files (multiple parameters can be in same shard)
+    shard_files = list(set(index["weight_map"].values()))
+    for shard_file in shard_files:
+        # Load shard into memory
+        shard_path = os.path.join(save_directory, shard_file)
+        state_dict = load_state_dict_from_file(
+            shard_path,
+            map_location="cpu",
+            weights_only=weights_only,
+        )
+        # Update model with parameters from this shard
+        model.load_state_dict(state_dict, strict=strict)
+        # Explicitly remove the state dict from memory
+        del state_dict
+    # 4. Return compatibility info
+    loaded_keys = set(index["weight_map"].keys())
+    model_keys = set(model.state_dict().keys())
+    return _IncompatibleKeys(
+        missing_keys=list(model_keys - loaded_keys), unexpected_keys=list(loaded_keys - model_keys)
+    )
+def load_state_dict_from_file(
+    checkpoint_file: Union[str, os.PathLike],
+    map_location: Optional[Union[str, "torch.device"]] = None,
+    weights_only: bool = False,
+    mmap: bool = False,
+) -> Union[Dict[str, "torch.Tensor"], Any]:
+    """
+    Loads a checkpoint file, handling both safetensors and pickle checkpoint formats.
+    Args:
+        checkpoint_file (`str` or `os.PathLike`):
+            Path to the checkpoint file to load. Can be either a safetensors or pickle (`.bin`) checkpoint.
+        map_location (`str` or `torch.device`, *optional*):
+            A `torch.device` object, string or a dict specifying how to remap storage locations. It
+            indicates the location where all tensors should be loaded.
+        weights_only (`bool`, *optional*, defaults to `False`):
+            If True, only loads the model weights without optimizer states and other metadata.
+            Only supported for pickle (`.bin`) checkpoints with PyTorch >= 1.13. Has no effect when
+            loading safetensors files.
+        mmap (`bool`, *optional*, defaults to `False`):
+            Whether to use memory-mapped file loading. Memory mapping can improve loading performance
+            for large models in PyTorch >= 2.1.0 with zipfile-based checkpoints. Has no effect when
+            loading safetensors files, as the `safetensors` library uses memory mapping by default.
+    Returns:
+        `Union[Dict[str, "torch.Tensor"], Any]`: The loaded checkpoint.
+            - For safetensors files: always returns a dictionary mapping parameter names to tensors.
+            - For pickle files: returns any Python object that was pickled (commonly a state dict, but could be
+              an entire model, optimizer state, or any other Python object).
+    Raises:
+        [`FileNotFoundError`](https://docs.python.org/3/library/exceptions.html#FileNotFoundError)
+            If the checkpoint file does not exist.
+        [`ImportError`](https://docs.python.org/3/library/exceptions.html#ImportError)
+            If safetensors or torch is not installed when trying to load a .safetensors file or a PyTorch checkpoint respectively.
+        [`OSError`](https://docs.python.org/3/library/exceptions.html#OSError)
+            If the checkpoint file format is invalid or if git-lfs files are not properly downloaded.
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If the checkpoint file path is empty or invalid.
+    Example:
+    ```python
+    >>> from huggingface_hub import load_state_dict_from_file
+    # Load a PyTorch checkpoint
+    >>> state_dict = load_state_dict_from_file("path/to/model.bin", map_location="cpu")
+    >>> model.load_state_dict(state_dict)
+    # Load a safetensors checkpoint
+    >>> state_dict = load_state_dict_from_file("path/to/model.safetensors")
+    >>> model.load_state_dict(state_dict)
+    ```
+    """
+    checkpoint_path = Path(checkpoint_file)
+    # Check if file exists and is a regular file (not a directory)
+    if not checkpoint_path.is_file():
+        raise FileNotFoundError(
+            f"No checkpoint file found at '{checkpoint_path}'. Please verify the path is correct and "
+            "the file has been properly downloaded."
+        )
+    # Load safetensors checkpoint
+    if checkpoint_path.suffix == ".safetensors":
+        try:
+            from safetensors import safe_open
+            from safetensors.torch import load_file
+        except ImportError as e:
+            raise ImportError(
+                "Please install `safetensors` to load safetensors checkpoint. "
+                "You can install it with `pip install safetensors`."
+            ) from e
+        # Check format of the archive
+        with safe_open(checkpoint_file, framework="pt") as f:  # type: ignore[attr-defined]
+            metadata = f.metadata()
+        # see comment: https://github.com/huggingface/transformers/blob/3d213b57fe74302e5902d68ed9478c3ad1aaa713/src/transformers/modeling_utils.py#L3966
+        if metadata is not None and metadata.get("format") not in ["pt", "mlx"]:
+            raise OSError(
+                f"The safetensors archive passed at {checkpoint_file} does not contain the valid metadata. Make sure "
+                "you save your model with the `save_torch_model` method."
+            )
+        device = str(map_location.type) if map_location is not None and hasattr(map_location, "type") else map_location
+        # meta device is not supported with safetensors, falling back to CPU
+        if device == "meta":
+            logger.warning("Meta device is not supported with safetensors. Falling back to CPU device.")
+            device = "cpu"
+        return load_file(checkpoint_file, device=device)  # type: ignore[arg-type]
+    # Otherwise, load from pickle
+    try:
+        import torch
+        from torch import load
+    except ImportError as e:
+        raise ImportError(
+            "Please install `torch` to load torch tensors. " "You can install it with `pip install torch`."
+        ) from e
+    # Add additional kwargs, mmap is only supported in torch >= 2.1.0
+    additional_kwargs = {}
+    if version.parse(torch.__version__) >= version.parse("2.1.0"):
+        additional_kwargs["mmap"] = mmap
+    # weights_only is only supported in torch >= 1.13.0
+    if version.parse(torch.__version__) >= version.parse("1.13.0"):
+        additional_kwargs["weights_only"] = weights_only
+    return load(
+        checkpoint_file,
+        map_location=map_location,
+        **additional_kwargs,
+    )
+# HELPERS
+def _validate_keys_for_strict_loading(
+    model: "torch.nn.Module",
+    loaded_keys: Iterable[str],
+) -> None:
+    """
+    Validate that model keys match loaded keys when strict loading is enabled.
+    Args:
+        model: The PyTorch model being loaded
+        loaded_keys: The keys present in the checkpoint
+    Raises:
+        RuntimeError: If there are missing or unexpected keys in strict mode
+    """
+    loaded_keys_set = set(loaded_keys)
+    model_keys = set(model.state_dict().keys())
+    missing_keys = model_keys - loaded_keys_set  # Keys in model but not in checkpoint
+    unexpected_keys = loaded_keys_set - model_keys  # Keys in checkpoint but not in model
+    if missing_keys or unexpected_keys:
+        error_message = f"Error(s) in loading state_dict for {model.__class__.__name__}"
+        if missing_keys:
+            str_missing_keys = ",".join([f'"{k}"' for k in sorted(missing_keys)])
+            error_message += f"\nMissing key(s): {str_missing_keys}."
+        if unexpected_keys:
+            str_unexpected_keys = ",".join([f'"{k}"' for k in sorted(unexpected_keys)])
+            error_message += f"\nUnexpected key(s): {str_unexpected_keys}."
+        raise RuntimeError(error_message)
 def _get_unique_id(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
     """Returns a unique id for plain tensor
     or a (potentially nested) Tuple of unique id for the flattened Tensor
@@ -359,7 +723,7 @@ def _get_unique_id(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
         # use some other unique id to distinguish.
         # this is a XLA tensor, it must be created using torch_xla's
         # device. So the following import is safe:
-        import torch_xla
+        import torch_xla  # type: ignore[import]
         unique_id = torch_xla._XLAC._xla_get_tensor_id(tensor)
     else:
@@ -423,7 +787,7 @@ def is_torch_tpu_available(check_device=True):
         if check_device:
             # We need to check if `xla_device` can be found, will raise a RuntimeError if not
             try:
-                import torch_xla.core.xla_model as xm
+                import torch_xla.core.xla_model as xm  # type: ignore[import]
                 _ = xm.xla_device()
                 return True
@@ -442,7 +806,7 @@ def storage_ptr(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
         from torch.utils._python_dispatch import is_traceable_wrapper_subclass
         if is_traceable_wrapper_subclass(tensor):
-            return _get_unique_id(tensor)
+            return _get_unique_id(tensor)  # type: ignore
     except ImportError:
         # for torch version less than 2.1, we can fallback to original implementation
         pass
@@ -459,7 +823,10 @@ def storage_ptr(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
 def _clean_state_dict_for_safetensors(
-    state_dict: Dict[str, "torch.Tensor"], metadata: Dict[str, str], force_contiguous: bool = True
+    state_dict: Dict[str, "torch.Tensor"],
+    metadata: Dict[str, str],
+    force_contiguous: bool = True,
+    shared_tensors_to_discard: Optional[List[str]] = None,
 ):
     """Remove shared tensors from state_dict and update metadata accordingly (for reloading).
@@ -467,7 +834,7 @@ def _clean_state_dict_for_safetensors(
     Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L155.
     """
-    to_removes = _remove_duplicate_names(state_dict)
+    to_removes = _remove_duplicate_names(state_dict, discard_names=shared_tensors_to_discard)
     for kept_name, to_remove_group in to_removes.items():
         for to_remove in to_remove_group:
             if metadata is None:
@@ -631,3 +998,18 @@ def _get_dtype_size(dtype: "torch.dtype") -> int:
         _float8_e5m2: 1,
     }
     return _SIZE[dtype]
+class _IncompatibleKeys(namedtuple("IncompatibleKeys", ["missing_keys", "unexpected_keys"])):
+    """
+    This is used to report missing and unexpected keys in the state dict.
+    Taken from https://github.com/pytorch/pytorch/blob/main/torch/nn/modules/module.py#L52.
+    """
+    def __repr__(self) -> str:
+        if not self.missing_keys and not self.unexpected_keys:
+            return "<All keys matched successfully>"
+        return super().__repr__()
+    __str__ = __repr__

huggingface_hub/utils/_cache_manager.py CHANGED Viewed

@@ -742,7 +742,7 @@ def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
         for ref_path in refs_path.glob("**/*"):
             # glob("**/*") iterates over all files and directories -> skip directories
-            if ref_path.is_dir():
+            if ref_path.is_dir() or ref_path.name in FILES_TO_IGNORE:
                 continue
             ref_name = str(ref_path.relative_to(refs_path))

huggingface_hub/utils/_headers.py CHANGED Viewed

@@ -20,6 +20,7 @@ from huggingface_hub.errors import LocalTokenNotFoundError
 from .. import constants
 from ._auth import get_token
+from ._deprecation import _deprecate_arguments
 from ._runtime import (
     get_fastai_version,
     get_fastcore_version,
@@ -35,15 +36,20 @@ from ._runtime import (
 from ._validators import validate_hf_hub_args
+@_deprecate_arguments(
+    version="1.0",
+    deprecated_args="is_write_action",
+    custom_message="This argument is ignored and we let the server handle the permission error instead (if any).",
+)
 @validate_hf_hub_args
 def build_hf_headers(
     *,
     token: Optional[Union[bool, str]] = None,
-    is_write_action: bool = False,
     library_name: Optional[str] = None,
     library_version: Optional[str] = None,
     user_agent: Union[Dict, str, None] = None,
     headers: Optional[Dict[str, str]] = None,
+    is_write_action: bool = False,
 ) -> Dict[str, str]:
     """
     Build headers dictionary to send in a HF Hub call.
@@ -68,9 +74,6 @@ def build_hf_headers(
                 - if `False`, authorization header is not set
                 - if `None`, the token is read from the machine only except if
                   `HF_HUB_DISABLE_IMPLICIT_TOKEN` env variable is set.
-        is_write_action (`bool`, default to `False`):
-            Set to True if the API call requires a write access. If `True`, the token
-            will be validated (cannot be `None`, cannot start by `"api_org***"`).
         library_name (`str`, *optional*):
             The name of the library that is making the HTTP request. Will be added to
             the user-agent header.
@@ -83,6 +86,8 @@ def build_hf_headers(
         headers (`dict`, *optional*):
             Additional headers to include in the request. Those headers take precedence
             over the ones generated by this function.
+        is_write_action (`bool`):
+            Ignored and deprecated argument.
     Returns:
         A `Dict` of headers to pass in your API call.
@@ -105,9 +110,6 @@ def build_hf_headers(
         >>> build_hf_headers() # token is not sent
         {"user-agent": ...}
-        >>> build_hf_headers(token="api_org_***", is_write_action=True)
-        ValueError: You must use your personal account token for write-access methods.
         >>> build_hf_headers(library_name="transformers", library_version="1.2.3")
         {"authorization": ..., "user-agent": "transformers/1.2.3; hf_hub/0.10.2; python/3.10.4; tensorflow/1.55"}
     ```
@@ -122,7 +124,6 @@ def build_hf_headers(
     """
     # Get auth token to send
     token_to_send = get_token_to_send(token)
-    _validate_token_to_send(token_to_send, is_write_action=is_write_action)
     # Combine headers
     hf_headers = {
@@ -171,23 +172,6 @@ def get_token_to_send(token: Optional[Union[bool, str]]) -> Optional[str]:
     return cached_token
-def _validate_token_to_send(token: Optional[str], is_write_action: bool) -> None:
-    if is_write_action:
-        if token is None:
-            raise ValueError(
-                "Token is required (write-access action) but no token found. You need"
-                " to provide a token or be logged in to Hugging Face with"
-                " `huggingface-cli login` or `huggingface_hub.login`. See"
-                " https://huggingface.co/settings/tokens."
-            )
-        if token.startswith("api_org"):
-            raise ValueError(
-                "You must use your personal account token for write-access methods. To"
-                " generate a write-access token, go to"
-                " https://huggingface.co/settings/tokens"
-            )
 def _http_user_agent(
     *,
     library_name: Optional[str] = None,

huggingface-hub 0.26.3__py3-none-any.whl → 0.27.0rc1__py3-none-any.whl

Potentially problematic release.

huggingface-hub 0.26.3py3-none-any.whl → 0.27.0rc1py3-none-any.whl