PyPI - huggingface-hub - Versions diffs - 0.24.7__py3-none-any.whl → 0.25.0rc0__py3-none-any.whl - Mend

huggingface-hub 0.24.7py3-none-any.whl → 0.25.0rc0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of huggingface-hub might be problematic. Click here for more details.

Files changed (52) hide show

huggingface_hub/__init__.py +21 -1
huggingface_hub/_commit_api.py +4 -4
huggingface_hub/_inference_endpoints.py +13 -1
huggingface_hub/_local_folder.py +191 -4
huggingface_hub/_login.py +6 -6
huggingface_hub/_snapshot_download.py +8 -17
huggingface_hub/_space_api.py +5 -0
huggingface_hub/_tensorboard_logger.py +29 -13
huggingface_hub/_upload_large_folder.py +573 -0
huggingface_hub/_webhooks_server.py +1 -1
huggingface_hub/commands/_cli_utils.py +5 -0
huggingface_hub/commands/download.py +8 -0
huggingface_hub/commands/huggingface_cli.py +6 -1
huggingface_hub/commands/lfs.py +2 -1
huggingface_hub/commands/repo_files.py +2 -2
huggingface_hub/commands/scan_cache.py +99 -57
huggingface_hub/commands/tag.py +1 -1
huggingface_hub/commands/upload.py +2 -1
huggingface_hub/commands/upload_large_folder.py +129 -0
huggingface_hub/commands/version.py +37 -0
huggingface_hub/community.py +2 -2
huggingface_hub/errors.py +218 -1
huggingface_hub/fastai_utils.py +2 -3
huggingface_hub/file_download.py +61 -62
huggingface_hub/hf_api.py +758 -314
huggingface_hub/hf_file_system.py +15 -23
huggingface_hub/hub_mixin.py +27 -25
huggingface_hub/inference/_client.py +78 -127
huggingface_hub/inference/_generated/_async_client.py +169 -144
huggingface_hub/inference/_generated/types/base.py +0 -9
huggingface_hub/inference/_templating.py +2 -3
huggingface_hub/inference_api.py +2 -2
huggingface_hub/keras_mixin.py +2 -2
huggingface_hub/lfs.py +7 -98
huggingface_hub/repocard.py +6 -5
huggingface_hub/repository.py +5 -5
huggingface_hub/serialization/_torch.py +64 -11
huggingface_hub/utils/__init__.py +13 -14
huggingface_hub/utils/_cache_manager.py +97 -14
huggingface_hub/utils/_fixes.py +18 -2
huggingface_hub/utils/_http.py +228 -2
huggingface_hub/utils/_lfs.py +110 -0
huggingface_hub/utils/_runtime.py +7 -1
huggingface_hub/utils/_token.py +3 -2
{huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.0rc0.dist-info}/METADATA +2 -2
{huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.0rc0.dist-info}/RECORD +50 -48
huggingface_hub/inference/_types.py +0 -52
huggingface_hub/utils/_errors.py +0 -397
{huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.0rc0.dist-info}/LICENSE +0 -0
{huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.0rc0.dist-info}/WHEEL +0 -0
{huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.0rc0.dist-info}/entry_points.txt +0 -0
{huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.0rc0.dist-info}/top_level.txt +0 -0

huggingface_hub/lfs.py CHANGED Viewed

@@ -16,10 +16,8 @@
 import inspect
 import io
-import os
 import re
 import warnings
-from contextlib import AbstractContextManager
 from dataclasses import dataclass
 from math import ceil
 from os.path import getsize
@@ -27,7 +25,7 @@ from pathlib import Path
 from typing import TYPE_CHECKING, BinaryIO, Dict, Iterable, List, Optional, Tuple, TypedDict
 from urllib.parse import unquote
-from huggingface_hub.constants import ENDPOINT, HF_HUB_ENABLE_HF_TRANSFER, REPO_TYPES_URL_PREFIXES
+from huggingface_hub import constants
 from .utils import (
     build_hf_headers,
@@ -39,6 +37,7 @@ from .utils import (
     tqdm,
     validate_hf_hub_args,
 )
+from .utils._lfs import SliceFileObj
 from .utils.sha import sha256, sha_fileobj
@@ -139,10 +138,10 @@ def post_lfs_batch_info(
         [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
             If the server returned an error.
     """
-    endpoint = endpoint if endpoint is not None else ENDPOINT
+    endpoint = endpoint if endpoint is not None else constants.ENDPOINT
     url_prefix = ""
-    if repo_type in REPO_TYPES_URL_PREFIXES:
-        url_prefix = REPO_TYPES_URL_PREFIXES[repo_type]
+    if repo_type in constants.REPO_TYPES_URL_PREFIXES:
+        url_prefix = constants.REPO_TYPES_URL_PREFIXES[repo_type]
     batch_url = f"{endpoint}/{url_prefix}{repo_id}.git/info/lfs/objects/batch"
     payload: Dict = {
         "operation": "upload",
@@ -328,9 +327,9 @@ def _upload_multi_part(operation: "CommitOperationAdd", header: Dict, chunk_size
     sorted_parts_urls = _get_sorted_parts_urls(header=header, upload_info=operation.upload_info, chunk_size=chunk_size)
     # 2. Upload parts (either with hf_transfer or in pure Python)
-    use_hf_transfer = HF_HUB_ENABLE_HF_TRANSFER
+    use_hf_transfer = constants.HF_HUB_ENABLE_HF_TRANSFER
     if (
-        HF_HUB_ENABLE_HF_TRANSFER
+        constants.HF_HUB_ENABLE_HF_TRANSFER
         and not isinstance(operation.path_or_fileobj, str)
         and not isinstance(operation.path_or_fileobj, Path)
     ):
@@ -462,93 +461,3 @@ def _upload_parts_hf_transfer(
         if not supports_callback:
             progress.update(total)
         return output
-class SliceFileObj(AbstractContextManager):
-    """
-    Utility context manager to read a *slice* of a seekable file-like object as a seekable, file-like object.
-    This is NOT thread safe
-    Inspired by stackoverflow.com/a/29838711/593036
-    Credits to @julien-c
-    Args:
-        fileobj (`BinaryIO`):
-            A file-like object to slice. MUST implement `tell()` and `seek()` (and `read()` of course).
-            `fileobj` will be reset to its original position when exiting the context manager.
-        seek_from (`int`):
-            The start of the slice (offset from position 0 in bytes).
-        read_limit (`int`):
-            The maximum number of bytes to read from the slice.
-    Attributes:
-        previous_position (`int`):
-            The previous position
-    Examples:
-    Reading 200 bytes with an offset of 128 bytes from a file (ie bytes 128 to 327):
-    ```python
-    >>> with open("path/to/file", "rb") as file:
-    ...     with SliceFileObj(file, seek_from=128, read_limit=200) as fslice:
-    ...         fslice.read(...)
-    ```
-    Reading a file in chunks of 512 bytes
-    ```python
-    >>> import os
-    >>> chunk_size = 512
-    >>> file_size = os.getsize("path/to/file")
-    >>> with open("path/to/file", "rb") as file:
-    ...     for chunk_idx in range(ceil(file_size / chunk_size)):
-    ...         with SliceFileObj(file, seek_from=chunk_idx * chunk_size, read_limit=chunk_size) as fslice:
-    ...             chunk = fslice.read(...)
-    ```
-    """
-    def __init__(self, fileobj: BinaryIO, seek_from: int, read_limit: int):
-        self.fileobj = fileobj
-        self.seek_from = seek_from
-        self.read_limit = read_limit
-    def __enter__(self):
-        self._previous_position = self.fileobj.tell()
-        end_of_stream = self.fileobj.seek(0, os.SEEK_END)
-        self._len = min(self.read_limit, end_of_stream - self.seek_from)
-        # ^^ The actual number of bytes that can be read from the slice
-        self.fileobj.seek(self.seek_from, io.SEEK_SET)
-        return self
-    def __exit__(self, exc_type, exc_value, traceback):
-        self.fileobj.seek(self._previous_position, io.SEEK_SET)
-    def read(self, n: int = -1):
-        pos = self.tell()
-        if pos >= self._len:
-            return b""
-        remaining_amount = self._len - pos
-        data = self.fileobj.read(remaining_amount if n < 0 else min(n, remaining_amount))
-        return data
-    def tell(self) -> int:
-        return self.fileobj.tell() - self.seek_from
-    def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
-        start = self.seek_from
-        end = start + self._len
-        if whence in (os.SEEK_SET, os.SEEK_END):
-            offset = start + offset if whence == os.SEEK_SET else end + offset
-            offset = max(start, min(offset, end))
-            whence = os.SEEK_SET
-        elif whence == os.SEEK_CUR:
-            cur_pos = self.fileobj.tell()
-            offset = max(start - cur_pos, min(offset, end - cur_pos))
-        else:
-            raise ValueError(f"whence value {whence} is not supported")
-        return self.fileobj.seek(offset, whence) - self.seek_from
-    def __iter__(self):
-        yield self.read(n=4 * 1024 * 1024)

huggingface_hub/repocard.py CHANGED Viewed

@@ -19,8 +19,9 @@ from huggingface_hub.repocard_data import (
 )
 from huggingface_hub.utils import get_session, is_jinja_available, yaml_dump
-from .constants import REPOCARD_NAME
-from .utils import EntryNotFoundError, SoftTemporaryDirectory, logging, validate_hf_hub_args
+from . import constants
+from .errors import EntryNotFoundError
+from .utils import SoftTemporaryDirectory, logging, validate_hf_hub_args
 logger = logging.get_logger(__name__)
@@ -175,7 +176,7 @@ class RepoCard:
             card_path = Path(
                 hf_hub_download(
                     repo_id_or_path,
-                    REPOCARD_NAME,
+                    constants.REPOCARD_NAME,
                     repo_type=repo_type or cls.repo_type,
                     token=token,
                 )
@@ -273,11 +274,11 @@ class RepoCard:
         self.validate(repo_type=repo_type)
         with SoftTemporaryDirectory() as tmpdir:
-            tmp_path = Path(tmpdir) / REPOCARD_NAME
+            tmp_path = Path(tmpdir) / constants.REPOCARD_NAME
             tmp_path.write_text(str(self))
             url = upload_file(
                 path_or_fileobj=str(tmp_path),
-                path_in_repo=REPOCARD_NAME,
+                path_in_repo=constants.REPOCARD_NAME,
                 repo_id=repo_id,
                 token=token,
                 repo_type=repo_type,

huggingface_hub/repository.py CHANGED Viewed

@@ -9,7 +9,7 @@ from pathlib import Path
 from typing import Callable, Dict, Iterator, List, Optional, Tuple, TypedDict, Union
 from urllib.parse import urlparse
-from huggingface_hub.constants import REPO_TYPES_URL_PREFIXES, REPOCARD_NAME
+from huggingface_hub import constants
 from huggingface_hub.repocard import metadata_load, metadata_save
 from .hf_api import HfApi, repo_type_and_id_from_hf_id
@@ -659,8 +659,8 @@ class Repository:
             repo_url = hub_url + "/"
-            if self._repo_type in REPO_TYPES_URL_PREFIXES:
-                repo_url += REPO_TYPES_URL_PREFIXES[self._repo_type]
+            if self._repo_type in constants.REPO_TYPES_URL_PREFIXES:
+                repo_url += constants.REPO_TYPES_URL_PREFIXES[self._repo_type]
             if token is not None:
                 # Add token in git url when provided
@@ -1434,13 +1434,13 @@ class Repository:
             os.chdir(current_working_directory)
     def repocard_metadata_load(self) -> Optional[Dict]:
-        filepath = os.path.join(self.local_dir, REPOCARD_NAME)
+        filepath = os.path.join(self.local_dir, constants.REPOCARD_NAME)
         if os.path.isfile(filepath):
             return metadata_load(filepath)
         return None
     def repocard_metadata_save(self, data: Dict) -> None:
-        return metadata_save(os.path.join(self.local_dir, REPOCARD_NAME), data)
+        return metadata_save(os.path.join(self.local_dir, constants.REPOCARD_NAME), data)
     @property
     def commands_failed(self):

huggingface_hub/serialization/_torch.py CHANGED Viewed

@@ -20,7 +20,7 @@ import re
 from collections import defaultdict
 from functools import lru_cache
 from pathlib import Path
-from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Union
 from .. import constants, logging
 from ._base import MAX_SHARD_SIZE, StateDictSplit, split_state_dict_into_shards_factory
@@ -336,17 +336,24 @@ def split_torch_state_dict_into_shards(
     )
-def get_torch_storage_id(tensor: "torch.Tensor") -> Tuple["torch.device", int, int]:
+def _get_unique_id(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
+    """Returns a unique id for plain tensor
+    or a (potentially nested) Tuple of unique id for the flattened Tensor
+    if the input is a wrapper tensor subclass Tensor
     """
-    Return unique identifier to a tensor storage.
-    Multiple different tensors can share the same underlying storage. For
-    example, "meta" tensors all share the same storage, and thus their identifier will all be equal. This identifier is
-    guaranteed to be unique and constant for this tensor's storage during its lifetime. Two tensor storages with
-    non-overlapping lifetimes may have the same id.
+    try:
+        # for torch 2.1 and above we can also handle tensor subclasses
+        from torch.utils._python_dispatch import is_traceable_wrapper_subclass
+        if is_traceable_wrapper_subclass(tensor):
+            attrs, _ = tensor.__tensor_flatten__()  # type: ignore[attr-defined]
+            return tuple(_get_unique_id(getattr(tensor, attr)) for attr in attrs)
+    except ImportError:
+        # for torch version less than 2.1, we can fallback to original implementation
+        pass
-    Taken from https://github.com/huggingface/transformers/blob/1ecf5f7c982d761b4daaa96719d162c324187c64/src/transformers/pytorch_utils.py#L278.
-    """
     if tensor.device.type == "xla" and is_torch_tpu_available():
         # NOTE: xla tensors dont have storage
         # use some other unique id to distinguish.
@@ -358,13 +365,38 @@ def get_torch_storage_id(tensor: "torch.Tensor") -> Tuple["torch.device", int, i
     else:
         unique_id = storage_ptr(tensor)
-    return tensor.device, unique_id, get_torch_storage_size(tensor)
+    return unique_id
+def get_torch_storage_id(tensor: "torch.Tensor") -> Tuple["torch.device", Union[int, Tuple[Any, ...]], int]:
+    """
+    Return unique identifier to a tensor storage.
+    Multiple different tensors can share the same underlying storage. For
+    example, "meta" tensors all share the same storage, and thus their identifier will all be equal. This identifier is
+    guaranteed to be unique and constant for this tensor's storage during its lifetime. Two tensor storages with
+    non-overlapping lifetimes may have the same id.
+    Taken from https://github.com/huggingface/transformers/blob/1ecf5f7c982d761b4daaa96719d162c324187c64/src/transformers/pytorch_utils.py#L278.
+    """
+    return tensor.device, _get_unique_id(tensor), get_torch_storage_size(tensor)
 def get_torch_storage_size(tensor: "torch.Tensor") -> int:
     """
     Taken from https://github.com/huggingface/safetensors/blob/08db34094e9e59e2f9218f2df133b7b4aaff5a99/bindings/python/py_src/safetensors/torch.py#L31C1-L41C59
     """
+    try:
+        # for torch 2.1 and above we can also handle tensor subclasses
+        from torch.utils._python_dispatch import is_traceable_wrapper_subclass
+        if is_traceable_wrapper_subclass(tensor):
+            attrs, _ = tensor.__tensor_flatten__()  # type: ignore[attr-defined]
+            return sum(get_torch_storage_size(getattr(tensor, attr)) for attr in attrs)
+    except ImportError:
+        # for torch version less than 2.1, we can fallback to original implementation
+        pass
     try:
         return tensor.untyped_storage().nbytes()
     except AttributeError:
@@ -398,10 +430,20 @@ def is_torch_tpu_available(check_device=True):
     return False
-def storage_ptr(tensor: "torch.Tensor") -> int:
+def storage_ptr(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
     """
     Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L11.
     """
+    try:
+        # for torch 2.1 and above we can also handle tensor subclasses
+        from torch.utils._python_dispatch import is_traceable_wrapper_subclass
+        if is_traceable_wrapper_subclass(tensor):
+            return _get_unique_id(tensor)
+    except ImportError:
+        # for torch version less than 2.1, we can fallback to original implementation
+        pass
     try:
         return tensor.untyped_storage().data_ptr()
     except Exception:
@@ -496,6 +538,17 @@ def _is_complete(tensor: "torch.Tensor") -> bool:
     """
     Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L80
     """
+    try:
+        # for torch 2.1 and above we can also handle tensor subclasses
+        from torch.utils._python_dispatch import is_traceable_wrapper_subclass
+        if is_traceable_wrapper_subclass(tensor):
+            attrs, _ = tensor.__tensor_flatten__()  # type: ignore[attr-defined]
+            return all(_is_complete(getattr(tensor, attr)) for attr in attrs)
+    except ImportError:
+        # for torch version less than 2.1, we can fallback to original implementation
+        pass
     return tensor.data_ptr() == storage_ptr(tensor) and tensor.nelement() * _get_dtype_size(
         tensor.dtype
     ) == get_torch_storage_size(tensor)

huggingface_hub/utils/__init__.py CHANGED Viewed

@@ -16,10 +16,21 @@
 # ruff: noqa: F401
 from huggingface_hub.errors import (
+    BadRequestError,
+    CacheNotFound,
+    CorruptedCacheException,
+    DisabledRepoError,
+    EntryNotFoundError,
+    FileMetadataError,
+    GatedRepoError,
+    HfHubHTTPError,
     HFValidationError,
+    LocalEntryNotFoundError,
     LocalTokenNotFoundError,
     NotASafetensorsRepoError,
     OfflineModeIsEnabled,
+    RepositoryNotFoundError,
+    RevisionNotFoundError,
     SafetensorsParsingError,
 )
@@ -29,26 +40,12 @@ from ._cache_manager import (
     CachedFileInfo,
     CachedRepoInfo,
     CachedRevisionInfo,
-    CacheNotFound,
-    CorruptedCacheException,
     DeleteCacheStrategy,
     HFCacheInfo,
     scan_cache_dir,
 )
 from ._chunk_utils import chunk_iterable
 from ._datetime import parse_datetime
-from ._errors import (
-    BadRequestError,
-    DisabledRepoError,
-    EntryNotFoundError,
-    FileMetadataError,
-    GatedRepoError,
-    HfHubHTTPError,
-    LocalEntryNotFoundError,
-    RepositoryNotFoundError,
-    RevisionNotFoundError,
-    hf_raise_for_status,
-)
 from ._experimental import experimental
 from ._fixes import SoftTemporaryDirectory, WeakFileLock, yaml_dump
 from ._git_credential import list_credential_helpers, set_git_credential, unset_git_credential
@@ -58,6 +55,7 @@ from ._http import (
     configure_http_backend,
     fix_hf_endpoint_in_url,
     get_session,
+    hf_raise_for_status,
     http_backoff,
     reset_sessions,
 )
@@ -84,6 +82,7 @@ from ._runtime import (
     get_tf_version,
     get_torch_version,
     is_aiohttp_available,
+    is_colab_enterprise,
     is_fastai_available,
     is_fastapi_available,
     is_fastcore_available,

huggingface_hub/utils/_cache_manager.py CHANGED Viewed

@@ -22,6 +22,9 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Dict, FrozenSet, List, Literal, Optional, Set, Union
+from huggingface_hub.errors import CacheNotFound, CorruptedCacheException
+from ..commands._cli_utils import tabulate
 from ..constants import HF_HUB_CACHE
 from . import logging
@@ -34,20 +37,6 @@ REPO_TYPE_T = Literal["model", "dataset", "space"]
 FILES_TO_IGNORE = [".DS_Store"]
-class CacheNotFound(Exception):
-    """Exception thrown when the Huggingface cache is not found."""
-    cache_dir: Union[str, Path]
-    def __init__(self, msg: str, cache_dir: Union[str, Path], *args, **kwargs):
-        super().__init__(msg, *args, **kwargs)
-        self.cache_dir = cache_dir
-class CorruptedCacheException(Exception):
-    """Exception for any unexpected structure in the Huggingface cache-system."""
 @dataclass(frozen=True)
 class CachedFileInfo:
     """Frozen data structure holding information about a single cached file.
@@ -496,6 +485,100 @@ class HFCacheInfo:
             expected_freed_size=delete_strategy_expected_freed_size,
         )
+    def export_as_table(self, *, verbosity: int = 0) -> str:
+        """Generate a table from the [`HFCacheInfo`] object.
+        Pass `verbosity=0` to get a table with a single row per repo, with columns
+        "repo_id", "repo_type", "size_on_disk", "nb_files", "last_accessed", "last_modified", "refs", "local_path".
+        Pass `verbosity=1` to get a table with a row per repo and revision (thus multiple rows can appear for a single repo), with columns
+        "repo_id", "repo_type", "revision", "size_on_disk", "nb_files", "last_modified", "refs", "local_path".
+        Example:
+        ```py
+        >>> from huggingface_hub.utils import scan_cache_dir
+        >>> hf_cache_info = scan_cache_dir()
+        HFCacheInfo(...)
+        >>> print(hf_cache_info.export_as_table())
+        REPO ID                                             REPO TYPE SIZE ON DISK NB FILES LAST_ACCESSED LAST_MODIFIED REFS LOCAL PATH
+        --------------------------------------------------- --------- ------------ -------- ------------- ------------- ---- --------------------------------------------------------------------------------------------------
+        roberta-base                                        model             2.7M        5 1 day ago     1 week ago    main ~/.cache/huggingface/hub/models--roberta-base
+        suno/bark                                           model             8.8K        1 1 week ago    1 week ago    main ~/.cache/huggingface/hub/models--suno--bark
+        t5-base                                             model           893.8M        4 4 days ago    7 months ago  main ~/.cache/huggingface/hub/models--t5-base
+        t5-large                                            model             3.0G        4 5 weeks ago   5 months ago  main ~/.cache/huggingface/hub/models--t5-large
+        >>> print(hf_cache_info.export_as_table(verbosity=1))
+        REPO ID                                             REPO TYPE REVISION                                 SIZE ON DISK NB FILES LAST_MODIFIED REFS LOCAL PATH
+        --------------------------------------------------- --------- ---------------------------------------- ------------ -------- ------------- ---- -----------------------------------------------------------------------------------------------------------------------------------------------------
+        roberta-base                                        model     e2da8e2f811d1448a5b465c236feacd80ffbac7b         2.7M        5 1 week ago    main ~/.cache/huggingface/hub/models--roberta-base/snapshots/e2da8e2f811d1448a5b465c236feacd80ffbac7b
+        suno/bark                                           model     70a8a7d34168586dc5d028fa9666aceade177992         8.8K        1 1 week ago    main ~/.cache/huggingface/hub/models--suno--bark/snapshots/70a8a7d34168586dc5d028fa9666aceade177992
+        t5-base                                             model     a9723ea7f1b39c1eae772870f3b547bf6ef7e6c1       893.8M        4 7 months ago  main ~/.cache/huggingface/hub/models--t5-base/snapshots/a9723ea7f1b39c1eae772870f3b547bf6ef7e6c1
+        t5-large                                            model     150ebc2c4b72291e770f58e6057481c8d2ed331a         3.0G        4 5 months ago  main ~/.cache/huggingface/hub/models--t5-large/snapshots/150ebc2c4b72291e770f58e6057481c8d2ed331a
+        ```
+        Args:
+            verbosity (`int`, *optional*):
+                The verbosity level. Defaults to 0.
+        Returns:
+            `str`: The table as a string.
+        """
+        if verbosity == 0:
+            return tabulate(
+                rows=[
+                    [
+                        repo.repo_id,
+                        repo.repo_type,
+                        "{:>12}".format(repo.size_on_disk_str),
+                        repo.nb_files,
+                        repo.last_accessed_str,
+                        repo.last_modified_str,
+                        ", ".join(sorted(repo.refs)),
+                        str(repo.repo_path),
+                    ]
+                    for repo in sorted(self.repos, key=lambda repo: repo.repo_path)
+                ],
+                headers=[
+                    "REPO ID",
+                    "REPO TYPE",
+                    "SIZE ON DISK",
+                    "NB FILES",
+                    "LAST_ACCESSED",
+                    "LAST_MODIFIED",
+                    "REFS",
+                    "LOCAL PATH",
+                ],
+            )
+        else:
+            return tabulate(
+                rows=[
+                    [
+                        repo.repo_id,
+                        repo.repo_type,
+                        revision.commit_hash,
+                        "{:>12}".format(revision.size_on_disk_str),
+                        revision.nb_files,
+                        revision.last_modified_str,
+                        ", ".join(sorted(revision.refs)),
+                        str(revision.snapshot_path),
+                    ]
+                    for repo in sorted(self.repos, key=lambda repo: repo.repo_path)
+                    for revision in sorted(repo.revisions, key=lambda revision: revision.commit_hash)
+                ],
+                headers=[
+                    "REPO ID",
+                    "REPO TYPE",
+                    "REVISION",
+                    "SIZE ON DISK",
+                    "NB FILES",
+                    "LAST_MODIFIED",
+                    "REFS",
+                    "LOCAL PATH",
+                ],
+            )
 def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
     """Scan the entire HF cache-system and return a [`~HFCacheInfo`] structure.

huggingface_hub/utils/_fixes.py CHANGED Viewed

@@ -18,7 +18,7 @@ from pathlib import Path
 from typing import Callable, Generator, Optional, Union
 import yaml
-from filelock import BaseFileLock, FileLock, Timeout
+from filelock import BaseFileLock, FileLock, SoftFileLock, Timeout
 from .. import constants
 from . import logging
@@ -84,13 +84,29 @@ def _set_write_permission_and_retry(func, path, excinfo):
 @contextlib.contextmanager
 def WeakFileLock(lock_file: Union[str, Path]) -> Generator[BaseFileLock, None, None]:
-    """A filelock that won't raise an exception if release fails."""
+    """A filelock with some custom logic.
+    This filelock is weaker than the default filelock in that:
+    1. It won't raise an exception if release fails.
+    2. It will default to a SoftFileLock if the filesystem does not support flock.
+    An INFO log message is emitted every 10 seconds if the lock is not acquired immediately.
+    """
     lock = FileLock(lock_file, timeout=constants.FILELOCK_LOG_EVERY_SECONDS)
     while True:
         try:
             lock.acquire()
         except Timeout:
             logger.info("still waiting to acquire lock on %s", lock_file)
+        except NotImplementedError as e:
+            if "use SoftFileLock instead" in str(e):
+                # It's possible that the system does support flock, expect for one partition or filesystem.
+                # In this case, let's default to a SoftFileLock.
+                logger.warning(
+                    "FileSystem does not appear to support flock. Falling back to SoftFileLock for %s", lock_file
+                )
+                lock = SoftFileLock(lock_file, timeout=constants.FILELOCK_LOG_EVERY_SECONDS)
+                continue
         else:
             break

huggingface-hub 0.24.7__py3-none-any.whl → 0.25.0rc0__py3-none-any.whl

Potentially problematic release.

huggingface-hub 0.24.7py3-none-any.whl → 0.25.0rc0py3-none-any.whl