huggingface-hub 0.24.6__py3-none-any.whl → 0.25.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (52) hide show
  1. huggingface_hub/__init__.py +21 -1
  2. huggingface_hub/_commit_api.py +4 -4
  3. huggingface_hub/_inference_endpoints.py +13 -1
  4. huggingface_hub/_local_folder.py +191 -4
  5. huggingface_hub/_login.py +6 -6
  6. huggingface_hub/_snapshot_download.py +8 -17
  7. huggingface_hub/_space_api.py +5 -0
  8. huggingface_hub/_tensorboard_logger.py +29 -13
  9. huggingface_hub/_upload_large_folder.py +573 -0
  10. huggingface_hub/_webhooks_server.py +1 -1
  11. huggingface_hub/commands/_cli_utils.py +5 -0
  12. huggingface_hub/commands/download.py +8 -0
  13. huggingface_hub/commands/huggingface_cli.py +6 -1
  14. huggingface_hub/commands/lfs.py +2 -1
  15. huggingface_hub/commands/repo_files.py +2 -2
  16. huggingface_hub/commands/scan_cache.py +99 -57
  17. huggingface_hub/commands/tag.py +1 -1
  18. huggingface_hub/commands/upload.py +2 -1
  19. huggingface_hub/commands/upload_large_folder.py +129 -0
  20. huggingface_hub/commands/version.py +37 -0
  21. huggingface_hub/community.py +2 -2
  22. huggingface_hub/errors.py +218 -1
  23. huggingface_hub/fastai_utils.py +2 -3
  24. huggingface_hub/file_download.py +63 -63
  25. huggingface_hub/hf_api.py +758 -314
  26. huggingface_hub/hf_file_system.py +15 -23
  27. huggingface_hub/hub_mixin.py +27 -25
  28. huggingface_hub/inference/_client.py +78 -127
  29. huggingface_hub/inference/_generated/_async_client.py +169 -144
  30. huggingface_hub/inference/_generated/types/base.py +0 -9
  31. huggingface_hub/inference/_templating.py +2 -3
  32. huggingface_hub/inference_api.py +2 -2
  33. huggingface_hub/keras_mixin.py +2 -2
  34. huggingface_hub/lfs.py +7 -98
  35. huggingface_hub/repocard.py +6 -5
  36. huggingface_hub/repository.py +5 -5
  37. huggingface_hub/serialization/_torch.py +64 -11
  38. huggingface_hub/utils/__init__.py +13 -14
  39. huggingface_hub/utils/_cache_manager.py +97 -14
  40. huggingface_hub/utils/_fixes.py +18 -2
  41. huggingface_hub/utils/_http.py +228 -2
  42. huggingface_hub/utils/_lfs.py +110 -0
  43. huggingface_hub/utils/_runtime.py +7 -1
  44. huggingface_hub/utils/_token.py +3 -2
  45. {huggingface_hub-0.24.6.dist-info → huggingface_hub-0.25.0rc0.dist-info}/METADATA +2 -2
  46. {huggingface_hub-0.24.6.dist-info → huggingface_hub-0.25.0rc0.dist-info}/RECORD +50 -48
  47. huggingface_hub/inference/_types.py +0 -52
  48. huggingface_hub/utils/_errors.py +0 -397
  49. {huggingface_hub-0.24.6.dist-info → huggingface_hub-0.25.0rc0.dist-info}/LICENSE +0 -0
  50. {huggingface_hub-0.24.6.dist-info → huggingface_hub-0.25.0rc0.dist-info}/WHEEL +0 -0
  51. {huggingface_hub-0.24.6.dist-info → huggingface_hub-0.25.0rc0.dist-info}/entry_points.txt +0 -0
  52. {huggingface_hub-0.24.6.dist-info → huggingface_hub-0.25.0rc0.dist-info}/top_level.txt +0 -0
huggingface_hub/lfs.py CHANGED
@@ -16,10 +16,8 @@
16
16
 
17
17
  import inspect
18
18
  import io
19
- import os
20
19
  import re
21
20
  import warnings
22
- from contextlib import AbstractContextManager
23
21
  from dataclasses import dataclass
24
22
  from math import ceil
25
23
  from os.path import getsize
@@ -27,7 +25,7 @@ from pathlib import Path
27
25
  from typing import TYPE_CHECKING, BinaryIO, Dict, Iterable, List, Optional, Tuple, TypedDict
28
26
  from urllib.parse import unquote
29
27
 
30
- from huggingface_hub.constants import ENDPOINT, HF_HUB_ENABLE_HF_TRANSFER, REPO_TYPES_URL_PREFIXES
28
+ from huggingface_hub import constants
31
29
 
32
30
  from .utils import (
33
31
  build_hf_headers,
@@ -39,6 +37,7 @@ from .utils import (
39
37
  tqdm,
40
38
  validate_hf_hub_args,
41
39
  )
40
+ from .utils._lfs import SliceFileObj
42
41
  from .utils.sha import sha256, sha_fileobj
43
42
 
44
43
 
@@ -139,10 +138,10 @@ def post_lfs_batch_info(
139
138
  [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
140
139
  If the server returned an error.
141
140
  """
142
- endpoint = endpoint if endpoint is not None else ENDPOINT
141
+ endpoint = endpoint if endpoint is not None else constants.ENDPOINT
143
142
  url_prefix = ""
144
- if repo_type in REPO_TYPES_URL_PREFIXES:
145
- url_prefix = REPO_TYPES_URL_PREFIXES[repo_type]
143
+ if repo_type in constants.REPO_TYPES_URL_PREFIXES:
144
+ url_prefix = constants.REPO_TYPES_URL_PREFIXES[repo_type]
146
145
  batch_url = f"{endpoint}/{url_prefix}{repo_id}.git/info/lfs/objects/batch"
147
146
  payload: Dict = {
148
147
  "operation": "upload",
@@ -328,9 +327,9 @@ def _upload_multi_part(operation: "CommitOperationAdd", header: Dict, chunk_size
328
327
  sorted_parts_urls = _get_sorted_parts_urls(header=header, upload_info=operation.upload_info, chunk_size=chunk_size)
329
328
 
330
329
  # 2. Upload parts (either with hf_transfer or in pure Python)
331
- use_hf_transfer = HF_HUB_ENABLE_HF_TRANSFER
330
+ use_hf_transfer = constants.HF_HUB_ENABLE_HF_TRANSFER
332
331
  if (
333
- HF_HUB_ENABLE_HF_TRANSFER
332
+ constants.HF_HUB_ENABLE_HF_TRANSFER
334
333
  and not isinstance(operation.path_or_fileobj, str)
335
334
  and not isinstance(operation.path_or_fileobj, Path)
336
335
  ):
@@ -462,93 +461,3 @@ def _upload_parts_hf_transfer(
462
461
  if not supports_callback:
463
462
  progress.update(total)
464
463
  return output
465
-
466
-
467
- class SliceFileObj(AbstractContextManager):
468
- """
469
- Utility context manager to read a *slice* of a seekable file-like object as a seekable, file-like object.
470
-
471
- This is NOT thread safe
472
-
473
- Inspired by stackoverflow.com/a/29838711/593036
474
-
475
- Credits to @julien-c
476
-
477
- Args:
478
- fileobj (`BinaryIO`):
479
- A file-like object to slice. MUST implement `tell()` and `seek()` (and `read()` of course).
480
- `fileobj` will be reset to its original position when exiting the context manager.
481
- seek_from (`int`):
482
- The start of the slice (offset from position 0 in bytes).
483
- read_limit (`int`):
484
- The maximum number of bytes to read from the slice.
485
-
486
- Attributes:
487
- previous_position (`int`):
488
- The previous position
489
-
490
- Examples:
491
-
492
- Reading 200 bytes with an offset of 128 bytes from a file (ie bytes 128 to 327):
493
- ```python
494
- >>> with open("path/to/file", "rb") as file:
495
- ... with SliceFileObj(file, seek_from=128, read_limit=200) as fslice:
496
- ... fslice.read(...)
497
- ```
498
-
499
- Reading a file in chunks of 512 bytes
500
- ```python
501
- >>> import os
502
- >>> chunk_size = 512
503
- >>> file_size = os.getsize("path/to/file")
504
- >>> with open("path/to/file", "rb") as file:
505
- ... for chunk_idx in range(ceil(file_size / chunk_size)):
506
- ... with SliceFileObj(file, seek_from=chunk_idx * chunk_size, read_limit=chunk_size) as fslice:
507
- ... chunk = fslice.read(...)
508
-
509
- ```
510
- """
511
-
512
- def __init__(self, fileobj: BinaryIO, seek_from: int, read_limit: int):
513
- self.fileobj = fileobj
514
- self.seek_from = seek_from
515
- self.read_limit = read_limit
516
-
517
- def __enter__(self):
518
- self._previous_position = self.fileobj.tell()
519
- end_of_stream = self.fileobj.seek(0, os.SEEK_END)
520
- self._len = min(self.read_limit, end_of_stream - self.seek_from)
521
- # ^^ The actual number of bytes that can be read from the slice
522
- self.fileobj.seek(self.seek_from, io.SEEK_SET)
523
- return self
524
-
525
- def __exit__(self, exc_type, exc_value, traceback):
526
- self.fileobj.seek(self._previous_position, io.SEEK_SET)
527
-
528
- def read(self, n: int = -1):
529
- pos = self.tell()
530
- if pos >= self._len:
531
- return b""
532
- remaining_amount = self._len - pos
533
- data = self.fileobj.read(remaining_amount if n < 0 else min(n, remaining_amount))
534
- return data
535
-
536
- def tell(self) -> int:
537
- return self.fileobj.tell() - self.seek_from
538
-
539
- def seek(self, offset: int, whence: int = os.SEEK_SET) -> int:
540
- start = self.seek_from
541
- end = start + self._len
542
- if whence in (os.SEEK_SET, os.SEEK_END):
543
- offset = start + offset if whence == os.SEEK_SET else end + offset
544
- offset = max(start, min(offset, end))
545
- whence = os.SEEK_SET
546
- elif whence == os.SEEK_CUR:
547
- cur_pos = self.fileobj.tell()
548
- offset = max(start - cur_pos, min(offset, end - cur_pos))
549
- else:
550
- raise ValueError(f"whence value {whence} is not supported")
551
- return self.fileobj.seek(offset, whence) - self.seek_from
552
-
553
- def __iter__(self):
554
- yield self.read(n=4 * 1024 * 1024)
@@ -19,8 +19,9 @@ from huggingface_hub.repocard_data import (
19
19
  )
20
20
  from huggingface_hub.utils import get_session, is_jinja_available, yaml_dump
21
21
 
22
- from .constants import REPOCARD_NAME
23
- from .utils import EntryNotFoundError, SoftTemporaryDirectory, logging, validate_hf_hub_args
22
+ from . import constants
23
+ from .errors import EntryNotFoundError
24
+ from .utils import SoftTemporaryDirectory, logging, validate_hf_hub_args
24
25
 
25
26
 
26
27
  logger = logging.get_logger(__name__)
@@ -175,7 +176,7 @@ class RepoCard:
175
176
  card_path = Path(
176
177
  hf_hub_download(
177
178
  repo_id_or_path,
178
- REPOCARD_NAME,
179
+ constants.REPOCARD_NAME,
179
180
  repo_type=repo_type or cls.repo_type,
180
181
  token=token,
181
182
  )
@@ -273,11 +274,11 @@ class RepoCard:
273
274
  self.validate(repo_type=repo_type)
274
275
 
275
276
  with SoftTemporaryDirectory() as tmpdir:
276
- tmp_path = Path(tmpdir) / REPOCARD_NAME
277
+ tmp_path = Path(tmpdir) / constants.REPOCARD_NAME
277
278
  tmp_path.write_text(str(self))
278
279
  url = upload_file(
279
280
  path_or_fileobj=str(tmp_path),
280
- path_in_repo=REPOCARD_NAME,
281
+ path_in_repo=constants.REPOCARD_NAME,
281
282
  repo_id=repo_id,
282
283
  token=token,
283
284
  repo_type=repo_type,
@@ -9,7 +9,7 @@ from pathlib import Path
9
9
  from typing import Callable, Dict, Iterator, List, Optional, Tuple, TypedDict, Union
10
10
  from urllib.parse import urlparse
11
11
 
12
- from huggingface_hub.constants import REPO_TYPES_URL_PREFIXES, REPOCARD_NAME
12
+ from huggingface_hub import constants
13
13
  from huggingface_hub.repocard import metadata_load, metadata_save
14
14
 
15
15
  from .hf_api import HfApi, repo_type_and_id_from_hf_id
@@ -659,8 +659,8 @@ class Repository:
659
659
 
660
660
  repo_url = hub_url + "/"
661
661
 
662
- if self._repo_type in REPO_TYPES_URL_PREFIXES:
663
- repo_url += REPO_TYPES_URL_PREFIXES[self._repo_type]
662
+ if self._repo_type in constants.REPO_TYPES_URL_PREFIXES:
663
+ repo_url += constants.REPO_TYPES_URL_PREFIXES[self._repo_type]
664
664
 
665
665
  if token is not None:
666
666
  # Add token in git url when provided
@@ -1434,13 +1434,13 @@ class Repository:
1434
1434
  os.chdir(current_working_directory)
1435
1435
 
1436
1436
  def repocard_metadata_load(self) -> Optional[Dict]:
1437
- filepath = os.path.join(self.local_dir, REPOCARD_NAME)
1437
+ filepath = os.path.join(self.local_dir, constants.REPOCARD_NAME)
1438
1438
  if os.path.isfile(filepath):
1439
1439
  return metadata_load(filepath)
1440
1440
  return None
1441
1441
 
1442
1442
  def repocard_metadata_save(self, data: Dict) -> None:
1443
- return metadata_save(os.path.join(self.local_dir, REPOCARD_NAME), data)
1443
+ return metadata_save(os.path.join(self.local_dir, constants.REPOCARD_NAME), data)
1444
1444
 
1445
1445
  @property
1446
1446
  def commands_failed(self):
@@ -20,7 +20,7 @@ import re
20
20
  from collections import defaultdict
21
21
  from functools import lru_cache
22
22
  from pathlib import Path
23
- from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union
23
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Union
24
24
 
25
25
  from .. import constants, logging
26
26
  from ._base import MAX_SHARD_SIZE, StateDictSplit, split_state_dict_into_shards_factory
@@ -336,17 +336,24 @@ def split_torch_state_dict_into_shards(
336
336
  )
337
337
 
338
338
 
339
- def get_torch_storage_id(tensor: "torch.Tensor") -> Tuple["torch.device", int, int]:
339
+ def _get_unique_id(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
340
+ """Returns a unique id for plain tensor
341
+ or a (potentially nested) Tuple of unique id for the flattened Tensor
342
+ if the input is a wrapper tensor subclass Tensor
340
343
  """
341
- Return unique identifier to a tensor storage.
342
344
 
343
- Multiple different tensors can share the same underlying storage. For
344
- example, "meta" tensors all share the same storage, and thus their identifier will all be equal. This identifier is
345
- guaranteed to be unique and constant for this tensor's storage during its lifetime. Two tensor storages with
346
- non-overlapping lifetimes may have the same id.
345
+ try:
346
+ # for torch 2.1 and above we can also handle tensor subclasses
347
+ from torch.utils._python_dispatch import is_traceable_wrapper_subclass
348
+
349
+ if is_traceable_wrapper_subclass(tensor):
350
+ attrs, _ = tensor.__tensor_flatten__() # type: ignore[attr-defined]
351
+ return tuple(_get_unique_id(getattr(tensor, attr)) for attr in attrs)
352
+
353
+ except ImportError:
354
+ # for torch version less than 2.1, we can fallback to original implementation
355
+ pass
347
356
 
348
- Taken from https://github.com/huggingface/transformers/blob/1ecf5f7c982d761b4daaa96719d162c324187c64/src/transformers/pytorch_utils.py#L278.
349
- """
350
357
  if tensor.device.type == "xla" and is_torch_tpu_available():
351
358
  # NOTE: xla tensors dont have storage
352
359
  # use some other unique id to distinguish.
@@ -358,13 +365,38 @@ def get_torch_storage_id(tensor: "torch.Tensor") -> Tuple["torch.device", int, i
358
365
  else:
359
366
  unique_id = storage_ptr(tensor)
360
367
 
361
- return tensor.device, unique_id, get_torch_storage_size(tensor)
368
+ return unique_id
369
+
370
+
371
+ def get_torch_storage_id(tensor: "torch.Tensor") -> Tuple["torch.device", Union[int, Tuple[Any, ...]], int]:
372
+ """
373
+ Return unique identifier to a tensor storage.
374
+
375
+ Multiple different tensors can share the same underlying storage. For
376
+ example, "meta" tensors all share the same storage, and thus their identifier will all be equal. This identifier is
377
+ guaranteed to be unique and constant for this tensor's storage during its lifetime. Two tensor storages with
378
+ non-overlapping lifetimes may have the same id.
379
+
380
+ Taken from https://github.com/huggingface/transformers/blob/1ecf5f7c982d761b4daaa96719d162c324187c64/src/transformers/pytorch_utils.py#L278.
381
+ """
382
+ return tensor.device, _get_unique_id(tensor), get_torch_storage_size(tensor)
362
383
 
363
384
 
364
385
  def get_torch_storage_size(tensor: "torch.Tensor") -> int:
365
386
  """
366
387
  Taken from https://github.com/huggingface/safetensors/blob/08db34094e9e59e2f9218f2df133b7b4aaff5a99/bindings/python/py_src/safetensors/torch.py#L31C1-L41C59
367
388
  """
389
+ try:
390
+ # for torch 2.1 and above we can also handle tensor subclasses
391
+ from torch.utils._python_dispatch import is_traceable_wrapper_subclass
392
+
393
+ if is_traceable_wrapper_subclass(tensor):
394
+ attrs, _ = tensor.__tensor_flatten__() # type: ignore[attr-defined]
395
+ return sum(get_torch_storage_size(getattr(tensor, attr)) for attr in attrs)
396
+ except ImportError:
397
+ # for torch version less than 2.1, we can fallback to original implementation
398
+ pass
399
+
368
400
  try:
369
401
  return tensor.untyped_storage().nbytes()
370
402
  except AttributeError:
@@ -398,10 +430,20 @@ def is_torch_tpu_available(check_device=True):
398
430
  return False
399
431
 
400
432
 
401
- def storage_ptr(tensor: "torch.Tensor") -> int:
433
+ def storage_ptr(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
402
434
  """
403
435
  Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L11.
404
436
  """
437
+ try:
438
+ # for torch 2.1 and above we can also handle tensor subclasses
439
+ from torch.utils._python_dispatch import is_traceable_wrapper_subclass
440
+
441
+ if is_traceable_wrapper_subclass(tensor):
442
+ return _get_unique_id(tensor)
443
+ except ImportError:
444
+ # for torch version less than 2.1, we can fallback to original implementation
445
+ pass
446
+
405
447
  try:
406
448
  return tensor.untyped_storage().data_ptr()
407
449
  except Exception:
@@ -496,6 +538,17 @@ def _is_complete(tensor: "torch.Tensor") -> bool:
496
538
  """
497
539
  Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L80
498
540
  """
541
+ try:
542
+ # for torch 2.1 and above we can also handle tensor subclasses
543
+ from torch.utils._python_dispatch import is_traceable_wrapper_subclass
544
+
545
+ if is_traceable_wrapper_subclass(tensor):
546
+ attrs, _ = tensor.__tensor_flatten__() # type: ignore[attr-defined]
547
+ return all(_is_complete(getattr(tensor, attr)) for attr in attrs)
548
+ except ImportError:
549
+ # for torch version less than 2.1, we can fallback to original implementation
550
+ pass
551
+
499
552
  return tensor.data_ptr() == storage_ptr(tensor) and tensor.nelement() * _get_dtype_size(
500
553
  tensor.dtype
501
554
  ) == get_torch_storage_size(tensor)
@@ -16,10 +16,21 @@
16
16
  # ruff: noqa: F401
17
17
 
18
18
  from huggingface_hub.errors import (
19
+ BadRequestError,
20
+ CacheNotFound,
21
+ CorruptedCacheException,
22
+ DisabledRepoError,
23
+ EntryNotFoundError,
24
+ FileMetadataError,
25
+ GatedRepoError,
26
+ HfHubHTTPError,
19
27
  HFValidationError,
28
+ LocalEntryNotFoundError,
20
29
  LocalTokenNotFoundError,
21
30
  NotASafetensorsRepoError,
22
31
  OfflineModeIsEnabled,
32
+ RepositoryNotFoundError,
33
+ RevisionNotFoundError,
23
34
  SafetensorsParsingError,
24
35
  )
25
36
 
@@ -29,26 +40,12 @@ from ._cache_manager import (
29
40
  CachedFileInfo,
30
41
  CachedRepoInfo,
31
42
  CachedRevisionInfo,
32
- CacheNotFound,
33
- CorruptedCacheException,
34
43
  DeleteCacheStrategy,
35
44
  HFCacheInfo,
36
45
  scan_cache_dir,
37
46
  )
38
47
  from ._chunk_utils import chunk_iterable
39
48
  from ._datetime import parse_datetime
40
- from ._errors import (
41
- BadRequestError,
42
- DisabledRepoError,
43
- EntryNotFoundError,
44
- FileMetadataError,
45
- GatedRepoError,
46
- HfHubHTTPError,
47
- LocalEntryNotFoundError,
48
- RepositoryNotFoundError,
49
- RevisionNotFoundError,
50
- hf_raise_for_status,
51
- )
52
49
  from ._experimental import experimental
53
50
  from ._fixes import SoftTemporaryDirectory, WeakFileLock, yaml_dump
54
51
  from ._git_credential import list_credential_helpers, set_git_credential, unset_git_credential
@@ -58,6 +55,7 @@ from ._http import (
58
55
  configure_http_backend,
59
56
  fix_hf_endpoint_in_url,
60
57
  get_session,
58
+ hf_raise_for_status,
61
59
  http_backoff,
62
60
  reset_sessions,
63
61
  )
@@ -84,6 +82,7 @@ from ._runtime import (
84
82
  get_tf_version,
85
83
  get_torch_version,
86
84
  is_aiohttp_available,
85
+ is_colab_enterprise,
87
86
  is_fastai_available,
88
87
  is_fastapi_available,
89
88
  is_fastcore_available,
@@ -22,6 +22,9 @@ from dataclasses import dataclass
22
22
  from pathlib import Path
23
23
  from typing import Dict, FrozenSet, List, Literal, Optional, Set, Union
24
24
 
25
+ from huggingface_hub.errors import CacheNotFound, CorruptedCacheException
26
+
27
+ from ..commands._cli_utils import tabulate
25
28
  from ..constants import HF_HUB_CACHE
26
29
  from . import logging
27
30
 
@@ -34,20 +37,6 @@ REPO_TYPE_T = Literal["model", "dataset", "space"]
34
37
  FILES_TO_IGNORE = [".DS_Store"]
35
38
 
36
39
 
37
- class CacheNotFound(Exception):
38
- """Exception thrown when the Huggingface cache is not found."""
39
-
40
- cache_dir: Union[str, Path]
41
-
42
- def __init__(self, msg: str, cache_dir: Union[str, Path], *args, **kwargs):
43
- super().__init__(msg, *args, **kwargs)
44
- self.cache_dir = cache_dir
45
-
46
-
47
- class CorruptedCacheException(Exception):
48
- """Exception for any unexpected structure in the Huggingface cache-system."""
49
-
50
-
51
40
  @dataclass(frozen=True)
52
41
  class CachedFileInfo:
53
42
  """Frozen data structure holding information about a single cached file.
@@ -496,6 +485,100 @@ class HFCacheInfo:
496
485
  expected_freed_size=delete_strategy_expected_freed_size,
497
486
  )
498
487
 
488
+ def export_as_table(self, *, verbosity: int = 0) -> str:
489
+ """Generate a table from the [`HFCacheInfo`] object.
490
+
491
+ Pass `verbosity=0` to get a table with a single row per repo, with columns
492
+ "repo_id", "repo_type", "size_on_disk", "nb_files", "last_accessed", "last_modified", "refs", "local_path".
493
+
494
+ Pass `verbosity=1` to get a table with a row per repo and revision (thus multiple rows can appear for a single repo), with columns
495
+ "repo_id", "repo_type", "revision", "size_on_disk", "nb_files", "last_modified", "refs", "local_path".
496
+
497
+ Example:
498
+ ```py
499
+ >>> from huggingface_hub.utils import scan_cache_dir
500
+
501
+ >>> hf_cache_info = scan_cache_dir()
502
+ HFCacheInfo(...)
503
+
504
+ >>> print(hf_cache_info.export_as_table())
505
+ REPO ID REPO TYPE SIZE ON DISK NB FILES LAST_ACCESSED LAST_MODIFIED REFS LOCAL PATH
506
+ --------------------------------------------------- --------- ------------ -------- ------------- ------------- ---- --------------------------------------------------------------------------------------------------
507
+ roberta-base model 2.7M 5 1 day ago 1 week ago main ~/.cache/huggingface/hub/models--roberta-base
508
+ suno/bark model 8.8K 1 1 week ago 1 week ago main ~/.cache/huggingface/hub/models--suno--bark
509
+ t5-base model 893.8M 4 4 days ago 7 months ago main ~/.cache/huggingface/hub/models--t5-base
510
+ t5-large model 3.0G 4 5 weeks ago 5 months ago main ~/.cache/huggingface/hub/models--t5-large
511
+
512
+ >>> print(hf_cache_info.export_as_table(verbosity=1))
513
+ REPO ID REPO TYPE REVISION SIZE ON DISK NB FILES LAST_MODIFIED REFS LOCAL PATH
514
+ --------------------------------------------------- --------- ---------------------------------------- ------------ -------- ------------- ---- -----------------------------------------------------------------------------------------------------------------------------------------------------
515
+ roberta-base model e2da8e2f811d1448a5b465c236feacd80ffbac7b 2.7M 5 1 week ago main ~/.cache/huggingface/hub/models--roberta-base/snapshots/e2da8e2f811d1448a5b465c236feacd80ffbac7b
516
+ suno/bark model 70a8a7d34168586dc5d028fa9666aceade177992 8.8K 1 1 week ago main ~/.cache/huggingface/hub/models--suno--bark/snapshots/70a8a7d34168586dc5d028fa9666aceade177992
517
+ t5-base model a9723ea7f1b39c1eae772870f3b547bf6ef7e6c1 893.8M 4 7 months ago main ~/.cache/huggingface/hub/models--t5-base/snapshots/a9723ea7f1b39c1eae772870f3b547bf6ef7e6c1
518
+ t5-large model 150ebc2c4b72291e770f58e6057481c8d2ed331a 3.0G 4 5 months ago main ~/.cache/huggingface/hub/models--t5-large/snapshots/150ebc2c4b72291e770f58e6057481c8d2ed331a
519
+ ```
520
+
521
+ Args:
522
+ verbosity (`int`, *optional*):
523
+ The verbosity level. Defaults to 0.
524
+
525
+ Returns:
526
+ `str`: The table as a string.
527
+ """
528
+ if verbosity == 0:
529
+ return tabulate(
530
+ rows=[
531
+ [
532
+ repo.repo_id,
533
+ repo.repo_type,
534
+ "{:>12}".format(repo.size_on_disk_str),
535
+ repo.nb_files,
536
+ repo.last_accessed_str,
537
+ repo.last_modified_str,
538
+ ", ".join(sorted(repo.refs)),
539
+ str(repo.repo_path),
540
+ ]
541
+ for repo in sorted(self.repos, key=lambda repo: repo.repo_path)
542
+ ],
543
+ headers=[
544
+ "REPO ID",
545
+ "REPO TYPE",
546
+ "SIZE ON DISK",
547
+ "NB FILES",
548
+ "LAST_ACCESSED",
549
+ "LAST_MODIFIED",
550
+ "REFS",
551
+ "LOCAL PATH",
552
+ ],
553
+ )
554
+ else:
555
+ return tabulate(
556
+ rows=[
557
+ [
558
+ repo.repo_id,
559
+ repo.repo_type,
560
+ revision.commit_hash,
561
+ "{:>12}".format(revision.size_on_disk_str),
562
+ revision.nb_files,
563
+ revision.last_modified_str,
564
+ ", ".join(sorted(revision.refs)),
565
+ str(revision.snapshot_path),
566
+ ]
567
+ for repo in sorted(self.repos, key=lambda repo: repo.repo_path)
568
+ for revision in sorted(repo.revisions, key=lambda revision: revision.commit_hash)
569
+ ],
570
+ headers=[
571
+ "REPO ID",
572
+ "REPO TYPE",
573
+ "REVISION",
574
+ "SIZE ON DISK",
575
+ "NB FILES",
576
+ "LAST_MODIFIED",
577
+ "REFS",
578
+ "LOCAL PATH",
579
+ ],
580
+ )
581
+
499
582
 
500
583
  def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
501
584
  """Scan the entire HF cache-system and return a [`~HFCacheInfo`] structure.
@@ -18,7 +18,7 @@ from pathlib import Path
18
18
  from typing import Callable, Generator, Optional, Union
19
19
 
20
20
  import yaml
21
- from filelock import BaseFileLock, FileLock, Timeout
21
+ from filelock import BaseFileLock, FileLock, SoftFileLock, Timeout
22
22
 
23
23
  from .. import constants
24
24
  from . import logging
@@ -84,13 +84,29 @@ def _set_write_permission_and_retry(func, path, excinfo):
84
84
 
85
85
  @contextlib.contextmanager
86
86
  def WeakFileLock(lock_file: Union[str, Path]) -> Generator[BaseFileLock, None, None]:
87
- """A filelock that won't raise an exception if release fails."""
87
+ """A filelock with some custom logic.
88
+
89
+ This filelock is weaker than the default filelock in that:
90
+ 1. It won't raise an exception if release fails.
91
+ 2. It will default to a SoftFileLock if the filesystem does not support flock.
92
+
93
+ An INFO log message is emitted every 10 seconds if the lock is not acquired immediately.
94
+ """
88
95
  lock = FileLock(lock_file, timeout=constants.FILELOCK_LOG_EVERY_SECONDS)
89
96
  while True:
90
97
  try:
91
98
  lock.acquire()
92
99
  except Timeout:
93
100
  logger.info("still waiting to acquire lock on %s", lock_file)
101
+ except NotImplementedError as e:
102
+ if "use SoftFileLock instead" in str(e):
103
+ # It's possible that the system does support flock, expect for one partition or filesystem.
104
+ # In this case, let's default to a SoftFileLock.
105
+ logger.warning(
106
+ "FileSystem does not appear to support flock. Falling back to SoftFileLock for %s", lock_file
107
+ )
108
+ lock = SoftFileLock(lock_file, timeout=constants.FILELOCK_LOG_EVERY_SECONDS)
109
+ continue
94
110
  else:
95
111
  break
96
112