huggingface-hub 0.36.0rc0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (132) hide show
  1. huggingface_hub/__init__.py +33 -45
  2. huggingface_hub/_commit_api.py +39 -43
  3. huggingface_hub/_commit_scheduler.py +11 -8
  4. huggingface_hub/_inference_endpoints.py +8 -8
  5. huggingface_hub/_jobs_api.py +20 -20
  6. huggingface_hub/_login.py +17 -43
  7. huggingface_hub/_oauth.py +8 -8
  8. huggingface_hub/_snapshot_download.py +135 -50
  9. huggingface_hub/_space_api.py +4 -4
  10. huggingface_hub/_tensorboard_logger.py +5 -5
  11. huggingface_hub/_upload_large_folder.py +18 -32
  12. huggingface_hub/_webhooks_payload.py +3 -3
  13. huggingface_hub/_webhooks_server.py +2 -2
  14. huggingface_hub/cli/__init__.py +0 -14
  15. huggingface_hub/cli/_cli_utils.py +143 -39
  16. huggingface_hub/cli/auth.py +105 -171
  17. huggingface_hub/cli/cache.py +594 -361
  18. huggingface_hub/cli/download.py +120 -112
  19. huggingface_hub/cli/hf.py +38 -41
  20. huggingface_hub/cli/jobs.py +689 -1017
  21. huggingface_hub/cli/lfs.py +120 -143
  22. huggingface_hub/cli/repo.py +282 -216
  23. huggingface_hub/cli/repo_files.py +50 -84
  24. huggingface_hub/cli/system.py +6 -25
  25. huggingface_hub/cli/upload.py +198 -220
  26. huggingface_hub/cli/upload_large_folder.py +91 -106
  27. huggingface_hub/community.py +5 -5
  28. huggingface_hub/constants.py +17 -52
  29. huggingface_hub/dataclasses.py +135 -21
  30. huggingface_hub/errors.py +47 -30
  31. huggingface_hub/fastai_utils.py +8 -9
  32. huggingface_hub/file_download.py +351 -303
  33. huggingface_hub/hf_api.py +398 -570
  34. huggingface_hub/hf_file_system.py +101 -66
  35. huggingface_hub/hub_mixin.py +32 -54
  36. huggingface_hub/inference/_client.py +177 -162
  37. huggingface_hub/inference/_common.py +38 -54
  38. huggingface_hub/inference/_generated/_async_client.py +218 -258
  39. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
  40. huggingface_hub/inference/_generated/types/base.py +10 -7
  41. huggingface_hub/inference/_generated/types/chat_completion.py +16 -16
  42. huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
  43. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
  44. huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
  45. huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
  46. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
  47. huggingface_hub/inference/_generated/types/summarization.py +2 -2
  48. huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
  49. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
  50. huggingface_hub/inference/_generated/types/text_generation.py +10 -10
  51. huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
  52. huggingface_hub/inference/_generated/types/token_classification.py +2 -2
  53. huggingface_hub/inference/_generated/types/translation.py +2 -2
  54. huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
  55. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
  56. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
  57. huggingface_hub/inference/_mcp/agent.py +3 -3
  58. huggingface_hub/inference/_mcp/constants.py +1 -2
  59. huggingface_hub/inference/_mcp/mcp_client.py +33 -22
  60. huggingface_hub/inference/_mcp/types.py +10 -10
  61. huggingface_hub/inference/_mcp/utils.py +4 -4
  62. huggingface_hub/inference/_providers/__init__.py +12 -4
  63. huggingface_hub/inference/_providers/_common.py +62 -24
  64. huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
  65. huggingface_hub/inference/_providers/cohere.py +3 -3
  66. huggingface_hub/inference/_providers/fal_ai.py +25 -25
  67. huggingface_hub/inference/_providers/featherless_ai.py +4 -4
  68. huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
  69. huggingface_hub/inference/_providers/hf_inference.py +13 -13
  70. huggingface_hub/inference/_providers/hyperbolic.py +4 -4
  71. huggingface_hub/inference/_providers/nebius.py +10 -10
  72. huggingface_hub/inference/_providers/novita.py +5 -5
  73. huggingface_hub/inference/_providers/nscale.py +4 -4
  74. huggingface_hub/inference/_providers/replicate.py +15 -15
  75. huggingface_hub/inference/_providers/sambanova.py +6 -6
  76. huggingface_hub/inference/_providers/together.py +7 -7
  77. huggingface_hub/lfs.py +21 -94
  78. huggingface_hub/repocard.py +15 -16
  79. huggingface_hub/repocard_data.py +57 -57
  80. huggingface_hub/serialization/__init__.py +0 -1
  81. huggingface_hub/serialization/_base.py +9 -9
  82. huggingface_hub/serialization/_dduf.py +7 -7
  83. huggingface_hub/serialization/_torch.py +28 -28
  84. huggingface_hub/utils/__init__.py +11 -6
  85. huggingface_hub/utils/_auth.py +5 -5
  86. huggingface_hub/utils/_cache_manager.py +49 -74
  87. huggingface_hub/utils/_deprecation.py +1 -1
  88. huggingface_hub/utils/_dotenv.py +3 -3
  89. huggingface_hub/utils/_fixes.py +0 -10
  90. huggingface_hub/utils/_git_credential.py +3 -3
  91. huggingface_hub/utils/_headers.py +7 -29
  92. huggingface_hub/utils/_http.py +371 -208
  93. huggingface_hub/utils/_pagination.py +4 -4
  94. huggingface_hub/utils/_parsing.py +98 -0
  95. huggingface_hub/utils/_paths.py +5 -5
  96. huggingface_hub/utils/_runtime.py +59 -23
  97. huggingface_hub/utils/_safetensors.py +21 -21
  98. huggingface_hub/utils/_subprocess.py +9 -9
  99. huggingface_hub/utils/_telemetry.py +3 -3
  100. huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -9
  101. huggingface_hub/utils/_typing.py +3 -3
  102. huggingface_hub/utils/_validators.py +53 -72
  103. huggingface_hub/utils/_xet.py +16 -16
  104. huggingface_hub/utils/_xet_progress_reporting.py +1 -1
  105. huggingface_hub/utils/insecure_hashlib.py +3 -9
  106. huggingface_hub/utils/tqdm.py +3 -3
  107. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/METADATA +16 -35
  108. huggingface_hub-1.0.0.dist-info/RECORD +152 -0
  109. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/entry_points.txt +0 -1
  110. huggingface_hub/commands/__init__.py +0 -27
  111. huggingface_hub/commands/delete_cache.py +0 -476
  112. huggingface_hub/commands/download.py +0 -204
  113. huggingface_hub/commands/env.py +0 -39
  114. huggingface_hub/commands/huggingface_cli.py +0 -65
  115. huggingface_hub/commands/lfs.py +0 -200
  116. huggingface_hub/commands/repo.py +0 -151
  117. huggingface_hub/commands/repo_files.py +0 -132
  118. huggingface_hub/commands/scan_cache.py +0 -183
  119. huggingface_hub/commands/tag.py +0 -161
  120. huggingface_hub/commands/upload.py +0 -318
  121. huggingface_hub/commands/upload_large_folder.py +0 -131
  122. huggingface_hub/commands/user.py +0 -208
  123. huggingface_hub/commands/version.py +0 -40
  124. huggingface_hub/inference_api.py +0 -217
  125. huggingface_hub/keras_mixin.py +0 -497
  126. huggingface_hub/repository.py +0 -1471
  127. huggingface_hub/serialization/_tensorflow.py +0 -92
  128. huggingface_hub/utils/_hf_folder.py +0 -68
  129. huggingface_hub-0.36.0rc0.dist-info/RECORD +0 -170
  130. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/LICENSE +0 -0
  131. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/WHEEL +0 -0
  132. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/top_level.txt +0 -0
@@ -20,7 +20,7 @@ import re
20
20
  from collections import defaultdict, namedtuple
21
21
  from functools import lru_cache
22
22
  from pathlib import Path
23
- from typing import TYPE_CHECKING, Any, Dict, Iterable, List, NamedTuple, Optional, Set, Tuple, Union
23
+ from typing import TYPE_CHECKING, Any, Iterable, NamedTuple, Optional, Union
24
24
 
25
25
  from packaging import version
26
26
 
@@ -43,10 +43,10 @@ def save_torch_model(
43
43
  filename_pattern: Optional[str] = None,
44
44
  force_contiguous: bool = True,
45
45
  max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
46
- metadata: Optional[Dict[str, str]] = None,
46
+ metadata: Optional[dict[str, str]] = None,
47
47
  safe_serialization: bool = True,
48
48
  is_main_process: bool = True,
49
- shared_tensors_to_discard: Optional[List[str]] = None,
49
+ shared_tensors_to_discard: Optional[list[str]] = None,
50
50
  ):
51
51
  """
52
52
  Saves a given torch model to disk, handling sharding and shared tensors issues.
@@ -86,7 +86,7 @@ def save_torch_model(
86
86
  that reason. Defaults to `True`.
87
87
  max_shard_size (`int` or `str`, *optional*):
88
88
  The maximum size of each shard, in bytes. Defaults to 5GB.
89
- metadata (`Dict[str, str]`, *optional*):
89
+ metadata (`dict[str, str]`, *optional*):
90
90
  Extra information to save along with the model. Some metadata will be added for each dropped tensors.
91
91
  This information will not be enough to recover the entire shared structure but might help understanding
92
92
  things.
@@ -98,7 +98,7 @@ def save_torch_model(
98
98
  Whether the process calling this is the main process or not. Useful when in distributed training like
99
99
  TPUs and need to call this function from all processes. In this case, set `is_main_process=True` only on
100
100
  the main process to avoid race conditions. Defaults to True.
101
- shared_tensors_to_discard (`List[str]`, *optional*):
101
+ shared_tensors_to_discard (`list[str]`, *optional*):
102
102
  List of tensor names to drop when saving shared tensors. If not provided and shared tensors are
103
103
  detected, it will drop the first name alphabetically.
104
104
 
@@ -131,16 +131,16 @@ def save_torch_model(
131
131
 
132
132
 
133
133
  def save_torch_state_dict(
134
- state_dict: Dict[str, "torch.Tensor"],
134
+ state_dict: dict[str, "torch.Tensor"],
135
135
  save_directory: Union[str, Path],
136
136
  *,
137
137
  filename_pattern: Optional[str] = None,
138
138
  force_contiguous: bool = True,
139
139
  max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
140
- metadata: Optional[Dict[str, str]] = None,
140
+ metadata: Optional[dict[str, str]] = None,
141
141
  safe_serialization: bool = True,
142
142
  is_main_process: bool = True,
143
- shared_tensors_to_discard: Optional[List[str]] = None,
143
+ shared_tensors_to_discard: Optional[list[str]] = None,
144
144
  ) -> None:
145
145
  """
146
146
  Save a model state dictionary to the disk, handling sharding and shared tensors issues.
@@ -165,7 +165,7 @@ def save_torch_state_dict(
165
165
  > If your model is a `transformers.PreTrainedModel`, you should pass `model._tied_weights_keys` as `shared_tensors_to_discard` to properly handle shared tensors saving. This ensures the correct duplicate tensors are discarded during saving.
166
166
 
167
167
  Args:
168
- state_dict (`Dict[str, torch.Tensor]`):
168
+ state_dict (`dict[str, torch.Tensor]`):
169
169
  The state dictionary to save.
170
170
  save_directory (`str` or `Path`):
171
171
  The directory in which the model will be saved.
@@ -180,7 +180,7 @@ def save_torch_state_dict(
180
180
  that reason. Defaults to `True`.
181
181
  max_shard_size (`int` or `str`, *optional*):
182
182
  The maximum size of each shard, in bytes. Defaults to 5GB.
183
- metadata (`Dict[str, str]`, *optional*):
183
+ metadata (`dict[str, str]`, *optional*):
184
184
  Extra information to save along with the model. Some metadata will be added for each dropped tensors.
185
185
  This information will not be enough to recover the entire shared structure but might help understanding
186
186
  things.
@@ -192,7 +192,7 @@ def save_torch_state_dict(
192
192
  Whether the process calling this is the main process or not. Useful when in distributed training like
193
193
  TPUs and need to call this function from all processes. In this case, set `is_main_process=True` only on
194
194
  the main process to avoid race conditions. Defaults to True.
195
- shared_tensors_to_discard (`List[str]`, *optional*):
195
+ shared_tensors_to_discard (`list[str]`, *optional*):
196
196
  List of tensor names to drop when saving shared tensors. If not provided and shared tensors are
197
197
  detected, it will drop the first name alphabetically.
198
198
 
@@ -288,7 +288,7 @@ def save_torch_state_dict(
288
288
 
289
289
 
290
290
  def split_torch_state_dict_into_shards(
291
- state_dict: Dict[str, "torch.Tensor"],
291
+ state_dict: dict[str, "torch.Tensor"],
292
292
  *,
293
293
  filename_pattern: str = constants.SAFETENSORS_WEIGHTS_FILE_PATTERN,
294
294
  max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
@@ -311,7 +311,7 @@ def split_torch_state_dict_into_shards(
311
311
  > size greater than `max_shard_size`.
312
312
 
313
313
  Args:
314
- state_dict (`Dict[str, torch.Tensor]`):
314
+ state_dict (`dict[str, torch.Tensor]`):
315
315
  The state dictionary to save.
316
316
  filename_pattern (`str`, *optional*):
317
317
  The pattern to generate the files names in which the model will be saved. Pattern must be a string that
@@ -330,7 +330,7 @@ def split_torch_state_dict_into_shards(
330
330
  >>> from safetensors.torch import save_file as safe_save_file
331
331
  >>> from huggingface_hub import split_torch_state_dict_into_shards
332
332
 
333
- >>> def save_state_dict(state_dict: Dict[str, torch.Tensor], save_directory: str):
333
+ >>> def save_state_dict(state_dict: dict[str, torch.Tensor], save_directory: str):
334
334
  ... state_dict_split = split_torch_state_dict_into_shards(state_dict)
335
335
  ... for filename, tensors in state_dict_split.filename_to_tensors.items():
336
336
  ... shard = {tensor: state_dict[tensor] for tensor in tensors}
@@ -542,7 +542,7 @@ def load_state_dict_from_file(
542
542
  map_location: Optional[Union[str, "torch.device"]] = None,
543
543
  weights_only: bool = False,
544
544
  mmap: bool = False,
545
- ) -> Union[Dict[str, "torch.Tensor"], Any]:
545
+ ) -> Union[dict[str, "torch.Tensor"], Any]:
546
546
  """
547
547
  Loads a checkpoint file, handling both safetensors and pickle checkpoint formats.
548
548
 
@@ -562,7 +562,7 @@ def load_state_dict_from_file(
562
562
  loading safetensors files, as the `safetensors` library uses memory mapping by default.
563
563
 
564
564
  Returns:
565
- `Union[Dict[str, "torch.Tensor"], Any]`: The loaded checkpoint.
565
+ `Union[dict[str, "torch.Tensor"], Any]`: The loaded checkpoint.
566
566
  - For safetensors files: always returns a dictionary mapping parameter names to tensors.
567
567
  - For pickle files: returns any Python object that was pickled (commonly a state dict, but could be
568
568
  an entire model, optimizer state, or any other Python object).
@@ -682,7 +682,7 @@ def _validate_keys_for_strict_loading(
682
682
  raise RuntimeError(error_message)
683
683
 
684
684
 
685
- def _get_unique_id(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
685
+ def _get_unique_id(tensor: "torch.Tensor") -> Union[int, tuple[Any, ...]]:
686
686
  """Returns a unique id for plain tensor
687
687
  or a (potentially nested) Tuple of unique id for the flattened Tensor
688
688
  if the input is a wrapper tensor subclass Tensor
@@ -723,7 +723,7 @@ def _get_unique_id(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
723
723
  return unique_id
724
724
 
725
725
 
726
- def get_torch_storage_id(tensor: "torch.Tensor") -> Optional[Tuple["torch.device", Union[int, Tuple[Any, ...]], int]]:
726
+ def get_torch_storage_id(tensor: "torch.Tensor") -> Optional[tuple["torch.device", Union[int, tuple[Any, ...]], int]]:
727
727
  """
728
728
  Return unique identifier to a tensor storage.
729
729
 
@@ -797,7 +797,7 @@ def is_torch_tpu_available(check_device=True):
797
797
  return False
798
798
 
799
799
 
800
- def storage_ptr(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
800
+ def storage_ptr(tensor: "torch.Tensor") -> Union[int, tuple[Any, ...]]:
801
801
  """
802
802
  Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L11.
803
803
  """
@@ -823,10 +823,10 @@ def storage_ptr(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
823
823
 
824
824
 
825
825
  def _clean_state_dict_for_safetensors(
826
- state_dict: Dict[str, "torch.Tensor"],
827
- metadata: Dict[str, str],
826
+ state_dict: dict[str, "torch.Tensor"],
827
+ metadata: dict[str, str],
828
828
  force_contiguous: bool = True,
829
- shared_tensors_to_discard: Optional[List[str]] = None,
829
+ shared_tensors_to_discard: Optional[list[str]] = None,
830
830
  ):
831
831
  """Remove shared tensors from state_dict and update metadata accordingly (for reloading).
832
832
 
@@ -860,7 +860,7 @@ def _end_ptr(tensor: "torch.Tensor") -> int:
860
860
  return stop
861
861
 
862
862
 
863
- def _filter_shared_not_shared(tensors: List[Set[str]], state_dict: Dict[str, "torch.Tensor"]) -> List[Set[str]]:
863
+ def _filter_shared_not_shared(tensors: list[set[str]], state_dict: dict[str, "torch.Tensor"]) -> list[set[str]]:
864
864
  """
865
865
  Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L44
866
866
  """
@@ -888,7 +888,7 @@ def _filter_shared_not_shared(tensors: List[Set[str]], state_dict: Dict[str, "to
888
888
  return filtered_tensors
889
889
 
890
890
 
891
- def _find_shared_tensors(state_dict: Dict[str, "torch.Tensor"]) -> List[Set[str]]:
891
+ def _find_shared_tensors(state_dict: dict[str, "torch.Tensor"]) -> list[set[str]]:
892
892
  """
893
893
  Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L69.
894
894
  """
@@ -925,11 +925,11 @@ def _is_complete(tensor: "torch.Tensor") -> bool:
925
925
 
926
926
 
927
927
  def _remove_duplicate_names(
928
- state_dict: Dict[str, "torch.Tensor"],
928
+ state_dict: dict[str, "torch.Tensor"],
929
929
  *,
930
- preferred_names: Optional[List[str]] = None,
931
- discard_names: Optional[List[str]] = None,
932
- ) -> Dict[str, List[str]]:
930
+ preferred_names: Optional[list[str]] = None,
931
+ discard_names: Optional[list[str]] = None,
932
+ ) -> dict[str, list[str]]:
933
933
  """
934
934
  Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L80
935
935
  """
@@ -42,6 +42,7 @@ from ._cache_manager import (
42
42
  CachedRevisionInfo,
43
43
  DeleteCacheStrategy,
44
44
  HFCacheInfo,
45
+ _format_size,
45
46
  scan_cache_dir,
46
47
  )
47
48
  from ._chunk_utils import chunk_iterable
@@ -50,14 +51,18 @@ from ._experimental import experimental
50
51
  from ._fixes import SoftTemporaryDirectory, WeakFileLock, yaml_dump
51
52
  from ._git_credential import list_credential_helpers, set_git_credential, unset_git_credential
52
53
  from ._headers import build_hf_headers, get_token_to_send
53
- from ._hf_folder import HfFolder
54
54
  from ._http import (
55
- configure_http_backend,
55
+ ASYNC_CLIENT_FACTORY_T,
56
+ CLIENT_FACTORY_T,
57
+ close_session,
56
58
  fix_hf_endpoint_in_url,
59
+ get_async_session,
57
60
  get_session,
58
61
  hf_raise_for_status,
59
62
  http_backoff,
60
- reset_sessions,
63
+ http_stream_backoff,
64
+ set_async_client_factory,
65
+ set_client_factory,
61
66
  )
62
67
  from ._pagination import paginate
63
68
  from ._paths import DEFAULT_IGNORE_PATTERNS, FORBIDDEN_FOLDERS, filter_repo_objects
@@ -70,7 +75,6 @@ from ._runtime import (
70
75
  get_gradio_version,
71
76
  get_graphviz_version,
72
77
  get_hf_hub_version,
73
- get_hf_transfer_version,
74
78
  get_jinja_version,
75
79
  get_numpy_version,
76
80
  get_pillow_version,
@@ -80,6 +84,7 @@ from ._runtime import (
80
84
  get_tensorboard_version,
81
85
  get_tf_version,
82
86
  get_torch_version,
87
+ installation_method,
83
88
  is_aiohttp_available,
84
89
  is_colab_enterprise,
85
90
  is_fastai_available,
@@ -88,7 +93,6 @@ from ._runtime import (
88
93
  is_google_colab,
89
94
  is_gradio_available,
90
95
  is_graphviz_available,
91
- is_hf_transfer_available,
92
96
  is_jinja_available,
93
97
  is_notebook,
94
98
  is_numpy_available,
@@ -104,8 +108,9 @@ from ._runtime import (
104
108
  from ._safetensors import SafetensorsFileMetadata, SafetensorsRepoMetadata, TensorInfo
105
109
  from ._subprocess import capture_output, run_interactive_subprocess, run_subprocess
106
110
  from ._telemetry import send_telemetry
111
+ from ._terminal import ANSI, tabulate
107
112
  from ._typing import is_jsonable, is_simple_optional_type, unwrap_simple_optional_type
108
- from ._validators import smoothly_deprecate_use_auth_token, validate_hf_hub_args, validate_repo_id
113
+ from ._validators import validate_hf_hub_args, validate_repo_id
109
114
  from ._xet import (
110
115
  XetConnectionInfo,
111
116
  XetFileData,
@@ -19,7 +19,7 @@ import os
19
19
  import warnings
20
20
  from pathlib import Path
21
21
  from threading import Lock
22
- from typing import Dict, Optional
22
+ from typing import Optional
23
23
 
24
24
  from .. import constants
25
25
  from ._runtime import is_colab_enterprise, is_google_colab
@@ -125,13 +125,13 @@ def _get_token_from_file() -> Optional[str]:
125
125
  return None
126
126
 
127
127
 
128
- def get_stored_tokens() -> Dict[str, str]:
128
+ def get_stored_tokens() -> dict[str, str]:
129
129
  """
130
130
  Returns the parsed INI file containing the access tokens.
131
131
  The file is located at `HF_STORED_TOKENS_PATH`, defaulting to `~/.cache/huggingface/stored_tokens`.
132
132
  If the file does not exist, an empty dictionary is returned.
133
133
 
134
- Returns: `Dict[str, str]`
134
+ Returns: `dict[str, str]`
135
135
  Key is the token name and value is the token.
136
136
  """
137
137
  tokens_path = Path(constants.HF_STORED_TOKENS_PATH)
@@ -147,12 +147,12 @@ def get_stored_tokens() -> Dict[str, str]:
147
147
  return stored_tokens
148
148
 
149
149
 
150
- def _save_stored_tokens(stored_tokens: Dict[str, str]) -> None:
150
+ def _save_stored_tokens(stored_tokens: dict[str, str]) -> None:
151
151
  """
152
152
  Saves the given configuration to the stored tokens file.
153
153
 
154
154
  Args:
155
- stored_tokens (`Dict[str, str]`):
155
+ stored_tokens (`dict[str, str]`):
156
156
  The stored tokens to save. Key is the token name and value is the token.
157
157
  """
158
158
  stored_tokens_path = Path(constants.HF_STORED_TOKENS_PATH)
@@ -16,17 +16,17 @@
16
16
 
17
17
  import os
18
18
  import shutil
19
- import time
20
19
  from collections import defaultdict
21
20
  from dataclasses import dataclass
22
21
  from pathlib import Path
23
- from typing import Dict, FrozenSet, List, Literal, Optional, Set, Union
22
+ from typing import Literal, Optional, Union
24
23
 
25
24
  from huggingface_hub.errors import CacheNotFound, CorruptedCacheException
26
25
 
27
- from ..commands._cli_utils import tabulate
28
26
  from ..constants import HF_HUB_CACHE
29
27
  from . import logging
28
+ from ._parsing import format_timesince
29
+ from ._terminal import tabulate
30
30
 
31
31
 
32
32
  logger = logging.get_logger(__name__)
@@ -79,7 +79,7 @@ class CachedFileInfo:
79
79
 
80
80
  Example: "2 weeks ago".
81
81
  """
82
- return _format_timesince(self.blob_last_accessed)
82
+ return format_timesince(self.blob_last_accessed)
83
83
 
84
84
  @property
85
85
  def blob_last_modified_str(self) -> str:
@@ -89,7 +89,7 @@ class CachedFileInfo:
89
89
 
90
90
  Example: "2 weeks ago".
91
91
  """
92
- return _format_timesince(self.blob_last_modified)
92
+ return format_timesince(self.blob_last_modified)
93
93
 
94
94
  @property
95
95
  def size_on_disk_str(self) -> str:
@@ -116,9 +116,9 @@ class CachedRevisionInfo:
116
116
  snapshot_path (`Path`):
117
117
  Path to the revision directory in the `snapshots` folder. It contains the
118
118
  exact tree structure as the repo on the Hub.
119
- files: (`FrozenSet[CachedFileInfo]`):
119
+ files: (`frozenset[CachedFileInfo]`):
120
120
  Set of [`~CachedFileInfo`] describing all files contained in the snapshot.
121
- refs (`FrozenSet[str]`):
121
+ refs (`frozenset[str]`):
122
122
  Set of `refs` pointing to this revision. If the revision has no `refs`, it
123
123
  is considered detached.
124
124
  Example: `{"main", "2.4.0"}` or `{"refs/pr/1"}`.
@@ -140,8 +140,8 @@ class CachedRevisionInfo:
140
140
  commit_hash: str
141
141
  snapshot_path: Path
142
142
  size_on_disk: int
143
- files: FrozenSet[CachedFileInfo]
144
- refs: FrozenSet[str]
143
+ files: frozenset[CachedFileInfo]
144
+ refs: frozenset[str]
145
145
 
146
146
  last_modified: float
147
147
 
@@ -153,7 +153,7 @@ class CachedRevisionInfo:
153
153
 
154
154
  Example: "2 weeks ago".
155
155
  """
156
- return _format_timesince(self.last_modified)
156
+ return format_timesince(self.last_modified)
157
157
 
158
158
  @property
159
159
  def size_on_disk_str(self) -> str:
@@ -187,7 +187,7 @@ class CachedRepoInfo:
187
187
  Sum of the blob file sizes in the cached repo.
188
188
  nb_files (`int`):
189
189
  Total number of blob files in the cached repo.
190
- revisions (`FrozenSet[CachedRevisionInfo]`):
190
+ revisions (`frozenset[CachedRevisionInfo]`):
191
191
  Set of [`~CachedRevisionInfo`] describing all revisions cached in the repo.
192
192
  last_accessed (`float`):
193
193
  Timestamp of the last time a blob file of the repo has been accessed.
@@ -210,7 +210,7 @@ class CachedRepoInfo:
210
210
  repo_path: Path
211
211
  size_on_disk: int
212
212
  nb_files: int
213
- revisions: FrozenSet[CachedRevisionInfo]
213
+ revisions: frozenset[CachedRevisionInfo]
214
214
 
215
215
  last_accessed: float
216
216
  last_modified: float
@@ -223,7 +223,7 @@ class CachedRepoInfo:
223
223
 
224
224
  Example: "2 weeks ago".
225
225
  """
226
- return _format_timesince(self.last_accessed)
226
+ return format_timesince(self.last_accessed)
227
227
 
228
228
  @property
229
229
  def last_modified_str(self) -> str:
@@ -233,7 +233,7 @@ class CachedRepoInfo:
233
233
 
234
234
  Example: "2 weeks ago".
235
235
  """
236
- return _format_timesince(self.last_modified)
236
+ return format_timesince(self.last_modified)
237
237
 
238
238
  @property
239
239
  def size_on_disk_str(self) -> str:
@@ -245,7 +245,12 @@ class CachedRepoInfo:
245
245
  return _format_size(self.size_on_disk)
246
246
 
247
247
  @property
248
- def refs(self) -> Dict[str, CachedRevisionInfo]:
248
+ def cache_id(self) -> str:
249
+ """Canonical `type/id` identifier used across cache tooling."""
250
+ return f"{self.repo_type}/{self.repo_id}"
251
+
252
+ @property
253
+ def refs(self) -> dict[str, CachedRevisionInfo]:
249
254
  """
250
255
  (property) Mapping between `refs` and revision data structures.
251
256
  """
@@ -262,21 +267,21 @@ class DeleteCacheStrategy:
262
267
  Args:
263
268
  expected_freed_size (`float`):
264
269
  Expected freed size once strategy is executed.
265
- blobs (`FrozenSet[Path]`):
270
+ blobs (`frozenset[Path]`):
266
271
  Set of blob file paths to be deleted.
267
- refs (`FrozenSet[Path]`):
272
+ refs (`frozenset[Path]`):
268
273
  Set of reference file paths to be deleted.
269
- repos (`FrozenSet[Path]`):
274
+ repos (`frozenset[Path]`):
270
275
  Set of entire repo paths to be deleted.
271
- snapshots (`FrozenSet[Path]`):
276
+ snapshots (`frozenset[Path]`):
272
277
  Set of snapshots to be deleted (directory of symlinks).
273
278
  """
274
279
 
275
280
  expected_freed_size: int
276
- blobs: FrozenSet[Path]
277
- refs: FrozenSet[Path]
278
- repos: FrozenSet[Path]
279
- snapshots: FrozenSet[Path]
281
+ blobs: frozenset[Path]
282
+ refs: frozenset[Path]
283
+ repos: frozenset[Path]
284
+ snapshots: frozenset[Path]
280
285
 
281
286
  @property
282
287
  def expected_freed_size_str(self) -> str:
@@ -331,10 +336,10 @@ class HFCacheInfo:
331
336
  Args:
332
337
  size_on_disk (`int`):
333
338
  Sum of all valid repo sizes in the cache-system.
334
- repos (`FrozenSet[CachedRepoInfo]`):
339
+ repos (`frozenset[CachedRepoInfo]`):
335
340
  Set of [`~CachedRepoInfo`] describing all valid cached repos found on the
336
341
  cache-system while scanning.
337
- warnings (`List[CorruptedCacheException]`):
342
+ warnings (`list[CorruptedCacheException]`):
338
343
  List of [`~CorruptedCacheException`] that occurred while scanning the cache.
339
344
  Those exceptions are captured so that the scan can continue. Corrupted repos
340
345
  are skipped from the scan.
@@ -345,8 +350,8 @@ class HFCacheInfo:
345
350
  """
346
351
 
347
352
  size_on_disk: int
348
- repos: FrozenSet[CachedRepoInfo]
349
- warnings: List[CorruptedCacheException]
353
+ repos: frozenset[CachedRepoInfo]
354
+ warnings: list[CorruptedCacheException]
350
355
 
351
356
  @property
352
357
  def size_on_disk_str(self) -> str:
@@ -393,9 +398,9 @@ class HFCacheInfo:
393
398
  > be executed. The [`~utils.DeleteCacheStrategy`] is not meant to be modified but
394
399
  > allows having a dry run before actually executing the deletion.
395
400
  """
396
- hashes_to_delete: Set[str] = set(revisions)
401
+ hashes_to_delete: set[str] = set(revisions)
397
402
 
398
- repos_with_revisions: Dict[CachedRepoInfo, Set[CachedRevisionInfo]] = defaultdict(set)
403
+ repos_with_revisions: dict[CachedRepoInfo, set[CachedRevisionInfo]] = defaultdict(set)
399
404
 
400
405
  for repo in self.repos:
401
406
  for revision in repo.revisions:
@@ -406,10 +411,10 @@ class HFCacheInfo:
406
411
  if len(hashes_to_delete) > 0:
407
412
  logger.warning(f"Revision(s) not found - cannot delete them: {', '.join(hashes_to_delete)}")
408
413
 
409
- delete_strategy_blobs: Set[Path] = set()
410
- delete_strategy_refs: Set[Path] = set()
411
- delete_strategy_repos: Set[Path] = set()
412
- delete_strategy_snapshots: Set[Path] = set()
414
+ delete_strategy_blobs: set[Path] = set()
415
+ delete_strategy_refs: set[Path] = set()
416
+ delete_strategy_repos: set[Path] = set()
417
+ delete_strategy_snapshots: set[Path] = set()
413
418
  delete_strategy_expected_freed_size = 0
414
419
 
415
420
  for affected_repo, revisions_to_delete in repos_with_revisions.items():
@@ -607,15 +612,12 @@ def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
607
612
 
608
613
  You can also print a detailed report directly from the `hf` command line using:
609
614
  ```text
610
- > hf cache scan
611
- REPO ID REPO TYPE SIZE ON DISK NB FILES REFS LOCAL PATH
612
- --------------------------- --------- ------------ -------- ------------------- -------------------------------------------------------------------------
613
- glue dataset 116.3K 15 1.17.0, main, 2.4.0 /Users/lucain/.cache/huggingface/hub/datasets--glue
614
- google/fleurs dataset 64.9M 6 main, refs/pr/1 /Users/lucain/.cache/huggingface/hub/datasets--google--fleurs
615
- Jean-Baptiste/camembert-ner model 441.0M 7 main /Users/lucain/.cache/huggingface/hub/models--Jean-Baptiste--camembert-ner
616
- bert-base-cased model 1.9G 13 main /Users/lucain/.cache/huggingface/hub/models--bert-base-cased
617
- t5-base model 10.1K 3 main /Users/lucain/.cache/huggingface/hub/models--t5-base
618
- t5-small model 970.7M 11 refs/pr/1, main /Users/lucain/.cache/huggingface/hub/models--t5-small
615
+ > hf cache ls
616
+ ID SIZE LAST_ACCESSED LAST_MODIFIED REFS
617
+ --------------------------- -------- ------------- ------------- -----------
618
+ dataset/nyu-mll/glue 157.4M 2 days ago 2 days ago main script
619
+ model/LiquidAI/LFM2-VL-1.6B 3.2G 4 days ago 4 days ago main
620
+ model/microsoft/UserLM-8b 32.1G 4 days ago 4 days ago main
619
621
 
620
622
  Done in 0.0s. Scanned 6 repo(s) for a total of 3.4G.
621
623
  Got 1 warning(s) while scanning. Use -vvv to print details.
@@ -651,8 +653,8 @@ def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
651
653
  f"Scan cache expects a directory but found a file: {cache_dir}. Please use `cache_dir` argument or set `HF_HUB_CACHE` environment variable."
652
654
  )
653
655
 
654
- repos: Set[CachedRepoInfo] = set()
655
- warnings: List[CorruptedCacheException] = []
656
+ repos: set[CachedRepoInfo] = set()
657
+ warnings: list[CorruptedCacheException] = []
656
658
  for repo_path in cache_dir.iterdir():
657
659
  if repo_path.name == ".locks": # skip './.locks/' folder
658
660
  continue
@@ -688,7 +690,7 @@ def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
688
690
  f"Repo type must be `dataset`, `model` or `space`, found `{repo_type}` ({repo_path})."
689
691
  )
690
692
 
691
- blob_stats: Dict[Path, os.stat_result] = {} # Key is blob_path, value is blob stats
693
+ blob_stats: dict[Path, os.stat_result] = {} # Key is blob_path, value is blob stats
692
694
 
693
695
  snapshots_path = repo_path / "snapshots"
694
696
  refs_path = repo_path / "refs"
@@ -699,7 +701,7 @@ def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
699
701
  # Scan over `refs` directory
700
702
 
701
703
  # key is revision hash, value is set of refs
702
- refs_by_hash: Dict[str, Set[str]] = defaultdict(set)
704
+ refs_by_hash: dict[str, set[str]] = defaultdict(set)
703
705
  if refs_path.exists():
704
706
  # Example of `refs` directory
705
707
  # ── refs
@@ -722,7 +724,7 @@ def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
722
724
  refs_by_hash[commit_hash].add(ref_name)
723
725
 
724
726
  # Scan snapshots directory
725
- cached_revisions: Set[CachedRevisionInfo] = set()
727
+ cached_revisions: set[CachedRevisionInfo] = set()
726
728
  for revision_path in snapshots_path.iterdir():
727
729
  # Ignore OS-created helper files
728
730
  if revision_path.name in FILES_TO_IGNORE:
@@ -816,33 +818,6 @@ def _format_size(num: int) -> str:
816
818
  return f"{num_f:.1f}Y"
817
819
 
818
820
 
819
- _TIMESINCE_CHUNKS = (
820
- # Label, divider, max value
821
- ("second", 1, 60),
822
- ("minute", 60, 60),
823
- ("hour", 60 * 60, 24),
824
- ("day", 60 * 60 * 24, 6),
825
- ("week", 60 * 60 * 24 * 7, 6),
826
- ("month", 60 * 60 * 24 * 30, 11),
827
- ("year", 60 * 60 * 24 * 365, None),
828
- )
829
-
830
-
831
- def _format_timesince(ts: float) -> str:
832
- """Format timestamp in seconds into a human-readable string, relative to now.
833
-
834
- Vaguely inspired by Django's `timesince` formatter.
835
- """
836
- delta = time.time() - ts
837
- if delta < 20:
838
- return "a few seconds ago"
839
- for label, divider, max_value in _TIMESINCE_CHUNKS: # noqa: B007
840
- value = round(delta / divider)
841
- if max_value is not None and value <= max_value:
842
- break
843
- return f"{value} {label}{'s' if value > 1 else ''} ago"
844
-
845
-
846
821
  def _try_delete_path(path: Path, path_type: str) -> None:
847
822
  """Try to delete a local file or folder.
848
823
 
@@ -62,7 +62,7 @@ def _deprecate_arguments(
62
62
  Args:
63
63
  version (`str`):
64
64
  The version when deprecated arguments will result in error.
65
- deprecated_args (`List[str]`):
65
+ deprecated_args (`list[str]`):
66
66
  List of the arguments to be deprecated.
67
67
  custom_message (`str`, *optional*):
68
68
  Warning message that is raised. If not passed, a default warning message
@@ -1,14 +1,14 @@
1
1
  # AI-generated module (ChatGPT)
2
2
  import re
3
- from typing import Dict, Optional
3
+ from typing import Optional
4
4
 
5
5
 
6
- def load_dotenv(dotenv_str: str, environ: Optional[Dict[str, str]] = None) -> Dict[str, str]:
6
+ def load_dotenv(dotenv_str: str, environ: Optional[dict[str, str]] = None) -> dict[str, str]:
7
7
  """
8
8
  Parse a DOTENV-format string and return a dictionary of key-value pairs.
9
9
  Handles quoted values, comments, export keyword, and blank lines.
10
10
  """
11
- env: Dict[str, str] = {}
11
+ env: dict[str, str] = {}
12
12
  line_pattern = re.compile(
13
13
  r"""
14
14
  ^\s*
@@ -1,13 +1,3 @@
1
- # JSONDecodeError was introduced in requests=2.27 released in 2022.
2
- # This allows us to support older requests for users
3
- # More information: https://github.com/psf/requests/pull/5856
4
- try:
5
- from requests import JSONDecodeError # type: ignore # noqa: F401
6
- except ImportError:
7
- try:
8
- from simplejson import JSONDecodeError # type: ignore # noqa: F401
9
- except ImportError:
10
- from json import JSONDecodeError # type: ignore # noqa: F401
11
1
  import contextlib
12
2
  import os
13
3
  import shutil
@@ -16,7 +16,7 @@
16
16
 
17
17
  import re
18
18
  import subprocess
19
- from typing import List, Optional
19
+ from typing import Optional
20
20
 
21
21
  from ..constants import ENDPOINT
22
22
  from ._subprocess import run_interactive_subprocess, run_subprocess
@@ -34,7 +34,7 @@ GIT_CREDENTIAL_REGEX = re.compile(
34
34
  )
35
35
 
36
36
 
37
- def list_credential_helpers(folder: Optional[str] = None) -> List[str]:
37
+ def list_credential_helpers(folder: Optional[str] = None) -> list[str]:
38
38
  """Return the list of git credential helpers configured.
39
39
 
40
40
  See https://git-scm.com/docs/gitcredentials.
@@ -104,7 +104,7 @@ def unset_git_credential(username: str = "hf_user", folder: Optional[str] = None
104
104
  stdin.flush()
105
105
 
106
106
 
107
- def _parse_credential_output(output: str) -> List[str]:
107
+ def _parse_credential_output(output: str) -> list[str]:
108
108
  """Parse the output of `git credential fill` to extract the password.
109
109
 
110
110
  Args: