huggingface-hub 0.36.0rc0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +33 -45
- huggingface_hub/_commit_api.py +39 -43
- huggingface_hub/_commit_scheduler.py +11 -8
- huggingface_hub/_inference_endpoints.py +8 -8
- huggingface_hub/_jobs_api.py +20 -20
- huggingface_hub/_login.py +17 -43
- huggingface_hub/_oauth.py +8 -8
- huggingface_hub/_snapshot_download.py +135 -50
- huggingface_hub/_space_api.py +4 -4
- huggingface_hub/_tensorboard_logger.py +5 -5
- huggingface_hub/_upload_large_folder.py +18 -32
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +2 -2
- huggingface_hub/cli/__init__.py +0 -14
- huggingface_hub/cli/_cli_utils.py +143 -39
- huggingface_hub/cli/auth.py +105 -171
- huggingface_hub/cli/cache.py +594 -361
- huggingface_hub/cli/download.py +120 -112
- huggingface_hub/cli/hf.py +38 -41
- huggingface_hub/cli/jobs.py +689 -1017
- huggingface_hub/cli/lfs.py +120 -143
- huggingface_hub/cli/repo.py +282 -216
- huggingface_hub/cli/repo_files.py +50 -84
- huggingface_hub/cli/system.py +6 -25
- huggingface_hub/cli/upload.py +198 -220
- huggingface_hub/cli/upload_large_folder.py +91 -106
- huggingface_hub/community.py +5 -5
- huggingface_hub/constants.py +17 -52
- huggingface_hub/dataclasses.py +135 -21
- huggingface_hub/errors.py +47 -30
- huggingface_hub/fastai_utils.py +8 -9
- huggingface_hub/file_download.py +351 -303
- huggingface_hub/hf_api.py +398 -570
- huggingface_hub/hf_file_system.py +101 -66
- huggingface_hub/hub_mixin.py +32 -54
- huggingface_hub/inference/_client.py +177 -162
- huggingface_hub/inference/_common.py +38 -54
- huggingface_hub/inference/_generated/_async_client.py +218 -258
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +16 -16
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/agent.py +3 -3
- huggingface_hub/inference/_mcp/constants.py +1 -2
- huggingface_hub/inference/_mcp/mcp_client.py +33 -22
- huggingface_hub/inference/_mcp/types.py +10 -10
- huggingface_hub/inference/_mcp/utils.py +4 -4
- huggingface_hub/inference/_providers/__init__.py +12 -4
- huggingface_hub/inference/_providers/_common.py +62 -24
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cohere.py +3 -3
- huggingface_hub/inference/_providers/fal_ai.py +25 -25
- huggingface_hub/inference/_providers/featherless_ai.py +4 -4
- huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
- huggingface_hub/inference/_providers/hf_inference.py +13 -13
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +10 -10
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +4 -4
- huggingface_hub/inference/_providers/replicate.py +15 -15
- huggingface_hub/inference/_providers/sambanova.py +6 -6
- huggingface_hub/inference/_providers/together.py +7 -7
- huggingface_hub/lfs.py +21 -94
- huggingface_hub/repocard.py +15 -16
- huggingface_hub/repocard_data.py +57 -57
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +9 -9
- huggingface_hub/serialization/_dduf.py +7 -7
- huggingface_hub/serialization/_torch.py +28 -28
- huggingface_hub/utils/__init__.py +11 -6
- huggingface_hub/utils/_auth.py +5 -5
- huggingface_hub/utils/_cache_manager.py +49 -74
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +3 -3
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +3 -3
- huggingface_hub/utils/_headers.py +7 -29
- huggingface_hub/utils/_http.py +371 -208
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_parsing.py +98 -0
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +59 -23
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +3 -3
- huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -9
- huggingface_hub/utils/_typing.py +3 -3
- huggingface_hub/utils/_validators.py +53 -72
- huggingface_hub/utils/_xet.py +16 -16
- huggingface_hub/utils/_xet_progress_reporting.py +1 -1
- huggingface_hub/utils/insecure_hashlib.py +3 -9
- huggingface_hub/utils/tqdm.py +3 -3
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/METADATA +16 -35
- huggingface_hub-1.0.0.dist-info/RECORD +152 -0
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/entry_points.txt +0 -1
- huggingface_hub/commands/__init__.py +0 -27
- huggingface_hub/commands/delete_cache.py +0 -476
- huggingface_hub/commands/download.py +0 -204
- huggingface_hub/commands/env.py +0 -39
- huggingface_hub/commands/huggingface_cli.py +0 -65
- huggingface_hub/commands/lfs.py +0 -200
- huggingface_hub/commands/repo.py +0 -151
- huggingface_hub/commands/repo_files.py +0 -132
- huggingface_hub/commands/scan_cache.py +0 -183
- huggingface_hub/commands/tag.py +0 -161
- huggingface_hub/commands/upload.py +0 -318
- huggingface_hub/commands/upload_large_folder.py +0 -131
- huggingface_hub/commands/user.py +0 -208
- huggingface_hub/commands/version.py +0 -40
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -497
- huggingface_hub/repository.py +0 -1471
- huggingface_hub/serialization/_tensorflow.py +0 -92
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.36.0rc0.dist-info/RECORD +0 -170
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -20,7 +20,7 @@ import re
|
|
|
20
20
|
from collections import defaultdict, namedtuple
|
|
21
21
|
from functools import lru_cache
|
|
22
22
|
from pathlib import Path
|
|
23
|
-
from typing import TYPE_CHECKING, Any,
|
|
23
|
+
from typing import TYPE_CHECKING, Any, Iterable, NamedTuple, Optional, Union
|
|
24
24
|
|
|
25
25
|
from packaging import version
|
|
26
26
|
|
|
@@ -43,10 +43,10 @@ def save_torch_model(
|
|
|
43
43
|
filename_pattern: Optional[str] = None,
|
|
44
44
|
force_contiguous: bool = True,
|
|
45
45
|
max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
|
|
46
|
-
metadata: Optional[
|
|
46
|
+
metadata: Optional[dict[str, str]] = None,
|
|
47
47
|
safe_serialization: bool = True,
|
|
48
48
|
is_main_process: bool = True,
|
|
49
|
-
shared_tensors_to_discard: Optional[
|
|
49
|
+
shared_tensors_to_discard: Optional[list[str]] = None,
|
|
50
50
|
):
|
|
51
51
|
"""
|
|
52
52
|
Saves a given torch model to disk, handling sharding and shared tensors issues.
|
|
@@ -86,7 +86,7 @@ def save_torch_model(
|
|
|
86
86
|
that reason. Defaults to `True`.
|
|
87
87
|
max_shard_size (`int` or `str`, *optional*):
|
|
88
88
|
The maximum size of each shard, in bytes. Defaults to 5GB.
|
|
89
|
-
metadata (`
|
|
89
|
+
metadata (`dict[str, str]`, *optional*):
|
|
90
90
|
Extra information to save along with the model. Some metadata will be added for each dropped tensors.
|
|
91
91
|
This information will not be enough to recover the entire shared structure but might help understanding
|
|
92
92
|
things.
|
|
@@ -98,7 +98,7 @@ def save_torch_model(
|
|
|
98
98
|
Whether the process calling this is the main process or not. Useful when in distributed training like
|
|
99
99
|
TPUs and need to call this function from all processes. In this case, set `is_main_process=True` only on
|
|
100
100
|
the main process to avoid race conditions. Defaults to True.
|
|
101
|
-
shared_tensors_to_discard (`
|
|
101
|
+
shared_tensors_to_discard (`list[str]`, *optional*):
|
|
102
102
|
List of tensor names to drop when saving shared tensors. If not provided and shared tensors are
|
|
103
103
|
detected, it will drop the first name alphabetically.
|
|
104
104
|
|
|
@@ -131,16 +131,16 @@ def save_torch_model(
|
|
|
131
131
|
|
|
132
132
|
|
|
133
133
|
def save_torch_state_dict(
|
|
134
|
-
state_dict:
|
|
134
|
+
state_dict: dict[str, "torch.Tensor"],
|
|
135
135
|
save_directory: Union[str, Path],
|
|
136
136
|
*,
|
|
137
137
|
filename_pattern: Optional[str] = None,
|
|
138
138
|
force_contiguous: bool = True,
|
|
139
139
|
max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
|
|
140
|
-
metadata: Optional[
|
|
140
|
+
metadata: Optional[dict[str, str]] = None,
|
|
141
141
|
safe_serialization: bool = True,
|
|
142
142
|
is_main_process: bool = True,
|
|
143
|
-
shared_tensors_to_discard: Optional[
|
|
143
|
+
shared_tensors_to_discard: Optional[list[str]] = None,
|
|
144
144
|
) -> None:
|
|
145
145
|
"""
|
|
146
146
|
Save a model state dictionary to the disk, handling sharding and shared tensors issues.
|
|
@@ -165,7 +165,7 @@ def save_torch_state_dict(
|
|
|
165
165
|
> If your model is a `transformers.PreTrainedModel`, you should pass `model._tied_weights_keys` as `shared_tensors_to_discard` to properly handle shared tensors saving. This ensures the correct duplicate tensors are discarded during saving.
|
|
166
166
|
|
|
167
167
|
Args:
|
|
168
|
-
state_dict (`
|
|
168
|
+
state_dict (`dict[str, torch.Tensor]`):
|
|
169
169
|
The state dictionary to save.
|
|
170
170
|
save_directory (`str` or `Path`):
|
|
171
171
|
The directory in which the model will be saved.
|
|
@@ -180,7 +180,7 @@ def save_torch_state_dict(
|
|
|
180
180
|
that reason. Defaults to `True`.
|
|
181
181
|
max_shard_size (`int` or `str`, *optional*):
|
|
182
182
|
The maximum size of each shard, in bytes. Defaults to 5GB.
|
|
183
|
-
metadata (`
|
|
183
|
+
metadata (`dict[str, str]`, *optional*):
|
|
184
184
|
Extra information to save along with the model. Some metadata will be added for each dropped tensors.
|
|
185
185
|
This information will not be enough to recover the entire shared structure but might help understanding
|
|
186
186
|
things.
|
|
@@ -192,7 +192,7 @@ def save_torch_state_dict(
|
|
|
192
192
|
Whether the process calling this is the main process or not. Useful when in distributed training like
|
|
193
193
|
TPUs and need to call this function from all processes. In this case, set `is_main_process=True` only on
|
|
194
194
|
the main process to avoid race conditions. Defaults to True.
|
|
195
|
-
shared_tensors_to_discard (`
|
|
195
|
+
shared_tensors_to_discard (`list[str]`, *optional*):
|
|
196
196
|
List of tensor names to drop when saving shared tensors. If not provided and shared tensors are
|
|
197
197
|
detected, it will drop the first name alphabetically.
|
|
198
198
|
|
|
@@ -288,7 +288,7 @@ def save_torch_state_dict(
|
|
|
288
288
|
|
|
289
289
|
|
|
290
290
|
def split_torch_state_dict_into_shards(
|
|
291
|
-
state_dict:
|
|
291
|
+
state_dict: dict[str, "torch.Tensor"],
|
|
292
292
|
*,
|
|
293
293
|
filename_pattern: str = constants.SAFETENSORS_WEIGHTS_FILE_PATTERN,
|
|
294
294
|
max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
|
|
@@ -311,7 +311,7 @@ def split_torch_state_dict_into_shards(
|
|
|
311
311
|
> size greater than `max_shard_size`.
|
|
312
312
|
|
|
313
313
|
Args:
|
|
314
|
-
state_dict (`
|
|
314
|
+
state_dict (`dict[str, torch.Tensor]`):
|
|
315
315
|
The state dictionary to save.
|
|
316
316
|
filename_pattern (`str`, *optional*):
|
|
317
317
|
The pattern to generate the files names in which the model will be saved. Pattern must be a string that
|
|
@@ -330,7 +330,7 @@ def split_torch_state_dict_into_shards(
|
|
|
330
330
|
>>> from safetensors.torch import save_file as safe_save_file
|
|
331
331
|
>>> from huggingface_hub import split_torch_state_dict_into_shards
|
|
332
332
|
|
|
333
|
-
>>> def save_state_dict(state_dict:
|
|
333
|
+
>>> def save_state_dict(state_dict: dict[str, torch.Tensor], save_directory: str):
|
|
334
334
|
... state_dict_split = split_torch_state_dict_into_shards(state_dict)
|
|
335
335
|
... for filename, tensors in state_dict_split.filename_to_tensors.items():
|
|
336
336
|
... shard = {tensor: state_dict[tensor] for tensor in tensors}
|
|
@@ -542,7 +542,7 @@ def load_state_dict_from_file(
|
|
|
542
542
|
map_location: Optional[Union[str, "torch.device"]] = None,
|
|
543
543
|
weights_only: bool = False,
|
|
544
544
|
mmap: bool = False,
|
|
545
|
-
) -> Union[
|
|
545
|
+
) -> Union[dict[str, "torch.Tensor"], Any]:
|
|
546
546
|
"""
|
|
547
547
|
Loads a checkpoint file, handling both safetensors and pickle checkpoint formats.
|
|
548
548
|
|
|
@@ -562,7 +562,7 @@ def load_state_dict_from_file(
|
|
|
562
562
|
loading safetensors files, as the `safetensors` library uses memory mapping by default.
|
|
563
563
|
|
|
564
564
|
Returns:
|
|
565
|
-
`Union[
|
|
565
|
+
`Union[dict[str, "torch.Tensor"], Any]`: The loaded checkpoint.
|
|
566
566
|
- For safetensors files: always returns a dictionary mapping parameter names to tensors.
|
|
567
567
|
- For pickle files: returns any Python object that was pickled (commonly a state dict, but could be
|
|
568
568
|
an entire model, optimizer state, or any other Python object).
|
|
@@ -682,7 +682,7 @@ def _validate_keys_for_strict_loading(
|
|
|
682
682
|
raise RuntimeError(error_message)
|
|
683
683
|
|
|
684
684
|
|
|
685
|
-
def _get_unique_id(tensor: "torch.Tensor") -> Union[int,
|
|
685
|
+
def _get_unique_id(tensor: "torch.Tensor") -> Union[int, tuple[Any, ...]]:
|
|
686
686
|
"""Returns a unique id for plain tensor
|
|
687
687
|
or a (potentially nested) Tuple of unique id for the flattened Tensor
|
|
688
688
|
if the input is a wrapper tensor subclass Tensor
|
|
@@ -723,7 +723,7 @@ def _get_unique_id(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
|
|
|
723
723
|
return unique_id
|
|
724
724
|
|
|
725
725
|
|
|
726
|
-
def get_torch_storage_id(tensor: "torch.Tensor") -> Optional[
|
|
726
|
+
def get_torch_storage_id(tensor: "torch.Tensor") -> Optional[tuple["torch.device", Union[int, tuple[Any, ...]], int]]:
|
|
727
727
|
"""
|
|
728
728
|
Return unique identifier to a tensor storage.
|
|
729
729
|
|
|
@@ -797,7 +797,7 @@ def is_torch_tpu_available(check_device=True):
|
|
|
797
797
|
return False
|
|
798
798
|
|
|
799
799
|
|
|
800
|
-
def storage_ptr(tensor: "torch.Tensor") -> Union[int,
|
|
800
|
+
def storage_ptr(tensor: "torch.Tensor") -> Union[int, tuple[Any, ...]]:
|
|
801
801
|
"""
|
|
802
802
|
Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L11.
|
|
803
803
|
"""
|
|
@@ -823,10 +823,10 @@ def storage_ptr(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
|
|
|
823
823
|
|
|
824
824
|
|
|
825
825
|
def _clean_state_dict_for_safetensors(
|
|
826
|
-
state_dict:
|
|
827
|
-
metadata:
|
|
826
|
+
state_dict: dict[str, "torch.Tensor"],
|
|
827
|
+
metadata: dict[str, str],
|
|
828
828
|
force_contiguous: bool = True,
|
|
829
|
-
shared_tensors_to_discard: Optional[
|
|
829
|
+
shared_tensors_to_discard: Optional[list[str]] = None,
|
|
830
830
|
):
|
|
831
831
|
"""Remove shared tensors from state_dict and update metadata accordingly (for reloading).
|
|
832
832
|
|
|
@@ -860,7 +860,7 @@ def _end_ptr(tensor: "torch.Tensor") -> int:
|
|
|
860
860
|
return stop
|
|
861
861
|
|
|
862
862
|
|
|
863
|
-
def _filter_shared_not_shared(tensors:
|
|
863
|
+
def _filter_shared_not_shared(tensors: list[set[str]], state_dict: dict[str, "torch.Tensor"]) -> list[set[str]]:
|
|
864
864
|
"""
|
|
865
865
|
Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L44
|
|
866
866
|
"""
|
|
@@ -888,7 +888,7 @@ def _filter_shared_not_shared(tensors: List[Set[str]], state_dict: Dict[str, "to
|
|
|
888
888
|
return filtered_tensors
|
|
889
889
|
|
|
890
890
|
|
|
891
|
-
def _find_shared_tensors(state_dict:
|
|
891
|
+
def _find_shared_tensors(state_dict: dict[str, "torch.Tensor"]) -> list[set[str]]:
|
|
892
892
|
"""
|
|
893
893
|
Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L69.
|
|
894
894
|
"""
|
|
@@ -925,11 +925,11 @@ def _is_complete(tensor: "torch.Tensor") -> bool:
|
|
|
925
925
|
|
|
926
926
|
|
|
927
927
|
def _remove_duplicate_names(
|
|
928
|
-
state_dict:
|
|
928
|
+
state_dict: dict[str, "torch.Tensor"],
|
|
929
929
|
*,
|
|
930
|
-
preferred_names: Optional[
|
|
931
|
-
discard_names: Optional[
|
|
932
|
-
) ->
|
|
930
|
+
preferred_names: Optional[list[str]] = None,
|
|
931
|
+
discard_names: Optional[list[str]] = None,
|
|
932
|
+
) -> dict[str, list[str]]:
|
|
933
933
|
"""
|
|
934
934
|
Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L80
|
|
935
935
|
"""
|
|
@@ -42,6 +42,7 @@ from ._cache_manager import (
|
|
|
42
42
|
CachedRevisionInfo,
|
|
43
43
|
DeleteCacheStrategy,
|
|
44
44
|
HFCacheInfo,
|
|
45
|
+
_format_size,
|
|
45
46
|
scan_cache_dir,
|
|
46
47
|
)
|
|
47
48
|
from ._chunk_utils import chunk_iterable
|
|
@@ -50,14 +51,18 @@ from ._experimental import experimental
|
|
|
50
51
|
from ._fixes import SoftTemporaryDirectory, WeakFileLock, yaml_dump
|
|
51
52
|
from ._git_credential import list_credential_helpers, set_git_credential, unset_git_credential
|
|
52
53
|
from ._headers import build_hf_headers, get_token_to_send
|
|
53
|
-
from ._hf_folder import HfFolder
|
|
54
54
|
from ._http import (
|
|
55
|
-
|
|
55
|
+
ASYNC_CLIENT_FACTORY_T,
|
|
56
|
+
CLIENT_FACTORY_T,
|
|
57
|
+
close_session,
|
|
56
58
|
fix_hf_endpoint_in_url,
|
|
59
|
+
get_async_session,
|
|
57
60
|
get_session,
|
|
58
61
|
hf_raise_for_status,
|
|
59
62
|
http_backoff,
|
|
60
|
-
|
|
63
|
+
http_stream_backoff,
|
|
64
|
+
set_async_client_factory,
|
|
65
|
+
set_client_factory,
|
|
61
66
|
)
|
|
62
67
|
from ._pagination import paginate
|
|
63
68
|
from ._paths import DEFAULT_IGNORE_PATTERNS, FORBIDDEN_FOLDERS, filter_repo_objects
|
|
@@ -70,7 +75,6 @@ from ._runtime import (
|
|
|
70
75
|
get_gradio_version,
|
|
71
76
|
get_graphviz_version,
|
|
72
77
|
get_hf_hub_version,
|
|
73
|
-
get_hf_transfer_version,
|
|
74
78
|
get_jinja_version,
|
|
75
79
|
get_numpy_version,
|
|
76
80
|
get_pillow_version,
|
|
@@ -80,6 +84,7 @@ from ._runtime import (
|
|
|
80
84
|
get_tensorboard_version,
|
|
81
85
|
get_tf_version,
|
|
82
86
|
get_torch_version,
|
|
87
|
+
installation_method,
|
|
83
88
|
is_aiohttp_available,
|
|
84
89
|
is_colab_enterprise,
|
|
85
90
|
is_fastai_available,
|
|
@@ -88,7 +93,6 @@ from ._runtime import (
|
|
|
88
93
|
is_google_colab,
|
|
89
94
|
is_gradio_available,
|
|
90
95
|
is_graphviz_available,
|
|
91
|
-
is_hf_transfer_available,
|
|
92
96
|
is_jinja_available,
|
|
93
97
|
is_notebook,
|
|
94
98
|
is_numpy_available,
|
|
@@ -104,8 +108,9 @@ from ._runtime import (
|
|
|
104
108
|
from ._safetensors import SafetensorsFileMetadata, SafetensorsRepoMetadata, TensorInfo
|
|
105
109
|
from ._subprocess import capture_output, run_interactive_subprocess, run_subprocess
|
|
106
110
|
from ._telemetry import send_telemetry
|
|
111
|
+
from ._terminal import ANSI, tabulate
|
|
107
112
|
from ._typing import is_jsonable, is_simple_optional_type, unwrap_simple_optional_type
|
|
108
|
-
from ._validators import
|
|
113
|
+
from ._validators import validate_hf_hub_args, validate_repo_id
|
|
109
114
|
from ._xet import (
|
|
110
115
|
XetConnectionInfo,
|
|
111
116
|
XetFileData,
|
huggingface_hub/utils/_auth.py
CHANGED
|
@@ -19,7 +19,7 @@ import os
|
|
|
19
19
|
import warnings
|
|
20
20
|
from pathlib import Path
|
|
21
21
|
from threading import Lock
|
|
22
|
-
from typing import
|
|
22
|
+
from typing import Optional
|
|
23
23
|
|
|
24
24
|
from .. import constants
|
|
25
25
|
from ._runtime import is_colab_enterprise, is_google_colab
|
|
@@ -125,13 +125,13 @@ def _get_token_from_file() -> Optional[str]:
|
|
|
125
125
|
return None
|
|
126
126
|
|
|
127
127
|
|
|
128
|
-
def get_stored_tokens() ->
|
|
128
|
+
def get_stored_tokens() -> dict[str, str]:
|
|
129
129
|
"""
|
|
130
130
|
Returns the parsed INI file containing the access tokens.
|
|
131
131
|
The file is located at `HF_STORED_TOKENS_PATH`, defaulting to `~/.cache/huggingface/stored_tokens`.
|
|
132
132
|
If the file does not exist, an empty dictionary is returned.
|
|
133
133
|
|
|
134
|
-
Returns: `
|
|
134
|
+
Returns: `dict[str, str]`
|
|
135
135
|
Key is the token name and value is the token.
|
|
136
136
|
"""
|
|
137
137
|
tokens_path = Path(constants.HF_STORED_TOKENS_PATH)
|
|
@@ -147,12 +147,12 @@ def get_stored_tokens() -> Dict[str, str]:
|
|
|
147
147
|
return stored_tokens
|
|
148
148
|
|
|
149
149
|
|
|
150
|
-
def _save_stored_tokens(stored_tokens:
|
|
150
|
+
def _save_stored_tokens(stored_tokens: dict[str, str]) -> None:
|
|
151
151
|
"""
|
|
152
152
|
Saves the given configuration to the stored tokens file.
|
|
153
153
|
|
|
154
154
|
Args:
|
|
155
|
-
stored_tokens (`
|
|
155
|
+
stored_tokens (`dict[str, str]`):
|
|
156
156
|
The stored tokens to save. Key is the token name and value is the token.
|
|
157
157
|
"""
|
|
158
158
|
stored_tokens_path = Path(constants.HF_STORED_TOKENS_PATH)
|
|
@@ -16,17 +16,17 @@
|
|
|
16
16
|
|
|
17
17
|
import os
|
|
18
18
|
import shutil
|
|
19
|
-
import time
|
|
20
19
|
from collections import defaultdict
|
|
21
20
|
from dataclasses import dataclass
|
|
22
21
|
from pathlib import Path
|
|
23
|
-
from typing import
|
|
22
|
+
from typing import Literal, Optional, Union
|
|
24
23
|
|
|
25
24
|
from huggingface_hub.errors import CacheNotFound, CorruptedCacheException
|
|
26
25
|
|
|
27
|
-
from ..commands._cli_utils import tabulate
|
|
28
26
|
from ..constants import HF_HUB_CACHE
|
|
29
27
|
from . import logging
|
|
28
|
+
from ._parsing import format_timesince
|
|
29
|
+
from ._terminal import tabulate
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
logger = logging.get_logger(__name__)
|
|
@@ -79,7 +79,7 @@ class CachedFileInfo:
|
|
|
79
79
|
|
|
80
80
|
Example: "2 weeks ago".
|
|
81
81
|
"""
|
|
82
|
-
return
|
|
82
|
+
return format_timesince(self.blob_last_accessed)
|
|
83
83
|
|
|
84
84
|
@property
|
|
85
85
|
def blob_last_modified_str(self) -> str:
|
|
@@ -89,7 +89,7 @@ class CachedFileInfo:
|
|
|
89
89
|
|
|
90
90
|
Example: "2 weeks ago".
|
|
91
91
|
"""
|
|
92
|
-
return
|
|
92
|
+
return format_timesince(self.blob_last_modified)
|
|
93
93
|
|
|
94
94
|
@property
|
|
95
95
|
def size_on_disk_str(self) -> str:
|
|
@@ -116,9 +116,9 @@ class CachedRevisionInfo:
|
|
|
116
116
|
snapshot_path (`Path`):
|
|
117
117
|
Path to the revision directory in the `snapshots` folder. It contains the
|
|
118
118
|
exact tree structure as the repo on the Hub.
|
|
119
|
-
files: (`
|
|
119
|
+
files: (`frozenset[CachedFileInfo]`):
|
|
120
120
|
Set of [`~CachedFileInfo`] describing all files contained in the snapshot.
|
|
121
|
-
refs (`
|
|
121
|
+
refs (`frozenset[str]`):
|
|
122
122
|
Set of `refs` pointing to this revision. If the revision has no `refs`, it
|
|
123
123
|
is considered detached.
|
|
124
124
|
Example: `{"main", "2.4.0"}` or `{"refs/pr/1"}`.
|
|
@@ -140,8 +140,8 @@ class CachedRevisionInfo:
|
|
|
140
140
|
commit_hash: str
|
|
141
141
|
snapshot_path: Path
|
|
142
142
|
size_on_disk: int
|
|
143
|
-
files:
|
|
144
|
-
refs:
|
|
143
|
+
files: frozenset[CachedFileInfo]
|
|
144
|
+
refs: frozenset[str]
|
|
145
145
|
|
|
146
146
|
last_modified: float
|
|
147
147
|
|
|
@@ -153,7 +153,7 @@ class CachedRevisionInfo:
|
|
|
153
153
|
|
|
154
154
|
Example: "2 weeks ago".
|
|
155
155
|
"""
|
|
156
|
-
return
|
|
156
|
+
return format_timesince(self.last_modified)
|
|
157
157
|
|
|
158
158
|
@property
|
|
159
159
|
def size_on_disk_str(self) -> str:
|
|
@@ -187,7 +187,7 @@ class CachedRepoInfo:
|
|
|
187
187
|
Sum of the blob file sizes in the cached repo.
|
|
188
188
|
nb_files (`int`):
|
|
189
189
|
Total number of blob files in the cached repo.
|
|
190
|
-
revisions (`
|
|
190
|
+
revisions (`frozenset[CachedRevisionInfo]`):
|
|
191
191
|
Set of [`~CachedRevisionInfo`] describing all revisions cached in the repo.
|
|
192
192
|
last_accessed (`float`):
|
|
193
193
|
Timestamp of the last time a blob file of the repo has been accessed.
|
|
@@ -210,7 +210,7 @@ class CachedRepoInfo:
|
|
|
210
210
|
repo_path: Path
|
|
211
211
|
size_on_disk: int
|
|
212
212
|
nb_files: int
|
|
213
|
-
revisions:
|
|
213
|
+
revisions: frozenset[CachedRevisionInfo]
|
|
214
214
|
|
|
215
215
|
last_accessed: float
|
|
216
216
|
last_modified: float
|
|
@@ -223,7 +223,7 @@ class CachedRepoInfo:
|
|
|
223
223
|
|
|
224
224
|
Example: "2 weeks ago".
|
|
225
225
|
"""
|
|
226
|
-
return
|
|
226
|
+
return format_timesince(self.last_accessed)
|
|
227
227
|
|
|
228
228
|
@property
|
|
229
229
|
def last_modified_str(self) -> str:
|
|
@@ -233,7 +233,7 @@ class CachedRepoInfo:
|
|
|
233
233
|
|
|
234
234
|
Example: "2 weeks ago".
|
|
235
235
|
"""
|
|
236
|
-
return
|
|
236
|
+
return format_timesince(self.last_modified)
|
|
237
237
|
|
|
238
238
|
@property
|
|
239
239
|
def size_on_disk_str(self) -> str:
|
|
@@ -245,7 +245,12 @@ class CachedRepoInfo:
|
|
|
245
245
|
return _format_size(self.size_on_disk)
|
|
246
246
|
|
|
247
247
|
@property
|
|
248
|
-
def
|
|
248
|
+
def cache_id(self) -> str:
|
|
249
|
+
"""Canonical `type/id` identifier used across cache tooling."""
|
|
250
|
+
return f"{self.repo_type}/{self.repo_id}"
|
|
251
|
+
|
|
252
|
+
@property
|
|
253
|
+
def refs(self) -> dict[str, CachedRevisionInfo]:
|
|
249
254
|
"""
|
|
250
255
|
(property) Mapping between `refs` and revision data structures.
|
|
251
256
|
"""
|
|
@@ -262,21 +267,21 @@ class DeleteCacheStrategy:
|
|
|
262
267
|
Args:
|
|
263
268
|
expected_freed_size (`float`):
|
|
264
269
|
Expected freed size once strategy is executed.
|
|
265
|
-
blobs (`
|
|
270
|
+
blobs (`frozenset[Path]`):
|
|
266
271
|
Set of blob file paths to be deleted.
|
|
267
|
-
refs (`
|
|
272
|
+
refs (`frozenset[Path]`):
|
|
268
273
|
Set of reference file paths to be deleted.
|
|
269
|
-
repos (`
|
|
274
|
+
repos (`frozenset[Path]`):
|
|
270
275
|
Set of entire repo paths to be deleted.
|
|
271
|
-
snapshots (`
|
|
276
|
+
snapshots (`frozenset[Path]`):
|
|
272
277
|
Set of snapshots to be deleted (directory of symlinks).
|
|
273
278
|
"""
|
|
274
279
|
|
|
275
280
|
expected_freed_size: int
|
|
276
|
-
blobs:
|
|
277
|
-
refs:
|
|
278
|
-
repos:
|
|
279
|
-
snapshots:
|
|
281
|
+
blobs: frozenset[Path]
|
|
282
|
+
refs: frozenset[Path]
|
|
283
|
+
repos: frozenset[Path]
|
|
284
|
+
snapshots: frozenset[Path]
|
|
280
285
|
|
|
281
286
|
@property
|
|
282
287
|
def expected_freed_size_str(self) -> str:
|
|
@@ -331,10 +336,10 @@ class HFCacheInfo:
|
|
|
331
336
|
Args:
|
|
332
337
|
size_on_disk (`int`):
|
|
333
338
|
Sum of all valid repo sizes in the cache-system.
|
|
334
|
-
repos (`
|
|
339
|
+
repos (`frozenset[CachedRepoInfo]`):
|
|
335
340
|
Set of [`~CachedRepoInfo`] describing all valid cached repos found on the
|
|
336
341
|
cache-system while scanning.
|
|
337
|
-
warnings (`
|
|
342
|
+
warnings (`list[CorruptedCacheException]`):
|
|
338
343
|
List of [`~CorruptedCacheException`] that occurred while scanning the cache.
|
|
339
344
|
Those exceptions are captured so that the scan can continue. Corrupted repos
|
|
340
345
|
are skipped from the scan.
|
|
@@ -345,8 +350,8 @@ class HFCacheInfo:
|
|
|
345
350
|
"""
|
|
346
351
|
|
|
347
352
|
size_on_disk: int
|
|
348
|
-
repos:
|
|
349
|
-
warnings:
|
|
353
|
+
repos: frozenset[CachedRepoInfo]
|
|
354
|
+
warnings: list[CorruptedCacheException]
|
|
350
355
|
|
|
351
356
|
@property
|
|
352
357
|
def size_on_disk_str(self) -> str:
|
|
@@ -393,9 +398,9 @@ class HFCacheInfo:
|
|
|
393
398
|
> be executed. The [`~utils.DeleteCacheStrategy`] is not meant to be modified but
|
|
394
399
|
> allows having a dry run before actually executing the deletion.
|
|
395
400
|
"""
|
|
396
|
-
hashes_to_delete:
|
|
401
|
+
hashes_to_delete: set[str] = set(revisions)
|
|
397
402
|
|
|
398
|
-
repos_with_revisions:
|
|
403
|
+
repos_with_revisions: dict[CachedRepoInfo, set[CachedRevisionInfo]] = defaultdict(set)
|
|
399
404
|
|
|
400
405
|
for repo in self.repos:
|
|
401
406
|
for revision in repo.revisions:
|
|
@@ -406,10 +411,10 @@ class HFCacheInfo:
|
|
|
406
411
|
if len(hashes_to_delete) > 0:
|
|
407
412
|
logger.warning(f"Revision(s) not found - cannot delete them: {', '.join(hashes_to_delete)}")
|
|
408
413
|
|
|
409
|
-
delete_strategy_blobs:
|
|
410
|
-
delete_strategy_refs:
|
|
411
|
-
delete_strategy_repos:
|
|
412
|
-
delete_strategy_snapshots:
|
|
414
|
+
delete_strategy_blobs: set[Path] = set()
|
|
415
|
+
delete_strategy_refs: set[Path] = set()
|
|
416
|
+
delete_strategy_repos: set[Path] = set()
|
|
417
|
+
delete_strategy_snapshots: set[Path] = set()
|
|
413
418
|
delete_strategy_expected_freed_size = 0
|
|
414
419
|
|
|
415
420
|
for affected_repo, revisions_to_delete in repos_with_revisions.items():
|
|
@@ -607,15 +612,12 @@ def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
|
|
|
607
612
|
|
|
608
613
|
You can also print a detailed report directly from the `hf` command line using:
|
|
609
614
|
```text
|
|
610
|
-
> hf cache
|
|
611
|
-
|
|
612
|
-
---------------------------
|
|
613
|
-
glue
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
bert-base-cased model 1.9G 13 main /Users/lucain/.cache/huggingface/hub/models--bert-base-cased
|
|
617
|
-
t5-base model 10.1K 3 main /Users/lucain/.cache/huggingface/hub/models--t5-base
|
|
618
|
-
t5-small model 970.7M 11 refs/pr/1, main /Users/lucain/.cache/huggingface/hub/models--t5-small
|
|
615
|
+
> hf cache ls
|
|
616
|
+
ID SIZE LAST_ACCESSED LAST_MODIFIED REFS
|
|
617
|
+
--------------------------- -------- ------------- ------------- -----------
|
|
618
|
+
dataset/nyu-mll/glue 157.4M 2 days ago 2 days ago main script
|
|
619
|
+
model/LiquidAI/LFM2-VL-1.6B 3.2G 4 days ago 4 days ago main
|
|
620
|
+
model/microsoft/UserLM-8b 32.1G 4 days ago 4 days ago main
|
|
619
621
|
|
|
620
622
|
Done in 0.0s. Scanned 6 repo(s) for a total of 3.4G.
|
|
621
623
|
Got 1 warning(s) while scanning. Use -vvv to print details.
|
|
@@ -651,8 +653,8 @@ def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
|
|
|
651
653
|
f"Scan cache expects a directory but found a file: {cache_dir}. Please use `cache_dir` argument or set `HF_HUB_CACHE` environment variable."
|
|
652
654
|
)
|
|
653
655
|
|
|
654
|
-
repos:
|
|
655
|
-
warnings:
|
|
656
|
+
repos: set[CachedRepoInfo] = set()
|
|
657
|
+
warnings: list[CorruptedCacheException] = []
|
|
656
658
|
for repo_path in cache_dir.iterdir():
|
|
657
659
|
if repo_path.name == ".locks": # skip './.locks/' folder
|
|
658
660
|
continue
|
|
@@ -688,7 +690,7 @@ def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
|
|
|
688
690
|
f"Repo type must be `dataset`, `model` or `space`, found `{repo_type}` ({repo_path})."
|
|
689
691
|
)
|
|
690
692
|
|
|
691
|
-
blob_stats:
|
|
693
|
+
blob_stats: dict[Path, os.stat_result] = {} # Key is blob_path, value is blob stats
|
|
692
694
|
|
|
693
695
|
snapshots_path = repo_path / "snapshots"
|
|
694
696
|
refs_path = repo_path / "refs"
|
|
@@ -699,7 +701,7 @@ def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
|
|
|
699
701
|
# Scan over `refs` directory
|
|
700
702
|
|
|
701
703
|
# key is revision hash, value is set of refs
|
|
702
|
-
refs_by_hash:
|
|
704
|
+
refs_by_hash: dict[str, set[str]] = defaultdict(set)
|
|
703
705
|
if refs_path.exists():
|
|
704
706
|
# Example of `refs` directory
|
|
705
707
|
# ── refs
|
|
@@ -722,7 +724,7 @@ def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
|
|
|
722
724
|
refs_by_hash[commit_hash].add(ref_name)
|
|
723
725
|
|
|
724
726
|
# Scan snapshots directory
|
|
725
|
-
cached_revisions:
|
|
727
|
+
cached_revisions: set[CachedRevisionInfo] = set()
|
|
726
728
|
for revision_path in snapshots_path.iterdir():
|
|
727
729
|
# Ignore OS-created helper files
|
|
728
730
|
if revision_path.name in FILES_TO_IGNORE:
|
|
@@ -816,33 +818,6 @@ def _format_size(num: int) -> str:
|
|
|
816
818
|
return f"{num_f:.1f}Y"
|
|
817
819
|
|
|
818
820
|
|
|
819
|
-
_TIMESINCE_CHUNKS = (
|
|
820
|
-
# Label, divider, max value
|
|
821
|
-
("second", 1, 60),
|
|
822
|
-
("minute", 60, 60),
|
|
823
|
-
("hour", 60 * 60, 24),
|
|
824
|
-
("day", 60 * 60 * 24, 6),
|
|
825
|
-
("week", 60 * 60 * 24 * 7, 6),
|
|
826
|
-
("month", 60 * 60 * 24 * 30, 11),
|
|
827
|
-
("year", 60 * 60 * 24 * 365, None),
|
|
828
|
-
)
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
def _format_timesince(ts: float) -> str:
|
|
832
|
-
"""Format timestamp in seconds into a human-readable string, relative to now.
|
|
833
|
-
|
|
834
|
-
Vaguely inspired by Django's `timesince` formatter.
|
|
835
|
-
"""
|
|
836
|
-
delta = time.time() - ts
|
|
837
|
-
if delta < 20:
|
|
838
|
-
return "a few seconds ago"
|
|
839
|
-
for label, divider, max_value in _TIMESINCE_CHUNKS: # noqa: B007
|
|
840
|
-
value = round(delta / divider)
|
|
841
|
-
if max_value is not None and value <= max_value:
|
|
842
|
-
break
|
|
843
|
-
return f"{value} {label}{'s' if value > 1 else ''} ago"
|
|
844
|
-
|
|
845
|
-
|
|
846
821
|
def _try_delete_path(path: Path, path_type: str) -> None:
|
|
847
822
|
"""Try to delete a local file or folder.
|
|
848
823
|
|
|
@@ -62,7 +62,7 @@ def _deprecate_arguments(
|
|
|
62
62
|
Args:
|
|
63
63
|
version (`str`):
|
|
64
64
|
The version when deprecated arguments will result in error.
|
|
65
|
-
deprecated_args (`
|
|
65
|
+
deprecated_args (`list[str]`):
|
|
66
66
|
List of the arguments to be deprecated.
|
|
67
67
|
custom_message (`str`, *optional*):
|
|
68
68
|
Warning message that is raised. If not passed, a default warning message
|
huggingface_hub/utils/_dotenv.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
# AI-generated module (ChatGPT)
|
|
2
2
|
import re
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Optional
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
def load_dotenv(dotenv_str: str, environ: Optional[
|
|
6
|
+
def load_dotenv(dotenv_str: str, environ: Optional[dict[str, str]] = None) -> dict[str, str]:
|
|
7
7
|
"""
|
|
8
8
|
Parse a DOTENV-format string and return a dictionary of key-value pairs.
|
|
9
9
|
Handles quoted values, comments, export keyword, and blank lines.
|
|
10
10
|
"""
|
|
11
|
-
env:
|
|
11
|
+
env: dict[str, str] = {}
|
|
12
12
|
line_pattern = re.compile(
|
|
13
13
|
r"""
|
|
14
14
|
^\s*
|
huggingface_hub/utils/_fixes.py
CHANGED
|
@@ -1,13 +1,3 @@
|
|
|
1
|
-
# JSONDecodeError was introduced in requests=2.27 released in 2022.
|
|
2
|
-
# This allows us to support older requests for users
|
|
3
|
-
# More information: https://github.com/psf/requests/pull/5856
|
|
4
|
-
try:
|
|
5
|
-
from requests import JSONDecodeError # type: ignore # noqa: F401
|
|
6
|
-
except ImportError:
|
|
7
|
-
try:
|
|
8
|
-
from simplejson import JSONDecodeError # type: ignore # noqa: F401
|
|
9
|
-
except ImportError:
|
|
10
|
-
from json import JSONDecodeError # type: ignore # noqa: F401
|
|
11
1
|
import contextlib
|
|
12
2
|
import os
|
|
13
3
|
import shutil
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
import re
|
|
18
18
|
import subprocess
|
|
19
|
-
from typing import
|
|
19
|
+
from typing import Optional
|
|
20
20
|
|
|
21
21
|
from ..constants import ENDPOINT
|
|
22
22
|
from ._subprocess import run_interactive_subprocess, run_subprocess
|
|
@@ -34,7 +34,7 @@ GIT_CREDENTIAL_REGEX = re.compile(
|
|
|
34
34
|
)
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
def list_credential_helpers(folder: Optional[str] = None) ->
|
|
37
|
+
def list_credential_helpers(folder: Optional[str] = None) -> list[str]:
|
|
38
38
|
"""Return the list of git credential helpers configured.
|
|
39
39
|
|
|
40
40
|
See https://git-scm.com/docs/gitcredentials.
|
|
@@ -104,7 +104,7 @@ def unset_git_credential(username: str = "hf_user", folder: Optional[str] = None
|
|
|
104
104
|
stdin.flush()
|
|
105
105
|
|
|
106
106
|
|
|
107
|
-
def _parse_credential_output(output: str) ->
|
|
107
|
+
def _parse_credential_output(output: str) -> list[str]:
|
|
108
108
|
"""Parse the output of `git credential fill` to extract the password.
|
|
109
109
|
|
|
110
110
|
Args:
|