huggingface-hub 0.35.0rc0__py3-none-any.whl → 1.0.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +46 -45
- huggingface_hub/_commit_api.py +28 -28
- huggingface_hub/_commit_scheduler.py +11 -8
- huggingface_hub/_inference_endpoints.py +8 -8
- huggingface_hub/_jobs_api.py +176 -20
- huggingface_hub/_local_folder.py +1 -1
- huggingface_hub/_login.py +13 -39
- huggingface_hub/_oauth.py +10 -14
- huggingface_hub/_snapshot_download.py +14 -28
- huggingface_hub/_space_api.py +4 -4
- huggingface_hub/_tensorboard_logger.py +13 -14
- huggingface_hub/_upload_large_folder.py +120 -13
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +2 -2
- huggingface_hub/cli/_cli_utils.py +2 -2
- huggingface_hub/cli/auth.py +8 -6
- huggingface_hub/cli/cache.py +18 -20
- huggingface_hub/cli/download.py +4 -4
- huggingface_hub/cli/hf.py +2 -5
- huggingface_hub/cli/jobs.py +599 -22
- huggingface_hub/cli/lfs.py +4 -4
- huggingface_hub/cli/repo.py +11 -7
- huggingface_hub/cli/repo_files.py +2 -2
- huggingface_hub/cli/upload.py +4 -4
- huggingface_hub/cli/upload_large_folder.py +3 -3
- huggingface_hub/commands/_cli_utils.py +2 -2
- huggingface_hub/commands/delete_cache.py +13 -13
- huggingface_hub/commands/download.py +4 -13
- huggingface_hub/commands/lfs.py +4 -4
- huggingface_hub/commands/repo_files.py +2 -2
- huggingface_hub/commands/scan_cache.py +1 -1
- huggingface_hub/commands/tag.py +1 -3
- huggingface_hub/commands/upload.py +4 -4
- huggingface_hub/commands/upload_large_folder.py +3 -3
- huggingface_hub/commands/user.py +4 -5
- huggingface_hub/community.py +5 -5
- huggingface_hub/constants.py +3 -41
- huggingface_hub/dataclasses.py +16 -19
- huggingface_hub/errors.py +42 -29
- huggingface_hub/fastai_utils.py +8 -9
- huggingface_hub/file_download.py +162 -259
- huggingface_hub/hf_api.py +841 -616
- huggingface_hub/hf_file_system.py +98 -62
- huggingface_hub/hub_mixin.py +37 -57
- huggingface_hub/inference/_client.py +257 -325
- huggingface_hub/inference/_common.py +110 -124
- huggingface_hub/inference/_generated/_async_client.py +307 -432
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +18 -16
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
- huggingface_hub/inference/_mcp/agent.py +3 -3
- huggingface_hub/inference/_mcp/cli.py +1 -1
- huggingface_hub/inference/_mcp/constants.py +2 -3
- huggingface_hub/inference/_mcp/mcp_client.py +58 -30
- huggingface_hub/inference/_mcp/types.py +10 -7
- huggingface_hub/inference/_mcp/utils.py +11 -7
- huggingface_hub/inference/_providers/__init__.py +4 -2
- huggingface_hub/inference/_providers/_common.py +49 -25
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cohere.py +3 -3
- huggingface_hub/inference/_providers/fal_ai.py +52 -21
- huggingface_hub/inference/_providers/featherless_ai.py +4 -4
- huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
- huggingface_hub/inference/_providers/hf_inference.py +28 -20
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +10 -10
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +4 -4
- huggingface_hub/inference/_providers/replicate.py +15 -15
- huggingface_hub/inference/_providers/sambanova.py +6 -6
- huggingface_hub/inference/_providers/together.py +7 -7
- huggingface_hub/lfs.py +20 -31
- huggingface_hub/repocard.py +18 -18
- huggingface_hub/repocard_data.py +56 -56
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +9 -9
- huggingface_hub/serialization/_dduf.py +7 -7
- huggingface_hub/serialization/_torch.py +28 -28
- huggingface_hub/utils/__init__.py +10 -4
- huggingface_hub/utils/_auth.py +5 -5
- huggingface_hub/utils/_cache_manager.py +31 -31
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +25 -21
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +4 -4
- huggingface_hub/utils/_headers.py +7 -29
- huggingface_hub/utils/_http.py +366 -208
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +16 -13
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +3 -3
- huggingface_hub/utils/_typing.py +25 -5
- huggingface_hub/utils/_validators.py +53 -72
- huggingface_hub/utils/_xet.py +16 -16
- huggingface_hub/utils/_xet_progress_reporting.py +32 -11
- huggingface_hub/utils/insecure_hashlib.py +3 -9
- huggingface_hub/utils/tqdm.py +3 -3
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/METADATA +18 -29
- huggingface_hub-1.0.0rc0.dist-info/RECORD +161 -0
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -500
- huggingface_hub/repository.py +0 -1477
- huggingface_hub/serialization/_tensorflow.py +0 -95
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.35.0rc0.dist-info/RECORD +0 -166
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/top_level.txt +0 -0
|
@@ -7,7 +7,7 @@ import zipfile
|
|
|
7
7
|
from contextlib import contextmanager
|
|
8
8
|
from dataclasses import dataclass, field
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import Any,
|
|
10
|
+
from typing import Any, Generator, Iterable, Union
|
|
11
11
|
|
|
12
12
|
from ..errors import DDUFCorruptedFileError, DDUFExportError, DDUFInvalidEntryNameError
|
|
13
13
|
|
|
@@ -87,7 +87,7 @@ class DDUFEntry:
|
|
|
87
87
|
return f.read(self.length).decode(encoding=encoding)
|
|
88
88
|
|
|
89
89
|
|
|
90
|
-
def read_dduf_file(dduf_path: Union[os.PathLike, str]) ->
|
|
90
|
+
def read_dduf_file(dduf_path: Union[os.PathLike, str]) -> dict[str, DDUFEntry]:
|
|
91
91
|
"""
|
|
92
92
|
Read a DDUF file and return a dictionary of entries.
|
|
93
93
|
|
|
@@ -98,7 +98,7 @@ def read_dduf_file(dduf_path: Union[os.PathLike, str]) -> Dict[str, DDUFEntry]:
|
|
|
98
98
|
The path to the DDUF file to read.
|
|
99
99
|
|
|
100
100
|
Returns:
|
|
101
|
-
`
|
|
101
|
+
`dict[str, DDUFEntry]`:
|
|
102
102
|
A dictionary of [`DDUFEntry`] indexed by filename.
|
|
103
103
|
|
|
104
104
|
Raises:
|
|
@@ -157,7 +157,7 @@ def read_dduf_file(dduf_path: Union[os.PathLike, str]) -> Dict[str, DDUFEntry]:
|
|
|
157
157
|
|
|
158
158
|
|
|
159
159
|
def export_entries_as_dduf(
|
|
160
|
-
dduf_path: Union[str, os.PathLike], entries: Iterable[
|
|
160
|
+
dduf_path: Union[str, os.PathLike], entries: Iterable[tuple[str, Union[str, Path, bytes]]]
|
|
161
161
|
) -> None:
|
|
162
162
|
"""Write a DDUF file from an iterable of entries.
|
|
163
163
|
|
|
@@ -167,7 +167,7 @@ def export_entries_as_dduf(
|
|
|
167
167
|
Args:
|
|
168
168
|
dduf_path (`str` or `os.PathLike`):
|
|
169
169
|
The path to the DDUF file to write.
|
|
170
|
-
entries (`Iterable[
|
|
170
|
+
entries (`Iterable[tuple[str, Union[str, Path, bytes]]]`):
|
|
171
171
|
An iterable of entries to write in the DDUF file. Each entry is a tuple with the filename and the content.
|
|
172
172
|
The filename should be the path to the file in the DDUF archive.
|
|
173
173
|
The content can be a string or a pathlib.Path representing a path to a file on the local disk or directly the content as bytes.
|
|
@@ -201,7 +201,7 @@ def export_entries_as_dduf(
|
|
|
201
201
|
>>> pipe = DiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
|
|
202
202
|
... # ... do some work with the pipeline
|
|
203
203
|
|
|
204
|
-
>>> def as_entries(pipe: DiffusionPipeline) -> Generator[
|
|
204
|
+
>>> def as_entries(pipe: DiffusionPipeline) -> Generator[tuple[str, bytes], None, None]:
|
|
205
205
|
... # Build an generator that yields the entries to add to the DDUF file.
|
|
206
206
|
... # The first element of the tuple is the filename in the DDUF archive (must use UNIX separator!). The second element is the content of the file.
|
|
207
207
|
... # Entries will be evaluated lazily when the DDUF file is created (only 1 entry is loaded in memory at a time)
|
|
@@ -267,7 +267,7 @@ def export_folder_as_dduf(dduf_path: Union[str, os.PathLike], folder_path: Union
|
|
|
267
267
|
"""
|
|
268
268
|
folder_path = Path(folder_path)
|
|
269
269
|
|
|
270
|
-
def _iterate_over_folder() -> Iterable[
|
|
270
|
+
def _iterate_over_folder() -> Iterable[tuple[str, Path]]:
|
|
271
271
|
for path in Path(folder_path).glob("**/*"):
|
|
272
272
|
if not path.is_file():
|
|
273
273
|
continue
|
|
@@ -20,7 +20,7 @@ import re
|
|
|
20
20
|
from collections import defaultdict, namedtuple
|
|
21
21
|
from functools import lru_cache
|
|
22
22
|
from pathlib import Path
|
|
23
|
-
from typing import TYPE_CHECKING, Any,
|
|
23
|
+
from typing import TYPE_CHECKING, Any, Iterable, NamedTuple, Optional, Union
|
|
24
24
|
|
|
25
25
|
from packaging import version
|
|
26
26
|
|
|
@@ -43,10 +43,10 @@ def save_torch_model(
|
|
|
43
43
|
filename_pattern: Optional[str] = None,
|
|
44
44
|
force_contiguous: bool = True,
|
|
45
45
|
max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
|
|
46
|
-
metadata: Optional[
|
|
46
|
+
metadata: Optional[dict[str, str]] = None,
|
|
47
47
|
safe_serialization: bool = True,
|
|
48
48
|
is_main_process: bool = True,
|
|
49
|
-
shared_tensors_to_discard: Optional[
|
|
49
|
+
shared_tensors_to_discard: Optional[list[str]] = None,
|
|
50
50
|
):
|
|
51
51
|
"""
|
|
52
52
|
Saves a given torch model to disk, handling sharding and shared tensors issues.
|
|
@@ -92,7 +92,7 @@ def save_torch_model(
|
|
|
92
92
|
that reason. Defaults to `True`.
|
|
93
93
|
max_shard_size (`int` or `str`, *optional*):
|
|
94
94
|
The maximum size of each shard, in bytes. Defaults to 5GB.
|
|
95
|
-
metadata (`
|
|
95
|
+
metadata (`dict[str, str]`, *optional*):
|
|
96
96
|
Extra information to save along with the model. Some metadata will be added for each dropped tensors.
|
|
97
97
|
This information will not be enough to recover the entire shared structure but might help understanding
|
|
98
98
|
things.
|
|
@@ -104,7 +104,7 @@ def save_torch_model(
|
|
|
104
104
|
Whether the process calling this is the main process or not. Useful when in distributed training like
|
|
105
105
|
TPUs and need to call this function from all processes. In this case, set `is_main_process=True` only on
|
|
106
106
|
the main process to avoid race conditions. Defaults to True.
|
|
107
|
-
shared_tensors_to_discard (`
|
|
107
|
+
shared_tensors_to_discard (`list[str]`, *optional*):
|
|
108
108
|
List of tensor names to drop when saving shared tensors. If not provided and shared tensors are
|
|
109
109
|
detected, it will drop the first name alphabetically.
|
|
110
110
|
|
|
@@ -137,16 +137,16 @@ def save_torch_model(
|
|
|
137
137
|
|
|
138
138
|
|
|
139
139
|
def save_torch_state_dict(
|
|
140
|
-
state_dict:
|
|
140
|
+
state_dict: dict[str, "torch.Tensor"],
|
|
141
141
|
save_directory: Union[str, Path],
|
|
142
142
|
*,
|
|
143
143
|
filename_pattern: Optional[str] = None,
|
|
144
144
|
force_contiguous: bool = True,
|
|
145
145
|
max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
|
|
146
|
-
metadata: Optional[
|
|
146
|
+
metadata: Optional[dict[str, str]] = None,
|
|
147
147
|
safe_serialization: bool = True,
|
|
148
148
|
is_main_process: bool = True,
|
|
149
|
-
shared_tensors_to_discard: Optional[
|
|
149
|
+
shared_tensors_to_discard: Optional[list[str]] = None,
|
|
150
150
|
) -> None:
|
|
151
151
|
"""
|
|
152
152
|
Save a model state dictionary to the disk, handling sharding and shared tensors issues.
|
|
@@ -177,7 +177,7 @@ def save_torch_state_dict(
|
|
|
177
177
|
</Tip>
|
|
178
178
|
|
|
179
179
|
Args:
|
|
180
|
-
state_dict (`
|
|
180
|
+
state_dict (`dict[str, torch.Tensor]`):
|
|
181
181
|
The state dictionary to save.
|
|
182
182
|
save_directory (`str` or `Path`):
|
|
183
183
|
The directory in which the model will be saved.
|
|
@@ -192,7 +192,7 @@ def save_torch_state_dict(
|
|
|
192
192
|
that reason. Defaults to `True`.
|
|
193
193
|
max_shard_size (`int` or `str`, *optional*):
|
|
194
194
|
The maximum size of each shard, in bytes. Defaults to 5GB.
|
|
195
|
-
metadata (`
|
|
195
|
+
metadata (`dict[str, str]`, *optional*):
|
|
196
196
|
Extra information to save along with the model. Some metadata will be added for each dropped tensors.
|
|
197
197
|
This information will not be enough to recover the entire shared structure but might help understanding
|
|
198
198
|
things.
|
|
@@ -204,7 +204,7 @@ def save_torch_state_dict(
|
|
|
204
204
|
Whether the process calling this is the main process or not. Useful when in distributed training like
|
|
205
205
|
TPUs and need to call this function from all processes. In this case, set `is_main_process=True` only on
|
|
206
206
|
the main process to avoid race conditions. Defaults to True.
|
|
207
|
-
shared_tensors_to_discard (`
|
|
207
|
+
shared_tensors_to_discard (`list[str]`, *optional*):
|
|
208
208
|
List of tensor names to drop when saving shared tensors. If not provided and shared tensors are
|
|
209
209
|
detected, it will drop the first name alphabetically.
|
|
210
210
|
|
|
@@ -300,7 +300,7 @@ def save_torch_state_dict(
|
|
|
300
300
|
|
|
301
301
|
|
|
302
302
|
def split_torch_state_dict_into_shards(
|
|
303
|
-
state_dict:
|
|
303
|
+
state_dict: dict[str, "torch.Tensor"],
|
|
304
304
|
*,
|
|
305
305
|
filename_pattern: str = constants.SAFETENSORS_WEIGHTS_FILE_PATTERN,
|
|
306
306
|
max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
|
|
@@ -329,7 +329,7 @@ def split_torch_state_dict_into_shards(
|
|
|
329
329
|
</Tip>
|
|
330
330
|
|
|
331
331
|
Args:
|
|
332
|
-
state_dict (`
|
|
332
|
+
state_dict (`dict[str, torch.Tensor]`):
|
|
333
333
|
The state dictionary to save.
|
|
334
334
|
filename_pattern (`str`, *optional*):
|
|
335
335
|
The pattern to generate the files names in which the model will be saved. Pattern must be a string that
|
|
@@ -348,7 +348,7 @@ def split_torch_state_dict_into_shards(
|
|
|
348
348
|
>>> from safetensors.torch import save_file as safe_save_file
|
|
349
349
|
>>> from huggingface_hub import split_torch_state_dict_into_shards
|
|
350
350
|
|
|
351
|
-
>>> def save_state_dict(state_dict:
|
|
351
|
+
>>> def save_state_dict(state_dict: dict[str, torch.Tensor], save_directory: str):
|
|
352
352
|
... state_dict_split = split_torch_state_dict_into_shards(state_dict)
|
|
353
353
|
... for filename, tensors in state_dict_split.filename_to_tensors.items():
|
|
354
354
|
... shard = {tensor: state_dict[tensor] for tensor in tensors}
|
|
@@ -560,7 +560,7 @@ def load_state_dict_from_file(
|
|
|
560
560
|
map_location: Optional[Union[str, "torch.device"]] = None,
|
|
561
561
|
weights_only: bool = False,
|
|
562
562
|
mmap: bool = False,
|
|
563
|
-
) -> Union[
|
|
563
|
+
) -> Union[dict[str, "torch.Tensor"], Any]:
|
|
564
564
|
"""
|
|
565
565
|
Loads a checkpoint file, handling both safetensors and pickle checkpoint formats.
|
|
566
566
|
|
|
@@ -580,7 +580,7 @@ def load_state_dict_from_file(
|
|
|
580
580
|
loading safetensors files, as the `safetensors` library uses memory mapping by default.
|
|
581
581
|
|
|
582
582
|
Returns:
|
|
583
|
-
`Union[
|
|
583
|
+
`Union[dict[str, "torch.Tensor"], Any]`: The loaded checkpoint.
|
|
584
584
|
- For safetensors files: always returns a dictionary mapping parameter names to tensors.
|
|
585
585
|
- For pickle files: returns any Python object that was pickled (commonly a state dict, but could be
|
|
586
586
|
an entire model, optimizer state, or any other Python object).
|
|
@@ -700,7 +700,7 @@ def _validate_keys_for_strict_loading(
|
|
|
700
700
|
raise RuntimeError(error_message)
|
|
701
701
|
|
|
702
702
|
|
|
703
|
-
def _get_unique_id(tensor: "torch.Tensor") -> Union[int,
|
|
703
|
+
def _get_unique_id(tensor: "torch.Tensor") -> Union[int, tuple[Any, ...]]:
|
|
704
704
|
"""Returns a unique id for plain tensor
|
|
705
705
|
or a (potentially nested) Tuple of unique id for the flattened Tensor
|
|
706
706
|
if the input is a wrapper tensor subclass Tensor
|
|
@@ -741,7 +741,7 @@ def _get_unique_id(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
|
|
|
741
741
|
return unique_id
|
|
742
742
|
|
|
743
743
|
|
|
744
|
-
def get_torch_storage_id(tensor: "torch.Tensor") -> Optional[
|
|
744
|
+
def get_torch_storage_id(tensor: "torch.Tensor") -> Optional[tuple["torch.device", Union[int, tuple[Any, ...]], int]]:
|
|
745
745
|
"""
|
|
746
746
|
Return unique identifier to a tensor storage.
|
|
747
747
|
|
|
@@ -815,7 +815,7 @@ def is_torch_tpu_available(check_device=True):
|
|
|
815
815
|
return False
|
|
816
816
|
|
|
817
817
|
|
|
818
|
-
def storage_ptr(tensor: "torch.Tensor") -> Union[int,
|
|
818
|
+
def storage_ptr(tensor: "torch.Tensor") -> Union[int, tuple[Any, ...]]:
|
|
819
819
|
"""
|
|
820
820
|
Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L11.
|
|
821
821
|
"""
|
|
@@ -841,10 +841,10 @@ def storage_ptr(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
|
|
|
841
841
|
|
|
842
842
|
|
|
843
843
|
def _clean_state_dict_for_safetensors(
|
|
844
|
-
state_dict:
|
|
845
|
-
metadata:
|
|
844
|
+
state_dict: dict[str, "torch.Tensor"],
|
|
845
|
+
metadata: dict[str, str],
|
|
846
846
|
force_contiguous: bool = True,
|
|
847
|
-
shared_tensors_to_discard: Optional[
|
|
847
|
+
shared_tensors_to_discard: Optional[list[str]] = None,
|
|
848
848
|
):
|
|
849
849
|
"""Remove shared tensors from state_dict and update metadata accordingly (for reloading).
|
|
850
850
|
|
|
@@ -878,7 +878,7 @@ def _end_ptr(tensor: "torch.Tensor") -> int:
|
|
|
878
878
|
return stop
|
|
879
879
|
|
|
880
880
|
|
|
881
|
-
def _filter_shared_not_shared(tensors:
|
|
881
|
+
def _filter_shared_not_shared(tensors: list[set[str]], state_dict: dict[str, "torch.Tensor"]) -> list[set[str]]:
|
|
882
882
|
"""
|
|
883
883
|
Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L44
|
|
884
884
|
"""
|
|
@@ -906,7 +906,7 @@ def _filter_shared_not_shared(tensors: List[Set[str]], state_dict: Dict[str, "to
|
|
|
906
906
|
return filtered_tensors
|
|
907
907
|
|
|
908
908
|
|
|
909
|
-
def _find_shared_tensors(state_dict:
|
|
909
|
+
def _find_shared_tensors(state_dict: dict[str, "torch.Tensor"]) -> list[set[str]]:
|
|
910
910
|
"""
|
|
911
911
|
Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L69.
|
|
912
912
|
"""
|
|
@@ -943,11 +943,11 @@ def _is_complete(tensor: "torch.Tensor") -> bool:
|
|
|
943
943
|
|
|
944
944
|
|
|
945
945
|
def _remove_duplicate_names(
|
|
946
|
-
state_dict:
|
|
946
|
+
state_dict: dict[str, "torch.Tensor"],
|
|
947
947
|
*,
|
|
948
|
-
preferred_names: Optional[
|
|
949
|
-
discard_names: Optional[
|
|
950
|
-
) ->
|
|
948
|
+
preferred_names: Optional[list[str]] = None,
|
|
949
|
+
discard_names: Optional[list[str]] = None,
|
|
950
|
+
) -> dict[str, list[str]]:
|
|
951
951
|
"""
|
|
952
952
|
Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L80
|
|
953
953
|
"""
|
|
@@ -50,14 +50,20 @@ from ._experimental import experimental
|
|
|
50
50
|
from ._fixes import SoftTemporaryDirectory, WeakFileLock, yaml_dump
|
|
51
51
|
from ._git_credential import list_credential_helpers, set_git_credential, unset_git_credential
|
|
52
52
|
from ._headers import build_hf_headers, get_token_to_send
|
|
53
|
-
from ._hf_folder import HfFolder
|
|
54
53
|
from ._http import (
|
|
55
|
-
|
|
54
|
+
ASYNC_CLIENT_FACTORY_T,
|
|
55
|
+
CLIENT_FACTORY_T,
|
|
56
|
+
HfHubAsyncTransport,
|
|
57
|
+
HfHubTransport,
|
|
58
|
+
close_client,
|
|
56
59
|
fix_hf_endpoint_in_url,
|
|
60
|
+
get_async_session,
|
|
57
61
|
get_session,
|
|
58
62
|
hf_raise_for_status,
|
|
59
63
|
http_backoff,
|
|
60
|
-
|
|
64
|
+
http_stream_backoff,
|
|
65
|
+
set_async_client_factory,
|
|
66
|
+
set_client_factory,
|
|
61
67
|
)
|
|
62
68
|
from ._pagination import paginate
|
|
63
69
|
from ._paths import DEFAULT_IGNORE_PATTERNS, FORBIDDEN_FOLDERS, filter_repo_objects
|
|
@@ -105,7 +111,7 @@ from ._safetensors import SafetensorsFileMetadata, SafetensorsRepoMetadata, Tens
|
|
|
105
111
|
from ._subprocess import capture_output, run_interactive_subprocess, run_subprocess
|
|
106
112
|
from ._telemetry import send_telemetry
|
|
107
113
|
from ._typing import is_jsonable, is_simple_optional_type, unwrap_simple_optional_type
|
|
108
|
-
from ._validators import
|
|
114
|
+
from ._validators import validate_hf_hub_args, validate_repo_id
|
|
109
115
|
from ._xet import (
|
|
110
116
|
XetConnectionInfo,
|
|
111
117
|
XetFileData,
|
huggingface_hub/utils/_auth.py
CHANGED
|
@@ -19,7 +19,7 @@ import os
|
|
|
19
19
|
import warnings
|
|
20
20
|
from pathlib import Path
|
|
21
21
|
from threading import Lock
|
|
22
|
-
from typing import
|
|
22
|
+
from typing import Optional
|
|
23
23
|
|
|
24
24
|
from .. import constants
|
|
25
25
|
from ._runtime import is_colab_enterprise, is_google_colab
|
|
@@ -125,13 +125,13 @@ def _get_token_from_file() -> Optional[str]:
|
|
|
125
125
|
return None
|
|
126
126
|
|
|
127
127
|
|
|
128
|
-
def get_stored_tokens() ->
|
|
128
|
+
def get_stored_tokens() -> dict[str, str]:
|
|
129
129
|
"""
|
|
130
130
|
Returns the parsed INI file containing the access tokens.
|
|
131
131
|
The file is located at `HF_STORED_TOKENS_PATH`, defaulting to `~/.cache/huggingface/stored_tokens`.
|
|
132
132
|
If the file does not exist, an empty dictionary is returned.
|
|
133
133
|
|
|
134
|
-
Returns: `
|
|
134
|
+
Returns: `dict[str, str]`
|
|
135
135
|
Key is the token name and value is the token.
|
|
136
136
|
"""
|
|
137
137
|
tokens_path = Path(constants.HF_STORED_TOKENS_PATH)
|
|
@@ -147,12 +147,12 @@ def get_stored_tokens() -> Dict[str, str]:
|
|
|
147
147
|
return stored_tokens
|
|
148
148
|
|
|
149
149
|
|
|
150
|
-
def _save_stored_tokens(stored_tokens:
|
|
150
|
+
def _save_stored_tokens(stored_tokens: dict[str, str]) -> None:
|
|
151
151
|
"""
|
|
152
152
|
Saves the given configuration to the stored tokens file.
|
|
153
153
|
|
|
154
154
|
Args:
|
|
155
|
-
stored_tokens (`
|
|
155
|
+
stored_tokens (`dict[str, str]`):
|
|
156
156
|
The stored tokens to save. Key is the token name and value is the token.
|
|
157
157
|
"""
|
|
158
158
|
stored_tokens_path = Path(constants.HF_STORED_TOKENS_PATH)
|
|
@@ -20,7 +20,7 @@ import time
|
|
|
20
20
|
from collections import defaultdict
|
|
21
21
|
from dataclasses import dataclass
|
|
22
22
|
from pathlib import Path
|
|
23
|
-
from typing import
|
|
23
|
+
from typing import Literal, Optional, Union
|
|
24
24
|
|
|
25
25
|
from huggingface_hub.errors import CacheNotFound, CorruptedCacheException
|
|
26
26
|
|
|
@@ -119,9 +119,9 @@ class CachedRevisionInfo:
|
|
|
119
119
|
snapshot_path (`Path`):
|
|
120
120
|
Path to the revision directory in the `snapshots` folder. It contains the
|
|
121
121
|
exact tree structure as the repo on the Hub.
|
|
122
|
-
files: (`
|
|
122
|
+
files: (`frozenset[CachedFileInfo]`):
|
|
123
123
|
Set of [`~CachedFileInfo`] describing all files contained in the snapshot.
|
|
124
|
-
refs (`
|
|
124
|
+
refs (`frozenset[str]`):
|
|
125
125
|
Set of `refs` pointing to this revision. If the revision has no `refs`, it
|
|
126
126
|
is considered detached.
|
|
127
127
|
Example: `{"main", "2.4.0"}` or `{"refs/pr/1"}`.
|
|
@@ -149,8 +149,8 @@ class CachedRevisionInfo:
|
|
|
149
149
|
commit_hash: str
|
|
150
150
|
snapshot_path: Path
|
|
151
151
|
size_on_disk: int
|
|
152
|
-
files:
|
|
153
|
-
refs:
|
|
152
|
+
files: frozenset[CachedFileInfo]
|
|
153
|
+
refs: frozenset[str]
|
|
154
154
|
|
|
155
155
|
last_modified: float
|
|
156
156
|
|
|
@@ -196,7 +196,7 @@ class CachedRepoInfo:
|
|
|
196
196
|
Sum of the blob file sizes in the cached repo.
|
|
197
197
|
nb_files (`int`):
|
|
198
198
|
Total number of blob files in the cached repo.
|
|
199
|
-
revisions (`
|
|
199
|
+
revisions (`frozenset[CachedRevisionInfo]`):
|
|
200
200
|
Set of [`~CachedRevisionInfo`] describing all revisions cached in the repo.
|
|
201
201
|
last_accessed (`float`):
|
|
202
202
|
Timestamp of the last time a blob file of the repo has been accessed.
|
|
@@ -225,7 +225,7 @@ class CachedRepoInfo:
|
|
|
225
225
|
repo_path: Path
|
|
226
226
|
size_on_disk: int
|
|
227
227
|
nb_files: int
|
|
228
|
-
revisions:
|
|
228
|
+
revisions: frozenset[CachedRevisionInfo]
|
|
229
229
|
|
|
230
230
|
last_accessed: float
|
|
231
231
|
last_modified: float
|
|
@@ -260,7 +260,7 @@ class CachedRepoInfo:
|
|
|
260
260
|
return _format_size(self.size_on_disk)
|
|
261
261
|
|
|
262
262
|
@property
|
|
263
|
-
def refs(self) ->
|
|
263
|
+
def refs(self) -> dict[str, CachedRevisionInfo]:
|
|
264
264
|
"""
|
|
265
265
|
(property) Mapping between `refs` and revision data structures.
|
|
266
266
|
"""
|
|
@@ -277,21 +277,21 @@ class DeleteCacheStrategy:
|
|
|
277
277
|
Args:
|
|
278
278
|
expected_freed_size (`float`):
|
|
279
279
|
Expected freed size once strategy is executed.
|
|
280
|
-
blobs (`
|
|
280
|
+
blobs (`frozenset[Path]`):
|
|
281
281
|
Set of blob file paths to be deleted.
|
|
282
|
-
refs (`
|
|
282
|
+
refs (`frozenset[Path]`):
|
|
283
283
|
Set of reference file paths to be deleted.
|
|
284
|
-
repos (`
|
|
284
|
+
repos (`frozenset[Path]`):
|
|
285
285
|
Set of entire repo paths to be deleted.
|
|
286
|
-
snapshots (`
|
|
286
|
+
snapshots (`frozenset[Path]`):
|
|
287
287
|
Set of snapshots to be deleted (directory of symlinks).
|
|
288
288
|
"""
|
|
289
289
|
|
|
290
290
|
expected_freed_size: int
|
|
291
|
-
blobs:
|
|
292
|
-
refs:
|
|
293
|
-
repos:
|
|
294
|
-
snapshots:
|
|
291
|
+
blobs: frozenset[Path]
|
|
292
|
+
refs: frozenset[Path]
|
|
293
|
+
repos: frozenset[Path]
|
|
294
|
+
snapshots: frozenset[Path]
|
|
295
295
|
|
|
296
296
|
@property
|
|
297
297
|
def expected_freed_size_str(self) -> str:
|
|
@@ -352,10 +352,10 @@ class HFCacheInfo:
|
|
|
352
352
|
Args:
|
|
353
353
|
size_on_disk (`int`):
|
|
354
354
|
Sum of all valid repo sizes in the cache-system.
|
|
355
|
-
repos (`
|
|
355
|
+
repos (`frozenset[CachedRepoInfo]`):
|
|
356
356
|
Set of [`~CachedRepoInfo`] describing all valid cached repos found on the
|
|
357
357
|
cache-system while scanning.
|
|
358
|
-
warnings (`
|
|
358
|
+
warnings (`list[CorruptedCacheException]`):
|
|
359
359
|
List of [`~CorruptedCacheException`] that occurred while scanning the cache.
|
|
360
360
|
Those exceptions are captured so that the scan can continue. Corrupted repos
|
|
361
361
|
are skipped from the scan.
|
|
@@ -369,8 +369,8 @@ class HFCacheInfo:
|
|
|
369
369
|
"""
|
|
370
370
|
|
|
371
371
|
size_on_disk: int
|
|
372
|
-
repos:
|
|
373
|
-
warnings:
|
|
372
|
+
repos: frozenset[CachedRepoInfo]
|
|
373
|
+
warnings: list[CorruptedCacheException]
|
|
374
374
|
|
|
375
375
|
@property
|
|
376
376
|
def size_on_disk_str(self) -> str:
|
|
@@ -420,9 +420,9 @@ class HFCacheInfo:
|
|
|
420
420
|
|
|
421
421
|
</Tip>
|
|
422
422
|
"""
|
|
423
|
-
hashes_to_delete:
|
|
423
|
+
hashes_to_delete: set[str] = set(revisions)
|
|
424
424
|
|
|
425
|
-
repos_with_revisions:
|
|
425
|
+
repos_with_revisions: dict[CachedRepoInfo, set[CachedRevisionInfo]] = defaultdict(set)
|
|
426
426
|
|
|
427
427
|
for repo in self.repos:
|
|
428
428
|
for revision in repo.revisions:
|
|
@@ -433,10 +433,10 @@ class HFCacheInfo:
|
|
|
433
433
|
if len(hashes_to_delete) > 0:
|
|
434
434
|
logger.warning(f"Revision(s) not found - cannot delete them: {', '.join(hashes_to_delete)}")
|
|
435
435
|
|
|
436
|
-
delete_strategy_blobs:
|
|
437
|
-
delete_strategy_refs:
|
|
438
|
-
delete_strategy_repos:
|
|
439
|
-
delete_strategy_snapshots:
|
|
436
|
+
delete_strategy_blobs: set[Path] = set()
|
|
437
|
+
delete_strategy_refs: set[Path] = set()
|
|
438
|
+
delete_strategy_repos: set[Path] = set()
|
|
439
|
+
delete_strategy_snapshots: set[Path] = set()
|
|
440
440
|
delete_strategy_expected_freed_size = 0
|
|
441
441
|
|
|
442
442
|
for affected_repo, revisions_to_delete in repos_with_revisions.items():
|
|
@@ -681,8 +681,8 @@ def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
|
|
|
681
681
|
f"Scan cache expects a directory but found a file: {cache_dir}. Please use `cache_dir` argument or set `HF_HUB_CACHE` environment variable."
|
|
682
682
|
)
|
|
683
683
|
|
|
684
|
-
repos:
|
|
685
|
-
warnings:
|
|
684
|
+
repos: set[CachedRepoInfo] = set()
|
|
685
|
+
warnings: list[CorruptedCacheException] = []
|
|
686
686
|
for repo_path in cache_dir.iterdir():
|
|
687
687
|
if repo_path.name == ".locks": # skip './.locks/' folder
|
|
688
688
|
continue
|
|
@@ -718,7 +718,7 @@ def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
|
|
|
718
718
|
f"Repo type must be `dataset`, `model` or `space`, found `{repo_type}` ({repo_path})."
|
|
719
719
|
)
|
|
720
720
|
|
|
721
|
-
blob_stats:
|
|
721
|
+
blob_stats: dict[Path, os.stat_result] = {} # Key is blob_path, value is blob stats
|
|
722
722
|
|
|
723
723
|
snapshots_path = repo_path / "snapshots"
|
|
724
724
|
refs_path = repo_path / "refs"
|
|
@@ -729,7 +729,7 @@ def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
|
|
|
729
729
|
# Scan over `refs` directory
|
|
730
730
|
|
|
731
731
|
# key is revision hash, value is set of refs
|
|
732
|
-
refs_by_hash:
|
|
732
|
+
refs_by_hash: dict[str, set[str]] = defaultdict(set)
|
|
733
733
|
if refs_path.exists():
|
|
734
734
|
# Example of `refs` directory
|
|
735
735
|
# ── refs
|
|
@@ -752,7 +752,7 @@ def _scan_cached_repo(repo_path: Path) -> CachedRepoInfo:
|
|
|
752
752
|
refs_by_hash[commit_hash].add(ref_name)
|
|
753
753
|
|
|
754
754
|
# Scan snapshots directory
|
|
755
|
-
cached_revisions:
|
|
755
|
+
cached_revisions: set[CachedRevisionInfo] = set()
|
|
756
756
|
for revision_path in snapshots_path.iterdir():
|
|
757
757
|
# Ignore OS-created helper files
|
|
758
758
|
if revision_path.name in FILES_TO_IGNORE:
|
|
@@ -62,7 +62,7 @@ def _deprecate_arguments(
|
|
|
62
62
|
Args:
|
|
63
63
|
version (`str`):
|
|
64
64
|
The version when deprecated arguments will result in error.
|
|
65
|
-
deprecated_args (`
|
|
65
|
+
deprecated_args (`list[str]`):
|
|
66
66
|
List of the arguments to be deprecated.
|
|
67
67
|
custom_message (`str`, *optional*):
|
|
68
68
|
Warning message that is raised. If not passed, a default warning message
|
huggingface_hub/utils/_dotenv.py
CHANGED
|
@@ -1,28 +1,28 @@
|
|
|
1
1
|
# AI-generated module (ChatGPT)
|
|
2
2
|
import re
|
|
3
|
-
from typing import
|
|
3
|
+
from typing import Optional
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
def load_dotenv(dotenv_str: str) ->
|
|
6
|
+
def load_dotenv(dotenv_str: str, environ: Optional[dict[str, str]] = None) -> dict[str, str]:
|
|
7
7
|
"""
|
|
8
8
|
Parse a DOTENV-format string and return a dictionary of key-value pairs.
|
|
9
9
|
Handles quoted values, comments, export keyword, and blank lines.
|
|
10
10
|
"""
|
|
11
|
-
env:
|
|
11
|
+
env: dict[str, str] = {}
|
|
12
12
|
line_pattern = re.compile(
|
|
13
13
|
r"""
|
|
14
14
|
^\s*
|
|
15
|
-
(?:export\
|
|
15
|
+
(?:export[^\S\n]+)? # optional export
|
|
16
16
|
([A-Za-z_][A-Za-z0-9_]*) # key
|
|
17
|
-
\
|
|
17
|
+
[^\S\n]*(=)?[^\S\n]*
|
|
18
18
|
( # value group
|
|
19
19
|
(?:
|
|
20
20
|
'(?:\\'|[^'])*' # single-quoted value
|
|
21
|
-
| "(
|
|
21
|
+
| \"(?:\\\"|[^\"])*\" # double-quoted value
|
|
22
22
|
| [^#\n\r]+? # unquoted value
|
|
23
23
|
)
|
|
24
24
|
)?
|
|
25
|
-
\
|
|
25
|
+
[^\S\n]*(?:\#.*)?$ # optional inline comment
|
|
26
26
|
""",
|
|
27
27
|
re.VERBOSE,
|
|
28
28
|
)
|
|
@@ -33,19 +33,23 @@ def load_dotenv(dotenv_str: str) -> Dict[str, str]:
|
|
|
33
33
|
continue # Skip comments and empty lines
|
|
34
34
|
|
|
35
35
|
match = line_pattern.match(line)
|
|
36
|
-
if
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
36
|
+
if match:
|
|
37
|
+
key = match.group(1)
|
|
38
|
+
val = None
|
|
39
|
+
if match.group(2): # if there is '='
|
|
40
|
+
raw_val = match.group(3) or ""
|
|
41
|
+
val = raw_val.strip()
|
|
42
|
+
# Remove surrounding quotes if quoted
|
|
43
|
+
if (val.startswith('"') and val.endswith('"')) or (val.startswith("'") and val.endswith("'")):
|
|
44
|
+
val = val[1:-1]
|
|
45
|
+
val = val.replace(r"\n", "\n").replace(r"\t", "\t").replace(r"\"", '"').replace(r"\\", "\\")
|
|
46
|
+
if raw_val.startswith('"'):
|
|
47
|
+
val = val.replace(r"\$", "$") # only in double quotes
|
|
48
|
+
elif environ is not None:
|
|
49
|
+
# Get it from the current environment
|
|
50
|
+
val = environ.get(key)
|
|
51
|
+
|
|
52
|
+
if val is not None:
|
|
53
|
+
env[key] = val
|
|
50
54
|
|
|
51
55
|
return env
|
huggingface_hub/utils/_fixes.py
CHANGED
|
@@ -1,13 +1,3 @@
|
|
|
1
|
-
# JSONDecodeError was introduced in requests=2.27 released in 2022.
|
|
2
|
-
# This allows us to support older requests for users
|
|
3
|
-
# More information: https://github.com/psf/requests/pull/5856
|
|
4
|
-
try:
|
|
5
|
-
from requests import JSONDecodeError # type: ignore # noqa: F401
|
|
6
|
-
except ImportError:
|
|
7
|
-
try:
|
|
8
|
-
from simplejson import JSONDecodeError # type: ignore # noqa: F401
|
|
9
|
-
except ImportError:
|
|
10
|
-
from json import JSONDecodeError # type: ignore # noqa: F401
|
|
11
1
|
import contextlib
|
|
12
2
|
import os
|
|
13
3
|
import shutil
|
|
@@ -16,7 +16,7 @@
|
|
|
16
16
|
|
|
17
17
|
import re
|
|
18
18
|
import subprocess
|
|
19
|
-
from typing import
|
|
19
|
+
from typing import Optional
|
|
20
20
|
|
|
21
21
|
from ..constants import ENDPOINT
|
|
22
22
|
from ._subprocess import run_interactive_subprocess, run_subprocess
|
|
@@ -27,14 +27,14 @@ GIT_CREDENTIAL_REGEX = re.compile(
|
|
|
27
27
|
^\s* # start of line
|
|
28
28
|
credential\.helper # credential.helper value
|
|
29
29
|
\s*=\s* # separator
|
|
30
|
-
(\w+) # the helper name (group 1)
|
|
30
|
+
([\w\-\/]+) # the helper name or absolute path (group 1)
|
|
31
31
|
(\s|$) # whitespace or end of line
|
|
32
32
|
""",
|
|
33
33
|
flags=re.MULTILINE | re.IGNORECASE | re.VERBOSE,
|
|
34
34
|
)
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
def list_credential_helpers(folder: Optional[str] = None) ->
|
|
37
|
+
def list_credential_helpers(folder: Optional[str] = None) -> list[str]:
|
|
38
38
|
"""Return the list of git credential helpers configured.
|
|
39
39
|
|
|
40
40
|
See https://git-scm.com/docs/gitcredentials.
|
|
@@ -104,7 +104,7 @@ def unset_git_credential(username: str = "hf_user", folder: Optional[str] = None
|
|
|
104
104
|
stdin.flush()
|
|
105
105
|
|
|
106
106
|
|
|
107
|
-
def _parse_credential_output(output: str) ->
|
|
107
|
+
def _parse_credential_output(output: str) -> list[str]:
|
|
108
108
|
"""Parse the output of `git credential fill` to extract the password.
|
|
109
109
|
|
|
110
110
|
Args:
|