huggingface-hub 0.20.3__py3-none-any.whl → 0.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +19 -1
- huggingface_hub/_commit_api.py +49 -20
- huggingface_hub/_inference_endpoints.py +10 -0
- huggingface_hub/_login.py +2 -2
- huggingface_hub/commands/download.py +1 -1
- huggingface_hub/file_download.py +57 -21
- huggingface_hub/hf_api.py +269 -54
- huggingface_hub/hf_file_system.py +131 -8
- huggingface_hub/hub_mixin.py +204 -42
- huggingface_hub/inference/_client.py +56 -9
- huggingface_hub/inference/_common.py +4 -3
- huggingface_hub/inference/_generated/_async_client.py +57 -9
- huggingface_hub/inference/_text_generation.py +5 -0
- huggingface_hub/inference/_types.py +17 -0
- huggingface_hub/lfs.py +6 -3
- huggingface_hub/repocard.py +5 -3
- huggingface_hub/repocard_data.py +11 -3
- huggingface_hub/serialization/__init__.py +19 -0
- huggingface_hub/serialization/_base.py +168 -0
- huggingface_hub/serialization/_numpy.py +67 -0
- huggingface_hub/serialization/_tensorflow.py +93 -0
- huggingface_hub/serialization/_torch.py +199 -0
- huggingface_hub/templates/datasetcard_template.md +1 -1
- huggingface_hub/templates/modelcard_template.md +1 -4
- huggingface_hub/utils/__init__.py +14 -10
- huggingface_hub/utils/_datetime.py +4 -11
- huggingface_hub/utils/_errors.py +29 -0
- huggingface_hub/utils/_runtime.py +21 -15
- huggingface_hub/utils/endpoint_helpers.py +27 -1
- {huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/METADATA +7 -3
- {huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/RECORD +35 -30
- {huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.20.3.dist-info → huggingface_hub-0.21.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Contains tensorflow-specific helpers."""
|
|
15
|
+
import math
|
|
16
|
+
import re
|
|
17
|
+
from typing import TYPE_CHECKING, Dict
|
|
18
|
+
|
|
19
|
+
from ._base import MAX_SHARD_SIZE, StateDictSplit, split_state_dict_into_shards_factory
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
import tensorflow as tf
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def split_tf_state_dict_into_shards(
|
|
27
|
+
state_dict: Dict[str, "tf.Tensor"],
|
|
28
|
+
*,
|
|
29
|
+
filename_pattern: str = "tf_model{suffix}.h5",
|
|
30
|
+
max_shard_size: int = MAX_SHARD_SIZE,
|
|
31
|
+
) -> StateDictSplit:
|
|
32
|
+
"""
|
|
33
|
+
Split a model state dictionary in shards so that each shard is smaller than a given size.
|
|
34
|
+
|
|
35
|
+
The shards are determined by iterating through the `state_dict` in the order of its keys. There is no optimization
|
|
36
|
+
made to make each shard as close as possible to the maximum size passed. For example, if the limit is 10GB and we
|
|
37
|
+
have tensors of sizes [6GB, 6GB, 2GB, 6GB, 2GB, 2GB] they will get sharded as [6GB], [6+2GB], [6+2+2GB] and not
|
|
38
|
+
[6+2+2GB], [6+2GB], [6GB].
|
|
39
|
+
|
|
40
|
+
<Tip warning={true}>
|
|
41
|
+
|
|
42
|
+
If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
|
|
43
|
+
size greater than `max_shard_size`.
|
|
44
|
+
|
|
45
|
+
</Tip>
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
state_dict (`Dict[str, Tensor]`):
|
|
49
|
+
The state dictionary to save.
|
|
50
|
+
filename_pattern (`str`, *optional*):
|
|
51
|
+
The pattern to generate the files names in which the model will be saved. Pattern must be a string that
|
|
52
|
+
can be formatted with `filename_pattern.format(suffix=...)` and must contain the keyword `suffix`
|
|
53
|
+
Defaults to `"tf_model{suffix}.h5"`.
|
|
54
|
+
max_shard_size (`int` or `str`, *optional*):
|
|
55
|
+
The maximum size of each shard, in bytes. Defaults to 5GB.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
[`StateDictSplit`]: A `StateDictSplit` object containing the shards and the index to retrieve them.
|
|
59
|
+
"""
|
|
60
|
+
return split_state_dict_into_shards_factory(
|
|
61
|
+
state_dict,
|
|
62
|
+
max_shard_size=max_shard_size,
|
|
63
|
+
filename_pattern=filename_pattern,
|
|
64
|
+
get_tensor_size=get_tensor_size,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def get_tensor_size(tensor: "tf.Tensor") -> int:
|
|
69
|
+
# Return `math.ceil` since dtype byte size can be a float (e.g., 0.125 for tf.bool).
|
|
70
|
+
# Better to overestimate than underestimate.
|
|
71
|
+
return math.ceil(tensor.numpy().size * _dtype_byte_size_tf(tensor.dtype))
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _dtype_byte_size_tf(dtype) -> float:
|
|
75
|
+
"""
|
|
76
|
+
Returns the size (in bytes) occupied by one parameter of type `dtype`.
|
|
77
|
+
Taken from https://github.com/huggingface/transformers/blob/74d9d0cebb0263a3f8ab9c280569170cc74651d0/src/transformers/modeling_tf_utils.py#L608.
|
|
78
|
+
NOTE: why not `tensor.numpy().nbytes`?
|
|
79
|
+
Example:
|
|
80
|
+
```py
|
|
81
|
+
>>> _dtype_byte_size(tf.float32)
|
|
82
|
+
4
|
|
83
|
+
```
|
|
84
|
+
"""
|
|
85
|
+
import tensorflow as tf
|
|
86
|
+
|
|
87
|
+
if dtype == tf.bool:
|
|
88
|
+
return 1 / 8
|
|
89
|
+
bit_search = re.search(r"[^\d](\d+)$", dtype.name)
|
|
90
|
+
if bit_search is None:
|
|
91
|
+
raise ValueError(f"`dtype` is not a valid dtype: {dtype}.")
|
|
92
|
+
bit_size = int(bit_search.groups()[0])
|
|
93
|
+
return bit_size // 8
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# Copyright 2024 The HuggingFace Team. All rights reserved.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
"""Contains pytorch-specific helpers."""
|
|
15
|
+
import importlib
|
|
16
|
+
from functools import lru_cache
|
|
17
|
+
from typing import TYPE_CHECKING, Dict, Tuple
|
|
18
|
+
|
|
19
|
+
from ._base import FILENAME_PATTERN, MAX_SHARD_SIZE, StateDictSplit, split_state_dict_into_shards_factory
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
import torch
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def split_torch_state_dict_into_shards(
|
|
27
|
+
state_dict: Dict[str, "torch.Tensor"],
|
|
28
|
+
*,
|
|
29
|
+
filename_pattern: str = FILENAME_PATTERN,
|
|
30
|
+
max_shard_size: int = MAX_SHARD_SIZE,
|
|
31
|
+
) -> StateDictSplit:
|
|
32
|
+
"""
|
|
33
|
+
Split a model state dictionary in shards so that each shard is smaller than a given size.
|
|
34
|
+
|
|
35
|
+
The shards are determined by iterating through the `state_dict` in the order of its keys. There is no optimization
|
|
36
|
+
made to make each shard as close as possible to the maximum size passed. For example, if the limit is 10GB and we
|
|
37
|
+
have tensors of sizes [6GB, 6GB, 2GB, 6GB, 2GB, 2GB] they will get sharded as [6GB], [6+2GB], [6+2+2GB] and not
|
|
38
|
+
[6+2+2GB], [6+2GB], [6GB].
|
|
39
|
+
|
|
40
|
+
<Tip warning={true}>
|
|
41
|
+
|
|
42
|
+
If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
|
|
43
|
+
size greater than `max_shard_size`.
|
|
44
|
+
|
|
45
|
+
</Tip>
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
state_dict (`Dict[str, torch.Tensor]`):
|
|
49
|
+
The state dictionary to save.
|
|
50
|
+
filename_pattern (`str`, *optional*):
|
|
51
|
+
The pattern to generate the files names in which the model will be saved. Pattern must be a string that
|
|
52
|
+
can be formatted with `filename_pattern.format(suffix=...)` and must contain the keyword `suffix`
|
|
53
|
+
Defaults to `"model{suffix}.safetensors"`.
|
|
54
|
+
max_shard_size (`int` or `str`, *optional*):
|
|
55
|
+
The maximum size of each shard, in bytes. Defaults to 5GB.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
[`StateDictSplit`]: A `StateDictSplit` object containing the shards and the index to retrieve them.
|
|
59
|
+
|
|
60
|
+
Example:
|
|
61
|
+
```py
|
|
62
|
+
>>> import json
|
|
63
|
+
>>> import os
|
|
64
|
+
>>> from safetensors.torch import save_file as safe_save_file
|
|
65
|
+
>>> from huggingface_hub import split_torch_state_dict_into_shards
|
|
66
|
+
|
|
67
|
+
>>> def save_state_dict(state_dict: Dict[str, torch.Tensor], save_directory: str):
|
|
68
|
+
... state_dict_split = split_torch_state_dict_into_shards(state_dict)
|
|
69
|
+
... for filename, tensors in state_dict_split.filename_to_tensors.values():
|
|
70
|
+
... shard = {tensor: state_dict[tensor] for tensor in tensors}
|
|
71
|
+
... safe_save_file(
|
|
72
|
+
... shard,
|
|
73
|
+
... os.path.join(save_directory, filename),
|
|
74
|
+
... metadata={"format": "pt"},
|
|
75
|
+
... )
|
|
76
|
+
... if state_dict_split.is_sharded:
|
|
77
|
+
... index = {
|
|
78
|
+
... "metadata": state_dict_split.metadata,
|
|
79
|
+
... "weight_map": state_dict_split.tensor_to_filename,
|
|
80
|
+
... }
|
|
81
|
+
... with open(os.path.join(save_directory, "model.safetensors.index.json"), "w") as f:
|
|
82
|
+
... f.write(json.dumps(index, indent=2))
|
|
83
|
+
```
|
|
84
|
+
"""
|
|
85
|
+
return split_state_dict_into_shards_factory(
|
|
86
|
+
state_dict,
|
|
87
|
+
max_shard_size=max_shard_size,
|
|
88
|
+
filename_pattern=filename_pattern,
|
|
89
|
+
get_tensor_size=get_tensor_size,
|
|
90
|
+
get_storage_id=get_storage_id,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def get_storage_id(tensor: "torch.Tensor") -> Tuple["torch.device", int, int]:
|
|
95
|
+
"""
|
|
96
|
+
Return unique identifier to a tensor storage.
|
|
97
|
+
|
|
98
|
+
Multiple different tensors can share the same underlying storage. For
|
|
99
|
+
example, "meta" tensors all share the same storage, and thus their identifier will all be equal. This identifier is
|
|
100
|
+
guaranteed to be unique and constant for this tensor's storage during its lifetime. Two tensor storages with
|
|
101
|
+
non-overlapping lifetimes may have the same id.
|
|
102
|
+
|
|
103
|
+
Taken from https://github.com/huggingface/transformers/blob/1ecf5f7c982d761b4daaa96719d162c324187c64/src/transformers/pytorch_utils.py#L278.
|
|
104
|
+
"""
|
|
105
|
+
if tensor.device.type == "xla" and is_torch_tpu_available():
|
|
106
|
+
# NOTE: xla tensors dont have storage
|
|
107
|
+
# use some other unique id to distinguish.
|
|
108
|
+
# this is a XLA tensor, it must be created using torch_xla's
|
|
109
|
+
# device. So the following import is safe:
|
|
110
|
+
import torch_xla
|
|
111
|
+
|
|
112
|
+
unique_id = torch_xla._XLAC._xla_get_tensor_id(tensor)
|
|
113
|
+
else:
|
|
114
|
+
unique_id = storage_ptr(tensor)
|
|
115
|
+
|
|
116
|
+
return tensor.device, unique_id, get_storage_size(tensor)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def get_tensor_size(tensor: "torch.Tensor") -> int:
|
|
120
|
+
return tensor.numel() * tensor.element_size()
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@lru_cache()
|
|
124
|
+
def is_torch_tpu_available(check_device=True):
|
|
125
|
+
"""
|
|
126
|
+
Checks if `torch_xla` is installed and potentially if a TPU is in the environment
|
|
127
|
+
|
|
128
|
+
Taken from https://github.com/huggingface/transformers/blob/1ecf5f7c982d761b4daaa96719d162c324187c64/src/transformers/utils/import_utils.py#L463.
|
|
129
|
+
"""
|
|
130
|
+
if importlib.util.find_spec("torch_xla") is not None:
|
|
131
|
+
if check_device:
|
|
132
|
+
# We need to check if `xla_device` can be found, will raise a RuntimeError if not
|
|
133
|
+
try:
|
|
134
|
+
import torch_xla.core.xla_model as xm
|
|
135
|
+
|
|
136
|
+
_ = xm.xla_device()
|
|
137
|
+
return True
|
|
138
|
+
except RuntimeError:
|
|
139
|
+
return False
|
|
140
|
+
return True
|
|
141
|
+
return False
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def storage_ptr(tensor: "torch.Tensor") -> int:
|
|
145
|
+
"""
|
|
146
|
+
Taken from https://github.com/huggingface/safetensors/blob/08db34094e9e59e2f9218f2df133b7b4aaff5a99/bindings/python/py_src/safetensors/torch.py#L11C1-L20C21.
|
|
147
|
+
"""
|
|
148
|
+
try:
|
|
149
|
+
return tensor.untyped_storage().data_ptr()
|
|
150
|
+
except Exception:
|
|
151
|
+
# Fallback for torch==1.10
|
|
152
|
+
try:
|
|
153
|
+
return tensor.storage().data_ptr()
|
|
154
|
+
except NotImplementedError:
|
|
155
|
+
# Fallback for meta storage
|
|
156
|
+
return 0
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def get_storage_size(tensor: "torch.Tensor") -> int:
|
|
160
|
+
"""
|
|
161
|
+
Taken from https://github.com/huggingface/safetensors/blob/08db34094e9e59e2f9218f2df133b7b4aaff5a99/bindings/python/py_src/safetensors/torch.py#L31C1-L41C59
|
|
162
|
+
"""
|
|
163
|
+
try:
|
|
164
|
+
return tensor.untyped_storage().nbytes()
|
|
165
|
+
except AttributeError:
|
|
166
|
+
# Fallback for torch==1.10
|
|
167
|
+
try:
|
|
168
|
+
return tensor.storage().size() * _get_dtype_size(tensor.dtype)
|
|
169
|
+
except NotImplementedError:
|
|
170
|
+
# Fallback for meta storage
|
|
171
|
+
# On torch >=2.0 this is the tensor size
|
|
172
|
+
return tensor.nelement() * _get_dtype_size(tensor.dtype)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
@lru_cache()
|
|
176
|
+
def _get_dtype_size(dtype: "torch.dtype") -> int:
|
|
177
|
+
"""
|
|
178
|
+
Taken from https://github.com/huggingface/safetensors/blob/08db34094e9e59e2f9218f2df133b7b4aaff5a99/bindings/python/py_src/safetensors/torch.py#L344
|
|
179
|
+
"""
|
|
180
|
+
import torch
|
|
181
|
+
|
|
182
|
+
# torch.float8 formats require 2.1; we do not support these dtypes on earlier versions
|
|
183
|
+
_float8_e4m3fn = getattr(torch, "float8_e4m3fn", None)
|
|
184
|
+
_float8_e5m2 = getattr(torch, "float8_e5m2", None)
|
|
185
|
+
_SIZE = {
|
|
186
|
+
torch.int64: 8,
|
|
187
|
+
torch.float32: 4,
|
|
188
|
+
torch.int32: 4,
|
|
189
|
+
torch.bfloat16: 2,
|
|
190
|
+
torch.float16: 2,
|
|
191
|
+
torch.int16: 2,
|
|
192
|
+
torch.uint8: 1,
|
|
193
|
+
torch.int8: 1,
|
|
194
|
+
torch.bool: 1,
|
|
195
|
+
torch.float64: 8,
|
|
196
|
+
_float8_e4m3fn: 1,
|
|
197
|
+
_float8_e5m2: 1,
|
|
198
|
+
}
|
|
199
|
+
return _SIZE[dtype]
|
|
@@ -82,7 +82,7 @@ Use the code below to get started with the model.
|
|
|
82
82
|
|
|
83
83
|
{{ training_data | default("[More Information Needed]", true)}}
|
|
84
84
|
|
|
85
|
-
### Training Procedure
|
|
85
|
+
### Training Procedure
|
|
86
86
|
|
|
87
87
|
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
|
88
88
|
|
|
@@ -198,6 +198,3 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
|
|
|
198
198
|
## Model Card Contact
|
|
199
199
|
|
|
200
200
|
{{ model_card_contact | default("[More Information Needed]", true)}}
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
# flake8: noqa
|
|
2
1
|
#!/usr/bin/env python
|
|
3
2
|
# coding=utf-8
|
|
4
3
|
# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
|
|
@@ -15,6 +14,8 @@
|
|
|
15
14
|
# See the License for the specific language governing permissions and
|
|
16
15
|
# limitations under the License
|
|
17
16
|
|
|
17
|
+
# ruff: noqa: F401
|
|
18
|
+
|
|
18
19
|
from . import tqdm as _tqdm # _tqdm is the module
|
|
19
20
|
from ._cache_assets import cached_assets_path
|
|
20
21
|
from ._cache_manager import (
|
|
@@ -31,6 +32,7 @@ from ._chunk_utils import chunk_iterable
|
|
|
31
32
|
from ._datetime import parse_datetime
|
|
32
33
|
from ._errors import (
|
|
33
34
|
BadRequestError,
|
|
35
|
+
DisabledRepoError,
|
|
34
36
|
EntryNotFoundError,
|
|
35
37
|
FileMetadataError,
|
|
36
38
|
GatedRepoError,
|
|
@@ -40,15 +42,14 @@ from ._errors import (
|
|
|
40
42
|
RevisionNotFoundError,
|
|
41
43
|
hf_raise_for_status,
|
|
42
44
|
)
|
|
43
|
-
from .
|
|
45
|
+
from ._experimental import experimental
|
|
44
46
|
from ._fixes import SoftTemporaryDirectory, yaml_dump
|
|
45
47
|
from ._git_credential import list_credential_helpers, set_git_credential, unset_git_credential
|
|
46
|
-
from ._headers import build_hf_headers, get_token_to_send
|
|
48
|
+
from ._headers import LocalTokenNotFoundError, build_hf_headers, get_token_to_send
|
|
47
49
|
from ._hf_folder import HfFolder
|
|
48
|
-
from ._http import configure_http_backend, get_session, http_backoff, reset_sessions
|
|
50
|
+
from ._http import OfflineModeIsEnabled, configure_http_backend, get_session, http_backoff, reset_sessions
|
|
49
51
|
from ._pagination import paginate
|
|
50
|
-
from ._paths import
|
|
51
|
-
from ._experimental import experimental
|
|
52
|
+
from ._paths import IGNORE_GIT_FOLDER_PATTERNS, filter_repo_objects
|
|
52
53
|
from ._runtime import (
|
|
53
54
|
dump_environment_info,
|
|
54
55
|
get_aiohttp_version,
|
|
@@ -70,28 +71,32 @@ from ._runtime import (
|
|
|
70
71
|
is_aiohttp_available,
|
|
71
72
|
is_fastai_available,
|
|
72
73
|
is_fastcore_available,
|
|
73
|
-
is_numpy_available,
|
|
74
74
|
is_google_colab,
|
|
75
75
|
is_gradio_available,
|
|
76
76
|
is_graphviz_available,
|
|
77
77
|
is_hf_transfer_available,
|
|
78
78
|
is_jinja_available,
|
|
79
79
|
is_notebook,
|
|
80
|
+
is_numpy_available,
|
|
81
|
+
is_package_available,
|
|
80
82
|
is_pillow_available,
|
|
81
83
|
is_pydantic_available,
|
|
82
84
|
is_pydot_available,
|
|
85
|
+
is_safetensors_available,
|
|
83
86
|
is_tensorboard_available,
|
|
84
87
|
is_tf_available,
|
|
85
88
|
is_torch_available,
|
|
86
89
|
)
|
|
87
90
|
from ._safetensors import (
|
|
91
|
+
NotASafetensorsRepoError,
|
|
88
92
|
SafetensorsFileMetadata,
|
|
93
|
+
SafetensorsParsingError,
|
|
89
94
|
SafetensorsRepoMetadata,
|
|
90
95
|
TensorInfo,
|
|
91
|
-
SafetensorsParsingError,
|
|
92
|
-
NotASafetensorsRepoError,
|
|
93
96
|
)
|
|
94
97
|
from ._subprocess import capture_output, run_interactive_subprocess, run_subprocess
|
|
98
|
+
from ._telemetry import send_telemetry
|
|
99
|
+
from ._token import get_token
|
|
95
100
|
from ._validators import (
|
|
96
101
|
HFValidationError,
|
|
97
102
|
smoothly_deprecate_use_auth_token,
|
|
@@ -105,4 +110,3 @@ from .tqdm import (
|
|
|
105
110
|
tqdm,
|
|
106
111
|
tqdm_stream_file,
|
|
107
112
|
)
|
|
108
|
-
from ._telemetry import send_telemetry
|
|
@@ -13,13 +13,7 @@
|
|
|
13
13
|
# See the License for the specific language governing permissions and
|
|
14
14
|
# limitations under the License.
|
|
15
15
|
"""Contains utilities to handle datetimes in Huggingface Hub."""
|
|
16
|
-
from datetime import datetime,
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
# Local machine offset compared to UTC.
|
|
20
|
-
# Taken from https://stackoverflow.com/a/3168394.
|
|
21
|
-
# `utcoffset()` returns `None` if no offset -> empty timedelta.
|
|
22
|
-
UTC_OFFSET = datetime.now(timezone.utc).astimezone().utcoffset() or timedelta()
|
|
16
|
+
from datetime import datetime, timezone
|
|
23
17
|
|
|
24
18
|
|
|
25
19
|
def parse_datetime(date_string: str) -> datetime:
|
|
@@ -51,16 +45,15 @@ def parse_datetime(date_string: str) -> datetime:
|
|
|
51
45
|
If `date_string` cannot be parsed.
|
|
52
46
|
"""
|
|
53
47
|
try:
|
|
54
|
-
# Datetime ending with a Z means "UTC".
|
|
55
|
-
#
|
|
48
|
+
# Datetime ending with a Z means "UTC". We parse the date and then explicitly
|
|
49
|
+
# set the timezone to UTC.
|
|
56
50
|
# See https://en.wikipedia.org/wiki/ISO_8601#Coordinated_Universal_Time_(UTC)
|
|
57
51
|
# Taken from https://stackoverflow.com/a/3168394.
|
|
58
52
|
if len(date_string) == 30:
|
|
59
53
|
# Means timezoned-timestamp with nanoseconds precision. We need to truncate the last 3 digits.
|
|
60
54
|
date_string = date_string[:-4] + "Z"
|
|
61
55
|
dt = datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ")
|
|
62
|
-
dt
|
|
63
|
-
return dt.astimezone(timezone.utc) # Set explicit timezone
|
|
56
|
+
return dt.replace(tzinfo=timezone.utc) # Set explicit timezone
|
|
64
57
|
except ValueError as e:
|
|
65
58
|
raise ValueError(
|
|
66
59
|
f"Cannot parse '{date_string}' as a datetime. Date string is expected to"
|
huggingface_hub/utils/_errors.py
CHANGED
|
@@ -154,6 +154,24 @@ class GatedRepoError(RepositoryNotFoundError):
|
|
|
154
154
|
"""
|
|
155
155
|
|
|
156
156
|
|
|
157
|
+
class DisabledRepoError(HfHubHTTPError):
|
|
158
|
+
"""
|
|
159
|
+
Raised when trying to access a repository that has been disabled by its author.
|
|
160
|
+
|
|
161
|
+
Example:
|
|
162
|
+
|
|
163
|
+
```py
|
|
164
|
+
>>> from huggingface_hub import dataset_info
|
|
165
|
+
>>> dataset_info("laion/laion-art")
|
|
166
|
+
(...)
|
|
167
|
+
huggingface_hub.utils._errors.DisabledRepoError: 403 Client Error. (Request ID: Root=1-659fc3fa-3031673e0f92c71a2260dbe2;bc6f4dfb-b30a-4862-af0a-5cfe827610d8)
|
|
168
|
+
|
|
169
|
+
Cannot access repository for url https://huggingface.co/api/datasets/laion/laion-art.
|
|
170
|
+
Access to this resource is disabled.
|
|
171
|
+
```
|
|
172
|
+
"""
|
|
173
|
+
|
|
174
|
+
|
|
157
175
|
class RevisionNotFoundError(HfHubHTTPError):
|
|
158
176
|
"""
|
|
159
177
|
Raised when trying to access a hf.co URL with a valid repository but an invalid
|
|
@@ -286,6 +304,7 @@ def hf_raise_for_status(response: Response, endpoint_name: Optional[str] = None)
|
|
|
286
304
|
response.raise_for_status()
|
|
287
305
|
except HTTPError as e:
|
|
288
306
|
error_code = response.headers.get("X-Error-Code")
|
|
307
|
+
error_message = response.headers.get("X-Error-Message")
|
|
289
308
|
|
|
290
309
|
if error_code == "RevisionNotFound":
|
|
291
310
|
message = f"{response.status_code} Client Error." + "\n\n" + f"Revision Not Found for url: {response.url}."
|
|
@@ -301,6 +320,16 @@ def hf_raise_for_status(response: Response, endpoint_name: Optional[str] = None)
|
|
|
301
320
|
)
|
|
302
321
|
raise GatedRepoError(message, response) from e
|
|
303
322
|
|
|
323
|
+
elif error_message == "Access to this resource is disabled.":
|
|
324
|
+
message = (
|
|
325
|
+
f"{response.status_code} Client Error."
|
|
326
|
+
+ "\n\n"
|
|
327
|
+
+ f"Cannot access repository for url {response.url}."
|
|
328
|
+
+ "\n"
|
|
329
|
+
+ "Access to this resource is disabled."
|
|
330
|
+
)
|
|
331
|
+
raise DisabledRepoError(message, response) from e
|
|
332
|
+
|
|
304
333
|
elif error_code == "RepoNotFound" or (
|
|
305
334
|
response.status_code == 401
|
|
306
335
|
and response.request is not None
|
|
@@ -38,6 +38,7 @@ _CANDIDATES = {
|
|
|
38
38
|
"pillow": {"Pillow"},
|
|
39
39
|
"pydantic": {"pydantic"},
|
|
40
40
|
"pydot": {"pydot"},
|
|
41
|
+
"safetensors": {"safetensors"},
|
|
41
42
|
"tensorboard": {"tensorboardX"},
|
|
42
43
|
"tensorflow": (
|
|
43
44
|
"tensorflow",
|
|
@@ -69,7 +70,7 @@ def _get_version(package_name: str) -> str:
|
|
|
69
70
|
return _package_versions.get(package_name, "N/A")
|
|
70
71
|
|
|
71
72
|
|
|
72
|
-
def
|
|
73
|
+
def is_package_available(package_name: str) -> bool:
|
|
73
74
|
return _get_version(package_name) != "N/A"
|
|
74
75
|
|
|
75
76
|
|
|
@@ -85,7 +86,7 @@ def get_hf_hub_version() -> str:
|
|
|
85
86
|
|
|
86
87
|
# aiohttp
|
|
87
88
|
def is_aiohttp_available() -> bool:
|
|
88
|
-
return
|
|
89
|
+
return is_package_available("aiohttp")
|
|
89
90
|
|
|
90
91
|
|
|
91
92
|
def get_aiohttp_version() -> str:
|
|
@@ -94,7 +95,7 @@ def get_aiohttp_version() -> str:
|
|
|
94
95
|
|
|
95
96
|
# FastAI
|
|
96
97
|
def is_fastai_available() -> bool:
|
|
97
|
-
return
|
|
98
|
+
return is_package_available("fastai")
|
|
98
99
|
|
|
99
100
|
|
|
100
101
|
def get_fastai_version() -> str:
|
|
@@ -103,7 +104,7 @@ def get_fastai_version() -> str:
|
|
|
103
104
|
|
|
104
105
|
# Fastcore
|
|
105
106
|
def is_fastcore_available() -> bool:
|
|
106
|
-
return
|
|
107
|
+
return is_package_available("fastcore")
|
|
107
108
|
|
|
108
109
|
|
|
109
110
|
def get_fastcore_version() -> str:
|
|
@@ -112,7 +113,7 @@ def get_fastcore_version() -> str:
|
|
|
112
113
|
|
|
113
114
|
# FastAI
|
|
114
115
|
def is_gradio_available() -> bool:
|
|
115
|
-
return
|
|
116
|
+
return is_package_available("gradio")
|
|
116
117
|
|
|
117
118
|
|
|
118
119
|
def get_gradio_version() -> str:
|
|
@@ -121,7 +122,7 @@ def get_gradio_version() -> str:
|
|
|
121
122
|
|
|
122
123
|
# Graphviz
|
|
123
124
|
def is_graphviz_available() -> bool:
|
|
124
|
-
return
|
|
125
|
+
return is_package_available("graphviz")
|
|
125
126
|
|
|
126
127
|
|
|
127
128
|
def get_graphviz_version() -> str:
|
|
@@ -130,7 +131,7 @@ def get_graphviz_version() -> str:
|
|
|
130
131
|
|
|
131
132
|
# hf_transfer
|
|
132
133
|
def is_hf_transfer_available() -> bool:
|
|
133
|
-
return
|
|
134
|
+
return is_package_available("hf_transfer")
|
|
134
135
|
|
|
135
136
|
|
|
136
137
|
def get_hf_transfer_version() -> str:
|
|
@@ -139,7 +140,7 @@ def get_hf_transfer_version() -> str:
|
|
|
139
140
|
|
|
140
141
|
# Numpy
|
|
141
142
|
def is_numpy_available() -> bool:
|
|
142
|
-
return
|
|
143
|
+
return is_package_available("numpy")
|
|
143
144
|
|
|
144
145
|
|
|
145
146
|
def get_numpy_version() -> str:
|
|
@@ -148,7 +149,7 @@ def get_numpy_version() -> str:
|
|
|
148
149
|
|
|
149
150
|
# Jinja
|
|
150
151
|
def is_jinja_available() -> bool:
|
|
151
|
-
return
|
|
152
|
+
return is_package_available("jinja")
|
|
152
153
|
|
|
153
154
|
|
|
154
155
|
def get_jinja_version() -> str:
|
|
@@ -157,7 +158,7 @@ def get_jinja_version() -> str:
|
|
|
157
158
|
|
|
158
159
|
# Pillow
|
|
159
160
|
def is_pillow_available() -> bool:
|
|
160
|
-
return
|
|
161
|
+
return is_package_available("pillow")
|
|
161
162
|
|
|
162
163
|
|
|
163
164
|
def get_pillow_version() -> str:
|
|
@@ -166,7 +167,7 @@ def get_pillow_version() -> str:
|
|
|
166
167
|
|
|
167
168
|
# Pydantic
|
|
168
169
|
def is_pydantic_available() -> bool:
|
|
169
|
-
if not
|
|
170
|
+
if not is_package_available("pydantic"):
|
|
170
171
|
return False
|
|
171
172
|
# For Pydantic, we add an extra check to test whether it is correctly installed or not. If both pydantic 2.x and
|
|
172
173
|
# typing_extensions<=4.5.0 are installed, then pydantic will fail at import time. This should not happen when
|
|
@@ -195,7 +196,7 @@ def get_pydantic_version() -> str:
|
|
|
195
196
|
|
|
196
197
|
# Pydot
|
|
197
198
|
def is_pydot_available() -> bool:
|
|
198
|
-
return
|
|
199
|
+
return is_package_available("pydot")
|
|
199
200
|
|
|
200
201
|
|
|
201
202
|
def get_pydot_version() -> str:
|
|
@@ -204,7 +205,7 @@ def get_pydot_version() -> str:
|
|
|
204
205
|
|
|
205
206
|
# Tensorboard
|
|
206
207
|
def is_tensorboard_available() -> bool:
|
|
207
|
-
return
|
|
208
|
+
return is_package_available("tensorboard")
|
|
208
209
|
|
|
209
210
|
|
|
210
211
|
def get_tensorboard_version() -> str:
|
|
@@ -213,7 +214,7 @@ def get_tensorboard_version() -> str:
|
|
|
213
214
|
|
|
214
215
|
# Tensorflow
|
|
215
216
|
def is_tf_available() -> bool:
|
|
216
|
-
return
|
|
217
|
+
return is_package_available("tensorflow")
|
|
217
218
|
|
|
218
219
|
|
|
219
220
|
def get_tf_version() -> str:
|
|
@@ -222,13 +223,18 @@ def get_tf_version() -> str:
|
|
|
222
223
|
|
|
223
224
|
# Torch
|
|
224
225
|
def is_torch_available() -> bool:
|
|
225
|
-
return
|
|
226
|
+
return is_package_available("torch")
|
|
226
227
|
|
|
227
228
|
|
|
228
229
|
def get_torch_version() -> str:
|
|
229
230
|
return _get_version("torch")
|
|
230
231
|
|
|
231
232
|
|
|
233
|
+
# Safetensors
|
|
234
|
+
def is_safetensors_available() -> bool:
|
|
235
|
+
return is_package_available("safetensors")
|
|
236
|
+
|
|
237
|
+
|
|
232
238
|
# Shell-related helpers
|
|
233
239
|
try:
|
|
234
240
|
# Set to `True` if script is running in a Google Colab notebook.
|