huggingface-hub 0.18.0__py3-none-any.whl → 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +31 -5
- huggingface_hub/_inference_endpoints.py +348 -0
- huggingface_hub/_login.py +9 -7
- huggingface_hub/_multi_commits.py +1 -1
- huggingface_hub/_snapshot_download.py +6 -7
- huggingface_hub/_space_api.py +7 -4
- huggingface_hub/_tensorboard_logger.py +1 -0
- huggingface_hub/_webhooks_payload.py +7 -7
- huggingface_hub/commands/lfs.py +3 -6
- huggingface_hub/commands/user.py +1 -4
- huggingface_hub/constants.py +27 -0
- huggingface_hub/file_download.py +142 -134
- huggingface_hub/hf_api.py +1036 -501
- huggingface_hub/hf_file_system.py +57 -12
- huggingface_hub/hub_mixin.py +3 -5
- huggingface_hub/inference/_client.py +43 -8
- huggingface_hub/inference/_common.py +8 -16
- huggingface_hub/inference/_generated/_async_client.py +41 -8
- huggingface_hub/inference/_text_generation.py +43 -0
- huggingface_hub/inference_api.py +1 -1
- huggingface_hub/lfs.py +32 -14
- huggingface_hub/repocard_data.py +7 -0
- huggingface_hub/repository.py +19 -3
- huggingface_hub/templates/modelcard_template.md +1 -1
- huggingface_hub/utils/__init__.py +1 -1
- huggingface_hub/utils/_cache_assets.py +3 -3
- huggingface_hub/utils/_cache_manager.py +6 -7
- huggingface_hub/utils/_datetime.py +3 -1
- huggingface_hub/utils/_errors.py +10 -0
- huggingface_hub/utils/_hf_folder.py +4 -2
- huggingface_hub/utils/_http.py +10 -1
- huggingface_hub/utils/_runtime.py +4 -2
- huggingface_hub/utils/endpoint_helpers.py +27 -175
- huggingface_hub/utils/insecure_hashlib.py +34 -0
- huggingface_hub/utils/logging.py +4 -6
- huggingface_hub/utils/sha.py +2 -1
- {huggingface_hub-0.18.0.dist-info → huggingface_hub-0.19.0.dist-info}/METADATA +16 -15
- huggingface_hub-0.19.0.dist-info/RECORD +74 -0
- {huggingface_hub-0.18.0.dist-info → huggingface_hub-0.19.0.dist-info}/WHEEL +1 -1
- huggingface_hub-0.18.0.dist-info/RECORD +0 -72
- {huggingface_hub-0.18.0.dist-info → huggingface_hub-0.19.0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.18.0.dist-info → huggingface_hub-0.19.0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.18.0.dist-info → huggingface_hub-0.19.0.dist-info}/top_level.txt +0 -0
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
from pathlib import Path
|
|
16
16
|
from typing import Union
|
|
17
17
|
|
|
18
|
-
from ..constants import
|
|
18
|
+
from ..constants import HF_ASSETS_CACHE
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
def cached_assets_path(
|
|
@@ -91,7 +91,7 @@ def cached_assets_path(
|
|
|
91
91
|
assets_dir (`str`, `Path`, *optional*):
|
|
92
92
|
Path to the folder where assets are cached. This must not be the same folder
|
|
93
93
|
where Hub files are cached. Defaults to `HF_HOME / "assets"` if not provided.
|
|
94
|
-
Can also be set with `
|
|
94
|
+
Can also be set with `HF_ASSETS_CACHE` environment variable.
|
|
95
95
|
|
|
96
96
|
Returns:
|
|
97
97
|
Path to the cache folder (`Path`).
|
|
@@ -115,7 +115,7 @@ def cached_assets_path(
|
|
|
115
115
|
"""
|
|
116
116
|
# Resolve assets_dir
|
|
117
117
|
if assets_dir is None:
|
|
118
|
-
assets_dir =
|
|
118
|
+
assets_dir = HF_ASSETS_CACHE
|
|
119
119
|
assets_dir = Path(assets_dir).expanduser().resolve()
|
|
120
120
|
|
|
121
121
|
# Avoid names that could create path issues
|
|
@@ -21,7 +21,7 @@ from dataclasses import dataclass
|
|
|
21
21
|
from pathlib import Path
|
|
22
22
|
from typing import Dict, FrozenSet, List, Literal, Optional, Set, Union
|
|
23
23
|
|
|
24
|
-
from ..constants import
|
|
24
|
+
from ..constants import HF_HUB_CACHE
|
|
25
25
|
from . import logging
|
|
26
26
|
|
|
27
27
|
|
|
@@ -580,26 +580,25 @@ def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
|
|
|
580
580
|
Returns: a [`~HFCacheInfo`] object.
|
|
581
581
|
"""
|
|
582
582
|
if cache_dir is None:
|
|
583
|
-
cache_dir =
|
|
583
|
+
cache_dir = HF_HUB_CACHE
|
|
584
584
|
|
|
585
585
|
cache_dir = Path(cache_dir).expanduser().resolve()
|
|
586
586
|
if not cache_dir.exists():
|
|
587
587
|
raise CacheNotFound(
|
|
588
|
-
f"Cache directory not found: {cache_dir}. Please use `cache_dir`"
|
|
589
|
-
" argument or set `HUGGINGFACE_HUB_CACHE` environment variable.",
|
|
588
|
+
f"Cache directory not found: {cache_dir}. Please use `cache_dir` argument or set `HF_HUB_CACHE` environment variable.",
|
|
590
589
|
cache_dir=cache_dir,
|
|
591
590
|
)
|
|
592
591
|
|
|
593
592
|
if cache_dir.is_file():
|
|
594
593
|
raise ValueError(
|
|
595
|
-
f"Scan cache expects a directory but found a file: {cache_dir}. Please use"
|
|
596
|
-
" `cache_dir` argument or set `HUGGINGFACE_HUB_CACHE` environment"
|
|
597
|
-
" variable."
|
|
594
|
+
f"Scan cache expects a directory but found a file: {cache_dir}. Please use `cache_dir` argument or set `HF_HUB_CACHE` environment variable."
|
|
598
595
|
)
|
|
599
596
|
|
|
600
597
|
repos: Set[CachedRepoInfo] = set()
|
|
601
598
|
warnings: List[CorruptedCacheException] = []
|
|
602
599
|
for repo_path in cache_dir.iterdir():
|
|
600
|
+
if repo_path.name == ".locks": # skip './.locks/' folder
|
|
601
|
+
continue
|
|
603
602
|
try:
|
|
604
603
|
repos.add(_scan_cached_repo(repo_path))
|
|
605
604
|
except CorruptedCacheException as e:
|
|
@@ -55,7 +55,9 @@ def parse_datetime(date_string: str) -> datetime:
|
|
|
55
55
|
# timezone and then move it to the appropriate UTC timezone.
|
|
56
56
|
# See https://en.wikipedia.org/wiki/ISO_8601#Coordinated_Universal_Time_(UTC)
|
|
57
57
|
# Taken from https://stackoverflow.com/a/3168394.
|
|
58
|
-
|
|
58
|
+
if len(date_string) == 30:
|
|
59
|
+
# Means timezoned-timestamp with nanoseconds precision. We need to truncate the last 3 digits.
|
|
60
|
+
date_string = date_string[:-4] + "Z"
|
|
59
61
|
dt = datetime.strptime(date_string, "%Y-%m-%dT%H:%M:%S.%fZ")
|
|
60
62
|
dt += UTC_OFFSET # By default, datetime is not timezoned -> move to UTC time
|
|
61
63
|
return dt.astimezone(timezone.utc) # Set explicit timezone
|
huggingface_hub/utils/_errors.py
CHANGED
|
@@ -2,6 +2,7 @@ from typing import Optional
|
|
|
2
2
|
|
|
3
3
|
from requests import HTTPError, Response
|
|
4
4
|
|
|
5
|
+
from ..constants import INFERENCE_ENDPOINTS_ENDPOINT
|
|
5
6
|
from ._fixes import JSONDecodeError
|
|
6
7
|
|
|
7
8
|
|
|
@@ -293,6 +294,15 @@ def hf_raise_for_status(response: Response, endpoint_name: Optional[str] = None)
|
|
|
293
294
|
# This prevent from raising a misleading `RepositoryNotFoundError` (see below).
|
|
294
295
|
pass
|
|
295
296
|
|
|
297
|
+
elif (
|
|
298
|
+
response.status_code == 401
|
|
299
|
+
and response.request.url is not None
|
|
300
|
+
and INFERENCE_ENDPOINTS_ENDPOINT in response.request.url
|
|
301
|
+
):
|
|
302
|
+
# Not enough permission to list Inference Endpoints from this org. We don't raise a custom error for this.
|
|
303
|
+
# This prevent from raising a misleading `RepositoryNotFoundError` (see below).
|
|
304
|
+
pass
|
|
305
|
+
|
|
296
306
|
elif error_code == "RepoNotFound" or response.status_code == 401:
|
|
297
307
|
# 401 is misleading as it is returned for:
|
|
298
308
|
# - private and gated repos if user is not authenticated
|
|
@@ -46,7 +46,7 @@ class HfFolder:
|
|
|
46
46
|
"""
|
|
47
47
|
Get token or None if not existent.
|
|
48
48
|
|
|
49
|
-
Note that a token can be also provided using the `
|
|
49
|
+
Note that a token can be also provided using the `HF_TOKEN` environment variable.
|
|
50
50
|
|
|
51
51
|
Token is saved in the huggingface home folder. You can configure it by setting
|
|
52
52
|
the `HF_HOME` environment variable. Previous location was `~/.huggingface/token`.
|
|
@@ -63,7 +63,9 @@ class HfFolder:
|
|
|
63
63
|
pass
|
|
64
64
|
|
|
65
65
|
# 1. Is it set by environment variable ?
|
|
66
|
-
token: Optional[str] = os.environ.get("
|
|
66
|
+
token: Optional[str] = os.environ.get("HF_TOKEN")
|
|
67
|
+
if token is None: # Ensure backward compatibility but doesn't have priority
|
|
68
|
+
token = os.environ.get("HUGGING_FACE_HUB_TOKEN")
|
|
67
69
|
if token is not None:
|
|
68
70
|
token = token.replace("\r", "").replace("\n", "").strip()
|
|
69
71
|
return token
|
huggingface_hub/utils/_http.py
CHANGED
|
@@ -113,7 +113,7 @@ def configure_http_backend(backend_factory: BACKEND_FACTORY_T = _default_backend
|
|
|
113
113
|
"""
|
|
114
114
|
global _GLOBAL_BACKEND_FACTORY
|
|
115
115
|
_GLOBAL_BACKEND_FACTORY = backend_factory
|
|
116
|
-
|
|
116
|
+
reset_sessions()
|
|
117
117
|
|
|
118
118
|
|
|
119
119
|
def get_session() -> requests.Session:
|
|
@@ -148,6 +148,15 @@ def get_session() -> requests.Session:
|
|
|
148
148
|
return _get_session_from_cache(process_id=os.getpid(), thread_id=threading.get_ident())
|
|
149
149
|
|
|
150
150
|
|
|
151
|
+
def reset_sessions() -> None:
|
|
152
|
+
"""Reset the cache of sessions.
|
|
153
|
+
|
|
154
|
+
Mostly used internally when sessions are reconfigured or an SSLError is raised.
|
|
155
|
+
See [`configure_http_backend`] for more details.
|
|
156
|
+
"""
|
|
157
|
+
_get_session_from_cache.cache_clear()
|
|
158
|
+
|
|
159
|
+
|
|
151
160
|
@lru_cache
|
|
152
161
|
def _get_session_from_cache(process_id: int, thread_id: int) -> requests.Session:
|
|
153
162
|
"""
|
|
@@ -305,8 +305,8 @@ def dump_environment_info() -> Dict[str, Any]:
|
|
|
305
305
|
|
|
306
306
|
# Environment variables
|
|
307
307
|
info["ENDPOINT"] = constants.ENDPOINT
|
|
308
|
-
info["
|
|
309
|
-
info["
|
|
308
|
+
info["HF_HUB_CACHE"] = constants.HF_HUB_CACHE
|
|
309
|
+
info["HF_ASSETS_CACHE"] = constants.HF_ASSETS_CACHE
|
|
310
310
|
info["HF_TOKEN_PATH"] = constants.HF_TOKEN_PATH
|
|
311
311
|
info["HF_HUB_OFFLINE"] = constants.HF_HUB_OFFLINE
|
|
312
312
|
info["HF_HUB_DISABLE_TELEMETRY"] = constants.HF_HUB_DISABLE_TELEMETRY
|
|
@@ -315,6 +315,8 @@ def dump_environment_info() -> Dict[str, Any]:
|
|
|
315
315
|
info["HF_HUB_DISABLE_EXPERIMENTAL_WARNING"] = constants.HF_HUB_DISABLE_EXPERIMENTAL_WARNING
|
|
316
316
|
info["HF_HUB_DISABLE_IMPLICIT_TOKEN"] = constants.HF_HUB_DISABLE_IMPLICIT_TOKEN
|
|
317
317
|
info["HF_HUB_ENABLE_HF_TRANSFER"] = constants.HF_HUB_ENABLE_HF_TRANSFER
|
|
318
|
+
info["HF_HUB_ETAG_TIMEOUT"] = constants.HF_HUB_ETAG_TIMEOUT
|
|
319
|
+
info["HF_HUB_DOWNLOAD_TIMEOUT"] = constants.HF_HUB_DOWNLOAD_TIMEOUT
|
|
318
320
|
|
|
319
321
|
print("\nCopy-and-paste the text below in your GitHub issue.\n")
|
|
320
322
|
print("\n".join([f"- {prop}: {val}" for prop, val in info.items()]) + "\n")
|
|
@@ -16,27 +16,28 @@ with the aim for a user-friendly interface.
|
|
|
16
16
|
import math
|
|
17
17
|
import re
|
|
18
18
|
from dataclasses import dataclass
|
|
19
|
-
from typing import TYPE_CHECKING,
|
|
19
|
+
from typing import TYPE_CHECKING, List, Optional, Union
|
|
20
|
+
|
|
21
|
+
from ..repocard_data import ModelCardData
|
|
20
22
|
|
|
21
23
|
|
|
22
24
|
if TYPE_CHECKING:
|
|
23
25
|
from ..hf_api import ModelInfo
|
|
24
26
|
|
|
25
27
|
|
|
26
|
-
def
|
|
27
|
-
|
|
28
|
-
minimum_threshold: Optional[float] = None,
|
|
29
|
-
maximum_threshold: Optional[float] = None,
|
|
30
|
-
) -> Iterable["ModelInfo"]:
|
|
31
|
-
"""Filters a list of models for those that include an emission tag and limit them to between two thresholds
|
|
28
|
+
def _is_emission_within_treshold(model_info: "ModelInfo", minimum_threshold: float, maximum_threshold: float) -> bool:
|
|
29
|
+
"""Checks if a model's emission is within a given threshold.
|
|
32
30
|
|
|
33
31
|
Args:
|
|
34
|
-
|
|
35
|
-
A
|
|
36
|
-
minimum_threshold (`float
|
|
32
|
+
model_info (`ModelInfo`):
|
|
33
|
+
A model info object containing the model's emission information.
|
|
34
|
+
minimum_threshold (`float`):
|
|
37
35
|
A minimum carbon threshold to filter by, such as 1.
|
|
38
|
-
maximum_threshold (`float
|
|
36
|
+
maximum_threshold (`float`):
|
|
39
37
|
A maximum carbon threshold to filter by, such as 10.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
`bool`: Whether the model's emission is within the given threshold.
|
|
40
41
|
"""
|
|
41
42
|
if minimum_threshold is None and maximum_threshold is None:
|
|
42
43
|
raise ValueError("Both `minimum_threshold` and `maximum_threshold` cannot both be `None`")
|
|
@@ -45,26 +46,24 @@ def _filter_emissions(
|
|
|
45
46
|
if maximum_threshold is None:
|
|
46
47
|
maximum_threshold = math.inf
|
|
47
48
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
continue
|
|
49
|
+
card_data = getattr(model_info, "card_data", None)
|
|
50
|
+
if card_data is None or not isinstance(card_data, (dict, ModelCardData)):
|
|
51
|
+
return False
|
|
52
52
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
53
|
+
# Get CO2 emission metadata
|
|
54
|
+
emission = card_data.get("co2_eq_emissions", None)
|
|
55
|
+
if isinstance(emission, dict):
|
|
56
|
+
emission = emission["emissions"]
|
|
57
|
+
if not emission:
|
|
58
|
+
return False
|
|
59
59
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
60
|
+
# Filter out if value is missing or out of range
|
|
61
|
+
matched = re.search(r"\d+\.\d+|\d+", str(emission))
|
|
62
|
+
if matched is None:
|
|
63
|
+
return False
|
|
64
64
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
yield model
|
|
65
|
+
emission_value = float(matched.group(0))
|
|
66
|
+
return minimum_threshold <= emission_value <= maximum_threshold
|
|
68
67
|
|
|
69
68
|
|
|
70
69
|
@dataclass
|
|
@@ -203,16 +202,11 @@ class ModelFilter:
|
|
|
203
202
|
>>> # For the task
|
|
204
203
|
>>> new_filter = ModelFilter(task="text-classification")
|
|
205
204
|
|
|
206
|
-
>>> # Retrieving tags using the `HfApi.get_model_tags` method
|
|
207
205
|
>>> from huggingface_hub import HfApi
|
|
208
206
|
|
|
209
207
|
>>> api = HfApi()
|
|
210
208
|
# To list model tags
|
|
211
209
|
|
|
212
|
-
>>> api.get_model_tags()
|
|
213
|
-
# To list dataset tags
|
|
214
|
-
|
|
215
|
-
>>> api.get_dataset_tags()
|
|
216
210
|
>>> new_filter = ModelFilter(tags="benchmark:raft")
|
|
217
211
|
|
|
218
212
|
>>> # Related to the dataset
|
|
@@ -227,145 +221,3 @@ class ModelFilter:
|
|
|
227
221
|
task: Optional[Union[str, List[str]]] = None
|
|
228
222
|
trained_dataset: Optional[Union[str, List[str]]] = None
|
|
229
223
|
tags: Optional[Union[str, List[str]]] = None
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
class AttributeDictionary(dict):
|
|
233
|
-
"""
|
|
234
|
-
`dict` subclass that also provides access to keys as attributes
|
|
235
|
-
|
|
236
|
-
If a key starts with a number, it will exist in the dictionary but not as an
|
|
237
|
-
attribute
|
|
238
|
-
|
|
239
|
-
Example:
|
|
240
|
-
|
|
241
|
-
```python
|
|
242
|
-
>>> d = AttributeDictionary()
|
|
243
|
-
>>> d["test"] = "a"
|
|
244
|
-
>>> print(d.test) # prints "a"
|
|
245
|
-
```
|
|
246
|
-
|
|
247
|
-
"""
|
|
248
|
-
|
|
249
|
-
def __getattr__(self, k):
|
|
250
|
-
if k in self:
|
|
251
|
-
return self[k]
|
|
252
|
-
else:
|
|
253
|
-
raise AttributeError(k)
|
|
254
|
-
|
|
255
|
-
def __setattr__(self, k, v):
|
|
256
|
-
(self.__setitem__, super().__setattr__)[k[0] == "_"](k, v)
|
|
257
|
-
|
|
258
|
-
def __delattr__(self, k):
|
|
259
|
-
if k in self:
|
|
260
|
-
del self[k]
|
|
261
|
-
else:
|
|
262
|
-
raise AttributeError(k)
|
|
263
|
-
|
|
264
|
-
def __dir__(self):
|
|
265
|
-
keys = sorted(self.keys())
|
|
266
|
-
keys = [key for key in keys if key.replace("_", "").isalpha()]
|
|
267
|
-
return super().__dir__() + keys
|
|
268
|
-
|
|
269
|
-
def __repr__(self):
|
|
270
|
-
repr_str = "Available Attributes or Keys:\n"
|
|
271
|
-
for key in sorted(self.keys()):
|
|
272
|
-
repr_str += f" * {key}"
|
|
273
|
-
if not key.replace("_", "").isalpha():
|
|
274
|
-
repr_str += " (Key only)"
|
|
275
|
-
repr_str += "\n"
|
|
276
|
-
return repr_str
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
class GeneralTags(AttributeDictionary):
|
|
280
|
-
"""
|
|
281
|
-
A namespace object holding all tags, filtered by `keys` If a tag starts with
|
|
282
|
-
a number, it will only exist in the dictionary
|
|
283
|
-
|
|
284
|
-
Example:
|
|
285
|
-
```python
|
|
286
|
-
>>> a.b["1a"] # will work
|
|
287
|
-
>>> a["b"]["1a"] # will work
|
|
288
|
-
>>> # a.b.1a # will not work
|
|
289
|
-
```
|
|
290
|
-
|
|
291
|
-
Args:
|
|
292
|
-
tag_dictionary (`dict`):
|
|
293
|
-
A dictionary of tags returned from the /api/***-tags-by-type api
|
|
294
|
-
endpoint
|
|
295
|
-
keys (`list`):
|
|
296
|
-
A list of keys to unpack the `tag_dictionary` with, such as
|
|
297
|
-
`["library","language"]`
|
|
298
|
-
"""
|
|
299
|
-
|
|
300
|
-
def __init__(self, tag_dictionary: dict, keys: Optional[list] = None):
|
|
301
|
-
self._tag_dictionary = tag_dictionary
|
|
302
|
-
if keys is None:
|
|
303
|
-
keys = list(self._tag_dictionary.keys())
|
|
304
|
-
for key in keys:
|
|
305
|
-
self._unpack_and_assign_dictionary(key)
|
|
306
|
-
|
|
307
|
-
def _unpack_and_assign_dictionary(self, key: str):
|
|
308
|
-
"Assign nested attributes to `self.key` containing information as an `AttributeDictionary`"
|
|
309
|
-
ref = AttributeDictionary()
|
|
310
|
-
setattr(self, key, ref)
|
|
311
|
-
for item in self._tag_dictionary.get(key, []):
|
|
312
|
-
label = item["label"].replace(" ", "").replace("-", "_").replace(".", "_")
|
|
313
|
-
ref[label] = item["id"]
|
|
314
|
-
self[key] = ref
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
class ModelTags(GeneralTags):
|
|
318
|
-
"""
|
|
319
|
-
A namespace object holding all available model tags If a tag starts with a
|
|
320
|
-
number, it will only exist in the dictionary
|
|
321
|
-
|
|
322
|
-
Example:
|
|
323
|
-
|
|
324
|
-
```python
|
|
325
|
-
>>> a.dataset["1_5BArabicCorpus"] # will work
|
|
326
|
-
>>> a["dataset"]["1_5BArabicCorpus"] # will work
|
|
327
|
-
>>> # o.dataset.1_5BArabicCorpus # will not work
|
|
328
|
-
```
|
|
329
|
-
|
|
330
|
-
Args:
|
|
331
|
-
model_tag_dictionary (`dict`):
|
|
332
|
-
A dictionary of valid model tags, returned from the
|
|
333
|
-
/api/models-tags-by-type api endpoint
|
|
334
|
-
"""
|
|
335
|
-
|
|
336
|
-
def __init__(self, model_tag_dictionary: dict):
|
|
337
|
-
keys = ["library", "language", "license", "dataset", "pipeline_tag"]
|
|
338
|
-
super().__init__(model_tag_dictionary, keys)
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
class DatasetTags(GeneralTags):
|
|
342
|
-
"""
|
|
343
|
-
A namespace object holding all available dataset tags If a tag starts with a
|
|
344
|
-
number, it will only exist in the dictionary
|
|
345
|
-
|
|
346
|
-
Example
|
|
347
|
-
|
|
348
|
-
```python
|
|
349
|
-
>>> a.size_categories["100K<n<1M"] # will work
|
|
350
|
-
>>> a["size_categories"]["100K<n<1M"] # will work
|
|
351
|
-
>>> # o.size_categories.100K<n<1M # will not work
|
|
352
|
-
```
|
|
353
|
-
|
|
354
|
-
Args:
|
|
355
|
-
dataset_tag_dictionary (`dict`):
|
|
356
|
-
A dictionary of valid dataset tags, returned from the
|
|
357
|
-
/api/datasets-tags-by-type api endpoint
|
|
358
|
-
"""
|
|
359
|
-
|
|
360
|
-
def __init__(self, dataset_tag_dictionary: dict):
|
|
361
|
-
keys = [
|
|
362
|
-
"language",
|
|
363
|
-
"multilinguality",
|
|
364
|
-
"language_creators",
|
|
365
|
-
"task_categories",
|
|
366
|
-
"size_categories",
|
|
367
|
-
"benchmark",
|
|
368
|
-
"task_ids",
|
|
369
|
-
"license",
|
|
370
|
-
]
|
|
371
|
-
super().__init__(dataset_tag_dictionary, keys)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Taken from https://github.com/mlflow/mlflow/pull/10119
|
|
2
|
+
#
|
|
3
|
+
# DO NOT use this function for security purposes (e.g., password hashing).
|
|
4
|
+
#
|
|
5
|
+
# In Python >= 3.9, insecure hashing algorithms such as MD5 fail in FIPS-compliant
|
|
6
|
+
# environments unless `usedforsecurity=False` is explicitly passed.
|
|
7
|
+
#
|
|
8
|
+
# References:
|
|
9
|
+
# - https://github.com/mlflow/mlflow/issues/9905
|
|
10
|
+
# - https://github.com/mlflow/mlflow/pull/10119
|
|
11
|
+
# - https://docs.python.org/3/library/hashlib.html
|
|
12
|
+
# - https://github.com/huggingface/transformers/pull/27038
|
|
13
|
+
#
|
|
14
|
+
# Usage:
|
|
15
|
+
# ```python
|
|
16
|
+
# # Use
|
|
17
|
+
# from huggingface_hub.utils.insecure_hashlib import sha256
|
|
18
|
+
# # instead of
|
|
19
|
+
# from hashlib import sha256
|
|
20
|
+
#
|
|
21
|
+
# # Use
|
|
22
|
+
# from huggingface_hub.utils import insecure_hashlib
|
|
23
|
+
# # instead of
|
|
24
|
+
# import hashlib
|
|
25
|
+
# ```
|
|
26
|
+
import functools
|
|
27
|
+
import hashlib
|
|
28
|
+
import sys
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
_kwargs = {"usedforsecurity": False} if sys.version_info >= (3, 9) else {}
|
|
32
|
+
md5 = functools.partial(hashlib.md5, **_kwargs)
|
|
33
|
+
sha1 = functools.partial(hashlib.sha1, **_kwargs)
|
|
34
|
+
sha256 = functools.partial(hashlib.sha256, **_kwargs)
|
huggingface_hub/utils/logging.py
CHANGED
|
@@ -50,18 +50,16 @@ def _get_library_root_logger() -> logging.Logger:
|
|
|
50
50
|
|
|
51
51
|
def _get_default_logging_level():
|
|
52
52
|
"""
|
|
53
|
-
If
|
|
54
|
-
|
|
55
|
-
`_default_log_level`
|
|
53
|
+
If `HF_HUB_VERBOSITY` env var is set to one of the valid choices return that as the new default level. If it is not
|
|
54
|
+
- fall back to `_default_log_level`
|
|
56
55
|
"""
|
|
57
|
-
env_level_str = os.getenv("
|
|
56
|
+
env_level_str = os.getenv("HF_HUB_VERBOSITY", None)
|
|
58
57
|
if env_level_str:
|
|
59
58
|
if env_level_str in log_levels:
|
|
60
59
|
return log_levels[env_level_str]
|
|
61
60
|
else:
|
|
62
61
|
logging.getLogger().warning(
|
|
63
|
-
f"Unknown option
|
|
64
|
-
f"has to be one of: { ', '.join(log_levels.keys()) }"
|
|
62
|
+
f"Unknown option HF_HUB_VERBOSITY={env_level_str}, has to be one of: { ', '.join(log_levels.keys()) }"
|
|
65
63
|
)
|
|
66
64
|
return _default_log_level
|
|
67
65
|
|
huggingface_hub/utils/sha.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
"""Utilities to efficiently compute the SHA 256 hash of a bunch of bytes."""
|
|
2
|
-
from hashlib import sha256
|
|
3
2
|
from typing import BinaryIO, Optional
|
|
4
3
|
|
|
4
|
+
from .insecure_hashlib import sha256
|
|
5
|
+
|
|
5
6
|
|
|
6
7
|
def sha_fileobj(fileobj: BinaryIO, chunk_size: Optional[int] = None) -> bytes:
|
|
7
8
|
"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: huggingface-hub
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.19.0
|
|
4
4
|
Summary: Client library to download and publish models, datasets and other repos on the huggingface.co hub
|
|
5
5
|
Home-page: https://github.com/huggingface/huggingface_hub
|
|
6
6
|
Author: Hugging Face, Inc.
|
|
@@ -33,7 +33,7 @@ Requires-Dist: packaging >=20.9
|
|
|
33
33
|
Provides-Extra: all
|
|
34
34
|
Requires-Dist: InquirerPy ==0.3.4 ; extra == 'all'
|
|
35
35
|
Requires-Dist: aiohttp ; extra == 'all'
|
|
36
|
-
Requires-Dist: pydantic <
|
|
36
|
+
Requires-Dist: pydantic <3.0,>1.1 ; extra == 'all'
|
|
37
37
|
Requires-Dist: jedi ; extra == 'all'
|
|
38
38
|
Requires-Dist: Jinja2 ; extra == 'all'
|
|
39
39
|
Requires-Dist: pytest ; extra == 'all'
|
|
@@ -47,9 +47,9 @@ Requires-Dist: soundfile ; extra == 'all'
|
|
|
47
47
|
Requires-Dist: Pillow ; extra == 'all'
|
|
48
48
|
Requires-Dist: gradio ; extra == 'all'
|
|
49
49
|
Requires-Dist: numpy ; extra == 'all'
|
|
50
|
-
Requires-Dist:
|
|
51
|
-
Requires-Dist: ruff >=0.0.241 ; extra == 'all'
|
|
50
|
+
Requires-Dist: ruff >=0.1.3 ; extra == 'all'
|
|
52
51
|
Requires-Dist: mypy ==1.5.1 ; extra == 'all'
|
|
52
|
+
Requires-Dist: typing-extensions >=4.8.0 ; extra == 'all'
|
|
53
53
|
Requires-Dist: types-PyYAML ; extra == 'all'
|
|
54
54
|
Requires-Dist: types-requests ; extra == 'all'
|
|
55
55
|
Requires-Dist: types-simplejson ; extra == 'all'
|
|
@@ -61,7 +61,7 @@ Requires-Dist: InquirerPy ==0.3.4 ; extra == 'cli'
|
|
|
61
61
|
Provides-Extra: dev
|
|
62
62
|
Requires-Dist: InquirerPy ==0.3.4 ; extra == 'dev'
|
|
63
63
|
Requires-Dist: aiohttp ; extra == 'dev'
|
|
64
|
-
Requires-Dist: pydantic <
|
|
64
|
+
Requires-Dist: pydantic <3.0,>1.1 ; extra == 'dev'
|
|
65
65
|
Requires-Dist: jedi ; extra == 'dev'
|
|
66
66
|
Requires-Dist: Jinja2 ; extra == 'dev'
|
|
67
67
|
Requires-Dist: pytest ; extra == 'dev'
|
|
@@ -75,9 +75,9 @@ Requires-Dist: soundfile ; extra == 'dev'
|
|
|
75
75
|
Requires-Dist: Pillow ; extra == 'dev'
|
|
76
76
|
Requires-Dist: gradio ; extra == 'dev'
|
|
77
77
|
Requires-Dist: numpy ; extra == 'dev'
|
|
78
|
-
Requires-Dist:
|
|
79
|
-
Requires-Dist: ruff >=0.0.241 ; extra == 'dev'
|
|
78
|
+
Requires-Dist: ruff >=0.1.3 ; extra == 'dev'
|
|
80
79
|
Requires-Dist: mypy ==1.5.1 ; extra == 'dev'
|
|
80
|
+
Requires-Dist: typing-extensions >=4.8.0 ; extra == 'dev'
|
|
81
81
|
Requires-Dist: types-PyYAML ; extra == 'dev'
|
|
82
82
|
Requires-Dist: types-requests ; extra == 'dev'
|
|
83
83
|
Requires-Dist: types-simplejson ; extra == 'dev'
|
|
@@ -87,7 +87,7 @@ Requires-Dist: types-urllib3 ; extra == 'dev'
|
|
|
87
87
|
Provides-Extra: docs
|
|
88
88
|
Requires-Dist: InquirerPy ==0.3.4 ; extra == 'docs'
|
|
89
89
|
Requires-Dist: aiohttp ; extra == 'docs'
|
|
90
|
-
Requires-Dist: pydantic <
|
|
90
|
+
Requires-Dist: pydantic <3.0,>1.1 ; extra == 'docs'
|
|
91
91
|
Requires-Dist: jedi ; extra == 'docs'
|
|
92
92
|
Requires-Dist: Jinja2 ; extra == 'docs'
|
|
93
93
|
Requires-Dist: pytest ; extra == 'docs'
|
|
@@ -101,9 +101,9 @@ Requires-Dist: soundfile ; extra == 'docs'
|
|
|
101
101
|
Requires-Dist: Pillow ; extra == 'docs'
|
|
102
102
|
Requires-Dist: gradio ; extra == 'docs'
|
|
103
103
|
Requires-Dist: numpy ; extra == 'docs'
|
|
104
|
-
Requires-Dist:
|
|
105
|
-
Requires-Dist: ruff >=0.0.241 ; extra == 'docs'
|
|
104
|
+
Requires-Dist: ruff >=0.1.3 ; extra == 'docs'
|
|
106
105
|
Requires-Dist: mypy ==1.5.1 ; extra == 'docs'
|
|
106
|
+
Requires-Dist: typing-extensions >=4.8.0 ; extra == 'docs'
|
|
107
107
|
Requires-Dist: types-PyYAML ; extra == 'docs'
|
|
108
108
|
Requires-Dist: types-requests ; extra == 'docs'
|
|
109
109
|
Requires-Dist: types-simplejson ; extra == 'docs'
|
|
@@ -118,10 +118,9 @@ Requires-Dist: fastai >=2.4 ; extra == 'fastai'
|
|
|
118
118
|
Requires-Dist: fastcore >=1.3.27 ; extra == 'fastai'
|
|
119
119
|
Provides-Extra: inference
|
|
120
120
|
Requires-Dist: aiohttp ; extra == 'inference'
|
|
121
|
-
Requires-Dist: pydantic <
|
|
121
|
+
Requires-Dist: pydantic <3.0,>1.1 ; extra == 'inference'
|
|
122
122
|
Provides-Extra: quality
|
|
123
|
-
Requires-Dist:
|
|
124
|
-
Requires-Dist: ruff >=0.0.241 ; extra == 'quality'
|
|
123
|
+
Requires-Dist: ruff >=0.1.3 ; extra == 'quality'
|
|
125
124
|
Requires-Dist: mypy ==1.5.1 ; extra == 'quality'
|
|
126
125
|
Provides-Extra: tensorflow
|
|
127
126
|
Requires-Dist: tensorflow ; extra == 'tensorflow'
|
|
@@ -130,7 +129,7 @@ Requires-Dist: graphviz ; extra == 'tensorflow'
|
|
|
130
129
|
Provides-Extra: testing
|
|
131
130
|
Requires-Dist: InquirerPy ==0.3.4 ; extra == 'testing'
|
|
132
131
|
Requires-Dist: aiohttp ; extra == 'testing'
|
|
133
|
-
Requires-Dist: pydantic <
|
|
132
|
+
Requires-Dist: pydantic <3.0,>1.1 ; extra == 'testing'
|
|
134
133
|
Requires-Dist: jedi ; extra == 'testing'
|
|
135
134
|
Requires-Dist: Jinja2 ; extra == 'testing'
|
|
136
135
|
Requires-Dist: pytest ; extra == 'testing'
|
|
@@ -147,13 +146,14 @@ Requires-Dist: numpy ; extra == 'testing'
|
|
|
147
146
|
Provides-Extra: torch
|
|
148
147
|
Requires-Dist: torch ; extra == 'torch'
|
|
149
148
|
Provides-Extra: typing
|
|
149
|
+
Requires-Dist: typing-extensions >=4.8.0 ; extra == 'typing'
|
|
150
150
|
Requires-Dist: types-PyYAML ; extra == 'typing'
|
|
151
151
|
Requires-Dist: types-requests ; extra == 'typing'
|
|
152
152
|
Requires-Dist: types-simplejson ; extra == 'typing'
|
|
153
153
|
Requires-Dist: types-toml ; extra == 'typing'
|
|
154
154
|
Requires-Dist: types-tqdm ; extra == 'typing'
|
|
155
155
|
Requires-Dist: types-urllib3 ; extra == 'typing'
|
|
156
|
-
Requires-Dist: pydantic <
|
|
156
|
+
Requires-Dist: pydantic <3.0,>1.1 ; extra == 'typing'
|
|
157
157
|
|
|
158
158
|
<p align="center">
|
|
159
159
|
<br/>
|
|
@@ -177,6 +177,7 @@ Requires-Dist: pydantic <2.0 ; extra == 'typing'
|
|
|
177
177
|
<p>
|
|
178
178
|
<b>English</b> |
|
|
179
179
|
<a href="https://github.com/huggingface/huggingface_hub/blob/main/README_de.md">Deutsch</a> |
|
|
180
|
+
<a href="https://github.com/huggingface/huggingface_hub/blob/main/README_hi.md">हिंदी</a> |
|
|
180
181
|
<a href="https://github.com/huggingface/huggingface_hub/blob/main/README_ko.md">한국어</a>
|
|
181
182
|
<p>
|
|
182
183
|
</h4>
|