huggingface-hub 0.18.0rc0__py3-none-any.whl → 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +31 -5
- huggingface_hub/_commit_api.py +7 -11
- huggingface_hub/_inference_endpoints.py +348 -0
- huggingface_hub/_login.py +9 -7
- huggingface_hub/_multi_commits.py +1 -1
- huggingface_hub/_snapshot_download.py +6 -7
- huggingface_hub/_space_api.py +7 -4
- huggingface_hub/_tensorboard_logger.py +1 -0
- huggingface_hub/_webhooks_payload.py +7 -7
- huggingface_hub/commands/lfs.py +3 -6
- huggingface_hub/commands/user.py +1 -4
- huggingface_hub/constants.py +27 -0
- huggingface_hub/file_download.py +142 -134
- huggingface_hub/hf_api.py +1058 -503
- huggingface_hub/hf_file_system.py +57 -12
- huggingface_hub/hub_mixin.py +3 -5
- huggingface_hub/inference/_client.py +43 -8
- huggingface_hub/inference/_common.py +8 -16
- huggingface_hub/inference/_generated/_async_client.py +41 -8
- huggingface_hub/inference/_text_generation.py +43 -0
- huggingface_hub/inference_api.py +1 -1
- huggingface_hub/lfs.py +32 -14
- huggingface_hub/repocard_data.py +7 -0
- huggingface_hub/repository.py +19 -3
- huggingface_hub/templates/datasetcard_template.md +83 -43
- huggingface_hub/templates/modelcard_template.md +4 -3
- huggingface_hub/utils/__init__.py +1 -1
- huggingface_hub/utils/_cache_assets.py +3 -3
- huggingface_hub/utils/_cache_manager.py +6 -7
- huggingface_hub/utils/_datetime.py +3 -1
- huggingface_hub/utils/_errors.py +10 -0
- huggingface_hub/utils/_hf_folder.py +4 -2
- huggingface_hub/utils/_http.py +10 -1
- huggingface_hub/utils/_runtime.py +4 -2
- huggingface_hub/utils/endpoint_helpers.py +27 -175
- huggingface_hub/utils/insecure_hashlib.py +34 -0
- huggingface_hub/utils/logging.py +4 -6
- huggingface_hub/utils/sha.py +2 -1
- {huggingface_hub-0.18.0rc0.dist-info → huggingface_hub-0.19.0.dist-info}/METADATA +16 -15
- huggingface_hub-0.19.0.dist-info/RECORD +74 -0
- {huggingface_hub-0.18.0rc0.dist-info → huggingface_hub-0.19.0.dist-info}/WHEEL +1 -1
- huggingface_hub-0.18.0rc0.dist-info/RECORD +0 -72
- {huggingface_hub-0.18.0rc0.dist-info → huggingface_hub-0.19.0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.18.0rc0.dist-info → huggingface_hub-0.19.0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.18.0rc0.dist-info → huggingface_hub-0.19.0.dist-info}/top_level.txt +0 -0
huggingface_hub/file_download.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
import fnmatch
|
|
3
|
+
import inspect
|
|
3
4
|
import io
|
|
4
5
|
import json
|
|
5
6
|
import os
|
|
@@ -7,33 +8,39 @@ import re
|
|
|
7
8
|
import shutil
|
|
8
9
|
import stat
|
|
9
10
|
import tempfile
|
|
11
|
+
import time
|
|
10
12
|
import uuid
|
|
11
13
|
import warnings
|
|
12
14
|
from contextlib import contextmanager
|
|
13
15
|
from dataclasses import dataclass
|
|
14
16
|
from functools import partial
|
|
15
|
-
from hashlib import sha256
|
|
16
17
|
from pathlib import Path
|
|
17
18
|
from typing import Any, BinaryIO, Dict, Generator, Literal, Optional, Tuple, Union
|
|
18
19
|
from urllib.parse import quote, urlparse
|
|
19
20
|
|
|
20
21
|
import requests
|
|
21
22
|
from filelock import FileLock
|
|
22
|
-
from requests.exceptions import ProxyError, Timeout
|
|
23
23
|
|
|
24
24
|
from huggingface_hub import constants
|
|
25
25
|
|
|
26
26
|
from . import __version__ # noqa: F401 # for backward compatibility
|
|
27
27
|
from .constants import (
|
|
28
|
+
DEFAULT_ETAG_TIMEOUT,
|
|
29
|
+
DEFAULT_REQUEST_TIMEOUT,
|
|
28
30
|
DEFAULT_REVISION,
|
|
31
|
+
DOWNLOAD_CHUNK_SIZE,
|
|
29
32
|
ENDPOINT,
|
|
33
|
+
HF_HUB_CACHE,
|
|
30
34
|
HF_HUB_DISABLE_SYMLINKS_WARNING,
|
|
35
|
+
HF_HUB_DOWNLOAD_TIMEOUT,
|
|
31
36
|
HF_HUB_ENABLE_HF_TRANSFER,
|
|
37
|
+
HF_HUB_ETAG_TIMEOUT,
|
|
38
|
+
HF_TRANSFER_CONCURRENCY,
|
|
32
39
|
HUGGINGFACE_CO_URL_TEMPLATE,
|
|
33
40
|
HUGGINGFACE_HEADER_X_LINKED_ETAG,
|
|
34
41
|
HUGGINGFACE_HEADER_X_LINKED_SIZE,
|
|
35
42
|
HUGGINGFACE_HEADER_X_REPO_COMMIT,
|
|
36
|
-
HUGGINGFACE_HUB_CACHE,
|
|
43
|
+
HUGGINGFACE_HUB_CACHE, # noqa: F401 # for backward compatibility
|
|
37
44
|
REPO_ID_SEPARATOR,
|
|
38
45
|
REPO_TYPES,
|
|
39
46
|
REPO_TYPES_URL_PREFIXES,
|
|
@@ -52,10 +59,10 @@ from .utils import (
|
|
|
52
59
|
get_graphviz_version, # noqa: F401 # for backward compatibility
|
|
53
60
|
get_jinja_version, # noqa: F401 # for backward compatibility
|
|
54
61
|
get_pydot_version, # noqa: F401 # for backward compatibility
|
|
62
|
+
get_session,
|
|
55
63
|
get_tf_version, # noqa: F401 # for backward compatibility
|
|
56
64
|
get_torch_version, # noqa: F401 # for backward compatibility
|
|
57
65
|
hf_raise_for_status,
|
|
58
|
-
http_backoff,
|
|
59
66
|
is_fastai_available, # noqa: F401 # for backward compatibility
|
|
60
67
|
is_fastcore_available, # noqa: F401 # for backward compatibility
|
|
61
68
|
is_graphviz_available, # noqa: F401 # for backward compatibility
|
|
@@ -64,12 +71,14 @@ from .utils import (
|
|
|
64
71
|
is_tf_available, # noqa: F401 # for backward compatibility
|
|
65
72
|
is_torch_available, # noqa: F401 # for backward compatibility
|
|
66
73
|
logging,
|
|
74
|
+
reset_sessions,
|
|
67
75
|
tqdm,
|
|
68
76
|
validate_hf_hub_args,
|
|
69
77
|
)
|
|
70
78
|
from .utils._headers import _http_user_agent
|
|
71
79
|
from .utils._runtime import _PY_VERSION # noqa: F401 # for backward compatibility
|
|
72
80
|
from .utils._typing import HTTP_METHOD_T
|
|
81
|
+
from .utils.insecure_hashlib import sha256
|
|
73
82
|
|
|
74
83
|
|
|
75
84
|
logger = logging.get_logger(__name__)
|
|
@@ -95,7 +104,7 @@ def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
|
|
|
95
104
|
"""
|
|
96
105
|
# Defaults to HF cache
|
|
97
106
|
if cache_dir is None:
|
|
98
|
-
cache_dir =
|
|
107
|
+
cache_dir = HF_HUB_CACHE
|
|
99
108
|
cache_dir = str(Path(cache_dir).expanduser().resolve()) # make it unique
|
|
100
109
|
|
|
101
110
|
# Check symlink compatibility only once (per cache directory) at first time use
|
|
@@ -200,9 +209,6 @@ def hf_hub_url(
|
|
|
200
209
|
revision (`str`, *optional*):
|
|
201
210
|
An optional Git revision id which can be a branch name, a tag, or a
|
|
202
211
|
commit hash.
|
|
203
|
-
endpoint (`str`, *optional*):
|
|
204
|
-
Hugging Face Hub base url. Will default to https://huggingface.co/. Otherwise, one can set the `HF_ENDPOINT`
|
|
205
|
-
environment variable.
|
|
206
212
|
|
|
207
213
|
Example:
|
|
208
214
|
|
|
@@ -319,7 +325,7 @@ def filename_to_url(
|
|
|
319
325
|
)
|
|
320
326
|
|
|
321
327
|
if cache_dir is None:
|
|
322
|
-
cache_dir =
|
|
328
|
+
cache_dir = HF_HUB_CACHE
|
|
323
329
|
if isinstance(cache_dir, Path):
|
|
324
330
|
cache_dir = str(cache_dir)
|
|
325
331
|
|
|
@@ -367,46 +373,24 @@ def _raise_if_offline_mode_is_enabled(msg: Optional[str] = None):
|
|
|
367
373
|
|
|
368
374
|
|
|
369
375
|
def _request_wrapper(
|
|
370
|
-
method: HTTP_METHOD_T,
|
|
371
|
-
url: str,
|
|
372
|
-
*,
|
|
373
|
-
max_retries: int = 0,
|
|
374
|
-
base_wait_time: float = 0.5,
|
|
375
|
-
max_wait_time: float = 2,
|
|
376
|
-
timeout: Optional[float] = 10.0,
|
|
377
|
-
follow_relative_redirects: bool = False,
|
|
378
|
-
**params,
|
|
376
|
+
method: HTTP_METHOD_T, url: str, *, follow_relative_redirects: bool = False, **params
|
|
379
377
|
) -> requests.Response:
|
|
380
378
|
"""Wrapper around requests methods to add several features.
|
|
381
379
|
|
|
382
380
|
What it does:
|
|
383
|
-
1. Ensure offline mode is disabled (env variable `HF_HUB_OFFLINE` not set to 1).
|
|
384
|
-
|
|
385
|
-
2. Follow relative
|
|
386
|
-
`allow_redirection` kwarg is set to False.
|
|
387
|
-
3. Retry in case request fails with a `Timeout` or `ProxyError`, with exponential backoff.
|
|
381
|
+
1. Ensure offline mode is disabled (env variable `HF_HUB_OFFLINE` not set to 1). If enabled, a
|
|
382
|
+
`OfflineModeIsEnabled` exception is raised.
|
|
383
|
+
2. Follow relative redirects if `follow_relative_redirects=True` even when `allow_redirection=False`.
|
|
388
384
|
|
|
389
385
|
Args:
|
|
390
386
|
method (`str`):
|
|
391
387
|
HTTP method, such as 'GET' or 'HEAD'.
|
|
392
388
|
url (`str`):
|
|
393
389
|
The URL of the resource to fetch.
|
|
394
|
-
max_retries (`int`, *optional*, defaults to `0`):
|
|
395
|
-
Maximum number of retries, defaults to 0 (no retries).
|
|
396
|
-
base_wait_time (`float`, *optional*, defaults to `0.5`):
|
|
397
|
-
Duration (in seconds) to wait before retrying the first time.
|
|
398
|
-
Wait time between retries then grows exponentially, capped by
|
|
399
|
-
`max_wait_time`.
|
|
400
|
-
max_wait_time (`float`, *optional*, defaults to `2`):
|
|
401
|
-
Maximum amount of time between two retries, in seconds.
|
|
402
|
-
timeout (`float`, *optional*, defaults to `10`):
|
|
403
|
-
How many seconds to wait for the server to send data before
|
|
404
|
-
giving up which is passed to `requests.request`.
|
|
405
390
|
follow_relative_redirects (`bool`, *optional*, defaults to `False`)
|
|
406
|
-
If True, relative redirection (redirection to the same site) will be
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
redirection to a CDN.
|
|
391
|
+
If True, relative redirection (redirection to the same site) will be resolved even when `allow_redirection`
|
|
392
|
+
kwarg is set to False. Useful when we want to follow a redirection to a renamed repository without
|
|
393
|
+
following redirection to a CDN.
|
|
410
394
|
**params (`dict`, *optional*):
|
|
411
395
|
Params to pass to `requests.request`.
|
|
412
396
|
"""
|
|
@@ -418,10 +402,6 @@ def _request_wrapper(
|
|
|
418
402
|
response = _request_wrapper(
|
|
419
403
|
method=method,
|
|
420
404
|
url=url,
|
|
421
|
-
max_retries=max_retries,
|
|
422
|
-
base_wait_time=base_wait_time,
|
|
423
|
-
max_wait_time=max_wait_time,
|
|
424
|
-
timeout=timeout,
|
|
425
405
|
follow_relative_redirects=False,
|
|
426
406
|
**params,
|
|
427
407
|
)
|
|
@@ -437,38 +417,14 @@ def _request_wrapper(
|
|
|
437
417
|
#
|
|
438
418
|
# Highly inspired by `resolve_redirects` from requests library.
|
|
439
419
|
# See https://github.com/psf/requests/blob/main/requests/sessions.py#L159
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
url=urlparse(url)._replace(path=parsed_target.path).geturl(),
|
|
443
|
-
max_retries=max_retries,
|
|
444
|
-
base_wait_time=base_wait_time,
|
|
445
|
-
max_wait_time=max_wait_time,
|
|
446
|
-
timeout=timeout,
|
|
447
|
-
follow_relative_redirects=True, # resolve recursively
|
|
448
|
-
**params,
|
|
449
|
-
)
|
|
420
|
+
next_url = urlparse(url)._replace(path=parsed_target.path).geturl()
|
|
421
|
+
return _request_wrapper(method=method, url=next_url, follow_relative_redirects=True, **params)
|
|
450
422
|
return response
|
|
451
423
|
|
|
452
|
-
#
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
max_retries=max_retries,
|
|
457
|
-
base_wait_time=base_wait_time,
|
|
458
|
-
max_wait_time=max_wait_time,
|
|
459
|
-
retry_on_exceptions=(Timeout, ProxyError),
|
|
460
|
-
retry_on_status_codes=(),
|
|
461
|
-
timeout=timeout,
|
|
462
|
-
**params,
|
|
463
|
-
)
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
def _request_with_retry(*args, **kwargs) -> requests.Response:
|
|
467
|
-
"""Deprecated method. Please use `_request_wrapper` instead.
|
|
468
|
-
|
|
469
|
-
Alias to keep backward compatibility (used in Transformers).
|
|
470
|
-
"""
|
|
471
|
-
return _request_wrapper(*args, **kwargs)
|
|
424
|
+
# Perform request and return if status_code is not in the retry list.
|
|
425
|
+
response = get_session().request(method=method, url=url, **params)
|
|
426
|
+
hf_raise_for_status(response)
|
|
427
|
+
return response
|
|
472
428
|
|
|
473
429
|
|
|
474
430
|
def http_get(
|
|
@@ -478,49 +434,39 @@ def http_get(
|
|
|
478
434
|
proxies=None,
|
|
479
435
|
resume_size: float = 0,
|
|
480
436
|
headers: Optional[Dict[str, str]] = None,
|
|
481
|
-
timeout: Optional[float] = 10.0,
|
|
482
|
-
max_retries: int = 0,
|
|
483
437
|
expected_size: Optional[int] = None,
|
|
438
|
+
_nb_retries: int = 5,
|
|
484
439
|
):
|
|
485
440
|
"""
|
|
486
441
|
Download a remote file. Do not gobble up errors, and will return errors tailored to the Hugging Face Hub.
|
|
442
|
+
|
|
443
|
+
If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely a
|
|
444
|
+
transient error (network outage?). We log a warning message and try to resume the download a few times before
|
|
445
|
+
giving up. The method gives up after 5 attempts if no new data has being received from the server.
|
|
487
446
|
"""
|
|
488
|
-
|
|
489
|
-
|
|
447
|
+
hf_transfer = None
|
|
448
|
+
if HF_HUB_ENABLE_HF_TRANSFER:
|
|
449
|
+
if resume_size != 0:
|
|
450
|
+
warnings.warn("'hf_transfer' does not support `resume_size`: falling back to regular download method")
|
|
451
|
+
elif proxies is not None:
|
|
452
|
+
warnings.warn("'hf_transfer' does not support `proxies`: falling back to regular download method")
|
|
453
|
+
else:
|
|
490
454
|
try:
|
|
491
|
-
#
|
|
492
|
-
# (~2x speed-up) but support less features (no progress bars).
|
|
493
|
-
from hf_transfer import download
|
|
494
|
-
|
|
495
|
-
logger.debug(f"Download {url} using HF_TRANSFER.")
|
|
496
|
-
max_files = 100
|
|
497
|
-
chunk_size = 10 * 1024 * 1024 # 10 MB
|
|
498
|
-
download(url, temp_file.name, max_files, chunk_size, headers=headers)
|
|
499
|
-
return
|
|
455
|
+
import hf_transfer # type: ignore[no-redef]
|
|
500
456
|
except ImportError:
|
|
501
457
|
raise ValueError(
|
|
502
458
|
"Fast download using 'hf_transfer' is enabled"
|
|
503
459
|
" (HF_HUB_ENABLE_HF_TRANSFER=1) but 'hf_transfer' package is not"
|
|
504
460
|
" available in your environment. Try `pip install hf_transfer`."
|
|
505
461
|
)
|
|
506
|
-
except Exception as e:
|
|
507
|
-
raise RuntimeError(
|
|
508
|
-
"An error occurred while downloading using `hf_transfer`. Consider"
|
|
509
|
-
" disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
|
|
510
|
-
) from e
|
|
511
462
|
|
|
463
|
+
initial_headers = headers
|
|
512
464
|
headers = copy.deepcopy(headers) or {}
|
|
513
465
|
if resume_size > 0:
|
|
514
466
|
headers["Range"] = "bytes=%d-" % (resume_size,)
|
|
515
467
|
|
|
516
468
|
r = _request_wrapper(
|
|
517
|
-
method="GET",
|
|
518
|
-
url=url,
|
|
519
|
-
stream=True,
|
|
520
|
-
proxies=proxies,
|
|
521
|
-
headers=headers,
|
|
522
|
-
timeout=timeout,
|
|
523
|
-
max_retries=max_retries,
|
|
469
|
+
method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=HF_HUB_DOWNLOAD_TIMEOUT
|
|
524
470
|
)
|
|
525
471
|
hf_raise_for_status(r)
|
|
526
472
|
content_length = r.headers.get("Content-Length")
|
|
@@ -541,28 +487,90 @@ def http_get(
|
|
|
541
487
|
if len(displayed_name) > 40:
|
|
542
488
|
displayed_name = f"(…){displayed_name[-40:]}"
|
|
543
489
|
|
|
544
|
-
|
|
490
|
+
consistency_error_message = (
|
|
491
|
+
f"Consistency check failed: file should be of size {expected_size} but has size"
|
|
492
|
+
f" {{actual_size}} ({displayed_name}).\nWe are sorry for the inconvenience. Please retry download and"
|
|
493
|
+
" pass `force_download=True, resume_download=False` as argument.\nIf the issue persists, please let us"
|
|
494
|
+
" know by opening an issue on https://github.com/huggingface/huggingface_hub."
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
# Stream file to buffer
|
|
498
|
+
with tqdm(
|
|
545
499
|
unit="B",
|
|
546
500
|
unit_scale=True,
|
|
547
501
|
total=total,
|
|
548
502
|
initial=resume_size,
|
|
549
503
|
desc=displayed_name,
|
|
550
504
|
disable=bool(logger.getEffectiveLevel() == logging.NOTSET),
|
|
551
|
-
)
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
505
|
+
) as progress:
|
|
506
|
+
if hf_transfer and total is not None and total > 5 * DOWNLOAD_CHUNK_SIZE:
|
|
507
|
+
supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters
|
|
508
|
+
if not supports_callback:
|
|
509
|
+
warnings.warn(
|
|
510
|
+
"You are using an outdated version of `hf_transfer`. "
|
|
511
|
+
"Consider upgrading to latest version to enable progress bars "
|
|
512
|
+
"using `pip install -U hf_transfer`."
|
|
513
|
+
)
|
|
514
|
+
try:
|
|
515
|
+
hf_transfer.download(
|
|
516
|
+
url=url,
|
|
517
|
+
filename=temp_file.name,
|
|
518
|
+
max_files=HF_TRANSFER_CONCURRENCY,
|
|
519
|
+
chunk_size=DOWNLOAD_CHUNK_SIZE,
|
|
520
|
+
headers=headers,
|
|
521
|
+
parallel_failures=3,
|
|
522
|
+
max_retries=5,
|
|
523
|
+
**({"callback": progress.update} if supports_callback else {}),
|
|
524
|
+
)
|
|
525
|
+
except Exception as e:
|
|
526
|
+
raise RuntimeError(
|
|
527
|
+
"An error occurred while downloading using `hf_transfer`. Consider"
|
|
528
|
+
" disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
|
|
529
|
+
) from e
|
|
530
|
+
if not supports_callback:
|
|
531
|
+
progress.update(total)
|
|
532
|
+
if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
|
|
533
|
+
raise EnvironmentError(
|
|
534
|
+
consistency_error_message.format(
|
|
535
|
+
actual_size=os.path.getsize(temp_file.name),
|
|
536
|
+
)
|
|
537
|
+
)
|
|
538
|
+
return
|
|
539
|
+
new_resume_size = resume_size
|
|
540
|
+
try:
|
|
541
|
+
for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
|
|
542
|
+
if chunk: # filter out keep-alive new chunks
|
|
543
|
+
progress.update(len(chunk))
|
|
544
|
+
temp_file.write(chunk)
|
|
545
|
+
new_resume_size += len(chunk)
|
|
546
|
+
# Some data has been downloaded from the server so we reset the number of retries.
|
|
547
|
+
_nb_retries = 5
|
|
548
|
+
except (requests.ConnectionError, requests.ReadTimeout) as e:
|
|
549
|
+
# If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
|
|
550
|
+
# a transient error (network outage?). We log a warning message and try to resume the download a few times
|
|
551
|
+
# before giving up. Tre retry mechanism is basic but should be enough in most cases.
|
|
552
|
+
if _nb_retries <= 0:
|
|
553
|
+
logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
|
|
554
|
+
raise
|
|
555
|
+
logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
|
|
556
|
+
time.sleep(1)
|
|
557
|
+
reset_sessions() # In case of SSLError it's best to reset the shared requests.Session objects
|
|
558
|
+
return http_get(
|
|
559
|
+
url=url,
|
|
560
|
+
temp_file=temp_file,
|
|
561
|
+
proxies=proxies,
|
|
562
|
+
resume_size=new_resume_size,
|
|
563
|
+
headers=initial_headers,
|
|
564
|
+
expected_size=expected_size,
|
|
565
|
+
_nb_retries=_nb_retries - 1,
|
|
566
|
+
)
|
|
564
567
|
|
|
565
|
-
|
|
568
|
+
if expected_size is not None and expected_size != temp_file.tell():
|
|
569
|
+
raise EnvironmentError(
|
|
570
|
+
consistency_error_message.format(
|
|
571
|
+
actual_size=temp_file.tell(),
|
|
572
|
+
)
|
|
573
|
+
)
|
|
566
574
|
|
|
567
575
|
|
|
568
576
|
@validate_hf_hub_args
|
|
@@ -576,7 +584,7 @@ def cached_download(
|
|
|
576
584
|
force_download: bool = False,
|
|
577
585
|
force_filename: Optional[str] = None,
|
|
578
586
|
proxies: Optional[Dict] = None,
|
|
579
|
-
etag_timeout: float =
|
|
587
|
+
etag_timeout: float = DEFAULT_ETAG_TIMEOUT,
|
|
580
588
|
resume_download: bool = False,
|
|
581
589
|
token: Union[bool, str, None] = None,
|
|
582
590
|
local_files_only: bool = False,
|
|
@@ -656,6 +664,10 @@ def cached_download(
|
|
|
656
664
|
|
|
657
665
|
</Tip>
|
|
658
666
|
"""
|
|
667
|
+
if HF_HUB_ETAG_TIMEOUT != DEFAULT_ETAG_TIMEOUT:
|
|
668
|
+
# Respect environment variable above user value
|
|
669
|
+
etag_timeout = HF_HUB_ETAG_TIMEOUT
|
|
670
|
+
|
|
659
671
|
if not legacy_cache_layout:
|
|
660
672
|
warnings.warn(
|
|
661
673
|
"'cached_download' is the legacy way to download files from the HF hub, please consider upgrading to"
|
|
@@ -664,7 +676,7 @@ def cached_download(
|
|
|
664
676
|
)
|
|
665
677
|
|
|
666
678
|
if cache_dir is None:
|
|
667
|
-
cache_dir =
|
|
679
|
+
cache_dir = HF_HUB_CACHE
|
|
668
680
|
if isinstance(cache_dir, Path):
|
|
669
681
|
cache_dir = str(cache_dir)
|
|
670
682
|
|
|
@@ -995,7 +1007,6 @@ def hf_hub_download(
|
|
|
995
1007
|
subfolder: Optional[str] = None,
|
|
996
1008
|
repo_type: Optional[str] = None,
|
|
997
1009
|
revision: Optional[str] = None,
|
|
998
|
-
endpoint: Optional[str] = None,
|
|
999
1010
|
library_name: Optional[str] = None,
|
|
1000
1011
|
library_version: Optional[str] = None,
|
|
1001
1012
|
cache_dir: Union[str, Path, None] = None,
|
|
@@ -1005,11 +1016,12 @@ def hf_hub_download(
|
|
|
1005
1016
|
force_download: bool = False,
|
|
1006
1017
|
force_filename: Optional[str] = None,
|
|
1007
1018
|
proxies: Optional[Dict] = None,
|
|
1008
|
-
etag_timeout: float =
|
|
1019
|
+
etag_timeout: float = DEFAULT_ETAG_TIMEOUT,
|
|
1009
1020
|
resume_download: bool = False,
|
|
1010
1021
|
token: Union[bool, str, None] = None,
|
|
1011
1022
|
local_files_only: bool = False,
|
|
1012
1023
|
legacy_cache_layout: bool = False,
|
|
1024
|
+
endpoint: Optional[str] = None,
|
|
1013
1025
|
) -> str:
|
|
1014
1026
|
"""Download a given file if it's not already present in the local cache.
|
|
1015
1027
|
|
|
@@ -1069,9 +1081,6 @@ def hf_hub_download(
|
|
|
1069
1081
|
revision (`str`, *optional*):
|
|
1070
1082
|
An optional Git revision id which can be a branch name, a tag, or a
|
|
1071
1083
|
commit hash.
|
|
1072
|
-
endpoint (`str`, *optional*):
|
|
1073
|
-
Hugging Face Hub base url. Will default to https://huggingface.co/. Otherwise, one can set the `HF_ENDPOINT`
|
|
1074
|
-
environment variable.
|
|
1075
1084
|
library_name (`str`, *optional*):
|
|
1076
1085
|
The name of the library to which the object corresponds.
|
|
1077
1086
|
library_version (`str`, *optional*):
|
|
@@ -1138,6 +1147,10 @@ def hf_hub_download(
|
|
|
1138
1147
|
|
|
1139
1148
|
</Tip>
|
|
1140
1149
|
"""
|
|
1150
|
+
if HF_HUB_ETAG_TIMEOUT != DEFAULT_ETAG_TIMEOUT:
|
|
1151
|
+
# Respect environment variable above user value
|
|
1152
|
+
etag_timeout = HF_HUB_ETAG_TIMEOUT
|
|
1153
|
+
|
|
1141
1154
|
if force_filename is not None:
|
|
1142
1155
|
warnings.warn(
|
|
1143
1156
|
"The `force_filename` parameter is deprecated as a new caching system, "
|
|
@@ -1173,13 +1186,14 @@ def hf_hub_download(
|
|
|
1173
1186
|
)
|
|
1174
1187
|
|
|
1175
1188
|
if cache_dir is None:
|
|
1176
|
-
cache_dir =
|
|
1189
|
+
cache_dir = HF_HUB_CACHE
|
|
1177
1190
|
if revision is None:
|
|
1178
1191
|
revision = DEFAULT_REVISION
|
|
1179
1192
|
if isinstance(cache_dir, Path):
|
|
1180
1193
|
cache_dir = str(cache_dir)
|
|
1181
1194
|
if isinstance(local_dir, Path):
|
|
1182
1195
|
local_dir = str(local_dir)
|
|
1196
|
+
locks_dir = os.path.join(cache_dir, ".locks")
|
|
1183
1197
|
|
|
1184
1198
|
if subfolder == "":
|
|
1185
1199
|
subfolder = None
|
|
@@ -1393,7 +1407,8 @@ def hf_hub_download(
|
|
|
1393
1407
|
return pointer_path
|
|
1394
1408
|
|
|
1395
1409
|
# Prevent parallel downloads of the same file with a lock.
|
|
1396
|
-
|
|
1410
|
+
# etag could be duplicated across repos,
|
|
1411
|
+
lock_path = os.path.join(locks_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type), f"{etag}.lock")
|
|
1397
1412
|
|
|
1398
1413
|
# Some Windows versions do not allow for paths longer than 255 characters.
|
|
1399
1414
|
# In this case, we must specify it is an extended path by using the "\\?\" prefix.
|
|
@@ -1403,6 +1418,7 @@ def hf_hub_download(
|
|
|
1403
1418
|
if os.name == "nt" and len(os.path.abspath(blob_path)) > 255:
|
|
1404
1419
|
blob_path = "\\\\?\\" + os.path.abspath(blob_path)
|
|
1405
1420
|
|
|
1421
|
+
Path(lock_path).parent.mkdir(parents=True, exist_ok=True)
|
|
1406
1422
|
with FileLock(lock_path):
|
|
1407
1423
|
# If the download just completed while the lock was activated.
|
|
1408
1424
|
if os.path.exists(pointer_path) and not force_download:
|
|
@@ -1477,11 +1493,6 @@ def hf_hub_download(
|
|
|
1477
1493
|
_chmod_and_replace(temp_file.name, local_dir_filepath)
|
|
1478
1494
|
pointer_path = local_dir_filepath # for return value
|
|
1479
1495
|
|
|
1480
|
-
try:
|
|
1481
|
-
os.remove(lock_path)
|
|
1482
|
-
except OSError:
|
|
1483
|
-
pass
|
|
1484
|
-
|
|
1485
1496
|
return pointer_path
|
|
1486
1497
|
|
|
1487
1498
|
|
|
@@ -1542,7 +1553,7 @@ def try_to_load_from_cache(
|
|
|
1542
1553
|
if repo_type not in REPO_TYPES:
|
|
1543
1554
|
raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(REPO_TYPES)}")
|
|
1544
1555
|
if cache_dir is None:
|
|
1545
|
-
cache_dir =
|
|
1556
|
+
cache_dir = HF_HUB_CACHE
|
|
1546
1557
|
|
|
1547
1558
|
object_id = repo_id.replace("/", "--")
|
|
1548
1559
|
repo_cache = os.path.join(cache_dir, f"{repo_type}s--{object_id}")
|
|
@@ -1583,7 +1594,7 @@ def get_hf_file_metadata(
|
|
|
1583
1594
|
url: str,
|
|
1584
1595
|
token: Union[bool, str, None] = None,
|
|
1585
1596
|
proxies: Optional[Dict] = None,
|
|
1586
|
-
timeout: Optional[float] =
|
|
1597
|
+
timeout: Optional[float] = DEFAULT_REQUEST_TIMEOUT,
|
|
1587
1598
|
) -> HfFileMetadata:
|
|
1588
1599
|
"""Fetch metadata of a file versioned on the Hub for a given url.
|
|
1589
1600
|
|
|
@@ -1624,12 +1635,9 @@ def get_hf_file_metadata(
|
|
|
1624
1635
|
# Return
|
|
1625
1636
|
return HfFileMetadata(
|
|
1626
1637
|
commit_hash=r.headers.get(HUGGINGFACE_HEADER_X_REPO_COMMIT),
|
|
1627
|
-
etag
|
|
1628
|
-
|
|
1629
|
-
|
|
1630
|
-
r.headers.get(HUGGINGFACE_HEADER_X_LINKED_ETAG)
|
|
1631
|
-
or r.headers.get("ETag")
|
|
1632
|
-
),
|
|
1638
|
+
# We favor a custom header indicating the etag of the linked resource, and
|
|
1639
|
+
# we fallback to the regular etag header.
|
|
1640
|
+
etag=_normalize_etag(r.headers.get(HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")),
|
|
1633
1641
|
# Either from response headers (if redirected) or defaults to request url
|
|
1634
1642
|
# Do not use directly `url`, as `_request_wrapper` might have followed relative
|
|
1635
1643
|
# redirects.
|