huggingface-hub 0.35.0rc0__py3-none-any.whl → 1.0.0rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +46 -45
- huggingface_hub/_commit_api.py +28 -28
- huggingface_hub/_commit_scheduler.py +11 -8
- huggingface_hub/_inference_endpoints.py +8 -8
- huggingface_hub/_jobs_api.py +176 -20
- huggingface_hub/_local_folder.py +1 -1
- huggingface_hub/_login.py +13 -39
- huggingface_hub/_oauth.py +10 -14
- huggingface_hub/_snapshot_download.py +14 -28
- huggingface_hub/_space_api.py +4 -4
- huggingface_hub/_tensorboard_logger.py +13 -14
- huggingface_hub/_upload_large_folder.py +120 -13
- huggingface_hub/_webhooks_payload.py +3 -3
- huggingface_hub/_webhooks_server.py +2 -2
- huggingface_hub/cli/_cli_utils.py +2 -2
- huggingface_hub/cli/auth.py +8 -6
- huggingface_hub/cli/cache.py +18 -20
- huggingface_hub/cli/download.py +4 -4
- huggingface_hub/cli/hf.py +2 -5
- huggingface_hub/cli/jobs.py +599 -22
- huggingface_hub/cli/lfs.py +4 -4
- huggingface_hub/cli/repo.py +11 -7
- huggingface_hub/cli/repo_files.py +2 -2
- huggingface_hub/cli/upload.py +4 -4
- huggingface_hub/cli/upload_large_folder.py +3 -3
- huggingface_hub/commands/_cli_utils.py +2 -2
- huggingface_hub/commands/delete_cache.py +13 -13
- huggingface_hub/commands/download.py +4 -13
- huggingface_hub/commands/lfs.py +4 -4
- huggingface_hub/commands/repo_files.py +2 -2
- huggingface_hub/commands/scan_cache.py +1 -1
- huggingface_hub/commands/tag.py +1 -3
- huggingface_hub/commands/upload.py +4 -4
- huggingface_hub/commands/upload_large_folder.py +3 -3
- huggingface_hub/commands/user.py +4 -5
- huggingface_hub/community.py +5 -5
- huggingface_hub/constants.py +3 -41
- huggingface_hub/dataclasses.py +16 -19
- huggingface_hub/errors.py +42 -29
- huggingface_hub/fastai_utils.py +8 -9
- huggingface_hub/file_download.py +162 -259
- huggingface_hub/hf_api.py +841 -616
- huggingface_hub/hf_file_system.py +98 -62
- huggingface_hub/hub_mixin.py +37 -57
- huggingface_hub/inference/_client.py +257 -325
- huggingface_hub/inference/_common.py +110 -124
- huggingface_hub/inference/_generated/_async_client.py +307 -432
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
- huggingface_hub/inference/_generated/types/base.py +10 -7
- huggingface_hub/inference/_generated/types/chat_completion.py +18 -16
- huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
- huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
- huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
- huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
- huggingface_hub/inference/_generated/types/summarization.py +2 -2
- huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
- huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
- huggingface_hub/inference/_generated/types/text_generation.py +10 -10
- huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
- huggingface_hub/inference/_generated/types/token_classification.py +2 -2
- huggingface_hub/inference/_generated/types/translation.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
- huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
- huggingface_hub/inference/_mcp/agent.py +3 -3
- huggingface_hub/inference/_mcp/cli.py +1 -1
- huggingface_hub/inference/_mcp/constants.py +2 -3
- huggingface_hub/inference/_mcp/mcp_client.py +58 -30
- huggingface_hub/inference/_mcp/types.py +10 -7
- huggingface_hub/inference/_mcp/utils.py +11 -7
- huggingface_hub/inference/_providers/__init__.py +4 -2
- huggingface_hub/inference/_providers/_common.py +49 -25
- huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
- huggingface_hub/inference/_providers/cohere.py +3 -3
- huggingface_hub/inference/_providers/fal_ai.py +52 -21
- huggingface_hub/inference/_providers/featherless_ai.py +4 -4
- huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
- huggingface_hub/inference/_providers/hf_inference.py +28 -20
- huggingface_hub/inference/_providers/hyperbolic.py +4 -4
- huggingface_hub/inference/_providers/nebius.py +10 -10
- huggingface_hub/inference/_providers/novita.py +5 -5
- huggingface_hub/inference/_providers/nscale.py +4 -4
- huggingface_hub/inference/_providers/replicate.py +15 -15
- huggingface_hub/inference/_providers/sambanova.py +6 -6
- huggingface_hub/inference/_providers/together.py +7 -7
- huggingface_hub/lfs.py +20 -31
- huggingface_hub/repocard.py +18 -18
- huggingface_hub/repocard_data.py +56 -56
- huggingface_hub/serialization/__init__.py +0 -1
- huggingface_hub/serialization/_base.py +9 -9
- huggingface_hub/serialization/_dduf.py +7 -7
- huggingface_hub/serialization/_torch.py +28 -28
- huggingface_hub/utils/__init__.py +10 -4
- huggingface_hub/utils/_auth.py +5 -5
- huggingface_hub/utils/_cache_manager.py +31 -31
- huggingface_hub/utils/_deprecation.py +1 -1
- huggingface_hub/utils/_dotenv.py +25 -21
- huggingface_hub/utils/_fixes.py +0 -10
- huggingface_hub/utils/_git_credential.py +4 -4
- huggingface_hub/utils/_headers.py +7 -29
- huggingface_hub/utils/_http.py +366 -208
- huggingface_hub/utils/_pagination.py +4 -4
- huggingface_hub/utils/_paths.py +5 -5
- huggingface_hub/utils/_runtime.py +16 -13
- huggingface_hub/utils/_safetensors.py +21 -21
- huggingface_hub/utils/_subprocess.py +9 -9
- huggingface_hub/utils/_telemetry.py +3 -3
- huggingface_hub/utils/_typing.py +25 -5
- huggingface_hub/utils/_validators.py +53 -72
- huggingface_hub/utils/_xet.py +16 -16
- huggingface_hub/utils/_xet_progress_reporting.py +32 -11
- huggingface_hub/utils/insecure_hashlib.py +3 -9
- huggingface_hub/utils/tqdm.py +3 -3
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/METADATA +18 -29
- huggingface_hub-1.0.0rc0.dist-info/RECORD +161 -0
- huggingface_hub/inference_api.py +0 -217
- huggingface_hub/keras_mixin.py +0 -500
- huggingface_hub/repository.py +0 -1477
- huggingface_hub/serialization/_tensorflow.py +0 -95
- huggingface_hub/utils/_hf_folder.py +0 -68
- huggingface_hub-0.35.0rc0.dist-info/RECORD +0 -166
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/top_level.txt +0 -0
huggingface_hub/file_download.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
import errno
|
|
3
|
-
import inspect
|
|
4
3
|
import os
|
|
5
4
|
import re
|
|
6
5
|
import shutil
|
|
@@ -10,26 +9,19 @@ import uuid
|
|
|
10
9
|
import warnings
|
|
11
10
|
from dataclasses import dataclass
|
|
12
11
|
from pathlib import Path
|
|
13
|
-
from typing import Any, BinaryIO,
|
|
12
|
+
from typing import Any, BinaryIO, NoReturn, Optional, Union
|
|
14
13
|
from urllib.parse import quote, urlparse
|
|
15
14
|
|
|
16
|
-
import
|
|
15
|
+
import httpx
|
|
17
16
|
|
|
18
|
-
from . import
|
|
19
|
-
__version__, # noqa: F401 # for backward compatibility
|
|
20
|
-
constants,
|
|
21
|
-
)
|
|
17
|
+
from . import constants
|
|
22
18
|
from ._local_folder import get_local_download_paths, read_download_metadata, write_download_metadata
|
|
23
|
-
from .constants import (
|
|
24
|
-
HUGGINGFACE_CO_URL_TEMPLATE, # noqa: F401 # for backward compatibility
|
|
25
|
-
HUGGINGFACE_HUB_CACHE, # noqa: F401 # for backward compatibility
|
|
26
|
-
)
|
|
27
19
|
from .errors import (
|
|
28
|
-
EntryNotFoundError,
|
|
29
20
|
FileMetadataError,
|
|
30
21
|
GatedRepoError,
|
|
31
22
|
HfHubHTTPError,
|
|
32
23
|
LocalEntryNotFoundError,
|
|
24
|
+
RemoteEntryNotFoundError,
|
|
33
25
|
RepositoryNotFoundError,
|
|
34
26
|
RevisionNotFoundError,
|
|
35
27
|
)
|
|
@@ -39,30 +31,15 @@ from .utils import (
|
|
|
39
31
|
WeakFileLock,
|
|
40
32
|
XetFileData,
|
|
41
33
|
build_hf_headers,
|
|
42
|
-
get_fastai_version, # noqa: F401 # for backward compatibility
|
|
43
|
-
get_fastcore_version, # noqa: F401 # for backward compatibility
|
|
44
|
-
get_graphviz_version, # noqa: F401 # for backward compatibility
|
|
45
|
-
get_jinja_version, # noqa: F401 # for backward compatibility
|
|
46
|
-
get_pydot_version, # noqa: F401 # for backward compatibility
|
|
47
|
-
get_tf_version, # noqa: F401 # for backward compatibility
|
|
48
|
-
get_torch_version, # noqa: F401 # for backward compatibility
|
|
49
34
|
hf_raise_for_status,
|
|
50
|
-
is_fastai_available, # noqa: F401 # for backward compatibility
|
|
51
|
-
is_fastcore_available, # noqa: F401 # for backward compatibility
|
|
52
|
-
is_graphviz_available, # noqa: F401 # for backward compatibility
|
|
53
|
-
is_jinja_available, # noqa: F401 # for backward compatibility
|
|
54
|
-
is_pydot_available, # noqa: F401 # for backward compatibility
|
|
55
|
-
is_tf_available, # noqa: F401 # for backward compatibility
|
|
56
|
-
is_torch_available, # noqa: F401 # for backward compatibility
|
|
57
35
|
logging,
|
|
58
36
|
parse_xet_file_data_from_response,
|
|
59
37
|
refresh_xet_connection_info,
|
|
60
|
-
reset_sessions,
|
|
61
38
|
tqdm,
|
|
62
39
|
validate_hf_hub_args,
|
|
63
40
|
)
|
|
64
|
-
from .utils._http import _adjust_range_header, http_backoff
|
|
65
|
-
from .utils._runtime import
|
|
41
|
+
from .utils._http import _adjust_range_header, http_backoff, http_stream_backoff
|
|
42
|
+
from .utils._runtime import is_xet_available
|
|
66
43
|
from .utils._typing import HTTP_METHOD_T
|
|
67
44
|
from .utils.sha import sha_fileobj
|
|
68
45
|
from .utils.tqdm import _get_progress_bar_context
|
|
@@ -83,7 +60,7 @@ REGEX_COMMIT_HASH = re.compile(r"^[0-9a-f]{40}$")
|
|
|
83
60
|
# Regex to check if the file etag IS a valid sha256
|
|
84
61
|
REGEX_SHA256 = re.compile(r"^[0-9a-f]{64}$")
|
|
85
62
|
|
|
86
|
-
_are_symlinks_supported_in_dir:
|
|
63
|
+
_are_symlinks_supported_in_dir: dict[str, bool] = {}
|
|
87
64
|
|
|
88
65
|
|
|
89
66
|
def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
|
|
@@ -252,7 +229,7 @@ def hf_hub_url(
|
|
|
252
229
|
|
|
253
230
|
if revision is None:
|
|
254
231
|
revision = constants.DEFAULT_REVISION
|
|
255
|
-
url = HUGGINGFACE_CO_URL_TEMPLATE.format(
|
|
232
|
+
url = constants.HUGGINGFACE_CO_URL_TEMPLATE.format(
|
|
256
233
|
repo_id=repo_id, revision=quote(revision, safe=""), filename=quote(filename)
|
|
257
234
|
)
|
|
258
235
|
# Update endpoint if provided
|
|
@@ -261,11 +238,10 @@ def hf_hub_url(
|
|
|
261
238
|
return url
|
|
262
239
|
|
|
263
240
|
|
|
264
|
-
def
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
`allow_redirection=False`.
|
|
241
|
+
def _httpx_follow_relative_redirects(method: HTTP_METHOD_T, url: str, **httpx_kwargs) -> httpx.Response:
|
|
242
|
+
"""Perform an HTTP request with backoff and follow relative redirects only.
|
|
243
|
+
|
|
244
|
+
This is useful to follow a redirection to a renamed repository without following redirection to a CDN.
|
|
269
245
|
|
|
270
246
|
A backoff mechanism retries the HTTP call on 429, 503 and 504 errors.
|
|
271
247
|
|
|
@@ -274,61 +250,59 @@ def _request_wrapper(
|
|
|
274
250
|
HTTP method, such as 'GET' or 'HEAD'.
|
|
275
251
|
url (`str`):
|
|
276
252
|
The URL of the resource to fetch.
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
kwarg is set to False. Useful when we want to follow a redirection to a renamed repository without
|
|
280
|
-
following redirection to a CDN.
|
|
281
|
-
**params (`dict`, *optional*):
|
|
282
|
-
Params to pass to `requests.request`.
|
|
253
|
+
**httpx_kwargs (`dict`, *optional*):
|
|
254
|
+
Params to pass to `httpx.request`.
|
|
283
255
|
"""
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
response =
|
|
256
|
+
while True:
|
|
257
|
+
# Make the request
|
|
258
|
+
response = http_backoff(
|
|
287
259
|
method=method,
|
|
288
260
|
url=url,
|
|
289
|
-
|
|
290
|
-
|
|
261
|
+
**httpx_kwargs,
|
|
262
|
+
follow_redirects=False,
|
|
263
|
+
retry_on_exceptions=(),
|
|
264
|
+
retry_on_status_codes=(429,),
|
|
291
265
|
)
|
|
266
|
+
hf_raise_for_status(response)
|
|
292
267
|
|
|
293
|
-
#
|
|
294
|
-
# This is useful in case of a renamed repository.
|
|
268
|
+
# Check if response is a relative redirect
|
|
295
269
|
if 300 <= response.status_code <= 399:
|
|
296
270
|
parsed_target = urlparse(response.headers["Location"])
|
|
297
271
|
if parsed_target.netloc == "":
|
|
298
|
-
#
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
return _request_wrapper(method=method, url=next_url, follow_relative_redirects=True, **params)
|
|
306
|
-
return response
|
|
307
|
-
|
|
308
|
-
# Perform request and return if status_code is not in the retry list.
|
|
309
|
-
response = http_backoff(method=method, url=url, **params, retry_on_exceptions=(), retry_on_status_codes=(429,))
|
|
310
|
-
hf_raise_for_status(response)
|
|
272
|
+
# Relative redirect -> update URL and retry
|
|
273
|
+
url = urlparse(url)._replace(path=parsed_target.path).geturl()
|
|
274
|
+
continue
|
|
275
|
+
|
|
276
|
+
# Break if no relative redirect
|
|
277
|
+
break
|
|
278
|
+
|
|
311
279
|
return response
|
|
312
280
|
|
|
313
281
|
|
|
314
|
-
def _get_file_length_from_http_response(response:
|
|
282
|
+
def _get_file_length_from_http_response(response: httpx.Response) -> Optional[int]:
|
|
315
283
|
"""
|
|
316
284
|
Get the length of the file from the HTTP response headers.
|
|
317
285
|
|
|
318
286
|
This function extracts the file size from the HTTP response headers, either from the
|
|
319
287
|
`Content-Range` or `Content-Length` header, if available (in that order).
|
|
320
|
-
The HTTP response object containing the headers.
|
|
321
|
-
`int` or `None`: The length of the file in bytes if the information is available,
|
|
322
|
-
otherwise `None`.
|
|
323
288
|
|
|
324
289
|
Args:
|
|
325
|
-
response (`
|
|
290
|
+
response (`httpx.Response`):
|
|
326
291
|
The HTTP response object.
|
|
327
292
|
|
|
328
293
|
Returns:
|
|
329
294
|
`int` or `None`: The length of the file in bytes, or None if not available.
|
|
330
295
|
"""
|
|
331
296
|
|
|
297
|
+
# If HTTP response contains compressed body (e.g. gzip), the `Content-Length` header will
|
|
298
|
+
# contain the length of the compressed body, not the uncompressed file size.
|
|
299
|
+
# And at the start of transmission there's no way to know the uncompressed file size for gzip,
|
|
300
|
+
# thus we return None in that case.
|
|
301
|
+
content_encoding = response.headers.get("Content-Encoding", "identity").lower()
|
|
302
|
+
if content_encoding != "identity":
|
|
303
|
+
# gzip/br/deflate/zstd etc
|
|
304
|
+
return None
|
|
305
|
+
|
|
332
306
|
content_range = response.headers.get("Content-Range")
|
|
333
307
|
if content_range is not None:
|
|
334
308
|
return int(content_range.rsplit("/")[-1])
|
|
@@ -340,13 +314,13 @@ def _get_file_length_from_http_response(response: requests.Response) -> Optional
|
|
|
340
314
|
return None
|
|
341
315
|
|
|
342
316
|
|
|
317
|
+
@validate_hf_hub_args
|
|
343
318
|
def http_get(
|
|
344
319
|
url: str,
|
|
345
320
|
temp_file: BinaryIO,
|
|
346
321
|
*,
|
|
347
|
-
proxies: Optional[Dict] = None,
|
|
348
322
|
resume_size: int = 0,
|
|
349
|
-
headers: Optional[
|
|
323
|
+
headers: Optional[dict[str, Any]] = None,
|
|
350
324
|
expected_size: Optional[int] = None,
|
|
351
325
|
displayed_filename: Optional[str] = None,
|
|
352
326
|
_nb_retries: int = 5,
|
|
@@ -364,8 +338,6 @@ def http_get(
|
|
|
364
338
|
The URL of the file to download.
|
|
365
339
|
temp_file (`BinaryIO`):
|
|
366
340
|
The file-like object where to save the file.
|
|
367
|
-
proxies (`dict`, *optional*):
|
|
368
|
-
Dictionary mapping protocol to the URL of the proxy passed to `requests.request`.
|
|
369
341
|
resume_size (`int`, *optional*):
|
|
370
342
|
The number of bytes already downloaded. If set to 0 (default), the whole file is download. If set to a
|
|
371
343
|
positive number, the download will resume at the given position.
|
|
@@ -387,8 +359,6 @@ def http_get(
|
|
|
387
359
|
if constants.HF_HUB_ENABLE_HF_TRANSFER:
|
|
388
360
|
if resume_size != 0:
|
|
389
361
|
warnings.warn("'hf_transfer' does not support `resume_size`: falling back to regular download method")
|
|
390
|
-
elif proxies is not None:
|
|
391
|
-
warnings.warn("'hf_transfer' does not support `proxies`: falling back to regular download method")
|
|
392
362
|
elif has_custom_range_header:
|
|
393
363
|
warnings.warn("'hf_transfer' ignores custom 'Range' headers; falling back to regular download method")
|
|
394
364
|
else:
|
|
@@ -417,107 +387,97 @@ def http_get(
|
|
|
417
387
|
" Try `pip install hf_transfer` or `pip install hf_xet`."
|
|
418
388
|
)
|
|
419
389
|
|
|
420
|
-
|
|
421
|
-
method="GET",
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
390
|
+
with http_stream_backoff(
|
|
391
|
+
method="GET",
|
|
392
|
+
url=url,
|
|
393
|
+
headers=headers,
|
|
394
|
+
timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
|
|
395
|
+
retry_on_exceptions=(),
|
|
396
|
+
retry_on_status_codes=(429,),
|
|
397
|
+
) as response:
|
|
398
|
+
hf_raise_for_status(response)
|
|
399
|
+
total: Optional[int] = _get_file_length_from_http_response(response)
|
|
400
|
+
|
|
401
|
+
if displayed_filename is None:
|
|
402
|
+
displayed_filename = url
|
|
403
|
+
content_disposition = response.headers.get("Content-Disposition")
|
|
404
|
+
if content_disposition is not None:
|
|
405
|
+
match = HEADER_FILENAME_PATTERN.search(content_disposition)
|
|
406
|
+
if match is not None:
|
|
407
|
+
# Means file is on CDN
|
|
408
|
+
displayed_filename = match.groupdict()["filename"]
|
|
409
|
+
|
|
410
|
+
# Truncate filename if too long to display
|
|
411
|
+
if len(displayed_filename) > 40:
|
|
412
|
+
displayed_filename = f"(…){displayed_filename[-40:]}"
|
|
413
|
+
|
|
414
|
+
consistency_error_message = (
|
|
415
|
+
f"Consistency check failed: file should be of size {expected_size} but has size"
|
|
416
|
+
f" {{actual_size}} ({displayed_filename}).\nThis is usually due to network issues while downloading the file."
|
|
417
|
+
" Please retry with `force_download=True`."
|
|
418
|
+
)
|
|
419
|
+
progress_cm = _get_progress_bar_context(
|
|
420
|
+
desc=displayed_filename,
|
|
421
|
+
log_level=logger.getEffectiveLevel(),
|
|
422
|
+
total=total,
|
|
423
|
+
initial=resume_size,
|
|
424
|
+
name="huggingface_hub.http_get",
|
|
425
|
+
_tqdm_bar=_tqdm_bar,
|
|
426
|
+
)
|
|
443
427
|
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
428
|
+
with progress_cm as progress:
|
|
429
|
+
if hf_transfer and total is not None and total > 5 * constants.DOWNLOAD_CHUNK_SIZE:
|
|
430
|
+
try:
|
|
431
|
+
hf_transfer.download(
|
|
432
|
+
url=url,
|
|
433
|
+
filename=temp_file.name,
|
|
434
|
+
max_files=constants.HF_TRANSFER_CONCURRENCY,
|
|
435
|
+
chunk_size=constants.DOWNLOAD_CHUNK_SIZE,
|
|
436
|
+
headers=initial_headers,
|
|
437
|
+
parallel_failures=3,
|
|
438
|
+
max_retries=5,
|
|
439
|
+
callback=progress.update,
|
|
440
|
+
)
|
|
441
|
+
except Exception as e:
|
|
442
|
+
raise RuntimeError(
|
|
443
|
+
"An error occurred while downloading using `hf_transfer`. Consider"
|
|
444
|
+
" disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
|
|
445
|
+
) from e
|
|
446
|
+
if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
|
|
447
|
+
raise EnvironmentError(
|
|
448
|
+
consistency_error_message.format(
|
|
449
|
+
actual_size=os.path.getsize(temp_file.name),
|
|
450
|
+
)
|
|
451
|
+
)
|
|
452
|
+
return
|
|
457
453
|
|
|
458
|
-
|
|
459
|
-
if hf_transfer and total is not None and total > 5 * constants.DOWNLOAD_CHUNK_SIZE:
|
|
460
|
-
supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters
|
|
461
|
-
if not supports_callback:
|
|
462
|
-
warnings.warn(
|
|
463
|
-
"You are using an outdated version of `hf_transfer`. "
|
|
464
|
-
"Consider upgrading to latest version to enable progress bars "
|
|
465
|
-
"using `pip install -U hf_transfer`."
|
|
466
|
-
)
|
|
454
|
+
new_resume_size = resume_size
|
|
467
455
|
try:
|
|
468
|
-
|
|
456
|
+
for chunk in response.iter_bytes(chunk_size=constants.DOWNLOAD_CHUNK_SIZE):
|
|
457
|
+
if chunk: # filter out keep-alive new chunks
|
|
458
|
+
progress.update(len(chunk))
|
|
459
|
+
temp_file.write(chunk)
|
|
460
|
+
new_resume_size += len(chunk)
|
|
461
|
+
# Some data has been downloaded from the server so we reset the number of retries.
|
|
462
|
+
_nb_retries = 5
|
|
463
|
+
except (httpx.ConnectError, httpx.TimeoutException) as e:
|
|
464
|
+
# If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
|
|
465
|
+
# a transient error (network outage?). We log a warning message and try to resume the download a few times
|
|
466
|
+
# before giving up. Tre retry mechanism is basic but should be enough in most cases.
|
|
467
|
+
if _nb_retries <= 0:
|
|
468
|
+
logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
|
|
469
|
+
raise
|
|
470
|
+
logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
|
|
471
|
+
time.sleep(1)
|
|
472
|
+
return http_get(
|
|
469
473
|
url=url,
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
chunk_size=constants.DOWNLOAD_CHUNK_SIZE,
|
|
474
|
+
temp_file=temp_file,
|
|
475
|
+
resume_size=new_resume_size,
|
|
473
476
|
headers=initial_headers,
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
)
|
|
478
|
-
except Exception as e:
|
|
479
|
-
raise RuntimeError(
|
|
480
|
-
"An error occurred while downloading using `hf_transfer`. Consider"
|
|
481
|
-
" disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
|
|
482
|
-
) from e
|
|
483
|
-
if not supports_callback:
|
|
484
|
-
progress.update(total)
|
|
485
|
-
if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
|
|
486
|
-
raise EnvironmentError(
|
|
487
|
-
consistency_error_message.format(
|
|
488
|
-
actual_size=os.path.getsize(temp_file.name),
|
|
489
|
-
)
|
|
477
|
+
expected_size=expected_size,
|
|
478
|
+
_nb_retries=_nb_retries - 1,
|
|
479
|
+
_tqdm_bar=_tqdm_bar,
|
|
490
480
|
)
|
|
491
|
-
return
|
|
492
|
-
new_resume_size = resume_size
|
|
493
|
-
try:
|
|
494
|
-
for chunk in r.iter_content(chunk_size=constants.DOWNLOAD_CHUNK_SIZE):
|
|
495
|
-
if chunk: # filter out keep-alive new chunks
|
|
496
|
-
progress.update(len(chunk))
|
|
497
|
-
temp_file.write(chunk)
|
|
498
|
-
new_resume_size += len(chunk)
|
|
499
|
-
# Some data has been downloaded from the server so we reset the number of retries.
|
|
500
|
-
_nb_retries = 5
|
|
501
|
-
except (requests.ConnectionError, requests.ReadTimeout) as e:
|
|
502
|
-
# If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
|
|
503
|
-
# a transient error (network outage?). We log a warning message and try to resume the download a few times
|
|
504
|
-
# before giving up. Tre retry mechanism is basic but should be enough in most cases.
|
|
505
|
-
if _nb_retries <= 0:
|
|
506
|
-
logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
|
|
507
|
-
raise
|
|
508
|
-
logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
|
|
509
|
-
time.sleep(1)
|
|
510
|
-
reset_sessions() # In case of SSLError it's best to reset the shared requests.Session objects
|
|
511
|
-
return http_get(
|
|
512
|
-
url=url,
|
|
513
|
-
temp_file=temp_file,
|
|
514
|
-
proxies=proxies,
|
|
515
|
-
resume_size=new_resume_size,
|
|
516
|
-
headers=initial_headers,
|
|
517
|
-
expected_size=expected_size,
|
|
518
|
-
_nb_retries=_nb_retries - 1,
|
|
519
|
-
_tqdm_bar=_tqdm_bar,
|
|
520
|
-
)
|
|
521
481
|
|
|
522
482
|
if expected_size is not None and expected_size != temp_file.tell():
|
|
523
483
|
raise EnvironmentError(
|
|
@@ -531,7 +491,7 @@ def xet_get(
|
|
|
531
491
|
*,
|
|
532
492
|
incomplete_path: Path,
|
|
533
493
|
xet_file_data: XetFileData,
|
|
534
|
-
headers:
|
|
494
|
+
headers: dict[str, str],
|
|
535
495
|
expected_size: Optional[int] = None,
|
|
536
496
|
displayed_filename: Optional[str] = None,
|
|
537
497
|
_tqdm_bar: Optional[tqdm] = None,
|
|
@@ -544,7 +504,7 @@ def xet_get(
|
|
|
544
504
|
The path to the file to download.
|
|
545
505
|
xet_file_data (`XetFileData`):
|
|
546
506
|
The file metadata needed to make the request to the xet storage service.
|
|
547
|
-
headers (`
|
|
507
|
+
headers (`dict[str, str]`):
|
|
548
508
|
The headers to send to the xet storage service.
|
|
549
509
|
expected_size (`int`, *optional*):
|
|
550
510
|
The expected size of the file to download. If set, the download will raise an error if the size of the
|
|
@@ -591,7 +551,7 @@ def xet_get(
|
|
|
591
551
|
|
|
592
552
|
connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers=headers)
|
|
593
553
|
|
|
594
|
-
def token_refresher() ->
|
|
554
|
+
def token_refresher() -> tuple[str, int]:
|
|
595
555
|
connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers=headers)
|
|
596
556
|
if connection_info is None:
|
|
597
557
|
raise ValueError("Failed to refresh token using xet metadata.")
|
|
@@ -818,17 +778,13 @@ def hf_hub_download(
|
|
|
818
778
|
library_version: Optional[str] = None,
|
|
819
779
|
cache_dir: Union[str, Path, None] = None,
|
|
820
780
|
local_dir: Union[str, Path, None] = None,
|
|
821
|
-
user_agent: Union[
|
|
781
|
+
user_agent: Union[dict, str, None] = None,
|
|
822
782
|
force_download: bool = False,
|
|
823
|
-
proxies: Optional[Dict] = None,
|
|
824
783
|
etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
|
|
825
784
|
token: Union[bool, str, None] = None,
|
|
826
785
|
local_files_only: bool = False,
|
|
827
|
-
headers: Optional[
|
|
786
|
+
headers: Optional[dict[str, str]] = None,
|
|
828
787
|
endpoint: Optional[str] = None,
|
|
829
|
-
resume_download: Optional[bool] = None,
|
|
830
|
-
force_filename: Optional[str] = None,
|
|
831
|
-
local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
|
|
832
788
|
) -> str:
|
|
833
789
|
"""Download a given file if it's not already present in the local cache.
|
|
834
790
|
|
|
@@ -891,9 +847,6 @@ def hf_hub_download(
|
|
|
891
847
|
force_download (`bool`, *optional*, defaults to `False`):
|
|
892
848
|
Whether the file should be downloaded even if it already exists in
|
|
893
849
|
the local cache.
|
|
894
|
-
proxies (`dict`, *optional*):
|
|
895
|
-
Dictionary mapping protocol to the URL of the proxy passed to
|
|
896
|
-
`requests.request`.
|
|
897
850
|
etag_timeout (`float`, *optional*, defaults to `10`):
|
|
898
851
|
When fetching ETag, how many seconds to wait for the server to send
|
|
899
852
|
data before giving up which is passed to `requests.request`.
|
|
@@ -917,7 +870,7 @@ def hf_hub_download(
|
|
|
917
870
|
or because it is set to `private` and you do not have access.
|
|
918
871
|
[`~utils.RevisionNotFoundError`]
|
|
919
872
|
If the revision to download from cannot be found.
|
|
920
|
-
[`~utils.
|
|
873
|
+
[`~utils.RemoteEntryNotFoundError`]
|
|
921
874
|
If the file to download cannot be found.
|
|
922
875
|
[`~utils.LocalEntryNotFoundError`]
|
|
923
876
|
If network is disabled or unavailable and file is not found in cache.
|
|
@@ -933,20 +886,6 @@ def hf_hub_download(
|
|
|
933
886
|
# Respect environment variable above user value
|
|
934
887
|
etag_timeout = constants.HF_HUB_ETAG_TIMEOUT
|
|
935
888
|
|
|
936
|
-
if force_filename is not None:
|
|
937
|
-
warnings.warn(
|
|
938
|
-
"The `force_filename` parameter is deprecated as a new caching system, "
|
|
939
|
-
"which keeps the filenames as they are on the Hub, is now in place.",
|
|
940
|
-
FutureWarning,
|
|
941
|
-
)
|
|
942
|
-
if resume_download is not None:
|
|
943
|
-
warnings.warn(
|
|
944
|
-
"`resume_download` is deprecated and will be removed in version 1.0.0. "
|
|
945
|
-
"Downloads always resume when possible. "
|
|
946
|
-
"If you want to force a new download, use `force_download=True`.",
|
|
947
|
-
FutureWarning,
|
|
948
|
-
)
|
|
949
|
-
|
|
950
889
|
if cache_dir is None:
|
|
951
890
|
cache_dir = constants.HF_HUB_CACHE
|
|
952
891
|
if revision is None:
|
|
@@ -976,15 +915,6 @@ def hf_hub_download(
|
|
|
976
915
|
)
|
|
977
916
|
|
|
978
917
|
if local_dir is not None:
|
|
979
|
-
if local_dir_use_symlinks != "auto":
|
|
980
|
-
warnings.warn(
|
|
981
|
-
"`local_dir_use_symlinks` parameter is deprecated and will be ignored. "
|
|
982
|
-
"The process to download files to a local folder has been updated and do "
|
|
983
|
-
"not rely on symlinks anymore. You only need to pass a destination folder "
|
|
984
|
-
"as`local_dir`.\n"
|
|
985
|
-
"For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder."
|
|
986
|
-
)
|
|
987
|
-
|
|
988
918
|
return _hf_hub_download_to_local_dir(
|
|
989
919
|
# Destination
|
|
990
920
|
local_dir=local_dir,
|
|
@@ -997,7 +927,6 @@ def hf_hub_download(
|
|
|
997
927
|
endpoint=endpoint,
|
|
998
928
|
etag_timeout=etag_timeout,
|
|
999
929
|
headers=hf_headers,
|
|
1000
|
-
proxies=proxies,
|
|
1001
930
|
token=token,
|
|
1002
931
|
# Additional options
|
|
1003
932
|
cache_dir=cache_dir,
|
|
@@ -1017,7 +946,6 @@ def hf_hub_download(
|
|
|
1017
946
|
endpoint=endpoint,
|
|
1018
947
|
etag_timeout=etag_timeout,
|
|
1019
948
|
headers=hf_headers,
|
|
1020
|
-
proxies=proxies,
|
|
1021
949
|
token=token,
|
|
1022
950
|
# Additional options
|
|
1023
951
|
local_files_only=local_files_only,
|
|
@@ -1037,8 +965,7 @@ def _hf_hub_download_to_cache_dir(
|
|
|
1037
965
|
# HTTP info
|
|
1038
966
|
endpoint: Optional[str],
|
|
1039
967
|
etag_timeout: float,
|
|
1040
|
-
headers:
|
|
1041
|
-
proxies: Optional[Dict],
|
|
968
|
+
headers: dict[str, str],
|
|
1042
969
|
token: Optional[Union[bool, str]],
|
|
1043
970
|
# Additional options
|
|
1044
971
|
local_files_only: bool,
|
|
@@ -1074,7 +1001,6 @@ def _hf_hub_download_to_cache_dir(
|
|
|
1074
1001
|
repo_type=repo_type,
|
|
1075
1002
|
revision=revision,
|
|
1076
1003
|
endpoint=endpoint,
|
|
1077
|
-
proxies=proxies,
|
|
1078
1004
|
etag_timeout=etag_timeout,
|
|
1079
1005
|
headers=headers,
|
|
1080
1006
|
token=token,
|
|
@@ -1170,7 +1096,6 @@ def _hf_hub_download_to_cache_dir(
|
|
|
1170
1096
|
incomplete_path=Path(blob_path + ".incomplete"),
|
|
1171
1097
|
destination_path=Path(blob_path),
|
|
1172
1098
|
url_to_download=url_to_download,
|
|
1173
|
-
proxies=proxies,
|
|
1174
1099
|
headers=headers,
|
|
1175
1100
|
expected_size=expected_size,
|
|
1176
1101
|
filename=filename,
|
|
@@ -1196,8 +1121,7 @@ def _hf_hub_download_to_local_dir(
|
|
|
1196
1121
|
# HTTP info
|
|
1197
1122
|
endpoint: Optional[str],
|
|
1198
1123
|
etag_timeout: float,
|
|
1199
|
-
headers:
|
|
1200
|
-
proxies: Optional[Dict],
|
|
1124
|
+
headers: dict[str, str],
|
|
1201
1125
|
token: Union[bool, str, None],
|
|
1202
1126
|
# Additional options
|
|
1203
1127
|
cache_dir: str,
|
|
@@ -1233,7 +1157,6 @@ def _hf_hub_download_to_local_dir(
|
|
|
1233
1157
|
repo_type=repo_type,
|
|
1234
1158
|
revision=revision,
|
|
1235
1159
|
endpoint=endpoint,
|
|
1236
|
-
proxies=proxies,
|
|
1237
1160
|
etag_timeout=etag_timeout,
|
|
1238
1161
|
headers=headers,
|
|
1239
1162
|
token=token,
|
|
@@ -1299,7 +1222,6 @@ def _hf_hub_download_to_local_dir(
|
|
|
1299
1222
|
incomplete_path=paths.incomplete_path(etag),
|
|
1300
1223
|
destination_path=paths.file_path,
|
|
1301
1224
|
url_to_download=url_to_download,
|
|
1302
|
-
proxies=proxies,
|
|
1303
1225
|
headers=headers,
|
|
1304
1226
|
expected_size=expected_size,
|
|
1305
1227
|
filename=filename,
|
|
@@ -1409,12 +1331,11 @@ def try_to_load_from_cache(
|
|
|
1409
1331
|
def get_hf_file_metadata(
|
|
1410
1332
|
url: str,
|
|
1411
1333
|
token: Union[bool, str, None] = None,
|
|
1412
|
-
proxies: Optional[Dict] = None,
|
|
1413
1334
|
timeout: Optional[float] = constants.DEFAULT_REQUEST_TIMEOUT,
|
|
1414
1335
|
library_name: Optional[str] = None,
|
|
1415
1336
|
library_version: Optional[str] = None,
|
|
1416
|
-
user_agent: Union[
|
|
1417
|
-
headers: Optional[
|
|
1337
|
+
user_agent: Union[dict, str, None] = None,
|
|
1338
|
+
headers: Optional[dict[str, str]] = None,
|
|
1418
1339
|
endpoint: Optional[str] = None,
|
|
1419
1340
|
) -> HfFileMetadata:
|
|
1420
1341
|
"""Fetch metadata of a file versioned on the Hub for a given url.
|
|
@@ -1428,9 +1349,6 @@ def get_hf_file_metadata(
|
|
|
1428
1349
|
folder.
|
|
1429
1350
|
- If `False` or `None`, no token is provided.
|
|
1430
1351
|
- If a string, it's used as the authentication token.
|
|
1431
|
-
proxies (`dict`, *optional*):
|
|
1432
|
-
Dictionary mapping protocol to the URL of the proxy passed to
|
|
1433
|
-
`requests.request`.
|
|
1434
1352
|
timeout (`float`, *optional*, defaults to 10):
|
|
1435
1353
|
How many seconds to wait for the server to send metadata before giving up.
|
|
1436
1354
|
library_name (`str`, *optional*):
|
|
@@ -1458,31 +1376,23 @@ def get_hf_file_metadata(
|
|
|
1458
1376
|
hf_headers["Accept-Encoding"] = "identity" # prevent any compression => we want to know the real size of the file
|
|
1459
1377
|
|
|
1460
1378
|
# Retrieve metadata
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
url=url,
|
|
1464
|
-
headers=hf_headers,
|
|
1465
|
-
allow_redirects=False,
|
|
1466
|
-
follow_relative_redirects=True,
|
|
1467
|
-
proxies=proxies,
|
|
1468
|
-
timeout=timeout,
|
|
1469
|
-
)
|
|
1470
|
-
hf_raise_for_status(r)
|
|
1379
|
+
response = _httpx_follow_relative_redirects(method="HEAD", url=url, headers=hf_headers, timeout=timeout)
|
|
1380
|
+
hf_raise_for_status(response)
|
|
1471
1381
|
|
|
1472
1382
|
# Return
|
|
1473
1383
|
return HfFileMetadata(
|
|
1474
|
-
commit_hash=
|
|
1475
|
-
# We favor a custom header indicating the etag of the linked resource, and
|
|
1476
|
-
|
|
1477
|
-
|
|
1384
|
+
commit_hash=response.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT),
|
|
1385
|
+
# We favor a custom header indicating the etag of the linked resource, and we fallback to the regular etag header.
|
|
1386
|
+
etag=_normalize_etag(
|
|
1387
|
+
response.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or response.headers.get("ETag")
|
|
1388
|
+
),
|
|
1478
1389
|
# Either from response headers (if redirected) or defaults to request url
|
|
1479
|
-
# Do not use directly `url
|
|
1480
|
-
#
|
|
1481
|
-
location=r.headers.get("Location") or r.request.url, # type: ignore
|
|
1390
|
+
# Do not use directly `url` as we might have followed relative redirects.
|
|
1391
|
+
location=response.headers.get("Location") or str(response.request.url), # type: ignore
|
|
1482
1392
|
size=_int_or_none(
|
|
1483
|
-
|
|
1393
|
+
response.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_SIZE) or response.headers.get("Content-Length")
|
|
1484
1394
|
),
|
|
1485
|
-
xet_file_data=parse_xet_file_data_from_response(
|
|
1395
|
+
xet_file_data=parse_xet_file_data_from_response(response, endpoint=endpoint), # type: ignore
|
|
1486
1396
|
)
|
|
1487
1397
|
|
|
1488
1398
|
|
|
@@ -1493,19 +1403,18 @@ def _get_metadata_or_catch_error(
|
|
|
1493
1403
|
repo_type: str,
|
|
1494
1404
|
revision: str,
|
|
1495
1405
|
endpoint: Optional[str],
|
|
1496
|
-
proxies: Optional[Dict],
|
|
1497
1406
|
etag_timeout: Optional[float],
|
|
1498
|
-
headers:
|
|
1407
|
+
headers: dict[str, str], # mutated inplace!
|
|
1499
1408
|
token: Union[bool, str, None],
|
|
1500
1409
|
local_files_only: bool,
|
|
1501
1410
|
relative_filename: Optional[str] = None, # only used to store `.no_exists` in cache
|
|
1502
1411
|
storage_folder: Optional[str] = None, # only used to store `.no_exists` in cache
|
|
1503
1412
|
) -> Union[
|
|
1504
1413
|
# Either an exception is caught and returned
|
|
1505
|
-
|
|
1414
|
+
tuple[None, None, None, None, None, Exception],
|
|
1506
1415
|
# Or the metadata is returned as
|
|
1507
1416
|
# `(url_to_download, etag, commit_hash, expected_size, xet_file_data, None)`
|
|
1508
|
-
|
|
1417
|
+
tuple[str, str, str, int, Optional[XetFileData], None],
|
|
1509
1418
|
]:
|
|
1510
1419
|
"""Get metadata for a file on the Hub, safely handling network issues.
|
|
1511
1420
|
|
|
@@ -1542,9 +1451,9 @@ def _get_metadata_or_catch_error(
|
|
|
1542
1451
|
try:
|
|
1543
1452
|
try:
|
|
1544
1453
|
metadata = get_hf_file_metadata(
|
|
1545
|
-
url=url,
|
|
1454
|
+
url=url, timeout=etag_timeout, headers=headers, token=token, endpoint=endpoint
|
|
1546
1455
|
)
|
|
1547
|
-
except
|
|
1456
|
+
except RemoteEntryNotFoundError as http_error:
|
|
1548
1457
|
if storage_folder is not None and relative_filename is not None:
|
|
1549
1458
|
# Cache the non-existence of the file
|
|
1550
1459
|
commit_hash = http_error.response.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT)
|
|
@@ -1595,21 +1504,17 @@ def _get_metadata_or_catch_error(
|
|
|
1595
1504
|
if urlparse(url).netloc != urlparse(metadata.location).netloc:
|
|
1596
1505
|
# Remove authorization header when downloading a LFS blob
|
|
1597
1506
|
headers.pop("authorization", None)
|
|
1598
|
-
except
|
|
1599
|
-
# Actually raise
|
|
1507
|
+
except httpx.ProxyError:
|
|
1508
|
+
# Actually raise on proxy error
|
|
1600
1509
|
raise
|
|
1601
|
-
except (
|
|
1602
|
-
requests.exceptions.ConnectionError,
|
|
1603
|
-
requests.exceptions.Timeout,
|
|
1604
|
-
OfflineModeIsEnabled,
|
|
1605
|
-
) as error:
|
|
1510
|
+
except (httpx.ConnectError, httpx.TimeoutException, OfflineModeIsEnabled) as error:
|
|
1606
1511
|
# Otherwise, our Internet connection is down.
|
|
1607
1512
|
# etag is None
|
|
1608
1513
|
head_error_call = error
|
|
1609
|
-
except (RevisionNotFoundError,
|
|
1514
|
+
except (RevisionNotFoundError, RemoteEntryNotFoundError):
|
|
1610
1515
|
# The repo was found but the revision or entry doesn't exist on the Hub (never existed or got deleted)
|
|
1611
1516
|
raise
|
|
1612
|
-
except
|
|
1517
|
+
except HfHubHTTPError as error:
|
|
1613
1518
|
# Multiple reasons for an http error:
|
|
1614
1519
|
# - Repository is private and invalid/missing token sent
|
|
1615
1520
|
# - Repository is gated and invalid/missing token sent
|
|
@@ -1667,8 +1572,7 @@ def _download_to_tmp_and_move(
|
|
|
1667
1572
|
incomplete_path: Path,
|
|
1668
1573
|
destination_path: Path,
|
|
1669
1574
|
url_to_download: str,
|
|
1670
|
-
|
|
1671
|
-
headers: Dict[str, str],
|
|
1575
|
+
headers: dict[str, str],
|
|
1672
1576
|
expected_size: Optional[int],
|
|
1673
1577
|
filename: str,
|
|
1674
1578
|
force_download: bool,
|
|
@@ -1692,14 +1596,14 @@ def _download_to_tmp_and_move(
|
|
|
1692
1596
|
# Do nothing if already exists (except if force_download=True)
|
|
1693
1597
|
return
|
|
1694
1598
|
|
|
1695
|
-
if incomplete_path.exists() and (force_download or
|
|
1599
|
+
if incomplete_path.exists() and (force_download or constants.HF_HUB_ENABLE_HF_TRANSFER):
|
|
1696
1600
|
# By default, we will try to resume the download if possible.
|
|
1697
1601
|
# However, if the user has set `force_download=True` or if `hf_transfer` is enabled, then we should
|
|
1698
1602
|
# not resume the download => delete the incomplete file.
|
|
1699
1603
|
message = f"Removing incomplete file '{incomplete_path}'"
|
|
1700
1604
|
if force_download:
|
|
1701
1605
|
message += " (force_download=True)"
|
|
1702
|
-
elif constants.HF_HUB_ENABLE_HF_TRANSFER
|
|
1606
|
+
elif constants.HF_HUB_ENABLE_HF_TRANSFER:
|
|
1703
1607
|
message += " (hf_transfer=True)"
|
|
1704
1608
|
logger.info(message)
|
|
1705
1609
|
incomplete_path.unlink(missing_ok=True)
|
|
@@ -1736,7 +1640,6 @@ def _download_to_tmp_and_move(
|
|
|
1736
1640
|
http_get(
|
|
1737
1641
|
url_to_download,
|
|
1738
1642
|
f,
|
|
1739
|
-
proxies=proxies,
|
|
1740
1643
|
resume_size=resume_size,
|
|
1741
1644
|
headers=headers,
|
|
1742
1645
|
expected_size=expected_size,
|