huggingface-hub 0.36.0rc0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (132) hide show
  1. huggingface_hub/__init__.py +33 -45
  2. huggingface_hub/_commit_api.py +39 -43
  3. huggingface_hub/_commit_scheduler.py +11 -8
  4. huggingface_hub/_inference_endpoints.py +8 -8
  5. huggingface_hub/_jobs_api.py +20 -20
  6. huggingface_hub/_login.py +17 -43
  7. huggingface_hub/_oauth.py +8 -8
  8. huggingface_hub/_snapshot_download.py +135 -50
  9. huggingface_hub/_space_api.py +4 -4
  10. huggingface_hub/_tensorboard_logger.py +5 -5
  11. huggingface_hub/_upload_large_folder.py +18 -32
  12. huggingface_hub/_webhooks_payload.py +3 -3
  13. huggingface_hub/_webhooks_server.py +2 -2
  14. huggingface_hub/cli/__init__.py +0 -14
  15. huggingface_hub/cli/_cli_utils.py +143 -39
  16. huggingface_hub/cli/auth.py +105 -171
  17. huggingface_hub/cli/cache.py +594 -361
  18. huggingface_hub/cli/download.py +120 -112
  19. huggingface_hub/cli/hf.py +38 -41
  20. huggingface_hub/cli/jobs.py +689 -1017
  21. huggingface_hub/cli/lfs.py +120 -143
  22. huggingface_hub/cli/repo.py +282 -216
  23. huggingface_hub/cli/repo_files.py +50 -84
  24. huggingface_hub/cli/system.py +6 -25
  25. huggingface_hub/cli/upload.py +198 -220
  26. huggingface_hub/cli/upload_large_folder.py +91 -106
  27. huggingface_hub/community.py +5 -5
  28. huggingface_hub/constants.py +17 -52
  29. huggingface_hub/dataclasses.py +135 -21
  30. huggingface_hub/errors.py +47 -30
  31. huggingface_hub/fastai_utils.py +8 -9
  32. huggingface_hub/file_download.py +351 -303
  33. huggingface_hub/hf_api.py +398 -570
  34. huggingface_hub/hf_file_system.py +101 -66
  35. huggingface_hub/hub_mixin.py +32 -54
  36. huggingface_hub/inference/_client.py +177 -162
  37. huggingface_hub/inference/_common.py +38 -54
  38. huggingface_hub/inference/_generated/_async_client.py +218 -258
  39. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
  40. huggingface_hub/inference/_generated/types/base.py +10 -7
  41. huggingface_hub/inference/_generated/types/chat_completion.py +16 -16
  42. huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
  43. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
  44. huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
  45. huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
  46. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
  47. huggingface_hub/inference/_generated/types/summarization.py +2 -2
  48. huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
  49. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
  50. huggingface_hub/inference/_generated/types/text_generation.py +10 -10
  51. huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
  52. huggingface_hub/inference/_generated/types/token_classification.py +2 -2
  53. huggingface_hub/inference/_generated/types/translation.py +2 -2
  54. huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
  55. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
  56. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
  57. huggingface_hub/inference/_mcp/agent.py +3 -3
  58. huggingface_hub/inference/_mcp/constants.py +1 -2
  59. huggingface_hub/inference/_mcp/mcp_client.py +33 -22
  60. huggingface_hub/inference/_mcp/types.py +10 -10
  61. huggingface_hub/inference/_mcp/utils.py +4 -4
  62. huggingface_hub/inference/_providers/__init__.py +12 -4
  63. huggingface_hub/inference/_providers/_common.py +62 -24
  64. huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
  65. huggingface_hub/inference/_providers/cohere.py +3 -3
  66. huggingface_hub/inference/_providers/fal_ai.py +25 -25
  67. huggingface_hub/inference/_providers/featherless_ai.py +4 -4
  68. huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
  69. huggingface_hub/inference/_providers/hf_inference.py +13 -13
  70. huggingface_hub/inference/_providers/hyperbolic.py +4 -4
  71. huggingface_hub/inference/_providers/nebius.py +10 -10
  72. huggingface_hub/inference/_providers/novita.py +5 -5
  73. huggingface_hub/inference/_providers/nscale.py +4 -4
  74. huggingface_hub/inference/_providers/replicate.py +15 -15
  75. huggingface_hub/inference/_providers/sambanova.py +6 -6
  76. huggingface_hub/inference/_providers/together.py +7 -7
  77. huggingface_hub/lfs.py +21 -94
  78. huggingface_hub/repocard.py +15 -16
  79. huggingface_hub/repocard_data.py +57 -57
  80. huggingface_hub/serialization/__init__.py +0 -1
  81. huggingface_hub/serialization/_base.py +9 -9
  82. huggingface_hub/serialization/_dduf.py +7 -7
  83. huggingface_hub/serialization/_torch.py +28 -28
  84. huggingface_hub/utils/__init__.py +11 -6
  85. huggingface_hub/utils/_auth.py +5 -5
  86. huggingface_hub/utils/_cache_manager.py +49 -74
  87. huggingface_hub/utils/_deprecation.py +1 -1
  88. huggingface_hub/utils/_dotenv.py +3 -3
  89. huggingface_hub/utils/_fixes.py +0 -10
  90. huggingface_hub/utils/_git_credential.py +3 -3
  91. huggingface_hub/utils/_headers.py +7 -29
  92. huggingface_hub/utils/_http.py +371 -208
  93. huggingface_hub/utils/_pagination.py +4 -4
  94. huggingface_hub/utils/_parsing.py +98 -0
  95. huggingface_hub/utils/_paths.py +5 -5
  96. huggingface_hub/utils/_runtime.py +59 -23
  97. huggingface_hub/utils/_safetensors.py +21 -21
  98. huggingface_hub/utils/_subprocess.py +9 -9
  99. huggingface_hub/utils/_telemetry.py +3 -3
  100. huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -9
  101. huggingface_hub/utils/_typing.py +3 -3
  102. huggingface_hub/utils/_validators.py +53 -72
  103. huggingface_hub/utils/_xet.py +16 -16
  104. huggingface_hub/utils/_xet_progress_reporting.py +1 -1
  105. huggingface_hub/utils/insecure_hashlib.py +3 -9
  106. huggingface_hub/utils/tqdm.py +3 -3
  107. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/METADATA +16 -35
  108. huggingface_hub-1.0.0.dist-info/RECORD +152 -0
  109. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/entry_points.txt +0 -1
  110. huggingface_hub/commands/__init__.py +0 -27
  111. huggingface_hub/commands/delete_cache.py +0 -476
  112. huggingface_hub/commands/download.py +0 -204
  113. huggingface_hub/commands/env.py +0 -39
  114. huggingface_hub/commands/huggingface_cli.py +0 -65
  115. huggingface_hub/commands/lfs.py +0 -200
  116. huggingface_hub/commands/repo.py +0 -151
  117. huggingface_hub/commands/repo_files.py +0 -132
  118. huggingface_hub/commands/scan_cache.py +0 -183
  119. huggingface_hub/commands/tag.py +0 -161
  120. huggingface_hub/commands/upload.py +0 -318
  121. huggingface_hub/commands/upload_large_folder.py +0 -131
  122. huggingface_hub/commands/user.py +0 -208
  123. huggingface_hub/commands/version.py +0 -40
  124. huggingface_hub/inference_api.py +0 -217
  125. huggingface_hub/keras_mixin.py +0 -497
  126. huggingface_hub/repository.py +0 -1471
  127. huggingface_hub/serialization/_tensorflow.py +0 -92
  128. huggingface_hub/utils/_hf_folder.py +0 -68
  129. huggingface_hub-0.36.0rc0.dist-info/RECORD +0 -170
  130. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/LICENSE +0 -0
  131. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/WHEEL +0 -0
  132. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,5 @@
1
1
  import copy
2
2
  import errno
3
- import inspect
4
3
  import os
5
4
  import re
6
5
  import shutil
@@ -10,26 +9,19 @@ import uuid
10
9
  import warnings
11
10
  from dataclasses import dataclass
12
11
  from pathlib import Path
13
- from typing import Any, BinaryIO, Dict, Literal, NoReturn, Optional, Tuple, Union
12
+ from typing import Any, BinaryIO, Literal, NoReturn, Optional, Union, overload
14
13
  from urllib.parse import quote, urlparse
15
14
 
16
- import requests
15
+ import httpx
17
16
 
18
- from . import (
19
- __version__, # noqa: F401 # for backward compatibility
20
- constants,
21
- )
17
+ from . import constants
22
18
  from ._local_folder import get_local_download_paths, read_download_metadata, write_download_metadata
23
- from .constants import (
24
- HUGGINGFACE_CO_URL_TEMPLATE, # noqa: F401 # for backward compatibility
25
- HUGGINGFACE_HUB_CACHE, # noqa: F401 # for backward compatibility
26
- )
27
19
  from .errors import (
28
- EntryNotFoundError,
29
20
  FileMetadataError,
30
21
  GatedRepoError,
31
22
  HfHubHTTPError,
32
23
  LocalEntryNotFoundError,
24
+ RemoteEntryNotFoundError,
33
25
  RepositoryNotFoundError,
34
26
  RevisionNotFoundError,
35
27
  )
@@ -39,30 +31,15 @@ from .utils import (
39
31
  WeakFileLock,
40
32
  XetFileData,
41
33
  build_hf_headers,
42
- get_fastai_version, # noqa: F401 # for backward compatibility
43
- get_fastcore_version, # noqa: F401 # for backward compatibility
44
- get_graphviz_version, # noqa: F401 # for backward compatibility
45
- get_jinja_version, # noqa: F401 # for backward compatibility
46
- get_pydot_version, # noqa: F401 # for backward compatibility
47
- get_tf_version, # noqa: F401 # for backward compatibility
48
- get_torch_version, # noqa: F401 # for backward compatibility
49
34
  hf_raise_for_status,
50
- is_fastai_available, # noqa: F401 # for backward compatibility
51
- is_fastcore_available, # noqa: F401 # for backward compatibility
52
- is_graphviz_available, # noqa: F401 # for backward compatibility
53
- is_jinja_available, # noqa: F401 # for backward compatibility
54
- is_pydot_available, # noqa: F401 # for backward compatibility
55
- is_tf_available, # noqa: F401 # for backward compatibility
56
- is_torch_available, # noqa: F401 # for backward compatibility
57
35
  logging,
58
36
  parse_xet_file_data_from_response,
59
37
  refresh_xet_connection_info,
60
- reset_sessions,
61
38
  tqdm,
62
39
  validate_hf_hub_args,
63
40
  )
64
- from .utils._http import _adjust_range_header, http_backoff
65
- from .utils._runtime import _PY_VERSION, is_xet_available # noqa: F401 # for backward compatibility
41
+ from .utils._http import _adjust_range_header, http_backoff, http_stream_backoff
42
+ from .utils._runtime import is_xet_available
66
43
  from .utils._typing import HTTP_METHOD_T
67
44
  from .utils.sha import sha_fileobj
68
45
  from .utils.tqdm import _get_progress_bar_context
@@ -83,7 +60,7 @@ REGEX_COMMIT_HASH = re.compile(r"^[0-9a-f]{40}$")
83
60
  # Regex to check if the file etag IS a valid sha256
84
61
  REGEX_SHA256 = re.compile(r"^[0-9a-f]{64}$")
85
62
 
86
- _are_symlinks_supported_in_dir: Dict[str, bool] = {}
63
+ _are_symlinks_supported_in_dir: dict[str, bool] = {}
87
64
 
88
65
 
89
66
  def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
@@ -172,6 +149,34 @@ class HfFileMetadata:
172
149
  xet_file_data: Optional[XetFileData]
173
150
 
174
151
 
152
+ @dataclass
153
+ class DryRunFileInfo:
154
+ """Information returned when performing a dry run of a file download.
155
+
156
+ Returned by [`hf_hub_download`] when `dry_run=True`.
157
+
158
+ Args:
159
+ commit_hash (`str`):
160
+ The commit_hash related to the file.
161
+ file_size (`int`):
162
+ Size of the file. In case of an LFS file, contains the size of the actual LFS file, not the pointer.
163
+ filename (`str`):
164
+ Name of the file in the repo.
165
+ is_cached (`bool`):
166
+ Whether the file is already cached locally.
167
+ will_download (`bool`):
168
+ Whether the file will be downloaded if `hf_hub_download` is called with `dry_run=False`.
169
+ In practice, will_download is `True` if the file is not cached or if `force_download=True`.
170
+ """
171
+
172
+ commit_hash: str
173
+ file_size: int
174
+ filename: str
175
+ local_path: str
176
+ is_cached: bool
177
+ will_download: bool
178
+
179
+
175
180
  @validate_hf_hub_args
176
181
  def hf_hub_url(
177
182
  repo_id: str,
@@ -249,7 +254,7 @@ def hf_hub_url(
249
254
 
250
255
  if revision is None:
251
256
  revision = constants.DEFAULT_REVISION
252
- url = HUGGINGFACE_CO_URL_TEMPLATE.format(
257
+ url = constants.HUGGINGFACE_CO_URL_TEMPLATE.format(
253
258
  repo_id=repo_id, revision=quote(revision, safe=""), filename=quote(filename)
254
259
  )
255
260
  # Update endpoint if provided
@@ -258,11 +263,10 @@ def hf_hub_url(
258
263
  return url
259
264
 
260
265
 
261
- def _request_wrapper(
262
- method: HTTP_METHOD_T, url: str, *, follow_relative_redirects: bool = False, **params
263
- ) -> requests.Response:
264
- """Wrapper around requests methods to follow relative redirects if `follow_relative_redirects=True` even when
265
- `allow_redirection=False`.
266
+ def _httpx_follow_relative_redirects(method: HTTP_METHOD_T, url: str, **httpx_kwargs) -> httpx.Response:
267
+ """Perform an HTTP request with backoff and follow relative redirects only.
268
+
269
+ This is useful to follow a redirection to a renamed repository without following redirection to a CDN.
266
270
 
267
271
  A backoff mechanism retries the HTTP call on 5xx errors and network errors.
268
272
 
@@ -271,44 +275,36 @@ def _request_wrapper(
271
275
  HTTP method, such as 'GET' or 'HEAD'.
272
276
  url (`str`):
273
277
  The URL of the resource to fetch.
274
- follow_relative_redirects (`bool`, *optional*, defaults to `False`)
275
- If True, relative redirection (redirection to the same site) will be resolved even when `allow_redirection`
276
- kwarg is set to False. Useful when we want to follow a redirection to a renamed repository without
277
- following redirection to a CDN.
278
- **params (`dict`, *optional*):
279
- Params to pass to `requests.request`.
278
+ **httpx_kwargs (`dict`, *optional*):
279
+ Params to pass to `httpx.request`.
280
280
  """
281
- # Recursively follow relative redirects
282
- if follow_relative_redirects:
283
- response = _request_wrapper(
281
+ while True:
282
+ # Make the request
283
+ response = http_backoff(
284
284
  method=method,
285
285
  url=url,
286
- follow_relative_redirects=False,
287
- **params,
286
+ **httpx_kwargs,
287
+ follow_redirects=False,
288
+ retry_on_exceptions=(),
289
+ retry_on_status_codes=(429,),
288
290
  )
291
+ hf_raise_for_status(response)
289
292
 
290
- # If redirection, we redirect only relative paths.
291
- # This is useful in case of a renamed repository.
293
+ # Check if response is a relative redirect
292
294
  if 300 <= response.status_code <= 399:
293
295
  parsed_target = urlparse(response.headers["Location"])
294
296
  if parsed_target.netloc == "":
295
- # This means it is a relative 'location' headers, as allowed by RFC 7231.
296
- # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')
297
- # We want to follow this relative redirect !
298
- #
299
- # Highly inspired by `resolve_redirects` from requests library.
300
- # See https://github.com/psf/requests/blob/main/requests/sessions.py#L159
301
- next_url = urlparse(url)._replace(path=parsed_target.path).geturl()
302
- return _request_wrapper(method=method, url=next_url, follow_relative_redirects=True, **params)
303
- return response
304
-
305
- # Perform request and return if status_code is not in the retry list.
306
- response = http_backoff(method=method, url=url, **params)
307
- hf_raise_for_status(response)
297
+ # Relative redirect -> update URL and retry
298
+ url = urlparse(url)._replace(path=parsed_target.path).geturl()
299
+ continue
300
+
301
+ # Break if no relative redirect
302
+ break
303
+
308
304
  return response
309
305
 
310
306
 
311
- def _get_file_length_from_http_response(response: requests.Response) -> Optional[int]:
307
+ def _get_file_length_from_http_response(response: httpx.Response) -> Optional[int]:
312
308
  """
313
309
  Get the length of the file from the HTTP response headers.
314
310
 
@@ -316,7 +312,7 @@ def _get_file_length_from_http_response(response: requests.Response) -> Optional
316
312
  `Content-Range` or `Content-Length` header, if available (in that order).
317
313
 
318
314
  Args:
319
- response (`requests.Response`):
315
+ response (`httpx.Response`):
320
316
  The HTTP response object.
321
317
 
322
318
  Returns:
@@ -343,13 +339,13 @@ def _get_file_length_from_http_response(response: requests.Response) -> Optional
343
339
  return None
344
340
 
345
341
 
342
+ @validate_hf_hub_args
346
343
  def http_get(
347
344
  url: str,
348
345
  temp_file: BinaryIO,
349
346
  *,
350
- proxies: Optional[Dict] = None,
351
347
  resume_size: int = 0,
352
- headers: Optional[Dict[str, Any]] = None,
348
+ headers: Optional[dict[str, Any]] = None,
353
349
  expected_size: Optional[int] = None,
354
350
  displayed_filename: Optional[str] = None,
355
351
  _nb_retries: int = 5,
@@ -367,8 +363,6 @@ def http_get(
367
363
  The URL of the file to download.
368
364
  temp_file (`BinaryIO`):
369
365
  The file-like object where to save the file.
370
- proxies (`dict`, *optional*):
371
- Dictionary mapping protocol to the URL of the proxy passed to `requests.request`.
372
366
  resume_size (`int`, *optional*):
373
367
  The number of bytes already downloaded. If set to 0 (default), the whole file is download. If set to a
374
368
  positive number, the download will resume at the given position.
@@ -385,138 +379,83 @@ def http_get(
385
379
  # If the file is already fully downloaded, we don't need to download it again.
386
380
  return
387
381
 
388
- has_custom_range_header = headers is not None and any(h.lower() == "range" for h in headers)
389
- hf_transfer = None
390
- if constants.HF_HUB_ENABLE_HF_TRANSFER:
391
- if resume_size != 0:
392
- warnings.warn("'hf_transfer' does not support `resume_size`: falling back to regular download method")
393
- elif proxies is not None:
394
- warnings.warn("'hf_transfer' does not support `proxies`: falling back to regular download method")
395
- elif has_custom_range_header:
396
- warnings.warn("'hf_transfer' ignores custom 'Range' headers; falling back to regular download method")
397
- else:
398
- try:
399
- import hf_transfer # type: ignore[no-redef]
400
- except ImportError:
401
- raise ValueError(
402
- "Fast download using 'hf_transfer' is enabled"
403
- " (HF_HUB_ENABLE_HF_TRANSFER=1) but 'hf_transfer' package is not"
404
- " available in your environment. Try `pip install hf_transfer`."
405
- )
406
-
407
382
  initial_headers = headers
408
383
  headers = copy.deepcopy(headers) or {}
409
384
  if resume_size > 0:
410
385
  headers["Range"] = _adjust_range_header(headers.get("Range"), resume_size)
411
386
  elif expected_size and expected_size > constants.MAX_HTTP_DOWNLOAD_SIZE:
412
- # Any files over 50GB will not be available through basic http request.
413
- # Setting the range header to 0-0 will force the server to return the file size in the Content-Range header.
414
- # Since hf_transfer splits the download into chunks, the process will succeed afterwards.
415
- if hf_transfer:
416
- headers["Range"] = "bytes=0-0"
417
- else:
418
- raise ValueError(
419
- "The file is too large to be downloaded using the regular download method. Use `hf_transfer` or `hf_xet` instead."
420
- " Try `pip install hf_transfer` or `pip install hf_xet`."
421
- )
422
-
423
- r = _request_wrapper(
424
- method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT
425
- )
426
-
427
- hf_raise_for_status(r)
428
- total: Optional[int] = _get_file_length_from_http_response(r)
429
-
430
- if displayed_filename is None:
431
- displayed_filename = url
432
- content_disposition = r.headers.get("Content-Disposition")
433
- if content_disposition is not None:
434
- match = HEADER_FILENAME_PATTERN.search(content_disposition)
435
- if match is not None:
436
- # Means file is on CDN
437
- displayed_filename = match.groupdict()["filename"]
438
-
439
- # Truncate filename if too long to display
440
- if len(displayed_filename) > 40:
441
- displayed_filename = f"(…){displayed_filename[-40:]}"
387
+ # Any files over 50GB will not be available through basic http requests.
388
+ raise ValueError(
389
+ "The file is too large to be downloaded using the regular download method. "
390
+ " Install `hf_xet` with `pip install hf_xet` for xet-powered downloads."
391
+ )
442
392
 
443
- consistency_error_message = (
444
- f"Consistency check failed: file should be of size {expected_size} but has size"
445
- f" {{actual_size}} ({displayed_filename}).\nThis is usually due to network issues while downloading the file."
446
- " Please retry with `force_download=True`."
447
- )
448
- progress_cm = _get_progress_bar_context(
449
- desc=displayed_filename,
450
- log_level=logger.getEffectiveLevel(),
451
- total=total,
452
- initial=resume_size,
453
- name="huggingface_hub.http_get",
454
- _tqdm_bar=_tqdm_bar,
455
- )
393
+ with http_stream_backoff(
394
+ method="GET",
395
+ url=url,
396
+ headers=headers,
397
+ timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
398
+ retry_on_exceptions=(),
399
+ retry_on_status_codes=(429,),
400
+ ) as response:
401
+ hf_raise_for_status(response)
402
+ total: Optional[int] = _get_file_length_from_http_response(response)
403
+
404
+ if displayed_filename is None:
405
+ displayed_filename = url
406
+ content_disposition = response.headers.get("Content-Disposition")
407
+ if content_disposition is not None:
408
+ match = HEADER_FILENAME_PATTERN.search(content_disposition)
409
+ if match is not None:
410
+ # Means file is on CDN
411
+ displayed_filename = match.groupdict()["filename"]
412
+
413
+ # Truncate filename if too long to display
414
+ if len(displayed_filename) > 40:
415
+ displayed_filename = f"(…){displayed_filename[-40:]}"
416
+
417
+ consistency_error_message = (
418
+ f"Consistency check failed: file should be of size {expected_size} but has size"
419
+ f" {{actual_size}} ({displayed_filename}).\nThis is usually due to network issues while downloading the file."
420
+ " Please retry with `force_download=True`."
421
+ )
422
+ progress_cm = _get_progress_bar_context(
423
+ desc=displayed_filename,
424
+ log_level=logger.getEffectiveLevel(),
425
+ total=total,
426
+ initial=resume_size,
427
+ name="huggingface_hub.http_get",
428
+ _tqdm_bar=_tqdm_bar,
429
+ )
456
430
 
457
- with progress_cm as progress:
458
- if hf_transfer and total is not None and total > 5 * constants.DOWNLOAD_CHUNK_SIZE:
459
- supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters
460
- if not supports_callback:
461
- warnings.warn(
462
- "You are using an outdated version of `hf_transfer`. "
463
- "Consider upgrading to latest version to enable progress bars "
464
- "using `pip install -U hf_transfer`."
465
- )
431
+ with progress_cm as progress:
432
+ new_resume_size = resume_size
466
433
  try:
467
- hf_transfer.download(
434
+ for chunk in response.iter_bytes(chunk_size=constants.DOWNLOAD_CHUNK_SIZE):
435
+ if chunk: # filter out keep-alive new chunks
436
+ progress.update(len(chunk))
437
+ temp_file.write(chunk)
438
+ new_resume_size += len(chunk)
439
+ # Some data has been downloaded from the server so we reset the number of retries.
440
+ _nb_retries = 5
441
+ except (httpx.ConnectError, httpx.TimeoutException) as e:
442
+ # If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
443
+ # a transient error (network outage?). We log a warning message and try to resume the download a few times
444
+ # before giving up. Tre retry mechanism is basic but should be enough in most cases.
445
+ if _nb_retries <= 0:
446
+ logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
447
+ raise
448
+ logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
449
+ time.sleep(1)
450
+ return http_get(
468
451
  url=url,
469
- filename=temp_file.name,
470
- max_files=constants.HF_TRANSFER_CONCURRENCY,
471
- chunk_size=constants.DOWNLOAD_CHUNK_SIZE,
452
+ temp_file=temp_file,
453
+ resume_size=new_resume_size,
472
454
  headers=initial_headers,
473
- parallel_failures=3,
474
- max_retries=5,
475
- **({"callback": progress.update} if supports_callback else {}),
476
- )
477
- except Exception as e:
478
- raise RuntimeError(
479
- "An error occurred while downloading using `hf_transfer`. Consider"
480
- " disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
481
- ) from e
482
- if not supports_callback:
483
- progress.update(total)
484
- if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
485
- raise EnvironmentError(
486
- consistency_error_message.format(
487
- actual_size=os.path.getsize(temp_file.name),
488
- )
455
+ expected_size=expected_size,
456
+ _nb_retries=_nb_retries - 1,
457
+ _tqdm_bar=_tqdm_bar,
489
458
  )
490
- return
491
- new_resume_size = resume_size
492
- try:
493
- for chunk in r.iter_content(chunk_size=constants.DOWNLOAD_CHUNK_SIZE):
494
- if chunk: # filter out keep-alive new chunks
495
- progress.update(len(chunk))
496
- temp_file.write(chunk)
497
- new_resume_size += len(chunk)
498
- # Some data has been downloaded from the server so we reset the number of retries.
499
- _nb_retries = 5
500
- except (requests.ConnectionError, requests.ReadTimeout) as e:
501
- # If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
502
- # a transient error (network outage?). We log a warning message and try to resume the download a few times
503
- # before giving up. Tre retry mechanism is basic but should be enough in most cases.
504
- if _nb_retries <= 0:
505
- logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
506
- raise
507
- logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
508
- time.sleep(1)
509
- reset_sessions() # In case of SSLError it's best to reset the shared requests.Session objects
510
- return http_get(
511
- url=url,
512
- temp_file=temp_file,
513
- proxies=proxies,
514
- resume_size=new_resume_size,
515
- headers=initial_headers,
516
- expected_size=expected_size,
517
- _nb_retries=_nb_retries - 1,
518
- _tqdm_bar=_tqdm_bar,
519
- )
520
459
 
521
460
  if expected_size is not None and expected_size != temp_file.tell():
522
461
  raise EnvironmentError(
@@ -530,7 +469,7 @@ def xet_get(
530
469
  *,
531
470
  incomplete_path: Path,
532
471
  xet_file_data: XetFileData,
533
- headers: Dict[str, str],
472
+ headers: dict[str, str],
534
473
  expected_size: Optional[int] = None,
535
474
  displayed_filename: Optional[str] = None,
536
475
  _tqdm_bar: Optional[tqdm] = None,
@@ -543,7 +482,7 @@ def xet_get(
543
482
  The path to the file to download.
544
483
  xet_file_data (`XetFileData`):
545
484
  The file metadata needed to make the request to the xet storage service.
546
- headers (`Dict[str, str]`):
485
+ headers (`dict[str, str]`):
547
486
  The headers to send to the xet storage service.
548
487
  expected_size (`int`, *optional*):
549
488
  The expected size of the file to download. If set, the download will raise an error if the size of the
@@ -590,7 +529,7 @@ def xet_get(
590
529
 
591
530
  connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers=headers)
592
531
 
593
- def token_refresher() -> Tuple[str, int]:
532
+ def token_refresher() -> tuple[str, int]:
594
533
  connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers=headers)
595
534
  if connection_info is None:
596
535
  raise ValueError("Failed to refresh token using xet metadata.")
@@ -805,6 +744,75 @@ def _check_disk_space(expected_size: int, target_dir: Union[str, Path]) -> None:
805
744
  pass
806
745
 
807
746
 
747
+ @overload
748
+ def hf_hub_download(
749
+ repo_id: str,
750
+ filename: str,
751
+ *,
752
+ subfolder: Optional[str] = None,
753
+ repo_type: Optional[str] = None,
754
+ revision: Optional[str] = None,
755
+ library_name: Optional[str] = None,
756
+ library_version: Optional[str] = None,
757
+ cache_dir: Union[str, Path, None] = None,
758
+ local_dir: Union[str, Path, None] = None,
759
+ user_agent: Union[dict, str, None] = None,
760
+ force_download: bool = False,
761
+ etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
762
+ token: Union[bool, str, None] = None,
763
+ local_files_only: bool = False,
764
+ headers: Optional[dict[str, str]] = None,
765
+ endpoint: Optional[str] = None,
766
+ dry_run: Literal[False] = False,
767
+ ) -> str: ...
768
+
769
+
770
+ @overload
771
+ def hf_hub_download(
772
+ repo_id: str,
773
+ filename: str,
774
+ *,
775
+ subfolder: Optional[str] = None,
776
+ repo_type: Optional[str] = None,
777
+ revision: Optional[str] = None,
778
+ library_name: Optional[str] = None,
779
+ library_version: Optional[str] = None,
780
+ cache_dir: Union[str, Path, None] = None,
781
+ local_dir: Union[str, Path, None] = None,
782
+ user_agent: Union[dict, str, None] = None,
783
+ force_download: bool = False,
784
+ etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
785
+ token: Union[bool, str, None] = None,
786
+ local_files_only: bool = False,
787
+ headers: Optional[dict[str, str]] = None,
788
+ endpoint: Optional[str] = None,
789
+ dry_run: Literal[True] = True,
790
+ ) -> DryRunFileInfo: ...
791
+
792
+
793
+ @overload
794
+ def hf_hub_download(
795
+ repo_id: str,
796
+ filename: str,
797
+ *,
798
+ subfolder: Optional[str] = None,
799
+ repo_type: Optional[str] = None,
800
+ revision: Optional[str] = None,
801
+ library_name: Optional[str] = None,
802
+ library_version: Optional[str] = None,
803
+ cache_dir: Union[str, Path, None] = None,
804
+ local_dir: Union[str, Path, None] = None,
805
+ user_agent: Union[dict, str, None] = None,
806
+ force_download: bool = False,
807
+ etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
808
+ token: Union[bool, str, None] = None,
809
+ local_files_only: bool = False,
810
+ headers: Optional[dict[str, str]] = None,
811
+ endpoint: Optional[str] = None,
812
+ dry_run: bool = False,
813
+ ) -> Union[str, DryRunFileInfo]: ...
814
+
815
+
808
816
  @validate_hf_hub_args
809
817
  def hf_hub_download(
810
818
  repo_id: str,
@@ -817,18 +825,15 @@ def hf_hub_download(
817
825
  library_version: Optional[str] = None,
818
826
  cache_dir: Union[str, Path, None] = None,
819
827
  local_dir: Union[str, Path, None] = None,
820
- user_agent: Union[Dict, str, None] = None,
828
+ user_agent: Union[dict, str, None] = None,
821
829
  force_download: bool = False,
822
- proxies: Optional[Dict] = None,
823
830
  etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
824
831
  token: Union[bool, str, None] = None,
825
832
  local_files_only: bool = False,
826
- headers: Optional[Dict[str, str]] = None,
833
+ headers: Optional[dict[str, str]] = None,
827
834
  endpoint: Optional[str] = None,
828
- resume_download: Optional[bool] = None,
829
- force_filename: Optional[str] = None,
830
- local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
831
- ) -> str:
835
+ dry_run: bool = False,
836
+ ) -> Union[str, DryRunFileInfo]:
832
837
  """Download a given file if it's not already present in the local cache.
833
838
 
834
839
  The new cache file layout looks like this:
@@ -890,9 +895,6 @@ def hf_hub_download(
890
895
  force_download (`bool`, *optional*, defaults to `False`):
891
896
  Whether the file should be downloaded even if it already exists in
892
897
  the local cache.
893
- proxies (`dict`, *optional*):
894
- Dictionary mapping protocol to the URL of the proxy passed to
895
- `requests.request`.
896
898
  etag_timeout (`float`, *optional*, defaults to `10`):
897
899
  When fetching ETag, how many seconds to wait for the server to send
898
900
  data before giving up which is passed to `requests.request`.
@@ -906,9 +908,14 @@ def hf_hub_download(
906
908
  local cached file if it exists.
907
909
  headers (`dict`, *optional*):
908
910
  Additional headers to be sent with the request.
911
+ dry_run (`bool`, *optional*, defaults to `False`):
912
+ If `True`, perform a dry run without actually downloading the file. Returns a
913
+ [`DryRunFileInfo`] object containing information about what would be downloaded.
909
914
 
910
915
  Returns:
911
- `str`: Local path of file or if networking is off, last version of file cached on disk.
916
+ `str` or [`DryRunFileInfo`]:
917
+ - If `dry_run=False`: Local path of file or if networking is off, last version of file cached on disk.
918
+ - If `dry_run=True`: A [`DryRunFileInfo`] object containing download information.
912
919
 
913
920
  Raises:
914
921
  [`~utils.RepositoryNotFoundError`]
@@ -916,7 +923,7 @@ def hf_hub_download(
916
923
  or because it is set to `private` and you do not have access.
917
924
  [`~utils.RevisionNotFoundError`]
918
925
  If the revision to download from cannot be found.
919
- [`~utils.EntryNotFoundError`]
926
+ [`~utils.RemoteEntryNotFoundError`]
920
927
  If the file to download cannot be found.
921
928
  [`~utils.LocalEntryNotFoundError`]
922
929
  If network is disabled or unavailable and file is not found in cache.
@@ -932,20 +939,6 @@ def hf_hub_download(
932
939
  # Respect environment variable above user value
933
940
  etag_timeout = constants.HF_HUB_ETAG_TIMEOUT
934
941
 
935
- if force_filename is not None:
936
- warnings.warn(
937
- "The `force_filename` parameter is deprecated as a new caching system, "
938
- "which keeps the filenames as they are on the Hub, is now in place.",
939
- FutureWarning,
940
- )
941
- if resume_download is not None:
942
- warnings.warn(
943
- "`resume_download` is deprecated and will be removed in version 1.0.0. "
944
- "Downloads always resume when possible. "
945
- "If you want to force a new download, use `force_download=True`.",
946
- FutureWarning,
947
- )
948
-
949
942
  if cache_dir is None:
950
943
  cache_dir = constants.HF_HUB_CACHE
951
944
  if revision is None:
@@ -975,15 +968,6 @@ def hf_hub_download(
975
968
  )
976
969
 
977
970
  if local_dir is not None:
978
- if local_dir_use_symlinks != "auto":
979
- warnings.warn(
980
- "`local_dir_use_symlinks` parameter is deprecated and will be ignored. "
981
- "The process to download files to a local folder has been updated and do "
982
- "not rely on symlinks anymore. You only need to pass a destination folder "
983
- "as`local_dir`.\n"
984
- "For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder."
985
- )
986
-
987
971
  return _hf_hub_download_to_local_dir(
988
972
  # Destination
989
973
  local_dir=local_dir,
@@ -996,12 +980,12 @@ def hf_hub_download(
996
980
  endpoint=endpoint,
997
981
  etag_timeout=etag_timeout,
998
982
  headers=hf_headers,
999
- proxies=proxies,
1000
983
  token=token,
1001
984
  # Additional options
1002
985
  cache_dir=cache_dir,
1003
986
  force_download=force_download,
1004
987
  local_files_only=local_files_only,
988
+ dry_run=dry_run,
1005
989
  )
1006
990
  else:
1007
991
  return _hf_hub_download_to_cache_dir(
@@ -1016,11 +1000,11 @@ def hf_hub_download(
1016
1000
  endpoint=endpoint,
1017
1001
  etag_timeout=etag_timeout,
1018
1002
  headers=hf_headers,
1019
- proxies=proxies,
1020
1003
  token=token,
1021
1004
  # Additional options
1022
1005
  local_files_only=local_files_only,
1023
1006
  force_download=force_download,
1007
+ dry_run=dry_run,
1024
1008
  )
1025
1009
 
1026
1010
 
@@ -1036,13 +1020,13 @@ def _hf_hub_download_to_cache_dir(
1036
1020
  # HTTP info
1037
1021
  endpoint: Optional[str],
1038
1022
  etag_timeout: float,
1039
- headers: Dict[str, str],
1040
- proxies: Optional[Dict],
1023
+ headers: dict[str, str],
1041
1024
  token: Optional[Union[bool, str]],
1042
1025
  # Additional options
1043
1026
  local_files_only: bool,
1044
1027
  force_download: bool,
1045
- ) -> str:
1028
+ dry_run: bool,
1029
+ ) -> Union[str, DryRunFileInfo]:
1046
1030
  """Download a given file to a cache folder, if not already present.
1047
1031
 
1048
1032
  Method should not be called directly. Please use `hf_hub_download` instead.
@@ -1062,8 +1046,18 @@ def _hf_hub_download_to_cache_dir(
1062
1046
  # if user provides a commit_hash and they already have the file on disk, shortcut everything.
1063
1047
  if REGEX_COMMIT_HASH.match(revision):
1064
1048
  pointer_path = _get_pointer_path(storage_folder, revision, relative_filename)
1065
- if os.path.exists(pointer_path) and not force_download:
1066
- return pointer_path
1049
+ if os.path.exists(pointer_path):
1050
+ if dry_run:
1051
+ return DryRunFileInfo(
1052
+ commit_hash=revision,
1053
+ file_size=os.path.getsize(pointer_path),
1054
+ filename=filename,
1055
+ is_cached=True,
1056
+ local_path=pointer_path,
1057
+ will_download=force_download,
1058
+ )
1059
+ if not force_download:
1060
+ return pointer_path
1067
1061
 
1068
1062
  # Try to get metadata (etag, commit_hash, url, size) from the server.
1069
1063
  # If we can't, a HEAD request error is returned.
@@ -1073,7 +1067,6 @@ def _hf_hub_download_to_cache_dir(
1073
1067
  repo_type=repo_type,
1074
1068
  revision=revision,
1075
1069
  endpoint=endpoint,
1076
- proxies=proxies,
1077
1070
  etag_timeout=etag_timeout,
1078
1071
  headers=headers,
1079
1072
  token=token,
@@ -1107,8 +1100,18 @@ def _hf_hub_download_to_cache_dir(
1107
1100
  # Return pointer file if exists
1108
1101
  if commit_hash is not None:
1109
1102
  pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
1110
- if os.path.exists(pointer_path) and not force_download:
1111
- return pointer_path
1103
+ if os.path.exists(pointer_path):
1104
+ if dry_run:
1105
+ return DryRunFileInfo(
1106
+ commit_hash=commit_hash,
1107
+ file_size=os.path.getsize(pointer_path),
1108
+ filename=filename,
1109
+ is_cached=True,
1110
+ local_path=pointer_path,
1111
+ will_download=force_download,
1112
+ )
1113
+ if not force_download:
1114
+ return pointer_path
1112
1115
 
1113
1116
  # Otherwise, raise appropriate error
1114
1117
  _raise_on_head_call_error(head_call_error, force_download, local_files_only)
@@ -1121,6 +1124,17 @@ def _hf_hub_download_to_cache_dir(
1121
1124
  blob_path = os.path.join(storage_folder, "blobs", etag)
1122
1125
  pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
1123
1126
 
1127
+ if dry_run:
1128
+ is_cached = os.path.exists(pointer_path) or os.path.exists(blob_path)
1129
+ return DryRunFileInfo(
1130
+ commit_hash=commit_hash,
1131
+ file_size=expected_size,
1132
+ filename=filename,
1133
+ is_cached=is_cached,
1134
+ local_path=pointer_path,
1135
+ will_download=force_download or not is_cached,
1136
+ )
1137
+
1124
1138
  os.makedirs(os.path.dirname(blob_path), exist_ok=True)
1125
1139
  os.makedirs(os.path.dirname(pointer_path), exist_ok=True)
1126
1140
 
@@ -1169,7 +1183,6 @@ def _hf_hub_download_to_cache_dir(
1169
1183
  incomplete_path=Path(blob_path + ".incomplete"),
1170
1184
  destination_path=Path(blob_path),
1171
1185
  url_to_download=url_to_download,
1172
- proxies=proxies,
1173
1186
  headers=headers,
1174
1187
  expected_size=expected_size,
1175
1188
  filename=filename,
@@ -1195,14 +1208,14 @@ def _hf_hub_download_to_local_dir(
1195
1208
  # HTTP info
1196
1209
  endpoint: Optional[str],
1197
1210
  etag_timeout: float,
1198
- headers: Dict[str, str],
1199
- proxies: Optional[Dict],
1211
+ headers: dict[str, str],
1200
1212
  token: Union[bool, str, None],
1201
1213
  # Additional options
1202
1214
  cache_dir: str,
1203
1215
  force_download: bool,
1204
1216
  local_files_only: bool,
1205
- ) -> str:
1217
+ dry_run: bool,
1218
+ ) -> Union[str, DryRunFileInfo]:
1206
1219
  """Download a given file to a local folder, if not already present.
1207
1220
 
1208
1221
  Method should not be called directly. Please use `hf_hub_download` instead.
@@ -1217,13 +1230,23 @@ def _hf_hub_download_to_local_dir(
1217
1230
 
1218
1231
  # Local file exists + metadata exists + commit_hash matches => return file
1219
1232
  if (
1220
- not force_download
1221
- and REGEX_COMMIT_HASH.match(revision)
1233
+ REGEX_COMMIT_HASH.match(revision)
1222
1234
  and paths.file_path.is_file()
1223
1235
  and local_metadata is not None
1224
1236
  and local_metadata.commit_hash == revision
1225
1237
  ):
1226
- return str(paths.file_path)
1238
+ local_file = str(paths.file_path)
1239
+ if dry_run:
1240
+ return DryRunFileInfo(
1241
+ commit_hash=revision,
1242
+ file_size=os.path.getsize(local_file),
1243
+ filename=filename,
1244
+ is_cached=True,
1245
+ local_path=local_file,
1246
+ will_download=force_download,
1247
+ )
1248
+ if not force_download:
1249
+ return local_file
1227
1250
 
1228
1251
  # Local file doesn't exist or commit_hash doesn't match => we need the etag
1229
1252
  (url_to_download, etag, commit_hash, expected_size, xet_file_data, head_call_error) = _get_metadata_or_catch_error(
@@ -1232,7 +1255,6 @@ def _hf_hub_download_to_local_dir(
1232
1255
  repo_type=repo_type,
1233
1256
  revision=revision,
1234
1257
  endpoint=endpoint,
1235
- proxies=proxies,
1236
1258
  etag_timeout=etag_timeout,
1237
1259
  headers=headers,
1238
1260
  token=token,
@@ -1241,11 +1263,24 @@ def _hf_hub_download_to_local_dir(
1241
1263
 
1242
1264
  if head_call_error is not None:
1243
1265
  # No HEAD call but local file exists => default to local file
1244
- if not force_download and paths.file_path.is_file():
1245
- logger.warning(
1246
- f"Couldn't access the Hub to check for update but local file already exists. Defaulting to existing file. (error: {head_call_error})"
1247
- )
1248
- return str(paths.file_path)
1266
+ if paths.file_path.is_file():
1267
+ if dry_run or not force_download:
1268
+ logger.warning(
1269
+ f"Couldn't access the Hub to check for update but local file already exists. Defaulting to existing file. (error: {head_call_error})"
1270
+ )
1271
+ local_path = str(paths.file_path)
1272
+ if dry_run and local_metadata is not None:
1273
+ return DryRunFileInfo(
1274
+ commit_hash=local_metadata.commit_hash,
1275
+ file_size=os.path.getsize(local_path),
1276
+ filename=filename,
1277
+ is_cached=True,
1278
+ local_path=local_path,
1279
+ will_download=force_download,
1280
+ )
1281
+ if not force_download:
1282
+ return local_path
1283
+
1249
1284
  # Otherwise => raise
1250
1285
  _raise_on_head_call_error(head_call_error, force_download, local_files_only)
1251
1286
 
@@ -1260,6 +1295,15 @@ def _hf_hub_download_to_local_dir(
1260
1295
  # etag matches => update metadata and return file
1261
1296
  if local_metadata is not None and local_metadata.etag == etag:
1262
1297
  write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
1298
+ if dry_run:
1299
+ return DryRunFileInfo(
1300
+ commit_hash=commit_hash,
1301
+ file_size=expected_size,
1302
+ filename=filename,
1303
+ is_cached=True,
1304
+ local_path=str(paths.file_path),
1305
+ will_download=False,
1306
+ )
1263
1307
  return str(paths.file_path)
1264
1308
 
1265
1309
  # metadata is outdated + etag is a sha256
@@ -1271,6 +1315,15 @@ def _hf_hub_download_to_local_dir(
1271
1315
  file_hash = sha_fileobj(f).hex()
1272
1316
  if file_hash == etag:
1273
1317
  write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
1318
+ if dry_run:
1319
+ return DryRunFileInfo(
1320
+ commit_hash=commit_hash,
1321
+ file_size=expected_size,
1322
+ filename=filename,
1323
+ is_cached=True,
1324
+ local_path=str(paths.file_path),
1325
+ will_download=False,
1326
+ )
1274
1327
  return str(paths.file_path)
1275
1328
 
1276
1329
  # Local file doesn't exist or etag isn't a match => retrieve file from remote (or cache)
@@ -1289,8 +1342,28 @@ def _hf_hub_download_to_local_dir(
1289
1342
  paths.file_path.parent.mkdir(parents=True, exist_ok=True)
1290
1343
  shutil.copyfile(cached_path, paths.file_path)
1291
1344
  write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
1345
+ if dry_run:
1346
+ return DryRunFileInfo(
1347
+ commit_hash=commit_hash,
1348
+ file_size=expected_size,
1349
+ filename=filename,
1350
+ is_cached=True,
1351
+ local_path=str(paths.file_path),
1352
+ will_download=False,
1353
+ )
1292
1354
  return str(paths.file_path)
1293
1355
 
1356
+ if dry_run:
1357
+ is_cached = paths.file_path.is_file()
1358
+ return DryRunFileInfo(
1359
+ commit_hash=commit_hash,
1360
+ file_size=expected_size,
1361
+ filename=filename,
1362
+ is_cached=is_cached,
1363
+ local_path=str(paths.file_path),
1364
+ will_download=force_download or not is_cached,
1365
+ )
1366
+
1294
1367
  # Otherwise, let's download the file!
1295
1368
  with WeakFileLock(paths.lock_path):
1296
1369
  paths.file_path.unlink(missing_ok=True) # delete outdated file first
@@ -1298,7 +1371,6 @@ def _hf_hub_download_to_local_dir(
1298
1371
  incomplete_path=paths.incomplete_path(etag),
1299
1372
  destination_path=paths.file_path,
1300
1373
  url_to_download=url_to_download,
1301
- proxies=proxies,
1302
1374
  headers=headers,
1303
1375
  expected_size=expected_size,
1304
1376
  filename=filename,
@@ -1408,12 +1480,11 @@ def try_to_load_from_cache(
1408
1480
  def get_hf_file_metadata(
1409
1481
  url: str,
1410
1482
  token: Union[bool, str, None] = None,
1411
- proxies: Optional[Dict] = None,
1412
1483
  timeout: Optional[float] = constants.DEFAULT_REQUEST_TIMEOUT,
1413
1484
  library_name: Optional[str] = None,
1414
1485
  library_version: Optional[str] = None,
1415
- user_agent: Union[Dict, str, None] = None,
1416
- headers: Optional[Dict[str, str]] = None,
1486
+ user_agent: Union[dict, str, None] = None,
1487
+ headers: Optional[dict[str, str]] = None,
1417
1488
  endpoint: Optional[str] = None,
1418
1489
  ) -> HfFileMetadata:
1419
1490
  """Fetch metadata of a file versioned on the Hub for a given url.
@@ -1427,9 +1498,6 @@ def get_hf_file_metadata(
1427
1498
  folder.
1428
1499
  - If `False` or `None`, no token is provided.
1429
1500
  - If a string, it's used as the authentication token.
1430
- proxies (`dict`, *optional*):
1431
- Dictionary mapping protocol to the URL of the proxy passed to
1432
- `requests.request`.
1433
1501
  timeout (`float`, *optional*, defaults to 10):
1434
1502
  How many seconds to wait for the server to send metadata before giving up.
1435
1503
  library_name (`str`, *optional*):
@@ -1457,31 +1525,23 @@ def get_hf_file_metadata(
1457
1525
  hf_headers["Accept-Encoding"] = "identity" # prevent any compression => we want to know the real size of the file
1458
1526
 
1459
1527
  # Retrieve metadata
1460
- r = _request_wrapper(
1461
- method="HEAD",
1462
- url=url,
1463
- headers=hf_headers,
1464
- allow_redirects=False,
1465
- follow_relative_redirects=True,
1466
- proxies=proxies,
1467
- timeout=timeout,
1468
- )
1469
- hf_raise_for_status(r)
1528
+ response = _httpx_follow_relative_redirects(method="HEAD", url=url, headers=hf_headers, timeout=timeout)
1529
+ hf_raise_for_status(response)
1470
1530
 
1471
1531
  # Return
1472
1532
  return HfFileMetadata(
1473
- commit_hash=r.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT),
1474
- # We favor a custom header indicating the etag of the linked resource, and
1475
- # we fallback to the regular etag header.
1476
- etag=_normalize_etag(r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")),
1533
+ commit_hash=response.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT),
1534
+ # We favor a custom header indicating the etag of the linked resource, and we fallback to the regular etag header.
1535
+ etag=_normalize_etag(
1536
+ response.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or response.headers.get("ETag")
1537
+ ),
1477
1538
  # Either from response headers (if redirected) or defaults to request url
1478
- # Do not use directly `url`, as `_request_wrapper` might have followed relative
1479
- # redirects.
1480
- location=r.headers.get("Location") or r.request.url, # type: ignore
1539
+ # Do not use directly `url` as we might have followed relative redirects.
1540
+ location=response.headers.get("Location") or str(response.request.url), # type: ignore
1481
1541
  size=_int_or_none(
1482
- r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_SIZE) or r.headers.get("Content-Length")
1542
+ response.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_SIZE) or response.headers.get("Content-Length")
1483
1543
  ),
1484
- xet_file_data=parse_xet_file_data_from_response(r, endpoint=endpoint), # type: ignore
1544
+ xet_file_data=parse_xet_file_data_from_response(response, endpoint=endpoint), # type: ignore
1485
1545
  )
1486
1546
 
1487
1547
 
@@ -1492,19 +1552,18 @@ def _get_metadata_or_catch_error(
1492
1552
  repo_type: str,
1493
1553
  revision: str,
1494
1554
  endpoint: Optional[str],
1495
- proxies: Optional[Dict],
1496
1555
  etag_timeout: Optional[float],
1497
- headers: Dict[str, str], # mutated inplace!
1556
+ headers: dict[str, str], # mutated inplace!
1498
1557
  token: Union[bool, str, None],
1499
1558
  local_files_only: bool,
1500
1559
  relative_filename: Optional[str] = None, # only used to store `.no_exists` in cache
1501
1560
  storage_folder: Optional[str] = None, # only used to store `.no_exists` in cache
1502
1561
  ) -> Union[
1503
1562
  # Either an exception is caught and returned
1504
- Tuple[None, None, None, None, None, Exception],
1563
+ tuple[None, None, None, None, None, Exception],
1505
1564
  # Or the metadata is returned as
1506
1565
  # `(url_to_download, etag, commit_hash, expected_size, xet_file_data, None)`
1507
- Tuple[str, str, str, int, Optional[XetFileData], None],
1566
+ tuple[str, str, str, int, Optional[XetFileData], None],
1508
1567
  ]:
1509
1568
  """Get metadata for a file on the Hub, safely handling network issues.
1510
1569
 
@@ -1541,9 +1600,9 @@ def _get_metadata_or_catch_error(
1541
1600
  try:
1542
1601
  try:
1543
1602
  metadata = get_hf_file_metadata(
1544
- url=url, proxies=proxies, timeout=etag_timeout, headers=headers, token=token, endpoint=endpoint
1603
+ url=url, timeout=etag_timeout, headers=headers, token=token, endpoint=endpoint
1545
1604
  )
1546
- except EntryNotFoundError as http_error:
1605
+ except RemoteEntryNotFoundError as http_error:
1547
1606
  if storage_folder is not None and relative_filename is not None:
1548
1607
  # Cache the non-existence of the file
1549
1608
  commit_hash = http_error.response.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT)
@@ -1594,21 +1653,17 @@ def _get_metadata_or_catch_error(
1594
1653
  if urlparse(url).netloc != urlparse(metadata.location).netloc:
1595
1654
  # Remove authorization header when downloading a LFS blob
1596
1655
  headers.pop("authorization", None)
1597
- except (requests.exceptions.SSLError, requests.exceptions.ProxyError):
1598
- # Actually raise for those subclasses of ConnectionError
1656
+ except httpx.ProxyError:
1657
+ # Actually raise on proxy error
1599
1658
  raise
1600
- except (
1601
- requests.exceptions.ConnectionError,
1602
- requests.exceptions.Timeout,
1603
- OfflineModeIsEnabled,
1604
- ) as error:
1659
+ except (httpx.ConnectError, httpx.TimeoutException, OfflineModeIsEnabled) as error:
1605
1660
  # Otherwise, our Internet connection is down.
1606
1661
  # etag is None
1607
1662
  head_error_call = error
1608
- except (RevisionNotFoundError, EntryNotFoundError):
1663
+ except (RevisionNotFoundError, RemoteEntryNotFoundError):
1609
1664
  # The repo was found but the revision or entry doesn't exist on the Hub (never existed or got deleted)
1610
1665
  raise
1611
- except requests.HTTPError as error:
1666
+ except HfHubHTTPError as error:
1612
1667
  # Multiple reasons for an http error:
1613
1668
  # - Repository is private and invalid/missing token sent
1614
1669
  # - Repository is gated and invalid/missing token sent
@@ -1666,8 +1721,7 @@ def _download_to_tmp_and_move(
1666
1721
  incomplete_path: Path,
1667
1722
  destination_path: Path,
1668
1723
  url_to_download: str,
1669
- proxies: Optional[Dict],
1670
- headers: Dict[str, str],
1724
+ headers: dict[str, str],
1671
1725
  expected_size: Optional[int],
1672
1726
  filename: str,
1673
1727
  force_download: bool,
@@ -1679,7 +1733,7 @@ def _download_to_tmp_and_move(
1679
1733
  Internal logic:
1680
1734
  - return early if file is already downloaded
1681
1735
  - resume download if possible (from incomplete file)
1682
- - do not resume download if `force_download=True` or `HF_HUB_ENABLE_HF_TRANSFER=True`
1736
+ - do not resume download if `force_download=True`
1683
1737
  - check disk space before downloading
1684
1738
  - download content to a temporary file
1685
1739
  - set correct permissions on temporary file
@@ -1691,16 +1745,11 @@ def _download_to_tmp_and_move(
1691
1745
  # Do nothing if already exists (except if force_download=True)
1692
1746
  return
1693
1747
 
1694
- if incomplete_path.exists() and (force_download or (constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies)):
1748
+ if incomplete_path.exists() and force_download:
1695
1749
  # By default, we will try to resume the download if possible.
1696
- # However, if the user has set `force_download=True` or if `hf_transfer` is enabled, then we should
1750
+ # However, if the user has set `force_download=True`, then we should
1697
1751
  # not resume the download => delete the incomplete file.
1698
- message = f"Removing incomplete file '{incomplete_path}'"
1699
- if force_download:
1700
- message += " (force_download=True)"
1701
- elif constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies:
1702
- message += " (hf_transfer=True)"
1703
- logger.info(message)
1752
+ logger.info(f"Removing incomplete file '{incomplete_path}' (force_download=True)")
1704
1753
  incomplete_path.unlink(missing_ok=True)
1705
1754
 
1706
1755
  with incomplete_path.open("ab") as f:
@@ -1735,7 +1784,6 @@ def _download_to_tmp_and_move(
1735
1784
  http_get(
1736
1785
  url_to_download,
1737
1786
  f,
1738
- proxies=proxies,
1739
1787
  resume_size=resume_size,
1740
1788
  headers=headers,
1741
1789
  expected_size=expected_size,