huggingface-hub 0.35.1__py3-none-any.whl → 1.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (127) hide show
  1. huggingface_hub/__init__.py +28 -45
  2. huggingface_hub/_commit_api.py +28 -28
  3. huggingface_hub/_commit_scheduler.py +11 -8
  4. huggingface_hub/_inference_endpoints.py +8 -8
  5. huggingface_hub/_jobs_api.py +20 -20
  6. huggingface_hub/_login.py +13 -39
  7. huggingface_hub/_oauth.py +8 -8
  8. huggingface_hub/_snapshot_download.py +14 -28
  9. huggingface_hub/_space_api.py +4 -4
  10. huggingface_hub/_tensorboard_logger.py +5 -5
  11. huggingface_hub/_upload_large_folder.py +15 -15
  12. huggingface_hub/_webhooks_payload.py +3 -3
  13. huggingface_hub/_webhooks_server.py +2 -2
  14. huggingface_hub/cli/__init__.py +0 -14
  15. huggingface_hub/cli/_cli_utils.py +80 -3
  16. huggingface_hub/cli/auth.py +104 -150
  17. huggingface_hub/cli/cache.py +102 -126
  18. huggingface_hub/cli/download.py +93 -110
  19. huggingface_hub/cli/hf.py +37 -41
  20. huggingface_hub/cli/jobs.py +689 -1017
  21. huggingface_hub/cli/lfs.py +120 -143
  22. huggingface_hub/cli/repo.py +158 -216
  23. huggingface_hub/cli/repo_files.py +50 -84
  24. huggingface_hub/cli/system.py +6 -25
  25. huggingface_hub/cli/upload.py +198 -212
  26. huggingface_hub/cli/upload_large_folder.py +90 -105
  27. huggingface_hub/commands/_cli_utils.py +2 -2
  28. huggingface_hub/commands/delete_cache.py +11 -11
  29. huggingface_hub/commands/download.py +4 -13
  30. huggingface_hub/commands/lfs.py +4 -4
  31. huggingface_hub/commands/repo_files.py +2 -2
  32. huggingface_hub/commands/tag.py +1 -3
  33. huggingface_hub/commands/upload.py +4 -4
  34. huggingface_hub/commands/upload_large_folder.py +3 -3
  35. huggingface_hub/commands/user.py +4 -5
  36. huggingface_hub/community.py +5 -5
  37. huggingface_hub/constants.py +3 -41
  38. huggingface_hub/dataclasses.py +16 -22
  39. huggingface_hub/errors.py +43 -30
  40. huggingface_hub/fastai_utils.py +8 -9
  41. huggingface_hub/file_download.py +154 -253
  42. huggingface_hub/hf_api.py +329 -558
  43. huggingface_hub/hf_file_system.py +104 -62
  44. huggingface_hub/hub_mixin.py +32 -54
  45. huggingface_hub/inference/_client.py +178 -163
  46. huggingface_hub/inference/_common.py +38 -54
  47. huggingface_hub/inference/_generated/_async_client.py +219 -259
  48. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
  49. huggingface_hub/inference/_generated/types/base.py +10 -7
  50. huggingface_hub/inference/_generated/types/chat_completion.py +16 -16
  51. huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
  52. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
  53. huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
  54. huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
  55. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
  56. huggingface_hub/inference/_generated/types/summarization.py +2 -2
  57. huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
  58. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
  59. huggingface_hub/inference/_generated/types/text_generation.py +10 -10
  60. huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
  61. huggingface_hub/inference/_generated/types/token_classification.py +2 -2
  62. huggingface_hub/inference/_generated/types/translation.py +2 -2
  63. huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
  64. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
  65. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
  66. huggingface_hub/inference/_mcp/agent.py +3 -3
  67. huggingface_hub/inference/_mcp/constants.py +1 -2
  68. huggingface_hub/inference/_mcp/mcp_client.py +33 -22
  69. huggingface_hub/inference/_mcp/types.py +10 -10
  70. huggingface_hub/inference/_mcp/utils.py +4 -4
  71. huggingface_hub/inference/_providers/__init__.py +2 -13
  72. huggingface_hub/inference/_providers/_common.py +24 -25
  73. huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
  74. huggingface_hub/inference/_providers/cohere.py +3 -3
  75. huggingface_hub/inference/_providers/fal_ai.py +25 -25
  76. huggingface_hub/inference/_providers/featherless_ai.py +4 -4
  77. huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
  78. huggingface_hub/inference/_providers/hf_inference.py +13 -13
  79. huggingface_hub/inference/_providers/hyperbolic.py +4 -4
  80. huggingface_hub/inference/_providers/nebius.py +10 -10
  81. huggingface_hub/inference/_providers/novita.py +5 -5
  82. huggingface_hub/inference/_providers/nscale.py +4 -4
  83. huggingface_hub/inference/_providers/replicate.py +15 -15
  84. huggingface_hub/inference/_providers/sambanova.py +6 -6
  85. huggingface_hub/inference/_providers/together.py +7 -7
  86. huggingface_hub/lfs.py +24 -33
  87. huggingface_hub/repocard.py +16 -17
  88. huggingface_hub/repocard_data.py +56 -56
  89. huggingface_hub/serialization/__init__.py +0 -1
  90. huggingface_hub/serialization/_base.py +9 -9
  91. huggingface_hub/serialization/_dduf.py +7 -7
  92. huggingface_hub/serialization/_torch.py +28 -28
  93. huggingface_hub/utils/__init__.py +10 -4
  94. huggingface_hub/utils/_auth.py +5 -5
  95. huggingface_hub/utils/_cache_manager.py +31 -31
  96. huggingface_hub/utils/_deprecation.py +1 -1
  97. huggingface_hub/utils/_dotenv.py +3 -3
  98. huggingface_hub/utils/_fixes.py +0 -10
  99. huggingface_hub/utils/_git_credential.py +3 -3
  100. huggingface_hub/utils/_headers.py +7 -29
  101. huggingface_hub/utils/_http.py +369 -209
  102. huggingface_hub/utils/_pagination.py +4 -4
  103. huggingface_hub/utils/_paths.py +5 -5
  104. huggingface_hub/utils/_runtime.py +15 -13
  105. huggingface_hub/utils/_safetensors.py +21 -21
  106. huggingface_hub/utils/_subprocess.py +9 -9
  107. huggingface_hub/utils/_telemetry.py +3 -3
  108. huggingface_hub/utils/_typing.py +3 -3
  109. huggingface_hub/utils/_validators.py +53 -72
  110. huggingface_hub/utils/_xet.py +16 -16
  111. huggingface_hub/utils/_xet_progress_reporting.py +1 -1
  112. huggingface_hub/utils/insecure_hashlib.py +3 -9
  113. huggingface_hub/utils/tqdm.py +3 -3
  114. {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/METADATA +17 -26
  115. huggingface_hub-1.0.0rc1.dist-info/RECORD +161 -0
  116. huggingface_hub/inference/_providers/publicai.py +0 -6
  117. huggingface_hub/inference/_providers/scaleway.py +0 -28
  118. huggingface_hub/inference_api.py +0 -217
  119. huggingface_hub/keras_mixin.py +0 -500
  120. huggingface_hub/repository.py +0 -1477
  121. huggingface_hub/serialization/_tensorflow.py +0 -95
  122. huggingface_hub/utils/_hf_folder.py +0 -68
  123. huggingface_hub-0.35.1.dist-info/RECORD +0 -168
  124. {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/LICENSE +0 -0
  125. {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/WHEEL +0 -0
  126. {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/entry_points.txt +0 -0
  127. {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,5 @@
1
1
  import copy
2
2
  import errno
3
- import inspect
4
3
  import os
5
4
  import re
6
5
  import shutil
@@ -10,26 +9,19 @@ import uuid
10
9
  import warnings
11
10
  from dataclasses import dataclass
12
11
  from pathlib import Path
13
- from typing import Any, BinaryIO, Dict, Literal, NoReturn, Optional, Tuple, Union
12
+ from typing import Any, BinaryIO, NoReturn, Optional, Union
14
13
  from urllib.parse import quote, urlparse
15
14
 
16
- import requests
15
+ import httpx
17
16
 
18
- from . import (
19
- __version__, # noqa: F401 # for backward compatibility
20
- constants,
21
- )
17
+ from . import constants
22
18
  from ._local_folder import get_local_download_paths, read_download_metadata, write_download_metadata
23
- from .constants import (
24
- HUGGINGFACE_CO_URL_TEMPLATE, # noqa: F401 # for backward compatibility
25
- HUGGINGFACE_HUB_CACHE, # noqa: F401 # for backward compatibility
26
- )
27
19
  from .errors import (
28
- EntryNotFoundError,
29
20
  FileMetadataError,
30
21
  GatedRepoError,
31
22
  HfHubHTTPError,
32
23
  LocalEntryNotFoundError,
24
+ RemoteEntryNotFoundError,
33
25
  RepositoryNotFoundError,
34
26
  RevisionNotFoundError,
35
27
  )
@@ -39,30 +31,15 @@ from .utils import (
39
31
  WeakFileLock,
40
32
  XetFileData,
41
33
  build_hf_headers,
42
- get_fastai_version, # noqa: F401 # for backward compatibility
43
- get_fastcore_version, # noqa: F401 # for backward compatibility
44
- get_graphviz_version, # noqa: F401 # for backward compatibility
45
- get_jinja_version, # noqa: F401 # for backward compatibility
46
- get_pydot_version, # noqa: F401 # for backward compatibility
47
- get_tf_version, # noqa: F401 # for backward compatibility
48
- get_torch_version, # noqa: F401 # for backward compatibility
49
34
  hf_raise_for_status,
50
- is_fastai_available, # noqa: F401 # for backward compatibility
51
- is_fastcore_available, # noqa: F401 # for backward compatibility
52
- is_graphviz_available, # noqa: F401 # for backward compatibility
53
- is_jinja_available, # noqa: F401 # for backward compatibility
54
- is_pydot_available, # noqa: F401 # for backward compatibility
55
- is_tf_available, # noqa: F401 # for backward compatibility
56
- is_torch_available, # noqa: F401 # for backward compatibility
57
35
  logging,
58
36
  parse_xet_file_data_from_response,
59
37
  refresh_xet_connection_info,
60
- reset_sessions,
61
38
  tqdm,
62
39
  validate_hf_hub_args,
63
40
  )
64
- from .utils._http import _adjust_range_header, http_backoff
65
- from .utils._runtime import _PY_VERSION, is_xet_available # noqa: F401 # for backward compatibility
41
+ from .utils._http import _adjust_range_header, http_backoff, http_stream_backoff
42
+ from .utils._runtime import is_xet_available
66
43
  from .utils._typing import HTTP_METHOD_T
67
44
  from .utils.sha import sha_fileobj
68
45
  from .utils.tqdm import _get_progress_bar_context
@@ -83,7 +60,7 @@ REGEX_COMMIT_HASH = re.compile(r"^[0-9a-f]{40}$")
83
60
  # Regex to check if the file etag IS a valid sha256
84
61
  REGEX_SHA256 = re.compile(r"^[0-9a-f]{64}$")
85
62
 
86
- _are_symlinks_supported_in_dir: Dict[str, bool] = {}
63
+ _are_symlinks_supported_in_dir: dict[str, bool] = {}
87
64
 
88
65
 
89
66
  def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
@@ -252,7 +229,7 @@ def hf_hub_url(
252
229
 
253
230
  if revision is None:
254
231
  revision = constants.DEFAULT_REVISION
255
- url = HUGGINGFACE_CO_URL_TEMPLATE.format(
232
+ url = constants.HUGGINGFACE_CO_URL_TEMPLATE.format(
256
233
  repo_id=repo_id, revision=quote(revision, safe=""), filename=quote(filename)
257
234
  )
258
235
  # Update endpoint if provided
@@ -261,57 +238,48 @@ def hf_hub_url(
261
238
  return url
262
239
 
263
240
 
264
- def _request_wrapper(
265
- method: HTTP_METHOD_T, url: str, *, follow_relative_redirects: bool = False, **params
266
- ) -> requests.Response:
267
- """Wrapper around requests methods to follow relative redirects if `follow_relative_redirects=True` even when
268
- `allow_redirection=False`.
241
+ def _httpx_follow_relative_redirects(method: HTTP_METHOD_T, url: str, **httpx_kwargs) -> httpx.Response:
242
+ """Perform an HTTP request with backoff and follow relative redirects only.
243
+
244
+ This is useful to follow a redirection to a renamed repository without following redirection to a CDN.
269
245
 
270
- A backoff mechanism retries the HTTP call on 5xx errors and network errors.
246
+ A backoff mechanism retries the HTTP call on 429, 503 and 504 errors.
271
247
 
272
248
  Args:
273
249
  method (`str`):
274
250
  HTTP method, such as 'GET' or 'HEAD'.
275
251
  url (`str`):
276
252
  The URL of the resource to fetch.
277
- follow_relative_redirects (`bool`, *optional*, defaults to `False`)
278
- If True, relative redirection (redirection to the same site) will be resolved even when `allow_redirection`
279
- kwarg is set to False. Useful when we want to follow a redirection to a renamed repository without
280
- following redirection to a CDN.
281
- **params (`dict`, *optional*):
282
- Params to pass to `requests.request`.
253
+ **httpx_kwargs (`dict`, *optional*):
254
+ Params to pass to `httpx.request`.
283
255
  """
284
- # Recursively follow relative redirects
285
- if follow_relative_redirects:
286
- response = _request_wrapper(
256
+ while True:
257
+ # Make the request
258
+ response = http_backoff(
287
259
  method=method,
288
260
  url=url,
289
- follow_relative_redirects=False,
290
- **params,
261
+ **httpx_kwargs,
262
+ follow_redirects=False,
263
+ retry_on_exceptions=(),
264
+ retry_on_status_codes=(429,),
291
265
  )
266
+ hf_raise_for_status(response)
292
267
 
293
- # If redirection, we redirect only relative paths.
294
- # This is useful in case of a renamed repository.
268
+ # Check if response is a relative redirect
295
269
  if 300 <= response.status_code <= 399:
296
270
  parsed_target = urlparse(response.headers["Location"])
297
271
  if parsed_target.netloc == "":
298
- # This means it is a relative 'location' headers, as allowed by RFC 7231.
299
- # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')
300
- # We want to follow this relative redirect !
301
- #
302
- # Highly inspired by `resolve_redirects` from requests library.
303
- # See https://github.com/psf/requests/blob/main/requests/sessions.py#L159
304
- next_url = urlparse(url)._replace(path=parsed_target.path).geturl()
305
- return _request_wrapper(method=method, url=next_url, follow_relative_redirects=True, **params)
306
- return response
307
-
308
- # Perform request and return if status_code is not in the retry list.
309
- response = http_backoff(method=method, url=url, **params)
310
- hf_raise_for_status(response)
272
+ # Relative redirect -> update URL and retry
273
+ url = urlparse(url)._replace(path=parsed_target.path).geturl()
274
+ continue
275
+
276
+ # Break if no relative redirect
277
+ break
278
+
311
279
  return response
312
280
 
313
281
 
314
- def _get_file_length_from_http_response(response: requests.Response) -> Optional[int]:
282
+ def _get_file_length_from_http_response(response: httpx.Response) -> Optional[int]:
315
283
  """
316
284
  Get the length of the file from the HTTP response headers.
317
285
 
@@ -319,7 +287,7 @@ def _get_file_length_from_http_response(response: requests.Response) -> Optional
319
287
  `Content-Range` or `Content-Length` header, if available (in that order).
320
288
 
321
289
  Args:
322
- response (`requests.Response`):
290
+ response (`httpx.Response`):
323
291
  The HTTP response object.
324
292
 
325
293
  Returns:
@@ -346,13 +314,13 @@ def _get_file_length_from_http_response(response: requests.Response) -> Optional
346
314
  return None
347
315
 
348
316
 
317
+ @validate_hf_hub_args
349
318
  def http_get(
350
319
  url: str,
351
320
  temp_file: BinaryIO,
352
321
  *,
353
- proxies: Optional[Dict] = None,
354
322
  resume_size: int = 0,
355
- headers: Optional[Dict[str, Any]] = None,
323
+ headers: Optional[dict[str, Any]] = None,
356
324
  expected_size: Optional[int] = None,
357
325
  displayed_filename: Optional[str] = None,
358
326
  _nb_retries: int = 5,
@@ -370,8 +338,6 @@ def http_get(
370
338
  The URL of the file to download.
371
339
  temp_file (`BinaryIO`):
372
340
  The file-like object where to save the file.
373
- proxies (`dict`, *optional*):
374
- Dictionary mapping protocol to the URL of the proxy passed to `requests.request`.
375
341
  resume_size (`int`, *optional*):
376
342
  The number of bytes already downloaded. If set to 0 (default), the whole file is download. If set to a
377
343
  positive number, the download will resume at the given position.
@@ -393,8 +359,6 @@ def http_get(
393
359
  if constants.HF_HUB_ENABLE_HF_TRANSFER:
394
360
  if resume_size != 0:
395
361
  warnings.warn("'hf_transfer' does not support `resume_size`: falling back to regular download method")
396
- elif proxies is not None:
397
- warnings.warn("'hf_transfer' does not support `proxies`: falling back to regular download method")
398
362
  elif has_custom_range_header:
399
363
  warnings.warn("'hf_transfer' ignores custom 'Range' headers; falling back to regular download method")
400
364
  else:
@@ -423,103 +387,97 @@ def http_get(
423
387
  " Try `pip install hf_transfer` or `pip install hf_xet`."
424
388
  )
425
389
 
426
- r = _request_wrapper(
427
- method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT
428
- )
429
-
430
- hf_raise_for_status(r)
431
- total: Optional[int] = _get_file_length_from_http_response(r)
432
-
433
- if displayed_filename is None:
434
- displayed_filename = url
435
- content_disposition = r.headers.get("Content-Disposition")
436
- if content_disposition is not None:
437
- match = HEADER_FILENAME_PATTERN.search(content_disposition)
438
- if match is not None:
439
- # Means file is on CDN
440
- displayed_filename = match.groupdict()["filename"]
441
-
442
- # Truncate filename if too long to display
443
- if len(displayed_filename) > 40:
444
- displayed_filename = f"(){displayed_filename[-40:]}"
390
+ with http_stream_backoff(
391
+ method="GET",
392
+ url=url,
393
+ headers=headers,
394
+ timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
395
+ retry_on_exceptions=(),
396
+ retry_on_status_codes=(429,),
397
+ ) as response:
398
+ hf_raise_for_status(response)
399
+ total: Optional[int] = _get_file_length_from_http_response(response)
400
+
401
+ if displayed_filename is None:
402
+ displayed_filename = url
403
+ content_disposition = response.headers.get("Content-Disposition")
404
+ if content_disposition is not None:
405
+ match = HEADER_FILENAME_PATTERN.search(content_disposition)
406
+ if match is not None:
407
+ # Means file is on CDN
408
+ displayed_filename = match.groupdict()["filename"]
409
+
410
+ # Truncate filename if too long to display
411
+ if len(displayed_filename) > 40:
412
+ displayed_filename = f"(…){displayed_filename[-40:]}"
413
+
414
+ consistency_error_message = (
415
+ f"Consistency check failed: file should be of size {expected_size} but has size"
416
+ f" {{actual_size}} ({displayed_filename}).\nThis is usually due to network issues while downloading the file."
417
+ " Please retry with `force_download=True`."
418
+ )
419
+ progress_cm = _get_progress_bar_context(
420
+ desc=displayed_filename,
421
+ log_level=logger.getEffectiveLevel(),
422
+ total=total,
423
+ initial=resume_size,
424
+ name="huggingface_hub.http_get",
425
+ _tqdm_bar=_tqdm_bar,
426
+ )
445
427
 
446
- consistency_error_message = (
447
- f"Consistency check failed: file should be of size {expected_size} but has size"
448
- f" {{actual_size}} ({displayed_filename}).\nThis is usually due to network issues while downloading the file."
449
- " Please retry with `force_download=True`."
450
- )
451
- progress_cm = _get_progress_bar_context(
452
- desc=displayed_filename,
453
- log_level=logger.getEffectiveLevel(),
454
- total=total,
455
- initial=resume_size,
456
- name="huggingface_hub.http_get",
457
- _tqdm_bar=_tqdm_bar,
458
- )
428
+ with progress_cm as progress:
429
+ if hf_transfer and total is not None and total > 5 * constants.DOWNLOAD_CHUNK_SIZE:
430
+ try:
431
+ hf_transfer.download(
432
+ url=url,
433
+ filename=temp_file.name,
434
+ max_files=constants.HF_TRANSFER_CONCURRENCY,
435
+ chunk_size=constants.DOWNLOAD_CHUNK_SIZE,
436
+ headers=initial_headers,
437
+ parallel_failures=3,
438
+ max_retries=5,
439
+ callback=progress.update,
440
+ )
441
+ except Exception as e:
442
+ raise RuntimeError(
443
+ "An error occurred while downloading using `hf_transfer`. Consider"
444
+ " disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
445
+ ) from e
446
+ if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
447
+ raise EnvironmentError(
448
+ consistency_error_message.format(
449
+ actual_size=os.path.getsize(temp_file.name),
450
+ )
451
+ )
452
+ return
459
453
 
460
- with progress_cm as progress:
461
- if hf_transfer and total is not None and total > 5 * constants.DOWNLOAD_CHUNK_SIZE:
462
- supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters
463
- if not supports_callback:
464
- warnings.warn(
465
- "You are using an outdated version of `hf_transfer`. "
466
- "Consider upgrading to latest version to enable progress bars "
467
- "using `pip install -U hf_transfer`."
468
- )
454
+ new_resume_size = resume_size
469
455
  try:
470
- hf_transfer.download(
456
+ for chunk in response.iter_bytes(chunk_size=constants.DOWNLOAD_CHUNK_SIZE):
457
+ if chunk: # filter out keep-alive new chunks
458
+ progress.update(len(chunk))
459
+ temp_file.write(chunk)
460
+ new_resume_size += len(chunk)
461
+ # Some data has been downloaded from the server so we reset the number of retries.
462
+ _nb_retries = 5
463
+ except (httpx.ConnectError, httpx.TimeoutException) as e:
464
+ # If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
465
+ # a transient error (network outage?). We log a warning message and try to resume the download a few times
466
+ # before giving up. Tre retry mechanism is basic but should be enough in most cases.
467
+ if _nb_retries <= 0:
468
+ logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
469
+ raise
470
+ logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
471
+ time.sleep(1)
472
+ return http_get(
471
473
  url=url,
472
- filename=temp_file.name,
473
- max_files=constants.HF_TRANSFER_CONCURRENCY,
474
- chunk_size=constants.DOWNLOAD_CHUNK_SIZE,
474
+ temp_file=temp_file,
475
+ resume_size=new_resume_size,
475
476
  headers=initial_headers,
476
- parallel_failures=3,
477
- max_retries=5,
478
- **({"callback": progress.update} if supports_callback else {}),
479
- )
480
- except Exception as e:
481
- raise RuntimeError(
482
- "An error occurred while downloading using `hf_transfer`. Consider"
483
- " disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
484
- ) from e
485
- if not supports_callback:
486
- progress.update(total)
487
- if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
488
- raise EnvironmentError(
489
- consistency_error_message.format(
490
- actual_size=os.path.getsize(temp_file.name),
491
- )
477
+ expected_size=expected_size,
478
+ _nb_retries=_nb_retries - 1,
479
+ _tqdm_bar=_tqdm_bar,
492
480
  )
493
- return
494
- new_resume_size = resume_size
495
- try:
496
- for chunk in r.iter_content(chunk_size=constants.DOWNLOAD_CHUNK_SIZE):
497
- if chunk: # filter out keep-alive new chunks
498
- progress.update(len(chunk))
499
- temp_file.write(chunk)
500
- new_resume_size += len(chunk)
501
- # Some data has been downloaded from the server so we reset the number of retries.
502
- _nb_retries = 5
503
- except (requests.ConnectionError, requests.ReadTimeout) as e:
504
- # If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
505
- # a transient error (network outage?). We log a warning message and try to resume the download a few times
506
- # before giving up. Tre retry mechanism is basic but should be enough in most cases.
507
- if _nb_retries <= 0:
508
- logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
509
- raise
510
- logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
511
- time.sleep(1)
512
- reset_sessions() # In case of SSLError it's best to reset the shared requests.Session objects
513
- return http_get(
514
- url=url,
515
- temp_file=temp_file,
516
- proxies=proxies,
517
- resume_size=new_resume_size,
518
- headers=initial_headers,
519
- expected_size=expected_size,
520
- _nb_retries=_nb_retries - 1,
521
- _tqdm_bar=_tqdm_bar,
522
- )
523
481
 
524
482
  if expected_size is not None and expected_size != temp_file.tell():
525
483
  raise EnvironmentError(
@@ -533,7 +491,7 @@ def xet_get(
533
491
  *,
534
492
  incomplete_path: Path,
535
493
  xet_file_data: XetFileData,
536
- headers: Dict[str, str],
494
+ headers: dict[str, str],
537
495
  expected_size: Optional[int] = None,
538
496
  displayed_filename: Optional[str] = None,
539
497
  _tqdm_bar: Optional[tqdm] = None,
@@ -546,7 +504,7 @@ def xet_get(
546
504
  The path to the file to download.
547
505
  xet_file_data (`XetFileData`):
548
506
  The file metadata needed to make the request to the xet storage service.
549
- headers (`Dict[str, str]`):
507
+ headers (`dict[str, str]`):
550
508
  The headers to send to the xet storage service.
551
509
  expected_size (`int`, *optional*):
552
510
  The expected size of the file to download. If set, the download will raise an error if the size of the
@@ -593,7 +551,7 @@ def xet_get(
593
551
 
594
552
  connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers=headers)
595
553
 
596
- def token_refresher() -> Tuple[str, int]:
554
+ def token_refresher() -> tuple[str, int]:
597
555
  connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers=headers)
598
556
  if connection_info is None:
599
557
  raise ValueError("Failed to refresh token using xet metadata.")
@@ -820,17 +778,13 @@ def hf_hub_download(
820
778
  library_version: Optional[str] = None,
821
779
  cache_dir: Union[str, Path, None] = None,
822
780
  local_dir: Union[str, Path, None] = None,
823
- user_agent: Union[Dict, str, None] = None,
781
+ user_agent: Union[dict, str, None] = None,
824
782
  force_download: bool = False,
825
- proxies: Optional[Dict] = None,
826
783
  etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
827
784
  token: Union[bool, str, None] = None,
828
785
  local_files_only: bool = False,
829
- headers: Optional[Dict[str, str]] = None,
786
+ headers: Optional[dict[str, str]] = None,
830
787
  endpoint: Optional[str] = None,
831
- resume_download: Optional[bool] = None,
832
- force_filename: Optional[str] = None,
833
- local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
834
788
  ) -> str:
835
789
  """Download a given file if it's not already present in the local cache.
836
790
 
@@ -893,9 +847,6 @@ def hf_hub_download(
893
847
  force_download (`bool`, *optional*, defaults to `False`):
894
848
  Whether the file should be downloaded even if it already exists in
895
849
  the local cache.
896
- proxies (`dict`, *optional*):
897
- Dictionary mapping protocol to the URL of the proxy passed to
898
- `requests.request`.
899
850
  etag_timeout (`float`, *optional*, defaults to `10`):
900
851
  When fetching ETag, how many seconds to wait for the server to send
901
852
  data before giving up which is passed to `requests.request`.
@@ -919,7 +870,7 @@ def hf_hub_download(
919
870
  or because it is set to `private` and you do not have access.
920
871
  [`~utils.RevisionNotFoundError`]
921
872
  If the revision to download from cannot be found.
922
- [`~utils.EntryNotFoundError`]
873
+ [`~utils.RemoteEntryNotFoundError`]
923
874
  If the file to download cannot be found.
924
875
  [`~utils.LocalEntryNotFoundError`]
925
876
  If network is disabled or unavailable and file is not found in cache.
@@ -935,20 +886,6 @@ def hf_hub_download(
935
886
  # Respect environment variable above user value
936
887
  etag_timeout = constants.HF_HUB_ETAG_TIMEOUT
937
888
 
938
- if force_filename is not None:
939
- warnings.warn(
940
- "The `force_filename` parameter is deprecated as a new caching system, "
941
- "which keeps the filenames as they are on the Hub, is now in place.",
942
- FutureWarning,
943
- )
944
- if resume_download is not None:
945
- warnings.warn(
946
- "`resume_download` is deprecated and will be removed in version 1.0.0. "
947
- "Downloads always resume when possible. "
948
- "If you want to force a new download, use `force_download=True`.",
949
- FutureWarning,
950
- )
951
-
952
889
  if cache_dir is None:
953
890
  cache_dir = constants.HF_HUB_CACHE
954
891
  if revision is None:
@@ -978,15 +915,6 @@ def hf_hub_download(
978
915
  )
979
916
 
980
917
  if local_dir is not None:
981
- if local_dir_use_symlinks != "auto":
982
- warnings.warn(
983
- "`local_dir_use_symlinks` parameter is deprecated and will be ignored. "
984
- "The process to download files to a local folder has been updated and do "
985
- "not rely on symlinks anymore. You only need to pass a destination folder "
986
- "as`local_dir`.\n"
987
- "For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder."
988
- )
989
-
990
918
  return _hf_hub_download_to_local_dir(
991
919
  # Destination
992
920
  local_dir=local_dir,
@@ -999,7 +927,6 @@ def hf_hub_download(
999
927
  endpoint=endpoint,
1000
928
  etag_timeout=etag_timeout,
1001
929
  headers=hf_headers,
1002
- proxies=proxies,
1003
930
  token=token,
1004
931
  # Additional options
1005
932
  cache_dir=cache_dir,
@@ -1019,7 +946,6 @@ def hf_hub_download(
1019
946
  endpoint=endpoint,
1020
947
  etag_timeout=etag_timeout,
1021
948
  headers=hf_headers,
1022
- proxies=proxies,
1023
949
  token=token,
1024
950
  # Additional options
1025
951
  local_files_only=local_files_only,
@@ -1039,8 +965,7 @@ def _hf_hub_download_to_cache_dir(
1039
965
  # HTTP info
1040
966
  endpoint: Optional[str],
1041
967
  etag_timeout: float,
1042
- headers: Dict[str, str],
1043
- proxies: Optional[Dict],
968
+ headers: dict[str, str],
1044
969
  token: Optional[Union[bool, str]],
1045
970
  # Additional options
1046
971
  local_files_only: bool,
@@ -1076,7 +1001,6 @@ def _hf_hub_download_to_cache_dir(
1076
1001
  repo_type=repo_type,
1077
1002
  revision=revision,
1078
1003
  endpoint=endpoint,
1079
- proxies=proxies,
1080
1004
  etag_timeout=etag_timeout,
1081
1005
  headers=headers,
1082
1006
  token=token,
@@ -1172,7 +1096,6 @@ def _hf_hub_download_to_cache_dir(
1172
1096
  incomplete_path=Path(blob_path + ".incomplete"),
1173
1097
  destination_path=Path(blob_path),
1174
1098
  url_to_download=url_to_download,
1175
- proxies=proxies,
1176
1099
  headers=headers,
1177
1100
  expected_size=expected_size,
1178
1101
  filename=filename,
@@ -1198,8 +1121,7 @@ def _hf_hub_download_to_local_dir(
1198
1121
  # HTTP info
1199
1122
  endpoint: Optional[str],
1200
1123
  etag_timeout: float,
1201
- headers: Dict[str, str],
1202
- proxies: Optional[Dict],
1124
+ headers: dict[str, str],
1203
1125
  token: Union[bool, str, None],
1204
1126
  # Additional options
1205
1127
  cache_dir: str,
@@ -1235,7 +1157,6 @@ def _hf_hub_download_to_local_dir(
1235
1157
  repo_type=repo_type,
1236
1158
  revision=revision,
1237
1159
  endpoint=endpoint,
1238
- proxies=proxies,
1239
1160
  etag_timeout=etag_timeout,
1240
1161
  headers=headers,
1241
1162
  token=token,
@@ -1301,7 +1222,6 @@ def _hf_hub_download_to_local_dir(
1301
1222
  incomplete_path=paths.incomplete_path(etag),
1302
1223
  destination_path=paths.file_path,
1303
1224
  url_to_download=url_to_download,
1304
- proxies=proxies,
1305
1225
  headers=headers,
1306
1226
  expected_size=expected_size,
1307
1227
  filename=filename,
@@ -1411,12 +1331,11 @@ def try_to_load_from_cache(
1411
1331
  def get_hf_file_metadata(
1412
1332
  url: str,
1413
1333
  token: Union[bool, str, None] = None,
1414
- proxies: Optional[Dict] = None,
1415
1334
  timeout: Optional[float] = constants.DEFAULT_REQUEST_TIMEOUT,
1416
1335
  library_name: Optional[str] = None,
1417
1336
  library_version: Optional[str] = None,
1418
- user_agent: Union[Dict, str, None] = None,
1419
- headers: Optional[Dict[str, str]] = None,
1337
+ user_agent: Union[dict, str, None] = None,
1338
+ headers: Optional[dict[str, str]] = None,
1420
1339
  endpoint: Optional[str] = None,
1421
1340
  ) -> HfFileMetadata:
1422
1341
  """Fetch metadata of a file versioned on the Hub for a given url.
@@ -1430,9 +1349,6 @@ def get_hf_file_metadata(
1430
1349
  folder.
1431
1350
  - If `False` or `None`, no token is provided.
1432
1351
  - If a string, it's used as the authentication token.
1433
- proxies (`dict`, *optional*):
1434
- Dictionary mapping protocol to the URL of the proxy passed to
1435
- `requests.request`.
1436
1352
  timeout (`float`, *optional*, defaults to 10):
1437
1353
  How many seconds to wait for the server to send metadata before giving up.
1438
1354
  library_name (`str`, *optional*):
@@ -1460,31 +1376,23 @@ def get_hf_file_metadata(
1460
1376
  hf_headers["Accept-Encoding"] = "identity" # prevent any compression => we want to know the real size of the file
1461
1377
 
1462
1378
  # Retrieve metadata
1463
- r = _request_wrapper(
1464
- method="HEAD",
1465
- url=url,
1466
- headers=hf_headers,
1467
- allow_redirects=False,
1468
- follow_relative_redirects=True,
1469
- proxies=proxies,
1470
- timeout=timeout,
1471
- )
1472
- hf_raise_for_status(r)
1379
+ response = _httpx_follow_relative_redirects(method="HEAD", url=url, headers=hf_headers, timeout=timeout)
1380
+ hf_raise_for_status(response)
1473
1381
 
1474
1382
  # Return
1475
1383
  return HfFileMetadata(
1476
- commit_hash=r.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT),
1477
- # We favor a custom header indicating the etag of the linked resource, and
1478
- # we fallback to the regular etag header.
1479
- etag=_normalize_etag(r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")),
1384
+ commit_hash=response.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT),
1385
+ # We favor a custom header indicating the etag of the linked resource, and we fallback to the regular etag header.
1386
+ etag=_normalize_etag(
1387
+ response.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or response.headers.get("ETag")
1388
+ ),
1480
1389
  # Either from response headers (if redirected) or defaults to request url
1481
- # Do not use directly `url`, as `_request_wrapper` might have followed relative
1482
- # redirects.
1483
- location=r.headers.get("Location") or r.request.url, # type: ignore
1390
+ # Do not use directly `url` as we might have followed relative redirects.
1391
+ location=response.headers.get("Location") or str(response.request.url), # type: ignore
1484
1392
  size=_int_or_none(
1485
- r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_SIZE) or r.headers.get("Content-Length")
1393
+ response.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_SIZE) or response.headers.get("Content-Length")
1486
1394
  ),
1487
- xet_file_data=parse_xet_file_data_from_response(r, endpoint=endpoint), # type: ignore
1395
+ xet_file_data=parse_xet_file_data_from_response(response, endpoint=endpoint), # type: ignore
1488
1396
  )
1489
1397
 
1490
1398
 
@@ -1495,19 +1403,18 @@ def _get_metadata_or_catch_error(
1495
1403
  repo_type: str,
1496
1404
  revision: str,
1497
1405
  endpoint: Optional[str],
1498
- proxies: Optional[Dict],
1499
1406
  etag_timeout: Optional[float],
1500
- headers: Dict[str, str], # mutated inplace!
1407
+ headers: dict[str, str], # mutated inplace!
1501
1408
  token: Union[bool, str, None],
1502
1409
  local_files_only: bool,
1503
1410
  relative_filename: Optional[str] = None, # only used to store `.no_exists` in cache
1504
1411
  storage_folder: Optional[str] = None, # only used to store `.no_exists` in cache
1505
1412
  ) -> Union[
1506
1413
  # Either an exception is caught and returned
1507
- Tuple[None, None, None, None, None, Exception],
1414
+ tuple[None, None, None, None, None, Exception],
1508
1415
  # Or the metadata is returned as
1509
1416
  # `(url_to_download, etag, commit_hash, expected_size, xet_file_data, None)`
1510
- Tuple[str, str, str, int, Optional[XetFileData], None],
1417
+ tuple[str, str, str, int, Optional[XetFileData], None],
1511
1418
  ]:
1512
1419
  """Get metadata for a file on the Hub, safely handling network issues.
1513
1420
 
@@ -1544,9 +1451,9 @@ def _get_metadata_or_catch_error(
1544
1451
  try:
1545
1452
  try:
1546
1453
  metadata = get_hf_file_metadata(
1547
- url=url, proxies=proxies, timeout=etag_timeout, headers=headers, token=token, endpoint=endpoint
1454
+ url=url, timeout=etag_timeout, headers=headers, token=token, endpoint=endpoint
1548
1455
  )
1549
- except EntryNotFoundError as http_error:
1456
+ except RemoteEntryNotFoundError as http_error:
1550
1457
  if storage_folder is not None and relative_filename is not None:
1551
1458
  # Cache the non-existence of the file
1552
1459
  commit_hash = http_error.response.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT)
@@ -1597,21 +1504,17 @@ def _get_metadata_or_catch_error(
1597
1504
  if urlparse(url).netloc != urlparse(metadata.location).netloc:
1598
1505
  # Remove authorization header when downloading a LFS blob
1599
1506
  headers.pop("authorization", None)
1600
- except (requests.exceptions.SSLError, requests.exceptions.ProxyError):
1601
- # Actually raise for those subclasses of ConnectionError
1507
+ except httpx.ProxyError:
1508
+ # Actually raise on proxy error
1602
1509
  raise
1603
- except (
1604
- requests.exceptions.ConnectionError,
1605
- requests.exceptions.Timeout,
1606
- OfflineModeIsEnabled,
1607
- ) as error:
1510
+ except (httpx.ConnectError, httpx.TimeoutException, OfflineModeIsEnabled) as error:
1608
1511
  # Otherwise, our Internet connection is down.
1609
1512
  # etag is None
1610
1513
  head_error_call = error
1611
- except (RevisionNotFoundError, EntryNotFoundError):
1514
+ except (RevisionNotFoundError, RemoteEntryNotFoundError):
1612
1515
  # The repo was found but the revision or entry doesn't exist on the Hub (never existed or got deleted)
1613
1516
  raise
1614
- except requests.HTTPError as error:
1517
+ except HfHubHTTPError as error:
1615
1518
  # Multiple reasons for an http error:
1616
1519
  # - Repository is private and invalid/missing token sent
1617
1520
  # - Repository is gated and invalid/missing token sent
@@ -1669,8 +1572,7 @@ def _download_to_tmp_and_move(
1669
1572
  incomplete_path: Path,
1670
1573
  destination_path: Path,
1671
1574
  url_to_download: str,
1672
- proxies: Optional[Dict],
1673
- headers: Dict[str, str],
1575
+ headers: dict[str, str],
1674
1576
  expected_size: Optional[int],
1675
1577
  filename: str,
1676
1578
  force_download: bool,
@@ -1694,14 +1596,14 @@ def _download_to_tmp_and_move(
1694
1596
  # Do nothing if already exists (except if force_download=True)
1695
1597
  return
1696
1598
 
1697
- if incomplete_path.exists() and (force_download or (constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies)):
1599
+ if incomplete_path.exists() and (force_download or constants.HF_HUB_ENABLE_HF_TRANSFER):
1698
1600
  # By default, we will try to resume the download if possible.
1699
1601
  # However, if the user has set `force_download=True` or if `hf_transfer` is enabled, then we should
1700
1602
  # not resume the download => delete the incomplete file.
1701
1603
  message = f"Removing incomplete file '{incomplete_path}'"
1702
1604
  if force_download:
1703
1605
  message += " (force_download=True)"
1704
- elif constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies:
1606
+ elif constants.HF_HUB_ENABLE_HF_TRANSFER:
1705
1607
  message += " (hf_transfer=True)"
1706
1608
  logger.info(message)
1707
1609
  incomplete_path.unlink(missing_ok=True)
@@ -1738,7 +1640,6 @@ def _download_to_tmp_and_move(
1738
1640
  http_get(
1739
1641
  url_to_download,
1740
1642
  f,
1741
- proxies=proxies,
1742
1643
  resume_size=resume_size,
1743
1644
  headers=headers,
1744
1645
  expected_size=expected_size,