huggingface-hub 0.35.0rc0__py3-none-any.whl → 1.0.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (127) hide show
  1. huggingface_hub/__init__.py +46 -45
  2. huggingface_hub/_commit_api.py +28 -28
  3. huggingface_hub/_commit_scheduler.py +11 -8
  4. huggingface_hub/_inference_endpoints.py +8 -8
  5. huggingface_hub/_jobs_api.py +176 -20
  6. huggingface_hub/_local_folder.py +1 -1
  7. huggingface_hub/_login.py +13 -39
  8. huggingface_hub/_oauth.py +10 -14
  9. huggingface_hub/_snapshot_download.py +14 -28
  10. huggingface_hub/_space_api.py +4 -4
  11. huggingface_hub/_tensorboard_logger.py +13 -14
  12. huggingface_hub/_upload_large_folder.py +120 -13
  13. huggingface_hub/_webhooks_payload.py +3 -3
  14. huggingface_hub/_webhooks_server.py +2 -2
  15. huggingface_hub/cli/_cli_utils.py +2 -2
  16. huggingface_hub/cli/auth.py +8 -6
  17. huggingface_hub/cli/cache.py +18 -20
  18. huggingface_hub/cli/download.py +4 -4
  19. huggingface_hub/cli/hf.py +2 -5
  20. huggingface_hub/cli/jobs.py +599 -22
  21. huggingface_hub/cli/lfs.py +4 -4
  22. huggingface_hub/cli/repo.py +11 -7
  23. huggingface_hub/cli/repo_files.py +2 -2
  24. huggingface_hub/cli/upload.py +4 -4
  25. huggingface_hub/cli/upload_large_folder.py +3 -3
  26. huggingface_hub/commands/_cli_utils.py +2 -2
  27. huggingface_hub/commands/delete_cache.py +13 -13
  28. huggingface_hub/commands/download.py +4 -13
  29. huggingface_hub/commands/lfs.py +4 -4
  30. huggingface_hub/commands/repo_files.py +2 -2
  31. huggingface_hub/commands/scan_cache.py +1 -1
  32. huggingface_hub/commands/tag.py +1 -3
  33. huggingface_hub/commands/upload.py +4 -4
  34. huggingface_hub/commands/upload_large_folder.py +3 -3
  35. huggingface_hub/commands/user.py +4 -5
  36. huggingface_hub/community.py +5 -5
  37. huggingface_hub/constants.py +3 -41
  38. huggingface_hub/dataclasses.py +16 -19
  39. huggingface_hub/errors.py +42 -29
  40. huggingface_hub/fastai_utils.py +8 -9
  41. huggingface_hub/file_download.py +162 -259
  42. huggingface_hub/hf_api.py +841 -616
  43. huggingface_hub/hf_file_system.py +98 -62
  44. huggingface_hub/hub_mixin.py +37 -57
  45. huggingface_hub/inference/_client.py +257 -325
  46. huggingface_hub/inference/_common.py +110 -124
  47. huggingface_hub/inference/_generated/_async_client.py +307 -432
  48. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
  49. huggingface_hub/inference/_generated/types/base.py +10 -7
  50. huggingface_hub/inference/_generated/types/chat_completion.py +18 -16
  51. huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
  52. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
  53. huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
  54. huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
  55. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
  56. huggingface_hub/inference/_generated/types/summarization.py +2 -2
  57. huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
  58. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
  59. huggingface_hub/inference/_generated/types/text_generation.py +10 -10
  60. huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
  61. huggingface_hub/inference/_generated/types/token_classification.py +2 -2
  62. huggingface_hub/inference/_generated/types/translation.py +2 -2
  63. huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
  64. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
  65. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
  66. huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
  67. huggingface_hub/inference/_mcp/agent.py +3 -3
  68. huggingface_hub/inference/_mcp/cli.py +1 -1
  69. huggingface_hub/inference/_mcp/constants.py +2 -3
  70. huggingface_hub/inference/_mcp/mcp_client.py +58 -30
  71. huggingface_hub/inference/_mcp/types.py +10 -7
  72. huggingface_hub/inference/_mcp/utils.py +11 -7
  73. huggingface_hub/inference/_providers/__init__.py +4 -2
  74. huggingface_hub/inference/_providers/_common.py +49 -25
  75. huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
  76. huggingface_hub/inference/_providers/cohere.py +3 -3
  77. huggingface_hub/inference/_providers/fal_ai.py +52 -21
  78. huggingface_hub/inference/_providers/featherless_ai.py +4 -4
  79. huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
  80. huggingface_hub/inference/_providers/hf_inference.py +28 -20
  81. huggingface_hub/inference/_providers/hyperbolic.py +4 -4
  82. huggingface_hub/inference/_providers/nebius.py +10 -10
  83. huggingface_hub/inference/_providers/novita.py +5 -5
  84. huggingface_hub/inference/_providers/nscale.py +4 -4
  85. huggingface_hub/inference/_providers/replicate.py +15 -15
  86. huggingface_hub/inference/_providers/sambanova.py +6 -6
  87. huggingface_hub/inference/_providers/together.py +7 -7
  88. huggingface_hub/lfs.py +20 -31
  89. huggingface_hub/repocard.py +18 -18
  90. huggingface_hub/repocard_data.py +56 -56
  91. huggingface_hub/serialization/__init__.py +0 -1
  92. huggingface_hub/serialization/_base.py +9 -9
  93. huggingface_hub/serialization/_dduf.py +7 -7
  94. huggingface_hub/serialization/_torch.py +28 -28
  95. huggingface_hub/utils/__init__.py +10 -4
  96. huggingface_hub/utils/_auth.py +5 -5
  97. huggingface_hub/utils/_cache_manager.py +31 -31
  98. huggingface_hub/utils/_deprecation.py +1 -1
  99. huggingface_hub/utils/_dotenv.py +25 -21
  100. huggingface_hub/utils/_fixes.py +0 -10
  101. huggingface_hub/utils/_git_credential.py +4 -4
  102. huggingface_hub/utils/_headers.py +7 -29
  103. huggingface_hub/utils/_http.py +366 -208
  104. huggingface_hub/utils/_pagination.py +4 -4
  105. huggingface_hub/utils/_paths.py +5 -5
  106. huggingface_hub/utils/_runtime.py +16 -13
  107. huggingface_hub/utils/_safetensors.py +21 -21
  108. huggingface_hub/utils/_subprocess.py +9 -9
  109. huggingface_hub/utils/_telemetry.py +3 -3
  110. huggingface_hub/utils/_typing.py +25 -5
  111. huggingface_hub/utils/_validators.py +53 -72
  112. huggingface_hub/utils/_xet.py +16 -16
  113. huggingface_hub/utils/_xet_progress_reporting.py +32 -11
  114. huggingface_hub/utils/insecure_hashlib.py +3 -9
  115. huggingface_hub/utils/tqdm.py +3 -3
  116. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/METADATA +18 -29
  117. huggingface_hub-1.0.0rc0.dist-info/RECORD +161 -0
  118. huggingface_hub/inference_api.py +0 -217
  119. huggingface_hub/keras_mixin.py +0 -500
  120. huggingface_hub/repository.py +0 -1477
  121. huggingface_hub/serialization/_tensorflow.py +0 -95
  122. huggingface_hub/utils/_hf_folder.py +0 -68
  123. huggingface_hub-0.35.0rc0.dist-info/RECORD +0 -166
  124. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/LICENSE +0 -0
  125. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/WHEEL +0 -0
  126. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/entry_points.txt +0 -0
  127. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,5 @@
1
1
  import copy
2
2
  import errno
3
- import inspect
4
3
  import os
5
4
  import re
6
5
  import shutil
@@ -10,26 +9,19 @@ import uuid
10
9
  import warnings
11
10
  from dataclasses import dataclass
12
11
  from pathlib import Path
13
- from typing import Any, BinaryIO, Dict, Literal, NoReturn, Optional, Tuple, Union
12
+ from typing import Any, BinaryIO, NoReturn, Optional, Union
14
13
  from urllib.parse import quote, urlparse
15
14
 
16
- import requests
15
+ import httpx
17
16
 
18
- from . import (
19
- __version__, # noqa: F401 # for backward compatibility
20
- constants,
21
- )
17
+ from . import constants
22
18
  from ._local_folder import get_local_download_paths, read_download_metadata, write_download_metadata
23
- from .constants import (
24
- HUGGINGFACE_CO_URL_TEMPLATE, # noqa: F401 # for backward compatibility
25
- HUGGINGFACE_HUB_CACHE, # noqa: F401 # for backward compatibility
26
- )
27
19
  from .errors import (
28
- EntryNotFoundError,
29
20
  FileMetadataError,
30
21
  GatedRepoError,
31
22
  HfHubHTTPError,
32
23
  LocalEntryNotFoundError,
24
+ RemoteEntryNotFoundError,
33
25
  RepositoryNotFoundError,
34
26
  RevisionNotFoundError,
35
27
  )
@@ -39,30 +31,15 @@ from .utils import (
39
31
  WeakFileLock,
40
32
  XetFileData,
41
33
  build_hf_headers,
42
- get_fastai_version, # noqa: F401 # for backward compatibility
43
- get_fastcore_version, # noqa: F401 # for backward compatibility
44
- get_graphviz_version, # noqa: F401 # for backward compatibility
45
- get_jinja_version, # noqa: F401 # for backward compatibility
46
- get_pydot_version, # noqa: F401 # for backward compatibility
47
- get_tf_version, # noqa: F401 # for backward compatibility
48
- get_torch_version, # noqa: F401 # for backward compatibility
49
34
  hf_raise_for_status,
50
- is_fastai_available, # noqa: F401 # for backward compatibility
51
- is_fastcore_available, # noqa: F401 # for backward compatibility
52
- is_graphviz_available, # noqa: F401 # for backward compatibility
53
- is_jinja_available, # noqa: F401 # for backward compatibility
54
- is_pydot_available, # noqa: F401 # for backward compatibility
55
- is_tf_available, # noqa: F401 # for backward compatibility
56
- is_torch_available, # noqa: F401 # for backward compatibility
57
35
  logging,
58
36
  parse_xet_file_data_from_response,
59
37
  refresh_xet_connection_info,
60
- reset_sessions,
61
38
  tqdm,
62
39
  validate_hf_hub_args,
63
40
  )
64
- from .utils._http import _adjust_range_header, http_backoff
65
- from .utils._runtime import _PY_VERSION, is_xet_available # noqa: F401 # for backward compatibility
41
+ from .utils._http import _adjust_range_header, http_backoff, http_stream_backoff
42
+ from .utils._runtime import is_xet_available
66
43
  from .utils._typing import HTTP_METHOD_T
67
44
  from .utils.sha import sha_fileobj
68
45
  from .utils.tqdm import _get_progress_bar_context
@@ -83,7 +60,7 @@ REGEX_COMMIT_HASH = re.compile(r"^[0-9a-f]{40}$")
83
60
  # Regex to check if the file etag IS a valid sha256
84
61
  REGEX_SHA256 = re.compile(r"^[0-9a-f]{64}$")
85
62
 
86
- _are_symlinks_supported_in_dir: Dict[str, bool] = {}
63
+ _are_symlinks_supported_in_dir: dict[str, bool] = {}
87
64
 
88
65
 
89
66
  def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
@@ -252,7 +229,7 @@ def hf_hub_url(
252
229
 
253
230
  if revision is None:
254
231
  revision = constants.DEFAULT_REVISION
255
- url = HUGGINGFACE_CO_URL_TEMPLATE.format(
232
+ url = constants.HUGGINGFACE_CO_URL_TEMPLATE.format(
256
233
  repo_id=repo_id, revision=quote(revision, safe=""), filename=quote(filename)
257
234
  )
258
235
  # Update endpoint if provided
@@ -261,11 +238,10 @@ def hf_hub_url(
261
238
  return url
262
239
 
263
240
 
264
- def _request_wrapper(
265
- method: HTTP_METHOD_T, url: str, *, follow_relative_redirects: bool = False, **params
266
- ) -> requests.Response:
267
- """Wrapper around requests methods to follow relative redirects if `follow_relative_redirects=True` even when
268
- `allow_redirection=False`.
241
+ def _httpx_follow_relative_redirects(method: HTTP_METHOD_T, url: str, **httpx_kwargs) -> httpx.Response:
242
+ """Perform an HTTP request with backoff and follow relative redirects only.
243
+
244
+ This is useful to follow a redirection to a renamed repository without following redirection to a CDN.
269
245
 
270
246
  A backoff mechanism retries the HTTP call on 429, 503 and 504 errors.
271
247
 
@@ -274,61 +250,59 @@ def _request_wrapper(
274
250
  HTTP method, such as 'GET' or 'HEAD'.
275
251
  url (`str`):
276
252
  The URL of the resource to fetch.
277
- follow_relative_redirects (`bool`, *optional*, defaults to `False`)
278
- If True, relative redirection (redirection to the same site) will be resolved even when `allow_redirection`
279
- kwarg is set to False. Useful when we want to follow a redirection to a renamed repository without
280
- following redirection to a CDN.
281
- **params (`dict`, *optional*):
282
- Params to pass to `requests.request`.
253
+ **httpx_kwargs (`dict`, *optional*):
254
+ Params to pass to `httpx.request`.
283
255
  """
284
- # Recursively follow relative redirects
285
- if follow_relative_redirects:
286
- response = _request_wrapper(
256
+ while True:
257
+ # Make the request
258
+ response = http_backoff(
287
259
  method=method,
288
260
  url=url,
289
- follow_relative_redirects=False,
290
- **params,
261
+ **httpx_kwargs,
262
+ follow_redirects=False,
263
+ retry_on_exceptions=(),
264
+ retry_on_status_codes=(429,),
291
265
  )
266
+ hf_raise_for_status(response)
292
267
 
293
- # If redirection, we redirect only relative paths.
294
- # This is useful in case of a renamed repository.
268
+ # Check if response is a relative redirect
295
269
  if 300 <= response.status_code <= 399:
296
270
  parsed_target = urlparse(response.headers["Location"])
297
271
  if parsed_target.netloc == "":
298
- # This means it is a relative 'location' headers, as allowed by RFC 7231.
299
- # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')
300
- # We want to follow this relative redirect !
301
- #
302
- # Highly inspired by `resolve_redirects` from requests library.
303
- # See https://github.com/psf/requests/blob/main/requests/sessions.py#L159
304
- next_url = urlparse(url)._replace(path=parsed_target.path).geturl()
305
- return _request_wrapper(method=method, url=next_url, follow_relative_redirects=True, **params)
306
- return response
307
-
308
- # Perform request and return if status_code is not in the retry list.
309
- response = http_backoff(method=method, url=url, **params, retry_on_exceptions=(), retry_on_status_codes=(429,))
310
- hf_raise_for_status(response)
272
+ # Relative redirect -> update URL and retry
273
+ url = urlparse(url)._replace(path=parsed_target.path).geturl()
274
+ continue
275
+
276
+ # Break if no relative redirect
277
+ break
278
+
311
279
  return response
312
280
 
313
281
 
314
- def _get_file_length_from_http_response(response: requests.Response) -> Optional[int]:
282
+ def _get_file_length_from_http_response(response: httpx.Response) -> Optional[int]:
315
283
  """
316
284
  Get the length of the file from the HTTP response headers.
317
285
 
318
286
  This function extracts the file size from the HTTP response headers, either from the
319
287
  `Content-Range` or `Content-Length` header, if available (in that order).
320
- The HTTP response object containing the headers.
321
- `int` or `None`: The length of the file in bytes if the information is available,
322
- otherwise `None`.
323
288
 
324
289
  Args:
325
- response (`requests.Response`):
290
+ response (`httpx.Response`):
326
291
  The HTTP response object.
327
292
 
328
293
  Returns:
329
294
  `int` or `None`: The length of the file in bytes, or None if not available.
330
295
  """
331
296
 
297
+ # If HTTP response contains compressed body (e.g. gzip), the `Content-Length` header will
298
+ # contain the length of the compressed body, not the uncompressed file size.
299
+ # And at the start of transmission there's no way to know the uncompressed file size for gzip,
300
+ # thus we return None in that case.
301
+ content_encoding = response.headers.get("Content-Encoding", "identity").lower()
302
+ if content_encoding != "identity":
303
+ # gzip/br/deflate/zstd etc
304
+ return None
305
+
332
306
  content_range = response.headers.get("Content-Range")
333
307
  if content_range is not None:
334
308
  return int(content_range.rsplit("/")[-1])
@@ -340,13 +314,13 @@ def _get_file_length_from_http_response(response: requests.Response) -> Optional
340
314
  return None
341
315
 
342
316
 
317
+ @validate_hf_hub_args
343
318
  def http_get(
344
319
  url: str,
345
320
  temp_file: BinaryIO,
346
321
  *,
347
- proxies: Optional[Dict] = None,
348
322
  resume_size: int = 0,
349
- headers: Optional[Dict[str, Any]] = None,
323
+ headers: Optional[dict[str, Any]] = None,
350
324
  expected_size: Optional[int] = None,
351
325
  displayed_filename: Optional[str] = None,
352
326
  _nb_retries: int = 5,
@@ -364,8 +338,6 @@ def http_get(
364
338
  The URL of the file to download.
365
339
  temp_file (`BinaryIO`):
366
340
  The file-like object where to save the file.
367
- proxies (`dict`, *optional*):
368
- Dictionary mapping protocol to the URL of the proxy passed to `requests.request`.
369
341
  resume_size (`int`, *optional*):
370
342
  The number of bytes already downloaded. If set to 0 (default), the whole file is download. If set to a
371
343
  positive number, the download will resume at the given position.
@@ -387,8 +359,6 @@ def http_get(
387
359
  if constants.HF_HUB_ENABLE_HF_TRANSFER:
388
360
  if resume_size != 0:
389
361
  warnings.warn("'hf_transfer' does not support `resume_size`: falling back to regular download method")
390
- elif proxies is not None:
391
- warnings.warn("'hf_transfer' does not support `proxies`: falling back to regular download method")
392
362
  elif has_custom_range_header:
393
363
  warnings.warn("'hf_transfer' ignores custom 'Range' headers; falling back to regular download method")
394
364
  else:
@@ -417,107 +387,97 @@ def http_get(
417
387
  " Try `pip install hf_transfer` or `pip install hf_xet`."
418
388
  )
419
389
 
420
- r = _request_wrapper(
421
- method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT
422
- )
423
-
424
- hf_raise_for_status(r)
425
- content_length = _get_file_length_from_http_response(r)
426
-
427
- # NOTE: 'total' is the total number of bytes to download, not the number of bytes in the file.
428
- # If the file is compressed, the number of bytes in the saved file will be higher than 'total'.
429
- total = resume_size + int(content_length) if content_length is not None else None
430
-
431
- if displayed_filename is None:
432
- displayed_filename = url
433
- content_disposition = r.headers.get("Content-Disposition")
434
- if content_disposition is not None:
435
- match = HEADER_FILENAME_PATTERN.search(content_disposition)
436
- if match is not None:
437
- # Means file is on CDN
438
- displayed_filename = match.groupdict()["filename"]
439
-
440
- # Truncate filename if too long to display
441
- if len(displayed_filename) > 40:
442
- displayed_filename = f"(…){displayed_filename[-40:]}"
390
+ with http_stream_backoff(
391
+ method="GET",
392
+ url=url,
393
+ headers=headers,
394
+ timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
395
+ retry_on_exceptions=(),
396
+ retry_on_status_codes=(429,),
397
+ ) as response:
398
+ hf_raise_for_status(response)
399
+ total: Optional[int] = _get_file_length_from_http_response(response)
400
+
401
+ if displayed_filename is None:
402
+ displayed_filename = url
403
+ content_disposition = response.headers.get("Content-Disposition")
404
+ if content_disposition is not None:
405
+ match = HEADER_FILENAME_PATTERN.search(content_disposition)
406
+ if match is not None:
407
+ # Means file is on CDN
408
+ displayed_filename = match.groupdict()["filename"]
409
+
410
+ # Truncate filename if too long to display
411
+ if len(displayed_filename) > 40:
412
+ displayed_filename = f"(…){displayed_filename[-40:]}"
413
+
414
+ consistency_error_message = (
415
+ f"Consistency check failed: file should be of size {expected_size} but has size"
416
+ f" {{actual_size}} ({displayed_filename}).\nThis is usually due to network issues while downloading the file."
417
+ " Please retry with `force_download=True`."
418
+ )
419
+ progress_cm = _get_progress_bar_context(
420
+ desc=displayed_filename,
421
+ log_level=logger.getEffectiveLevel(),
422
+ total=total,
423
+ initial=resume_size,
424
+ name="huggingface_hub.http_get",
425
+ _tqdm_bar=_tqdm_bar,
426
+ )
443
427
 
444
- consistency_error_message = (
445
- f"Consistency check failed: file should be of size {expected_size} but has size"
446
- f" {{actual_size}} ({displayed_filename}).\nThis is usually due to network issues while downloading the file."
447
- " Please retry with `force_download=True`."
448
- )
449
- progress_cm = _get_progress_bar_context(
450
- desc=displayed_filename,
451
- log_level=logger.getEffectiveLevel(),
452
- total=total,
453
- initial=resume_size,
454
- name="huggingface_hub.http_get",
455
- _tqdm_bar=_tqdm_bar,
456
- )
428
+ with progress_cm as progress:
429
+ if hf_transfer and total is not None and total > 5 * constants.DOWNLOAD_CHUNK_SIZE:
430
+ try:
431
+ hf_transfer.download(
432
+ url=url,
433
+ filename=temp_file.name,
434
+ max_files=constants.HF_TRANSFER_CONCURRENCY,
435
+ chunk_size=constants.DOWNLOAD_CHUNK_SIZE,
436
+ headers=initial_headers,
437
+ parallel_failures=3,
438
+ max_retries=5,
439
+ callback=progress.update,
440
+ )
441
+ except Exception as e:
442
+ raise RuntimeError(
443
+ "An error occurred while downloading using `hf_transfer`. Consider"
444
+ " disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
445
+ ) from e
446
+ if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
447
+ raise EnvironmentError(
448
+ consistency_error_message.format(
449
+ actual_size=os.path.getsize(temp_file.name),
450
+ )
451
+ )
452
+ return
457
453
 
458
- with progress_cm as progress:
459
- if hf_transfer and total is not None and total > 5 * constants.DOWNLOAD_CHUNK_SIZE:
460
- supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters
461
- if not supports_callback:
462
- warnings.warn(
463
- "You are using an outdated version of `hf_transfer`. "
464
- "Consider upgrading to latest version to enable progress bars "
465
- "using `pip install -U hf_transfer`."
466
- )
454
+ new_resume_size = resume_size
467
455
  try:
468
- hf_transfer.download(
456
+ for chunk in response.iter_bytes(chunk_size=constants.DOWNLOAD_CHUNK_SIZE):
457
+ if chunk: # filter out keep-alive new chunks
458
+ progress.update(len(chunk))
459
+ temp_file.write(chunk)
460
+ new_resume_size += len(chunk)
461
+ # Some data has been downloaded from the server so we reset the number of retries.
462
+ _nb_retries = 5
463
+ except (httpx.ConnectError, httpx.TimeoutException) as e:
464
+ # If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
465
+ # a transient error (network outage?). We log a warning message and try to resume the download a few times
466
+ # before giving up. Tre retry mechanism is basic but should be enough in most cases.
467
+ if _nb_retries <= 0:
468
+ logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
469
+ raise
470
+ logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
471
+ time.sleep(1)
472
+ return http_get(
469
473
  url=url,
470
- filename=temp_file.name,
471
- max_files=constants.HF_TRANSFER_CONCURRENCY,
472
- chunk_size=constants.DOWNLOAD_CHUNK_SIZE,
474
+ temp_file=temp_file,
475
+ resume_size=new_resume_size,
473
476
  headers=initial_headers,
474
- parallel_failures=3,
475
- max_retries=5,
476
- **({"callback": progress.update} if supports_callback else {}),
477
- )
478
- except Exception as e:
479
- raise RuntimeError(
480
- "An error occurred while downloading using `hf_transfer`. Consider"
481
- " disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
482
- ) from e
483
- if not supports_callback:
484
- progress.update(total)
485
- if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
486
- raise EnvironmentError(
487
- consistency_error_message.format(
488
- actual_size=os.path.getsize(temp_file.name),
489
- )
477
+ expected_size=expected_size,
478
+ _nb_retries=_nb_retries - 1,
479
+ _tqdm_bar=_tqdm_bar,
490
480
  )
491
- return
492
- new_resume_size = resume_size
493
- try:
494
- for chunk in r.iter_content(chunk_size=constants.DOWNLOAD_CHUNK_SIZE):
495
- if chunk: # filter out keep-alive new chunks
496
- progress.update(len(chunk))
497
- temp_file.write(chunk)
498
- new_resume_size += len(chunk)
499
- # Some data has been downloaded from the server so we reset the number of retries.
500
- _nb_retries = 5
501
- except (requests.ConnectionError, requests.ReadTimeout) as e:
502
- # If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
503
- # a transient error (network outage?). We log a warning message and try to resume the download a few times
504
- # before giving up. Tre retry mechanism is basic but should be enough in most cases.
505
- if _nb_retries <= 0:
506
- logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
507
- raise
508
- logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
509
- time.sleep(1)
510
- reset_sessions() # In case of SSLError it's best to reset the shared requests.Session objects
511
- return http_get(
512
- url=url,
513
- temp_file=temp_file,
514
- proxies=proxies,
515
- resume_size=new_resume_size,
516
- headers=initial_headers,
517
- expected_size=expected_size,
518
- _nb_retries=_nb_retries - 1,
519
- _tqdm_bar=_tqdm_bar,
520
- )
521
481
 
522
482
  if expected_size is not None and expected_size != temp_file.tell():
523
483
  raise EnvironmentError(
@@ -531,7 +491,7 @@ def xet_get(
531
491
  *,
532
492
  incomplete_path: Path,
533
493
  xet_file_data: XetFileData,
534
- headers: Dict[str, str],
494
+ headers: dict[str, str],
535
495
  expected_size: Optional[int] = None,
536
496
  displayed_filename: Optional[str] = None,
537
497
  _tqdm_bar: Optional[tqdm] = None,
@@ -544,7 +504,7 @@ def xet_get(
544
504
  The path to the file to download.
545
505
  xet_file_data (`XetFileData`):
546
506
  The file metadata needed to make the request to the xet storage service.
547
- headers (`Dict[str, str]`):
507
+ headers (`dict[str, str]`):
548
508
  The headers to send to the xet storage service.
549
509
  expected_size (`int`, *optional*):
550
510
  The expected size of the file to download. If set, the download will raise an error if the size of the
@@ -591,7 +551,7 @@ def xet_get(
591
551
 
592
552
  connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers=headers)
593
553
 
594
- def token_refresher() -> Tuple[str, int]:
554
+ def token_refresher() -> tuple[str, int]:
595
555
  connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers=headers)
596
556
  if connection_info is None:
597
557
  raise ValueError("Failed to refresh token using xet metadata.")
@@ -818,17 +778,13 @@ def hf_hub_download(
818
778
  library_version: Optional[str] = None,
819
779
  cache_dir: Union[str, Path, None] = None,
820
780
  local_dir: Union[str, Path, None] = None,
821
- user_agent: Union[Dict, str, None] = None,
781
+ user_agent: Union[dict, str, None] = None,
822
782
  force_download: bool = False,
823
- proxies: Optional[Dict] = None,
824
783
  etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
825
784
  token: Union[bool, str, None] = None,
826
785
  local_files_only: bool = False,
827
- headers: Optional[Dict[str, str]] = None,
786
+ headers: Optional[dict[str, str]] = None,
828
787
  endpoint: Optional[str] = None,
829
- resume_download: Optional[bool] = None,
830
- force_filename: Optional[str] = None,
831
- local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
832
788
  ) -> str:
833
789
  """Download a given file if it's not already present in the local cache.
834
790
 
@@ -891,9 +847,6 @@ def hf_hub_download(
891
847
  force_download (`bool`, *optional*, defaults to `False`):
892
848
  Whether the file should be downloaded even if it already exists in
893
849
  the local cache.
894
- proxies (`dict`, *optional*):
895
- Dictionary mapping protocol to the URL of the proxy passed to
896
- `requests.request`.
897
850
  etag_timeout (`float`, *optional*, defaults to `10`):
898
851
  When fetching ETag, how many seconds to wait for the server to send
899
852
  data before giving up which is passed to `requests.request`.
@@ -917,7 +870,7 @@ def hf_hub_download(
917
870
  or because it is set to `private` and you do not have access.
918
871
  [`~utils.RevisionNotFoundError`]
919
872
  If the revision to download from cannot be found.
920
- [`~utils.EntryNotFoundError`]
873
+ [`~utils.RemoteEntryNotFoundError`]
921
874
  If the file to download cannot be found.
922
875
  [`~utils.LocalEntryNotFoundError`]
923
876
  If network is disabled or unavailable and file is not found in cache.
@@ -933,20 +886,6 @@ def hf_hub_download(
933
886
  # Respect environment variable above user value
934
887
  etag_timeout = constants.HF_HUB_ETAG_TIMEOUT
935
888
 
936
- if force_filename is not None:
937
- warnings.warn(
938
- "The `force_filename` parameter is deprecated as a new caching system, "
939
- "which keeps the filenames as they are on the Hub, is now in place.",
940
- FutureWarning,
941
- )
942
- if resume_download is not None:
943
- warnings.warn(
944
- "`resume_download` is deprecated and will be removed in version 1.0.0. "
945
- "Downloads always resume when possible. "
946
- "If you want to force a new download, use `force_download=True`.",
947
- FutureWarning,
948
- )
949
-
950
889
  if cache_dir is None:
951
890
  cache_dir = constants.HF_HUB_CACHE
952
891
  if revision is None:
@@ -976,15 +915,6 @@ def hf_hub_download(
976
915
  )
977
916
 
978
917
  if local_dir is not None:
979
- if local_dir_use_symlinks != "auto":
980
- warnings.warn(
981
- "`local_dir_use_symlinks` parameter is deprecated and will be ignored. "
982
- "The process to download files to a local folder has been updated and do "
983
- "not rely on symlinks anymore. You only need to pass a destination folder "
984
- "as`local_dir`.\n"
985
- "For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder."
986
- )
987
-
988
918
  return _hf_hub_download_to_local_dir(
989
919
  # Destination
990
920
  local_dir=local_dir,
@@ -997,7 +927,6 @@ def hf_hub_download(
997
927
  endpoint=endpoint,
998
928
  etag_timeout=etag_timeout,
999
929
  headers=hf_headers,
1000
- proxies=proxies,
1001
930
  token=token,
1002
931
  # Additional options
1003
932
  cache_dir=cache_dir,
@@ -1017,7 +946,6 @@ def hf_hub_download(
1017
946
  endpoint=endpoint,
1018
947
  etag_timeout=etag_timeout,
1019
948
  headers=hf_headers,
1020
- proxies=proxies,
1021
949
  token=token,
1022
950
  # Additional options
1023
951
  local_files_only=local_files_only,
@@ -1037,8 +965,7 @@ def _hf_hub_download_to_cache_dir(
1037
965
  # HTTP info
1038
966
  endpoint: Optional[str],
1039
967
  etag_timeout: float,
1040
- headers: Dict[str, str],
1041
- proxies: Optional[Dict],
968
+ headers: dict[str, str],
1042
969
  token: Optional[Union[bool, str]],
1043
970
  # Additional options
1044
971
  local_files_only: bool,
@@ -1074,7 +1001,6 @@ def _hf_hub_download_to_cache_dir(
1074
1001
  repo_type=repo_type,
1075
1002
  revision=revision,
1076
1003
  endpoint=endpoint,
1077
- proxies=proxies,
1078
1004
  etag_timeout=etag_timeout,
1079
1005
  headers=headers,
1080
1006
  token=token,
@@ -1170,7 +1096,6 @@ def _hf_hub_download_to_cache_dir(
1170
1096
  incomplete_path=Path(blob_path + ".incomplete"),
1171
1097
  destination_path=Path(blob_path),
1172
1098
  url_to_download=url_to_download,
1173
- proxies=proxies,
1174
1099
  headers=headers,
1175
1100
  expected_size=expected_size,
1176
1101
  filename=filename,
@@ -1196,8 +1121,7 @@ def _hf_hub_download_to_local_dir(
1196
1121
  # HTTP info
1197
1122
  endpoint: Optional[str],
1198
1123
  etag_timeout: float,
1199
- headers: Dict[str, str],
1200
- proxies: Optional[Dict],
1124
+ headers: dict[str, str],
1201
1125
  token: Union[bool, str, None],
1202
1126
  # Additional options
1203
1127
  cache_dir: str,
@@ -1233,7 +1157,6 @@ def _hf_hub_download_to_local_dir(
1233
1157
  repo_type=repo_type,
1234
1158
  revision=revision,
1235
1159
  endpoint=endpoint,
1236
- proxies=proxies,
1237
1160
  etag_timeout=etag_timeout,
1238
1161
  headers=headers,
1239
1162
  token=token,
@@ -1299,7 +1222,6 @@ def _hf_hub_download_to_local_dir(
1299
1222
  incomplete_path=paths.incomplete_path(etag),
1300
1223
  destination_path=paths.file_path,
1301
1224
  url_to_download=url_to_download,
1302
- proxies=proxies,
1303
1225
  headers=headers,
1304
1226
  expected_size=expected_size,
1305
1227
  filename=filename,
@@ -1409,12 +1331,11 @@ def try_to_load_from_cache(
1409
1331
  def get_hf_file_metadata(
1410
1332
  url: str,
1411
1333
  token: Union[bool, str, None] = None,
1412
- proxies: Optional[Dict] = None,
1413
1334
  timeout: Optional[float] = constants.DEFAULT_REQUEST_TIMEOUT,
1414
1335
  library_name: Optional[str] = None,
1415
1336
  library_version: Optional[str] = None,
1416
- user_agent: Union[Dict, str, None] = None,
1417
- headers: Optional[Dict[str, str]] = None,
1337
+ user_agent: Union[dict, str, None] = None,
1338
+ headers: Optional[dict[str, str]] = None,
1418
1339
  endpoint: Optional[str] = None,
1419
1340
  ) -> HfFileMetadata:
1420
1341
  """Fetch metadata of a file versioned on the Hub for a given url.
@@ -1428,9 +1349,6 @@ def get_hf_file_metadata(
1428
1349
  folder.
1429
1350
  - If `False` or `None`, no token is provided.
1430
1351
  - If a string, it's used as the authentication token.
1431
- proxies (`dict`, *optional*):
1432
- Dictionary mapping protocol to the URL of the proxy passed to
1433
- `requests.request`.
1434
1352
  timeout (`float`, *optional*, defaults to 10):
1435
1353
  How many seconds to wait for the server to send metadata before giving up.
1436
1354
  library_name (`str`, *optional*):
@@ -1458,31 +1376,23 @@ def get_hf_file_metadata(
1458
1376
  hf_headers["Accept-Encoding"] = "identity" # prevent any compression => we want to know the real size of the file
1459
1377
 
1460
1378
  # Retrieve metadata
1461
- r = _request_wrapper(
1462
- method="HEAD",
1463
- url=url,
1464
- headers=hf_headers,
1465
- allow_redirects=False,
1466
- follow_relative_redirects=True,
1467
- proxies=proxies,
1468
- timeout=timeout,
1469
- )
1470
- hf_raise_for_status(r)
1379
+ response = _httpx_follow_relative_redirects(method="HEAD", url=url, headers=hf_headers, timeout=timeout)
1380
+ hf_raise_for_status(response)
1471
1381
 
1472
1382
  # Return
1473
1383
  return HfFileMetadata(
1474
- commit_hash=r.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT),
1475
- # We favor a custom header indicating the etag of the linked resource, and
1476
- # we fallback to the regular etag header.
1477
- etag=_normalize_etag(r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")),
1384
+ commit_hash=response.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT),
1385
+ # We favor a custom header indicating the etag of the linked resource, and we fallback to the regular etag header.
1386
+ etag=_normalize_etag(
1387
+ response.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or response.headers.get("ETag")
1388
+ ),
1478
1389
  # Either from response headers (if redirected) or defaults to request url
1479
- # Do not use directly `url`, as `_request_wrapper` might have followed relative
1480
- # redirects.
1481
- location=r.headers.get("Location") or r.request.url, # type: ignore
1390
+ # Do not use directly `url` as we might have followed relative redirects.
1391
+ location=response.headers.get("Location") or str(response.request.url), # type: ignore
1482
1392
  size=_int_or_none(
1483
- r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_SIZE) or r.headers.get("Content-Length")
1393
+ response.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_SIZE) or response.headers.get("Content-Length")
1484
1394
  ),
1485
- xet_file_data=parse_xet_file_data_from_response(r, endpoint=endpoint), # type: ignore
1395
+ xet_file_data=parse_xet_file_data_from_response(response, endpoint=endpoint), # type: ignore
1486
1396
  )
1487
1397
 
1488
1398
 
@@ -1493,19 +1403,18 @@ def _get_metadata_or_catch_error(
1493
1403
  repo_type: str,
1494
1404
  revision: str,
1495
1405
  endpoint: Optional[str],
1496
- proxies: Optional[Dict],
1497
1406
  etag_timeout: Optional[float],
1498
- headers: Dict[str, str], # mutated inplace!
1407
+ headers: dict[str, str], # mutated inplace!
1499
1408
  token: Union[bool, str, None],
1500
1409
  local_files_only: bool,
1501
1410
  relative_filename: Optional[str] = None, # only used to store `.no_exists` in cache
1502
1411
  storage_folder: Optional[str] = None, # only used to store `.no_exists` in cache
1503
1412
  ) -> Union[
1504
1413
  # Either an exception is caught and returned
1505
- Tuple[None, None, None, None, None, Exception],
1414
+ tuple[None, None, None, None, None, Exception],
1506
1415
  # Or the metadata is returned as
1507
1416
  # `(url_to_download, etag, commit_hash, expected_size, xet_file_data, None)`
1508
- Tuple[str, str, str, int, Optional[XetFileData], None],
1417
+ tuple[str, str, str, int, Optional[XetFileData], None],
1509
1418
  ]:
1510
1419
  """Get metadata for a file on the Hub, safely handling network issues.
1511
1420
 
@@ -1542,9 +1451,9 @@ def _get_metadata_or_catch_error(
1542
1451
  try:
1543
1452
  try:
1544
1453
  metadata = get_hf_file_metadata(
1545
- url=url, proxies=proxies, timeout=etag_timeout, headers=headers, token=token, endpoint=endpoint
1454
+ url=url, timeout=etag_timeout, headers=headers, token=token, endpoint=endpoint
1546
1455
  )
1547
- except EntryNotFoundError as http_error:
1456
+ except RemoteEntryNotFoundError as http_error:
1548
1457
  if storage_folder is not None and relative_filename is not None:
1549
1458
  # Cache the non-existence of the file
1550
1459
  commit_hash = http_error.response.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT)
@@ -1595,21 +1504,17 @@ def _get_metadata_or_catch_error(
1595
1504
  if urlparse(url).netloc != urlparse(metadata.location).netloc:
1596
1505
  # Remove authorization header when downloading a LFS blob
1597
1506
  headers.pop("authorization", None)
1598
- except (requests.exceptions.SSLError, requests.exceptions.ProxyError):
1599
- # Actually raise for those subclasses of ConnectionError
1507
+ except httpx.ProxyError:
1508
+ # Actually raise on proxy error
1600
1509
  raise
1601
- except (
1602
- requests.exceptions.ConnectionError,
1603
- requests.exceptions.Timeout,
1604
- OfflineModeIsEnabled,
1605
- ) as error:
1510
+ except (httpx.ConnectError, httpx.TimeoutException, OfflineModeIsEnabled) as error:
1606
1511
  # Otherwise, our Internet connection is down.
1607
1512
  # etag is None
1608
1513
  head_error_call = error
1609
- except (RevisionNotFoundError, EntryNotFoundError):
1514
+ except (RevisionNotFoundError, RemoteEntryNotFoundError):
1610
1515
  # The repo was found but the revision or entry doesn't exist on the Hub (never existed or got deleted)
1611
1516
  raise
1612
- except requests.HTTPError as error:
1517
+ except HfHubHTTPError as error:
1613
1518
  # Multiple reasons for an http error:
1614
1519
  # - Repository is private and invalid/missing token sent
1615
1520
  # - Repository is gated and invalid/missing token sent
@@ -1667,8 +1572,7 @@ def _download_to_tmp_and_move(
1667
1572
  incomplete_path: Path,
1668
1573
  destination_path: Path,
1669
1574
  url_to_download: str,
1670
- proxies: Optional[Dict],
1671
- headers: Dict[str, str],
1575
+ headers: dict[str, str],
1672
1576
  expected_size: Optional[int],
1673
1577
  filename: str,
1674
1578
  force_download: bool,
@@ -1692,14 +1596,14 @@ def _download_to_tmp_and_move(
1692
1596
  # Do nothing if already exists (except if force_download=True)
1693
1597
  return
1694
1598
 
1695
- if incomplete_path.exists() and (force_download or (constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies)):
1599
+ if incomplete_path.exists() and (force_download or constants.HF_HUB_ENABLE_HF_TRANSFER):
1696
1600
  # By default, we will try to resume the download if possible.
1697
1601
  # However, if the user has set `force_download=True` or if `hf_transfer` is enabled, then we should
1698
1602
  # not resume the download => delete the incomplete file.
1699
1603
  message = f"Removing incomplete file '{incomplete_path}'"
1700
1604
  if force_download:
1701
1605
  message += " (force_download=True)"
1702
- elif constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies:
1606
+ elif constants.HF_HUB_ENABLE_HF_TRANSFER:
1703
1607
  message += " (hf_transfer=True)"
1704
1608
  logger.info(message)
1705
1609
  incomplete_path.unlink(missing_ok=True)
@@ -1736,7 +1640,6 @@ def _download_to_tmp_and_move(
1736
1640
  http_get(
1737
1641
  url_to_download,
1738
1642
  f,
1739
- proxies=proxies,
1740
1643
  resume_size=resume_size,
1741
1644
  headers=headers,
1742
1645
  expected_size=expected_size,