huggingface-hub 0.36.0rc0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (132) hide show
  1. huggingface_hub/__init__.py +33 -45
  2. huggingface_hub/_commit_api.py +39 -43
  3. huggingface_hub/_commit_scheduler.py +11 -8
  4. huggingface_hub/_inference_endpoints.py +8 -8
  5. huggingface_hub/_jobs_api.py +20 -20
  6. huggingface_hub/_login.py +17 -43
  7. huggingface_hub/_oauth.py +8 -8
  8. huggingface_hub/_snapshot_download.py +135 -50
  9. huggingface_hub/_space_api.py +4 -4
  10. huggingface_hub/_tensorboard_logger.py +5 -5
  11. huggingface_hub/_upload_large_folder.py +18 -32
  12. huggingface_hub/_webhooks_payload.py +3 -3
  13. huggingface_hub/_webhooks_server.py +2 -2
  14. huggingface_hub/cli/__init__.py +0 -14
  15. huggingface_hub/cli/_cli_utils.py +143 -39
  16. huggingface_hub/cli/auth.py +105 -171
  17. huggingface_hub/cli/cache.py +594 -361
  18. huggingface_hub/cli/download.py +120 -112
  19. huggingface_hub/cli/hf.py +38 -41
  20. huggingface_hub/cli/jobs.py +689 -1017
  21. huggingface_hub/cli/lfs.py +120 -143
  22. huggingface_hub/cli/repo.py +282 -216
  23. huggingface_hub/cli/repo_files.py +50 -84
  24. huggingface_hub/cli/system.py +6 -25
  25. huggingface_hub/cli/upload.py +198 -220
  26. huggingface_hub/cli/upload_large_folder.py +91 -106
  27. huggingface_hub/community.py +5 -5
  28. huggingface_hub/constants.py +17 -52
  29. huggingface_hub/dataclasses.py +135 -21
  30. huggingface_hub/errors.py +47 -30
  31. huggingface_hub/fastai_utils.py +8 -9
  32. huggingface_hub/file_download.py +351 -303
  33. huggingface_hub/hf_api.py +398 -570
  34. huggingface_hub/hf_file_system.py +101 -66
  35. huggingface_hub/hub_mixin.py +32 -54
  36. huggingface_hub/inference/_client.py +177 -162
  37. huggingface_hub/inference/_common.py +38 -54
  38. huggingface_hub/inference/_generated/_async_client.py +218 -258
  39. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
  40. huggingface_hub/inference/_generated/types/base.py +10 -7
  41. huggingface_hub/inference/_generated/types/chat_completion.py +16 -16
  42. huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
  43. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
  44. huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
  45. huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
  46. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
  47. huggingface_hub/inference/_generated/types/summarization.py +2 -2
  48. huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
  49. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
  50. huggingface_hub/inference/_generated/types/text_generation.py +10 -10
  51. huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
  52. huggingface_hub/inference/_generated/types/token_classification.py +2 -2
  53. huggingface_hub/inference/_generated/types/translation.py +2 -2
  54. huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
  55. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
  56. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
  57. huggingface_hub/inference/_mcp/agent.py +3 -3
  58. huggingface_hub/inference/_mcp/constants.py +1 -2
  59. huggingface_hub/inference/_mcp/mcp_client.py +33 -22
  60. huggingface_hub/inference/_mcp/types.py +10 -10
  61. huggingface_hub/inference/_mcp/utils.py +4 -4
  62. huggingface_hub/inference/_providers/__init__.py +12 -4
  63. huggingface_hub/inference/_providers/_common.py +62 -24
  64. huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
  65. huggingface_hub/inference/_providers/cohere.py +3 -3
  66. huggingface_hub/inference/_providers/fal_ai.py +25 -25
  67. huggingface_hub/inference/_providers/featherless_ai.py +4 -4
  68. huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
  69. huggingface_hub/inference/_providers/hf_inference.py +13 -13
  70. huggingface_hub/inference/_providers/hyperbolic.py +4 -4
  71. huggingface_hub/inference/_providers/nebius.py +10 -10
  72. huggingface_hub/inference/_providers/novita.py +5 -5
  73. huggingface_hub/inference/_providers/nscale.py +4 -4
  74. huggingface_hub/inference/_providers/replicate.py +15 -15
  75. huggingface_hub/inference/_providers/sambanova.py +6 -6
  76. huggingface_hub/inference/_providers/together.py +7 -7
  77. huggingface_hub/lfs.py +21 -94
  78. huggingface_hub/repocard.py +15 -16
  79. huggingface_hub/repocard_data.py +57 -57
  80. huggingface_hub/serialization/__init__.py +0 -1
  81. huggingface_hub/serialization/_base.py +9 -9
  82. huggingface_hub/serialization/_dduf.py +7 -7
  83. huggingface_hub/serialization/_torch.py +28 -28
  84. huggingface_hub/utils/__init__.py +11 -6
  85. huggingface_hub/utils/_auth.py +5 -5
  86. huggingface_hub/utils/_cache_manager.py +49 -74
  87. huggingface_hub/utils/_deprecation.py +1 -1
  88. huggingface_hub/utils/_dotenv.py +3 -3
  89. huggingface_hub/utils/_fixes.py +0 -10
  90. huggingface_hub/utils/_git_credential.py +3 -3
  91. huggingface_hub/utils/_headers.py +7 -29
  92. huggingface_hub/utils/_http.py +371 -208
  93. huggingface_hub/utils/_pagination.py +4 -4
  94. huggingface_hub/utils/_parsing.py +98 -0
  95. huggingface_hub/utils/_paths.py +5 -5
  96. huggingface_hub/utils/_runtime.py +59 -23
  97. huggingface_hub/utils/_safetensors.py +21 -21
  98. huggingface_hub/utils/_subprocess.py +9 -9
  99. huggingface_hub/utils/_telemetry.py +3 -3
  100. huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -9
  101. huggingface_hub/utils/_typing.py +3 -3
  102. huggingface_hub/utils/_validators.py +53 -72
  103. huggingface_hub/utils/_xet.py +16 -16
  104. huggingface_hub/utils/_xet_progress_reporting.py +1 -1
  105. huggingface_hub/utils/insecure_hashlib.py +3 -9
  106. huggingface_hub/utils/tqdm.py +3 -3
  107. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/METADATA +16 -35
  108. huggingface_hub-1.0.0.dist-info/RECORD +152 -0
  109. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/entry_points.txt +0 -1
  110. huggingface_hub/commands/__init__.py +0 -27
  111. huggingface_hub/commands/delete_cache.py +0 -476
  112. huggingface_hub/commands/download.py +0 -204
  113. huggingface_hub/commands/env.py +0 -39
  114. huggingface_hub/commands/huggingface_cli.py +0 -65
  115. huggingface_hub/commands/lfs.py +0 -200
  116. huggingface_hub/commands/repo.py +0 -151
  117. huggingface_hub/commands/repo_files.py +0 -132
  118. huggingface_hub/commands/scan_cache.py +0 -183
  119. huggingface_hub/commands/tag.py +0 -161
  120. huggingface_hub/commands/upload.py +0 -318
  121. huggingface_hub/commands/upload_large_folder.py +0 -131
  122. huggingface_hub/commands/user.py +0 -208
  123. huggingface_hub/commands/version.py +0 -40
  124. huggingface_hub/inference_api.py +0 -217
  125. huggingface_hub/keras_mixin.py +0 -497
  126. huggingface_hub/repository.py +0 -1471
  127. huggingface_hub/serialization/_tensorflow.py +0 -92
  128. huggingface_hub/utils/_hf_folder.py +0 -68
  129. huggingface_hub-0.36.0rc0.dist-info/RECORD +0 -170
  130. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/LICENSE +0 -0
  131. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/WHEEL +0 -0
  132. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/top_level.txt +0 -0
@@ -2,7 +2,7 @@ import time
2
2
  from dataclasses import dataclass, field
3
3
  from datetime import datetime
4
4
  from enum import Enum
5
- from typing import TYPE_CHECKING, Dict, Optional, Union
5
+ from typing import TYPE_CHECKING, Optional, Union
6
6
 
7
7
  from huggingface_hub.errors import InferenceEndpointError, InferenceEndpointTimeoutError
8
8
 
@@ -62,7 +62,7 @@ class InferenceEndpoint:
62
62
  The timestamp of the last update of the Inference Endpoint.
63
63
  type ([`InferenceEndpointType`]):
64
64
  The type of the Inference Endpoint (public, protected, private).
65
- raw (`Dict`):
65
+ raw (`dict`):
66
66
  The raw dictionary data returned from the API.
67
67
  token (`str` or `bool`, *optional*):
68
68
  Authentication token for the Inference Endpoint, if set when requesting the API. Will default to the
@@ -112,7 +112,7 @@ class InferenceEndpoint:
112
112
  type: InferenceEndpointType = field(repr=False, init=False)
113
113
 
114
114
  # Raw dict from the API
115
- raw: Dict = field(repr=False)
115
+ raw: dict = field(repr=False)
116
116
 
117
117
  # Internal fields
118
118
  _token: Union[str, bool, None] = field(repr=False, compare=False)
@@ -120,7 +120,7 @@ class InferenceEndpoint:
120
120
 
121
121
  @classmethod
122
122
  def from_raw(
123
- cls, raw: Dict, namespace: str, token: Union[str, bool, None] = None, api: Optional["HfApi"] = None
123
+ cls, raw: dict, namespace: str, token: Union[str, bool, None] = None, api: Optional["HfApi"] = None
124
124
  ) -> "InferenceEndpoint":
125
125
  """Initialize object from raw dictionary."""
126
126
  if api is None:
@@ -260,8 +260,8 @@ class InferenceEndpoint:
260
260
  framework: Optional[str] = None,
261
261
  revision: Optional[str] = None,
262
262
  task: Optional[str] = None,
263
- custom_image: Optional[Dict] = None,
264
- secrets: Optional[Dict[str, str]] = None,
263
+ custom_image: Optional[dict] = None,
264
+ secrets: Optional[dict[str, str]] = None,
265
265
  ) -> "InferenceEndpoint":
266
266
  """Update the Inference Endpoint.
267
267
 
@@ -293,10 +293,10 @@ class InferenceEndpoint:
293
293
  The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`).
294
294
  task (`str`, *optional*):
295
295
  The task on which to deploy the model (e.g. `"text-classification"`).
296
- custom_image (`Dict`, *optional*):
296
+ custom_image (`dict`, *optional*):
297
297
  A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
298
298
  Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
299
- secrets (`Dict[str, str]`, *optional*):
299
+ secrets (`dict[str, str]`, *optional*):
300
300
  Secret values to inject in the container environment.
301
301
  Returns:
302
302
  [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
@@ -15,7 +15,7 @@
15
15
  from dataclasses import dataclass
16
16
  from datetime import datetime
17
17
  from enum import Enum
18
- from typing import Any, Dict, List, Optional, Union
18
+ from typing import Any, Optional, Union
19
19
 
20
20
  from huggingface_hub import constants
21
21
  from huggingface_hub._space_api import SpaceHardware
@@ -71,13 +71,13 @@ class JobInfo:
71
71
  space_id (`str` or `None`):
72
72
  The Docker image from Hugging Face Spaces used for the Job.
73
73
  Can be None if docker_image is present instead.
74
- command (`List[str]` or `None`):
74
+ command (`list[str]` or `None`):
75
75
  Command of the Job, e.g. `["python", "-c", "print('hello world')"]`
76
- arguments (`List[str]` or `None`):
76
+ arguments (`list[str]` or `None`):
77
77
  Arguments passed to the command
78
- environment (`Dict[str]` or `None`):
78
+ environment (`dict[str]` or `None`):
79
79
  Environment variables of the Job as a dictionary.
80
- secrets (`Dict[str]` or `None`):
80
+ secrets (`dict[str]` or `None`):
81
81
  Secret environment variables of the Job (encrypted).
82
82
  flavor (`str` or `None`):
83
83
  Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values.
@@ -111,10 +111,10 @@ class JobInfo:
111
111
  created_at: Optional[datetime]
112
112
  docker_image: Optional[str]
113
113
  space_id: Optional[str]
114
- command: Optional[List[str]]
115
- arguments: Optional[List[str]]
116
- environment: Optional[Dict[str, Any]]
117
- secrets: Optional[Dict[str, Any]]
114
+ command: Optional[list[str]]
115
+ arguments: Optional[list[str]]
116
+ environment: Optional[dict[str, Any]]
117
+ secrets: Optional[dict[str, Any]]
118
118
  flavor: Optional[SpaceHardware]
119
119
  status: JobStatus
120
120
  owner: JobOwner
@@ -148,13 +148,13 @@ class JobInfo:
148
148
  class JobSpec:
149
149
  docker_image: Optional[str]
150
150
  space_id: Optional[str]
151
- command: Optional[List[str]]
152
- arguments: Optional[List[str]]
153
- environment: Optional[Dict[str, Any]]
154
- secrets: Optional[Dict[str, Any]]
151
+ command: Optional[list[str]]
152
+ arguments: Optional[list[str]]
153
+ environment: Optional[dict[str, Any]]
154
+ secrets: Optional[dict[str, Any]]
155
155
  flavor: Optional[SpaceHardware]
156
156
  timeout: Optional[int]
157
- tags: Optional[List[str]]
157
+ tags: Optional[list[str]]
158
158
  arch: Optional[str]
159
159
 
160
160
  def __init__(self, **kwargs) -> None:
@@ -202,7 +202,7 @@ class ScheduledJobInfo:
202
202
  Scheduled Job ID.
203
203
  created_at (`datetime` or `None`):
204
204
  When the scheduled Job was created.
205
- tags (`List[str]` or `None`):
205
+ tags (`list[str]` or `None`):
206
206
  The tags of the scheduled Job.
207
207
  schedule (`str` or `None`):
208
208
  One of "@annually", "@yearly", "@monthly", "@weekly", "@daily", "@hourly", or a
@@ -263,14 +263,14 @@ class ScheduledJobInfo:
263
263
  def _create_job_spec(
264
264
  *,
265
265
  image: str,
266
- command: List[str],
267
- env: Optional[Dict[str, Any]],
268
- secrets: Optional[Dict[str, Any]],
266
+ command: list[str],
267
+ env: Optional[dict[str, Any]],
268
+ secrets: Optional[dict[str, Any]],
269
269
  flavor: Optional[SpaceHardware],
270
270
  timeout: Optional[Union[int, float, str]],
271
- ) -> Dict[str, Any]:
271
+ ) -> dict[str, Any]:
272
272
  # prepare job spec to send to HF Jobs API
273
- job_spec: Dict[str, Any] = {
273
+ job_spec: dict[str, Any] = {
274
274
  "command": command,
275
275
  "arguments": [],
276
276
  "environment": env or {},
huggingface_hub/_login.py CHANGED
@@ -19,9 +19,11 @@ from getpass import getpass
19
19
  from pathlib import Path
20
20
  from typing import Optional
21
21
 
22
+ import typer
23
+
22
24
  from . import constants
23
- from .commands._cli_utils import ANSI
24
25
  from .utils import (
26
+ ANSI,
25
27
  capture_output,
26
28
  get_token,
27
29
  is_google_colab,
@@ -41,7 +43,6 @@ from .utils._auth import (
41
43
  _save_token,
42
44
  get_stored_tokens,
43
45
  )
44
- from .utils._deprecation import _deprecate_arguments, _deprecate_positional_args
45
46
 
46
47
 
47
48
  logger = logging.get_logger(__name__)
@@ -55,18 +56,11 @@ _HF_LOGO_ASCII = """
55
56
  """
56
57
 
57
58
 
58
- @_deprecate_arguments(
59
- version="1.0",
60
- deprecated_args="write_permission",
61
- custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.",
62
- )
63
- @_deprecate_positional_args(version="1.0")
64
59
  def login(
65
60
  token: Optional[str] = None,
66
61
  *,
67
62
  add_to_git_credential: bool = False,
68
- new_session: bool = True,
69
- write_permission: bool = False,
63
+ skip_if_logged_in: bool = False,
70
64
  ) -> None:
71
65
  """Login the machine to access the Hub.
72
66
 
@@ -96,10 +90,8 @@ def login(
96
90
  is configured, a warning will be displayed to the user. If `token` is `None`,
97
91
  the value of `add_to_git_credential` is ignored and will be prompted again
98
92
  to the end user.
99
- new_session (`bool`, defaults to `True`):
100
- If `True`, will request a token even if one is already saved on the machine.
101
- write_permission (`bool`):
102
- Ignored and deprecated argument.
93
+ skip_if_logged_in (`bool`, defaults to `False`):
94
+ If `True`, do not prompt for token if user is already logged in.
103
95
  Raises:
104
96
  [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
105
97
  If an organization token is passed. Only personal account tokens are valid
@@ -119,9 +111,9 @@ def login(
119
111
  )
120
112
  _login(token, add_to_git_credential=add_to_git_credential)
121
113
  elif is_notebook():
122
- notebook_login(new_session=new_session)
114
+ notebook_login(skip_if_logged_in=skip_if_logged_in)
123
115
  else:
124
- interpreter_login(new_session=new_session)
116
+ interpreter_login(skip_if_logged_in=skip_if_logged_in)
125
117
 
126
118
 
127
119
  def logout(token_name: Optional[str] = None) -> None:
@@ -236,13 +228,7 @@ def auth_list() -> None:
236
228
  ###
237
229
 
238
230
 
239
- @_deprecate_arguments(
240
- version="1.0",
241
- deprecated_args="write_permission",
242
- custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.",
243
- )
244
- @_deprecate_positional_args(version="1.0")
245
- def interpreter_login(*, new_session: bool = True, write_permission: bool = False) -> None:
231
+ def interpreter_login(*, skip_if_logged_in: bool = False) -> None:
246
232
  """
247
233
  Displays a prompt to log in to the HF website and store the token.
248
234
 
@@ -253,17 +239,13 @@ def interpreter_login(*, new_session: bool = True, write_permission: bool = Fals
253
239
  For more details, see [`login`].
254
240
 
255
241
  Args:
256
- new_session (`bool`, defaults to `True`):
257
- If `True`, will request a token even if one is already saved on the machine.
258
- write_permission (`bool`):
259
- Ignored and deprecated argument.
242
+ skip_if_logged_in (`bool`, defaults to `False`):
243
+ If `True`, do not prompt for token if user is already logged in.
260
244
  """
261
- if not new_session and get_token() is not None:
245
+ if not skip_if_logged_in and get_token() is not None:
262
246
  logger.info("User is already logged in.")
263
247
  return
264
248
 
265
- from .commands.delete_cache import _ask_for_confirmation_no_tui
266
-
267
249
  print(_HF_LOGO_ASCII)
268
250
  if get_token() is not None:
269
251
  logger.info(
@@ -279,7 +261,7 @@ def interpreter_login(*, new_session: bool = True, write_permission: bool = Fals
279
261
  if os.name == "nt":
280
262
  logger.info("Token can be pasted using 'Right-Click'.")
281
263
  token = getpass("Enter your token (input will not be visible): ")
282
- add_to_git_credential = _ask_for_confirmation_no_tui("Add token as git credential?")
264
+ add_to_git_credential = typer.confirm("Add token as git credential?")
283
265
 
284
266
  _login(token=token, add_to_git_credential=add_to_git_credential)
285
267
 
@@ -308,13 +290,7 @@ NOTEBOOK_LOGIN_TOKEN_HTML_END = """
308
290
  notebooks. </center>"""
309
291
 
310
292
 
311
- @_deprecate_arguments(
312
- version="1.0",
313
- deprecated_args="write_permission",
314
- custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.",
315
- )
316
- @_deprecate_positional_args(version="1.0")
317
- def notebook_login(*, new_session: bool = True, write_permission: bool = False) -> None:
293
+ def notebook_login(*, skip_if_logged_in: bool = False) -> None:
318
294
  """
319
295
  Displays a widget to log in to the HF website and store the token.
320
296
 
@@ -325,10 +301,8 @@ def notebook_login(*, new_session: bool = True, write_permission: bool = False)
325
301
  For more details, see [`login`].
326
302
 
327
303
  Args:
328
- new_session (`bool`, defaults to `True`):
329
- If `True`, will request a token even if one is already saved on the machine.
330
- write_permission (`bool`):
331
- Ignored and deprecated argument.
304
+ skip_if_logged_in (`bool`, defaults to `False`):
305
+ If `True`, do not prompt for token if user is already logged in.
332
306
  """
333
307
  try:
334
308
  import ipywidgets.widgets as widgets # type: ignore
@@ -338,7 +312,7 @@ def notebook_login(*, new_session: bool = True, write_permission: bool = False)
338
312
  "The `notebook_login` function can only be used in a notebook (Jupyter or"
339
313
  " Colab) and you need the `ipywidgets` module: `pip install ipywidgets`."
340
314
  )
341
- if not new_session and get_token() is not None:
315
+ if not skip_if_logged_in and get_token() is not None:
342
316
  logger.info("User is already logged in.")
343
317
  return
344
318
 
huggingface_hub/_oauth.py CHANGED
@@ -6,7 +6,7 @@ import time
6
6
  import urllib.parse
7
7
  import warnings
8
8
  from dataclasses import dataclass
9
- from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Union
9
+ from typing import TYPE_CHECKING, Literal, Optional, Union
10
10
 
11
11
  from . import constants
12
12
  from .hf_api import whoami
@@ -39,7 +39,7 @@ class OAuthOrgInfo:
39
39
  Whether the org has a payment method set up. Hugging Face field.
40
40
  role_in_org (`Optional[str]`, *optional*):
41
41
  The user's role in the org. Hugging Face field.
42
- security_restrictions (`Optional[List[Literal["ip", "token-policy", "mfa", "sso"]]]`, *optional*):
42
+ security_restrictions (`Optional[list[Literal["ip", "token-policy", "mfa", "sso"]]]`, *optional*):
43
43
  Array of security restrictions that the user hasn't completed for this org. Possible values: "ip", "token-policy", "mfa", "sso". Hugging Face field.
44
44
  """
45
45
 
@@ -50,7 +50,7 @@ class OAuthOrgInfo:
50
50
  is_enterprise: bool
51
51
  can_pay: Optional[bool] = None
52
52
  role_in_org: Optional[str] = None
53
- security_restrictions: Optional[List[Literal["ip", "token-policy", "mfa", "sso"]]] = None
53
+ security_restrictions: Optional[list[Literal["ip", "token-policy", "mfa", "sso"]]] = None
54
54
 
55
55
 
56
56
  @dataclass
@@ -79,7 +79,7 @@ class OAuthUserInfo:
79
79
  Whether the user is a pro user. Hugging Face field.
80
80
  can_pay (`Optional[bool]`, *optional*):
81
81
  Whether the user has a payment method set up. Hugging Face field.
82
- orgs (`Optional[List[OrgInfo]]`, *optional*):
82
+ orgs (`Optional[list[OrgInfo]]`, *optional*):
83
83
  List of organizations the user is part of. Hugging Face field.
84
84
  """
85
85
 
@@ -93,7 +93,7 @@ class OAuthUserInfo:
93
93
  website: Optional[str]
94
94
  is_pro: bool
95
95
  can_pay: Optional[bool]
96
- orgs: Optional[List[OAuthOrgInfo]]
96
+ orgs: Optional[list[OAuthOrgInfo]]
97
97
 
98
98
 
99
99
  @dataclass
@@ -306,7 +306,7 @@ def _add_oauth_routes(app: "fastapi.FastAPI", route_prefix: str) -> None:
306
306
  target_url = request.query_params.get("_target_url")
307
307
 
308
308
  # Build redirect URI with the same query params as before and bump nb_redirects count
309
- query_params: Dict[str, Union[int, str]] = {"_nb_redirects": nb_redirects + 1}
309
+ query_params: dict[str, Union[int, str]] = {"_nb_redirects": nb_redirects + 1}
310
310
  if target_url:
311
311
  query_params["_target_url"] = target_url
312
312
 
@@ -406,7 +406,7 @@ def _get_redirect_target(request: "fastapi.Request", default_target: str = "/")
406
406
  return request.query_params.get("_target_url", default_target)
407
407
 
408
408
 
409
- def _get_mocked_oauth_info() -> Dict:
409
+ def _get_mocked_oauth_info() -> dict:
410
410
  token = get_token()
411
411
  if token is None:
412
412
  raise ValueError(
@@ -449,7 +449,7 @@ def _get_mocked_oauth_info() -> Dict:
449
449
  }
450
450
 
451
451
 
452
- def _get_oauth_uris(route_prefix: str = "/") -> Tuple[str, str, str]:
452
+ def _get_oauth_uris(route_prefix: str = "/") -> tuple[str, str, str]:
453
453
  route_prefix = route_prefix.strip("/")
454
454
  if route_prefix:
455
455
  route_prefix = f"/{route_prefix}"
@@ -1,20 +1,21 @@
1
1
  import os
2
2
  from pathlib import Path
3
- from typing import Dict, Iterable, List, Literal, Optional, Type, Union
3
+ from typing import Iterable, List, Literal, Optional, Union, overload
4
4
 
5
- import requests
5
+ import httpx
6
6
  from tqdm.auto import tqdm as base_tqdm
7
7
  from tqdm.contrib.concurrent import thread_map
8
8
 
9
9
  from . import constants
10
10
  from .errors import (
11
+ DryRunError,
11
12
  GatedRepoError,
12
13
  HfHubHTTPError,
13
14
  LocalEntryNotFoundError,
14
15
  RepositoryNotFoundError,
15
16
  RevisionNotFoundError,
16
17
  )
17
- from .file_download import REGEX_COMMIT_HASH, hf_hub_download, repo_folder_name
18
+ from .file_download import REGEX_COMMIT_HASH, DryRunFileInfo, hf_hub_download, repo_folder_name
18
19
  from .hf_api import DatasetInfo, HfApi, ModelInfo, RepoFile, SpaceInfo
19
20
  from .utils import OfflineModeIsEnabled, filter_repo_objects, logging, validate_hf_hub_args
20
21
  from .utils import tqdm as hf_tqdm
@@ -25,6 +26,81 @@ logger = logging.get_logger(__name__)
25
26
  VERY_LARGE_REPO_THRESHOLD = 50000 # After this limit, we don't consider `repo_info.siblings` to be reliable enough
26
27
 
27
28
 
29
+ @overload
30
+ def snapshot_download(
31
+ repo_id: str,
32
+ *,
33
+ repo_type: Optional[str] = None,
34
+ revision: Optional[str] = None,
35
+ cache_dir: Union[str, Path, None] = None,
36
+ local_dir: Union[str, Path, None] = None,
37
+ library_name: Optional[str] = None,
38
+ library_version: Optional[str] = None,
39
+ user_agent: Optional[Union[dict, str]] = None,
40
+ etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
41
+ force_download: bool = False,
42
+ token: Optional[Union[bool, str]] = None,
43
+ local_files_only: bool = False,
44
+ allow_patterns: Optional[Union[list[str], str]] = None,
45
+ ignore_patterns: Optional[Union[list[str], str]] = None,
46
+ max_workers: int = 8,
47
+ tqdm_class: Optional[type[base_tqdm]] = None,
48
+ headers: Optional[dict[str, str]] = None,
49
+ endpoint: Optional[str] = None,
50
+ dry_run: Literal[False] = False,
51
+ ) -> str: ...
52
+
53
+
54
+ @overload
55
+ def snapshot_download(
56
+ repo_id: str,
57
+ *,
58
+ repo_type: Optional[str] = None,
59
+ revision: Optional[str] = None,
60
+ cache_dir: Union[str, Path, None] = None,
61
+ local_dir: Union[str, Path, None] = None,
62
+ library_name: Optional[str] = None,
63
+ library_version: Optional[str] = None,
64
+ user_agent: Optional[Union[dict, str]] = None,
65
+ etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
66
+ force_download: bool = False,
67
+ token: Optional[Union[bool, str]] = None,
68
+ local_files_only: bool = False,
69
+ allow_patterns: Optional[Union[list[str], str]] = None,
70
+ ignore_patterns: Optional[Union[list[str], str]] = None,
71
+ max_workers: int = 8,
72
+ tqdm_class: Optional[type[base_tqdm]] = None,
73
+ headers: Optional[dict[str, str]] = None,
74
+ endpoint: Optional[str] = None,
75
+ dry_run: Literal[True] = True,
76
+ ) -> list[DryRunFileInfo]: ...
77
+
78
+
79
+ @overload
80
+ def snapshot_download(
81
+ repo_id: str,
82
+ *,
83
+ repo_type: Optional[str] = None,
84
+ revision: Optional[str] = None,
85
+ cache_dir: Union[str, Path, None] = None,
86
+ local_dir: Union[str, Path, None] = None,
87
+ library_name: Optional[str] = None,
88
+ library_version: Optional[str] = None,
89
+ user_agent: Optional[Union[dict, str]] = None,
90
+ etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
91
+ force_download: bool = False,
92
+ token: Optional[Union[bool, str]] = None,
93
+ local_files_only: bool = False,
94
+ allow_patterns: Optional[Union[list[str], str]] = None,
95
+ ignore_patterns: Optional[Union[list[str], str]] = None,
96
+ max_workers: int = 8,
97
+ tqdm_class: Optional[type[base_tqdm]] = None,
98
+ headers: Optional[dict[str, str]] = None,
99
+ endpoint: Optional[str] = None,
100
+ dry_run: bool = False,
101
+ ) -> Union[str, list[DryRunFileInfo]]: ...
102
+
103
+
28
104
  @validate_hf_hub_args
29
105
  def snapshot_download(
30
106
  repo_id: str,
@@ -35,22 +111,19 @@ def snapshot_download(
35
111
  local_dir: Union[str, Path, None] = None,
36
112
  library_name: Optional[str] = None,
37
113
  library_version: Optional[str] = None,
38
- user_agent: Optional[Union[Dict, str]] = None,
39
- proxies: Optional[Dict] = None,
114
+ user_agent: Optional[Union[dict, str]] = None,
40
115
  etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
41
116
  force_download: bool = False,
42
117
  token: Optional[Union[bool, str]] = None,
43
118
  local_files_only: bool = False,
44
- allow_patterns: Optional[Union[List[str], str]] = None,
45
- ignore_patterns: Optional[Union[List[str], str]] = None,
119
+ allow_patterns: Optional[Union[list[str], str]] = None,
120
+ ignore_patterns: Optional[Union[list[str], str]] = None,
46
121
  max_workers: int = 8,
47
- tqdm_class: Optional[Type[base_tqdm]] = None,
48
- headers: Optional[Dict[str, str]] = None,
122
+ tqdm_class: Optional[type[base_tqdm]] = None,
123
+ headers: Optional[dict[str, str]] = None,
49
124
  endpoint: Optional[str] = None,
50
- # Deprecated args
51
- local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
52
- resume_download: Optional[bool] = None,
53
- ) -> str:
125
+ dry_run: bool = False,
126
+ ) -> Union[str, list[DryRunFileInfo]]:
54
127
  """Download repo files.
55
128
 
56
129
  Download a whole snapshot of a repo's files at the specified revision. This is useful when you want all files from
@@ -85,12 +158,9 @@ def snapshot_download(
85
158
  The version of the library.
86
159
  user_agent (`str`, `dict`, *optional*):
87
160
  The user-agent info in the form of a dictionary or a string.
88
- proxies (`dict`, *optional*):
89
- Dictionary mapping protocol to the URL of the proxy passed to
90
- `requests.request`.
91
161
  etag_timeout (`float`, *optional*, defaults to `10`):
92
162
  When fetching ETag, how many seconds to wait for the server to send
93
- data before giving up which is passed to `requests.request`.
163
+ data before giving up which is passed to `httpx.request`.
94
164
  force_download (`bool`, *optional*, defaults to `False`):
95
165
  Whether the file should be downloaded even if it already exists in the local cache.
96
166
  token (`str`, `bool`, *optional*):
@@ -103,9 +173,9 @@ def snapshot_download(
103
173
  local_files_only (`bool`, *optional*, defaults to `False`):
104
174
  If `True`, avoid downloading the file and return the path to the
105
175
  local cached file if it exists.
106
- allow_patterns (`List[str]` or `str`, *optional*):
176
+ allow_patterns (`list[str]` or `str`, *optional*):
107
177
  If provided, only files matching at least one pattern are downloaded.
108
- ignore_patterns (`List[str]` or `str`, *optional*):
178
+ ignore_patterns (`list[str]` or `str`, *optional*):
109
179
  If provided, files matching any of the patterns are not downloaded.
110
180
  max_workers (`int`, *optional*):
111
181
  Number of concurrent threads to download files (1 thread = 1 file download).
@@ -116,9 +186,14 @@ def snapshot_download(
116
186
  Note that the `tqdm_class` is not passed to each individual download.
117
187
  Defaults to the custom HF progress bar that can be disabled by setting
118
188
  `HF_HUB_DISABLE_PROGRESS_BARS` environment variable.
189
+ dry_run (`bool`, *optional*, defaults to `False`):
190
+ If `True`, perform a dry run without actually downloading the files. Returns a list of
191
+ [`DryRunFileInfo`] objects containing information about what would be downloaded.
119
192
 
120
193
  Returns:
121
- `str`: folder path of the repo snapshot.
194
+ `str` or list of [`DryRunFileInfo`]:
195
+ - If `dry_run=False`: Local snapshot path.
196
+ - If `dry_run=True`: A list of [`DryRunFileInfo`] objects containing download information.
122
197
 
123
198
  Raises:
124
199
  [`~utils.RepositoryNotFoundError`]
@@ -163,14 +238,10 @@ def snapshot_download(
163
238
  try:
164
239
  # if we have internet connection we want to list files to download
165
240
  repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type, revision=revision)
166
- except (requests.exceptions.SSLError, requests.exceptions.ProxyError):
167
- # Actually raise for those subclasses of ConnectionError
241
+ except httpx.ProxyError:
242
+ # Actually raise on proxy error
168
243
  raise
169
- except (
170
- requests.exceptions.ConnectionError,
171
- requests.exceptions.Timeout,
172
- OfflineModeIsEnabled,
173
- ) as error:
244
+ except (httpx.ConnectError, httpx.TimeoutException, OfflineModeIsEnabled) as error:
174
245
  # Internet connection is down
175
246
  # => will try to use local files only
176
247
  api_call_error = error
@@ -178,7 +249,7 @@ def snapshot_download(
178
249
  except RevisionNotFoundError:
179
250
  # The repo was found but the revision doesn't exist on the Hub (never existed or got deleted)
180
251
  raise
181
- except requests.HTTPError as error:
252
+ except HfHubHTTPError as error:
182
253
  # Multiple reasons for an http error:
183
254
  # - Repository is private and invalid/missing token sent
184
255
  # - Repository is gated and invalid/missing token sent
@@ -198,6 +269,11 @@ def snapshot_download(
198
269
  # - f the specified revision is a branch or tag, look inside "refs".
199
270
  # => if local_dir is not None, we will return the path to the local folder if it exists.
200
271
  if repo_info is None:
272
+ if dry_run:
273
+ raise DryRunError(
274
+ "Dry run cannot be performed as the repository cannot be accessed. Please check your internet connection or authentication token."
275
+ ) from api_call_error
276
+
201
277
  # Try to get which commit hash corresponds to the specified revision
202
278
  commit_hash = None
203
279
  if REGEX_COMMIT_HASH.match(revision):
@@ -284,6 +360,8 @@ def snapshot_download(
284
360
  tqdm_desc = f"Fetching {len(filtered_repo_files)} files"
285
361
  else:
286
362
  tqdm_desc = "Fetching ... files"
363
+ if dry_run:
364
+ tqdm_desc = "[dry-run] " + tqdm_desc
287
365
 
288
366
  commit_hash = repo_info.sha
289
367
  snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash)
@@ -299,33 +377,36 @@ def snapshot_download(
299
377
  except OSError as e:
300
378
  logger.warning(f"Ignored error while writing commit hash to {ref_path}: {e}.")
301
379
 
380
+ results: List[Union[str, DryRunFileInfo]] = []
381
+
302
382
  # we pass the commit_hash to hf_hub_download
303
383
  # so no network call happens if we already
304
384
  # have the file locally.
305
- def _inner_hf_hub_download(repo_file: str):
306
- return hf_hub_download(
307
- repo_id,
308
- filename=repo_file,
309
- repo_type=repo_type,
310
- revision=commit_hash,
311
- endpoint=endpoint,
312
- cache_dir=cache_dir,
313
- local_dir=local_dir,
314
- local_dir_use_symlinks=local_dir_use_symlinks,
315
- library_name=library_name,
316
- library_version=library_version,
317
- user_agent=user_agent,
318
- proxies=proxies,
319
- etag_timeout=etag_timeout,
320
- resume_download=resume_download,
321
- force_download=force_download,
322
- token=token,
323
- headers=headers,
385
+ def _inner_hf_hub_download(repo_file: str) -> None:
386
+ results.append(
387
+ hf_hub_download( # type: ignore[no-matching-overload] # ty not happy, don't know why :/
388
+ repo_id,
389
+ filename=repo_file,
390
+ repo_type=repo_type,
391
+ revision=commit_hash,
392
+ endpoint=endpoint,
393
+ cache_dir=cache_dir,
394
+ local_dir=local_dir,
395
+ library_name=library_name,
396
+ library_version=library_version,
397
+ user_agent=user_agent,
398
+ etag_timeout=etag_timeout,
399
+ force_download=force_download,
400
+ token=token,
401
+ headers=headers,
402
+ dry_run=dry_run,
403
+ )
324
404
  )
325
405
 
326
- if constants.HF_HUB_ENABLE_HF_TRANSFER:
327
- # when using hf_transfer we don't want extra parallelism
328
- # from the one hf_transfer provides
406
+ if constants.HF_XET_HIGH_PERFORMANCE and not dry_run:
407
+ # when using hf_xet high performance we don't want extra parallelism
408
+ # from the one hf_xet provides
409
+ # TODO: revisit this when xet_session is implemented
329
410
  for file in filtered_repo_files:
330
411
  _inner_hf_hub_download(file)
331
412
  else:
@@ -338,6 +419,10 @@ def snapshot_download(
338
419
  tqdm_class=tqdm_class or hf_tqdm,
339
420
  )
340
421
 
422
+ if dry_run:
423
+ assert all(isinstance(r, DryRunFileInfo) for r in results)
424
+ return results # type: ignore
425
+
341
426
  if local_dir is not None:
342
427
  return str(os.path.realpath(local_dir))
343
428
  return snapshot_folder