PyPI - huggingface-hub - Versions diffs - 0.36.0__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

huggingface-hub 0.36.0py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of huggingface-hub might be problematic. Click here for more details.

Files changed (132) hide show

huggingface_hub/__init__.py +33 -45
huggingface_hub/_commit_api.py +39 -43
huggingface_hub/_commit_scheduler.py +11 -8
huggingface_hub/_inference_endpoints.py +8 -8
huggingface_hub/_jobs_api.py +20 -20
huggingface_hub/_login.py +17 -43
huggingface_hub/_oauth.py +8 -8
huggingface_hub/_snapshot_download.py +135 -50
huggingface_hub/_space_api.py +4 -4
huggingface_hub/_tensorboard_logger.py +5 -5
huggingface_hub/_upload_large_folder.py +18 -32
huggingface_hub/_webhooks_payload.py +3 -3
huggingface_hub/_webhooks_server.py +2 -2
huggingface_hub/cli/__init__.py +0 -14
huggingface_hub/cli/_cli_utils.py +143 -39
huggingface_hub/cli/auth.py +105 -171
huggingface_hub/cli/cache.py +594 -361
huggingface_hub/cli/download.py +120 -112
huggingface_hub/cli/hf.py +38 -41
huggingface_hub/cli/jobs.py +689 -1017
huggingface_hub/cli/lfs.py +120 -143
huggingface_hub/cli/repo.py +282 -216
huggingface_hub/cli/repo_files.py +50 -84
huggingface_hub/cli/system.py +6 -25
huggingface_hub/cli/upload.py +198 -220
huggingface_hub/cli/upload_large_folder.py +91 -106
huggingface_hub/community.py +5 -5
huggingface_hub/constants.py +17 -52
huggingface_hub/dataclasses.py +135 -21
huggingface_hub/errors.py +47 -30
huggingface_hub/fastai_utils.py +8 -9
huggingface_hub/file_download.py +351 -303
huggingface_hub/hf_api.py +398 -570
huggingface_hub/hf_file_system.py +101 -66
huggingface_hub/hub_mixin.py +32 -54
huggingface_hub/inference/_client.py +177 -162
huggingface_hub/inference/_common.py +38 -54
huggingface_hub/inference/_generated/_async_client.py +218 -258
huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
huggingface_hub/inference/_generated/types/base.py +10 -7
huggingface_hub/inference/_generated/types/chat_completion.py +16 -16
huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
huggingface_hub/inference/_generated/types/summarization.py +2 -2
huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
huggingface_hub/inference/_generated/types/text_generation.py +10 -10
huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
huggingface_hub/inference/_generated/types/token_classification.py +2 -2
huggingface_hub/inference/_generated/types/translation.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
huggingface_hub/inference/_mcp/agent.py +3 -3
huggingface_hub/inference/_mcp/constants.py +1 -2
huggingface_hub/inference/_mcp/mcp_client.py +33 -22
huggingface_hub/inference/_mcp/types.py +10 -10
huggingface_hub/inference/_mcp/utils.py +4 -4
huggingface_hub/inference/_providers/__init__.py +12 -4
huggingface_hub/inference/_providers/_common.py +62 -24
huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
huggingface_hub/inference/_providers/cohere.py +3 -3
huggingface_hub/inference/_providers/fal_ai.py +25 -25
huggingface_hub/inference/_providers/featherless_ai.py +4 -4
huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
huggingface_hub/inference/_providers/hf_inference.py +13 -13
huggingface_hub/inference/_providers/hyperbolic.py +4 -4
huggingface_hub/inference/_providers/nebius.py +10 -10
huggingface_hub/inference/_providers/novita.py +5 -5
huggingface_hub/inference/_providers/nscale.py +4 -4
huggingface_hub/inference/_providers/replicate.py +15 -15
huggingface_hub/inference/_providers/sambanova.py +6 -6
huggingface_hub/inference/_providers/together.py +7 -7
huggingface_hub/lfs.py +21 -94
huggingface_hub/repocard.py +15 -16
huggingface_hub/repocard_data.py +57 -57
huggingface_hub/serialization/__init__.py +0 -1
huggingface_hub/serialization/_base.py +9 -9
huggingface_hub/serialization/_dduf.py +7 -7
huggingface_hub/serialization/_torch.py +28 -28
huggingface_hub/utils/__init__.py +11 -6
huggingface_hub/utils/_auth.py +5 -5
huggingface_hub/utils/_cache_manager.py +49 -74
huggingface_hub/utils/_deprecation.py +1 -1
huggingface_hub/utils/_dotenv.py +3 -3
huggingface_hub/utils/_fixes.py +0 -10
huggingface_hub/utils/_git_credential.py +3 -3
huggingface_hub/utils/_headers.py +7 -29
huggingface_hub/utils/_http.py +371 -208
huggingface_hub/utils/_pagination.py +4 -4
huggingface_hub/utils/_parsing.py +98 -0
huggingface_hub/utils/_paths.py +5 -5
huggingface_hub/utils/_runtime.py +59 -23
huggingface_hub/utils/_safetensors.py +21 -21
huggingface_hub/utils/_subprocess.py +9 -9
huggingface_hub/utils/_telemetry.py +3 -3
huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -9
huggingface_hub/utils/_typing.py +3 -3
huggingface_hub/utils/_validators.py +53 -72
huggingface_hub/utils/_xet.py +16 -16
huggingface_hub/utils/_xet_progress_reporting.py +1 -1
huggingface_hub/utils/insecure_hashlib.py +3 -9
huggingface_hub/utils/tqdm.py +3 -3
{huggingface_hub-0.36.0.dist-info → huggingface_hub-1.0.0.dist-info}/METADATA +16 -35
huggingface_hub-1.0.0.dist-info/RECORD +152 -0
{huggingface_hub-0.36.0.dist-info → huggingface_hub-1.0.0.dist-info}/entry_points.txt +0 -1
huggingface_hub/commands/__init__.py +0 -27
huggingface_hub/commands/delete_cache.py +0 -476
huggingface_hub/commands/download.py +0 -204
huggingface_hub/commands/env.py +0 -39
huggingface_hub/commands/huggingface_cli.py +0 -65
huggingface_hub/commands/lfs.py +0 -200
huggingface_hub/commands/repo.py +0 -151
huggingface_hub/commands/repo_files.py +0 -132
huggingface_hub/commands/scan_cache.py +0 -183
huggingface_hub/commands/tag.py +0 -161
huggingface_hub/commands/upload.py +0 -318
huggingface_hub/commands/upload_large_folder.py +0 -131
huggingface_hub/commands/user.py +0 -208
huggingface_hub/commands/version.py +0 -40
huggingface_hub/inference_api.py +0 -217
huggingface_hub/keras_mixin.py +0 -497
huggingface_hub/repository.py +0 -1471
huggingface_hub/serialization/_tensorflow.py +0 -92
huggingface_hub/utils/_hf_folder.py +0 -68
huggingface_hub-0.36.0.dist-info/RECORD +0 -170
{huggingface_hub-0.36.0.dist-info → huggingface_hub-1.0.0.dist-info}/LICENSE +0 -0
{huggingface_hub-0.36.0.dist-info → huggingface_hub-1.0.0.dist-info}/WHEEL +0 -0
{huggingface_hub-0.36.0.dist-info → huggingface_hub-1.0.0.dist-info}/top_level.txt +0 -0

huggingface_hub/_inference_endpoints.py CHANGED Viewed

@@ -2,7 +2,7 @@ import time
 from dataclasses import dataclass, field
 from datetime import datetime
 from enum import Enum
-from typing import TYPE_CHECKING, Dict, Optional, Union
+from typing import TYPE_CHECKING, Optional, Union
 from huggingface_hub.errors import InferenceEndpointError, InferenceEndpointTimeoutError
@@ -62,7 +62,7 @@ class InferenceEndpoint:
             The timestamp of the last update of the Inference Endpoint.
         type ([`InferenceEndpointType`]):
             The type of the Inference Endpoint (public, protected, private).
-        raw (`Dict`):
+        raw (`dict`):
             The raw dictionary data returned from the API.
         token (`str` or `bool`, *optional*):
             Authentication token for the Inference Endpoint, if set when requesting the API. Will default to the
@@ -112,7 +112,7 @@ class InferenceEndpoint:
     type: InferenceEndpointType = field(repr=False, init=False)
     # Raw dict from the API
-    raw: Dict = field(repr=False)
+    raw: dict = field(repr=False)
     # Internal fields
     _token: Union[str, bool, None] = field(repr=False, compare=False)
@@ -120,7 +120,7 @@ class InferenceEndpoint:
     @classmethod
     def from_raw(
-        cls, raw: Dict, namespace: str, token: Union[str, bool, None] = None, api: Optional["HfApi"] = None
+        cls, raw: dict, namespace: str, token: Union[str, bool, None] = None, api: Optional["HfApi"] = None
     ) -> "InferenceEndpoint":
         """Initialize object from raw dictionary."""
         if api is None:
@@ -260,8 +260,8 @@ class InferenceEndpoint:
         framework: Optional[str] = None,
         revision: Optional[str] = None,
         task: Optional[str] = None,
-        custom_image: Optional[Dict] = None,
-        secrets: Optional[Dict[str, str]] = None,
+        custom_image: Optional[dict] = None,
+        secrets: Optional[dict[str, str]] = None,
     ) -> "InferenceEndpoint":
         """Update the Inference Endpoint.
@@ -293,10 +293,10 @@ class InferenceEndpoint:
                 The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`).
             task (`str`, *optional*):
                 The task on which to deploy the model (e.g. `"text-classification"`).
-            custom_image (`Dict`, *optional*):
+            custom_image (`dict`, *optional*):
                 A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
                 Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
-            secrets (`Dict[str, str]`, *optional*):
+            secrets (`dict[str, str]`, *optional*):
                 Secret values to inject in the container environment.
         Returns:
             [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.

huggingface_hub/_jobs_api.py CHANGED Viewed

@@ -15,7 +15,7 @@
 from dataclasses import dataclass
 from datetime import datetime
 from enum import Enum
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Optional, Union
 from huggingface_hub import constants
 from huggingface_hub._space_api import SpaceHardware
@@ -71,13 +71,13 @@ class JobInfo:
         space_id (`str` or `None`):
             The Docker image from Hugging Face Spaces used for the Job.
             Can be None if docker_image is present instead.
-        command (`List[str]` or `None`):
+        command (`list[str]` or `None`):
             Command of the Job, e.g. `["python", "-c", "print('hello world')"]`
-        arguments (`List[str]` or `None`):
+        arguments (`list[str]` or `None`):
             Arguments passed to the command
-        environment (`Dict[str]` or `None`):
+        environment (`dict[str]` or `None`):
             Environment variables of the Job as a dictionary.
-        secrets (`Dict[str]` or `None`):
+        secrets (`dict[str]` or `None`):
             Secret environment variables of the Job (encrypted).
         flavor (`str` or `None`):
             Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values.
@@ -111,10 +111,10 @@ class JobInfo:
     created_at: Optional[datetime]
     docker_image: Optional[str]
     space_id: Optional[str]
-    command: Optional[List[str]]
-    arguments: Optional[List[str]]
-    environment: Optional[Dict[str, Any]]
-    secrets: Optional[Dict[str, Any]]
+    command: Optional[list[str]]
+    arguments: Optional[list[str]]
+    environment: Optional[dict[str, Any]]
+    secrets: Optional[dict[str, Any]]
     flavor: Optional[SpaceHardware]
     status: JobStatus
     owner: JobOwner
@@ -148,13 +148,13 @@ class JobInfo:
 class JobSpec:
     docker_image: Optional[str]
     space_id: Optional[str]
-    command: Optional[List[str]]
-    arguments: Optional[List[str]]
-    environment: Optional[Dict[str, Any]]
-    secrets: Optional[Dict[str, Any]]
+    command: Optional[list[str]]
+    arguments: Optional[list[str]]
+    environment: Optional[dict[str, Any]]
+    secrets: Optional[dict[str, Any]]
     flavor: Optional[SpaceHardware]
     timeout: Optional[int]
-    tags: Optional[List[str]]
+    tags: Optional[list[str]]
     arch: Optional[str]
     def __init__(self, **kwargs) -> None:
@@ -202,7 +202,7 @@ class ScheduledJobInfo:
             Scheduled Job ID.
         created_at (`datetime` or `None`):
             When the scheduled Job was created.
-        tags (`List[str]` or `None`):
+        tags (`list[str]` or `None`):
             The tags of the scheduled Job.
         schedule (`str` or `None`):
             One of "@annually", "@yearly", "@monthly", "@weekly", "@daily", "@hourly", or a
@@ -263,14 +263,14 @@ class ScheduledJobInfo:
 def _create_job_spec(
     *,
     image: str,
-    command: List[str],
-    env: Optional[Dict[str, Any]],
-    secrets: Optional[Dict[str, Any]],
+    command: list[str],
+    env: Optional[dict[str, Any]],
+    secrets: Optional[dict[str, Any]],
     flavor: Optional[SpaceHardware],
     timeout: Optional[Union[int, float, str]],
-) -> Dict[str, Any]:
+) -> dict[str, Any]:
     # prepare job spec to send to HF Jobs API
-    job_spec: Dict[str, Any] = {
+    job_spec: dict[str, Any] = {
         "command": command,
         "arguments": [],
         "environment": env or {},

huggingface_hub/_login.py CHANGED Viewed

@@ -19,9 +19,11 @@ from getpass import getpass
 from pathlib import Path
 from typing import Optional
+import typer
 from . import constants
-from .commands._cli_utils import ANSI
 from .utils import (
+    ANSI,
     capture_output,
     get_token,
     is_google_colab,
@@ -41,7 +43,6 @@ from .utils._auth import (
     _save_token,
     get_stored_tokens,
 )
-from .utils._deprecation import _deprecate_arguments, _deprecate_positional_args
 logger = logging.get_logger(__name__)
@@ -55,18 +56,11 @@ _HF_LOGO_ASCII = """
 """
-@_deprecate_arguments(
-    version="1.0",
-    deprecated_args="write_permission",
-    custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.",
-)
-@_deprecate_positional_args(version="1.0")
 def login(
     token: Optional[str] = None,
     *,
     add_to_git_credential: bool = False,
-    new_session: bool = True,
-    write_permission: bool = False,
+    skip_if_logged_in: bool = False,
 ) -> None:
     """Login the machine to access the Hub.
@@ -96,10 +90,8 @@ def login(
             is configured, a warning will be displayed to the user. If `token` is `None`,
             the value of `add_to_git_credential` is ignored and will be prompted again
             to the end user.
-        new_session (`bool`, defaults to `True`):
-            If `True`, will request a token even if one is already saved on the machine.
-        write_permission (`bool`):
-            Ignored and deprecated argument.
+        skip_if_logged_in (`bool`, defaults to `False`):
+            If `True`, do not prompt for token if user is already logged in.
     Raises:
         [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
             If an organization token is passed. Only personal account tokens are valid
@@ -119,9 +111,9 @@ def login(
             )
         _login(token, add_to_git_credential=add_to_git_credential)
     elif is_notebook():
-        notebook_login(new_session=new_session)
+        notebook_login(skip_if_logged_in=skip_if_logged_in)
     else:
-        interpreter_login(new_session=new_session)
+        interpreter_login(skip_if_logged_in=skip_if_logged_in)
 def logout(token_name: Optional[str] = None) -> None:
@@ -236,13 +228,7 @@ def auth_list() -> None:
 ###
-@_deprecate_arguments(
-    version="1.0",
-    deprecated_args="write_permission",
-    custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.",
-)
-@_deprecate_positional_args(version="1.0")
-def interpreter_login(*, new_session: bool = True, write_permission: bool = False) -> None:
+def interpreter_login(*, skip_if_logged_in: bool = False) -> None:
     """
     Displays a prompt to log in to the HF website and store the token.
@@ -253,17 +239,13 @@ def interpreter_login(*, new_session: bool = True, write_permission: bool = Fals
     For more details, see [`login`].
     Args:
-        new_session (`bool`, defaults to `True`):
-            If `True`, will request a token even if one is already saved on the machine.
-        write_permission (`bool`):
-            Ignored and deprecated argument.
+        skip_if_logged_in (`bool`, defaults to `False`):
+            If `True`, do not prompt for token if user is already logged in.
     """
-    if not new_session and get_token() is not None:
+    if not skip_if_logged_in and get_token() is not None:
         logger.info("User is already logged in.")
         return
-    from .commands.delete_cache import _ask_for_confirmation_no_tui
     print(_HF_LOGO_ASCII)
     if get_token() is not None:
         logger.info(
@@ -279,7 +261,7 @@ def interpreter_login(*, new_session: bool = True, write_permission: bool = Fals
     if os.name == "nt":
         logger.info("Token can be pasted using 'Right-Click'.")
     token = getpass("Enter your token (input will not be visible): ")
-    add_to_git_credential = _ask_for_confirmation_no_tui("Add token as git credential?")
+    add_to_git_credential = typer.confirm("Add token as git credential?")
     _login(token=token, add_to_git_credential=add_to_git_credential)
@@ -308,13 +290,7 @@ NOTEBOOK_LOGIN_TOKEN_HTML_END = """
 notebooks. </center>"""
-@_deprecate_arguments(
-    version="1.0",
-    deprecated_args="write_permission",
-    custom_message="Fine-grained tokens added complexity to the permissions, making it irrelevant to check if a token has 'write' access.",
-)
-@_deprecate_positional_args(version="1.0")
-def notebook_login(*, new_session: bool = True, write_permission: bool = False) -> None:
+def notebook_login(*, skip_if_logged_in: bool = False) -> None:
     """
     Displays a widget to log in to the HF website and store the token.
@@ -325,10 +301,8 @@ def notebook_login(*, new_session: bool = True, write_permission: bool = False)
     For more details, see [`login`].
     Args:
-        new_session (`bool`, defaults to `True`):
-            If `True`, will request a token even if one is already saved on the machine.
-        write_permission (`bool`):
-            Ignored and deprecated argument.
+        skip_if_logged_in (`bool`, defaults to `False`):
+            If `True`, do not prompt for token if user is already logged in.
     """
     try:
         import ipywidgets.widgets as widgets  # type: ignore
@@ -338,7 +312,7 @@ def notebook_login(*, new_session: bool = True, write_permission: bool = False)
             "The `notebook_login` function can only be used in a notebook (Jupyter or"
             " Colab) and you need the `ipywidgets` module: `pip install ipywidgets`."
         )
-    if not new_session and get_token() is not None:
+    if not skip_if_logged_in and get_token() is not None:
         logger.info("User is already logged in.")
         return

huggingface_hub/_oauth.py CHANGED Viewed

@@ -6,7 +6,7 @@ import time
 import urllib.parse
 import warnings
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Literal, Optional, Union
 from . import constants
 from .hf_api import whoami
@@ -39,7 +39,7 @@ class OAuthOrgInfo:
             Whether the org has a payment method set up. Hugging Face field.
         role_in_org (`Optional[str]`, *optional*):
             The user's role in the org. Hugging Face field.
-        security_restrictions (`Optional[List[Literal["ip", "token-policy", "mfa", "sso"]]]`, *optional*):
+        security_restrictions (`Optional[list[Literal["ip", "token-policy", "mfa", "sso"]]]`, *optional*):
             Array of security restrictions that the user hasn't completed for this org. Possible values: "ip", "token-policy", "mfa", "sso". Hugging Face field.
     """
@@ -50,7 +50,7 @@ class OAuthOrgInfo:
     is_enterprise: bool
     can_pay: Optional[bool] = None
     role_in_org: Optional[str] = None
-    security_restrictions: Optional[List[Literal["ip", "token-policy", "mfa", "sso"]]] = None
+    security_restrictions: Optional[list[Literal["ip", "token-policy", "mfa", "sso"]]] = None
 @dataclass
@@ -79,7 +79,7 @@ class OAuthUserInfo:
             Whether the user is a pro user. Hugging Face field.
         can_pay (`Optional[bool]`, *optional*):
             Whether the user has a payment method set up. Hugging Face field.
-        orgs (`Optional[List[OrgInfo]]`, *optional*):
+        orgs (`Optional[list[OrgInfo]]`, *optional*):
             List of organizations the user is part of. Hugging Face field.
     """
@@ -93,7 +93,7 @@ class OAuthUserInfo:
     website: Optional[str]
     is_pro: bool
     can_pay: Optional[bool]
-    orgs: Optional[List[OAuthOrgInfo]]
+    orgs: Optional[list[OAuthOrgInfo]]
 @dataclass
@@ -306,7 +306,7 @@ def _add_oauth_routes(app: "fastapi.FastAPI", route_prefix: str) -> None:
             target_url = request.query_params.get("_target_url")
             # Build redirect URI with the same query params as before and bump nb_redirects count
-            query_params: Dict[str, Union[int, str]] = {"_nb_redirects": nb_redirects + 1}
+            query_params: dict[str, Union[int, str]] = {"_nb_redirects": nb_redirects + 1}
             if target_url:
                 query_params["_target_url"] = target_url
@@ -406,7 +406,7 @@ def _get_redirect_target(request: "fastapi.Request", default_target: str = "/")
     return request.query_params.get("_target_url", default_target)
-def _get_mocked_oauth_info() -> Dict:
+def _get_mocked_oauth_info() -> dict:
     token = get_token()
     if token is None:
         raise ValueError(
@@ -449,7 +449,7 @@ def _get_mocked_oauth_info() -> Dict:
     }
-def _get_oauth_uris(route_prefix: str = "/") -> Tuple[str, str, str]:
+def _get_oauth_uris(route_prefix: str = "/") -> tuple[str, str, str]:
     route_prefix = route_prefix.strip("/")
     if route_prefix:
         route_prefix = f"/{route_prefix}"

huggingface_hub/_snapshot_download.py CHANGED Viewed

@@ -1,20 +1,21 @@
 import os
 from pathlib import Path
-from typing import Dict, Iterable, List, Literal, Optional, Type, Union
+from typing import Iterable, List, Literal, Optional, Union, overload
-import requests
+import httpx
 from tqdm.auto import tqdm as base_tqdm
 from tqdm.contrib.concurrent import thread_map
 from . import constants
 from .errors import (
+    DryRunError,
     GatedRepoError,
     HfHubHTTPError,
     LocalEntryNotFoundError,
     RepositoryNotFoundError,
     RevisionNotFoundError,
 )
-from .file_download import REGEX_COMMIT_HASH, hf_hub_download, repo_folder_name
+from .file_download import REGEX_COMMIT_HASH, DryRunFileInfo, hf_hub_download, repo_folder_name
 from .hf_api import DatasetInfo, HfApi, ModelInfo, RepoFile, SpaceInfo
 from .utils import OfflineModeIsEnabled, filter_repo_objects, logging, validate_hf_hub_args
 from .utils import tqdm as hf_tqdm
@@ -25,6 +26,81 @@ logger = logging.get_logger(__name__)
 VERY_LARGE_REPO_THRESHOLD = 50000  # After this limit, we don't consider `repo_info.siblings` to be reliable enough
+@overload
+def snapshot_download(
+    repo_id: str,
+    *,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    user_agent: Optional[Union[dict, str]] = None,
+    etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+    force_download: bool = False,
+    token: Optional[Union[bool, str]] = None,
+    local_files_only: bool = False,
+    allow_patterns: Optional[Union[list[str], str]] = None,
+    ignore_patterns: Optional[Union[list[str], str]] = None,
+    max_workers: int = 8,
+    tqdm_class: Optional[type[base_tqdm]] = None,
+    headers: Optional[dict[str, str]] = None,
+    endpoint: Optional[str] = None,
+    dry_run: Literal[False] = False,
+) -> str: ...
+@overload
+def snapshot_download(
+    repo_id: str,
+    *,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    user_agent: Optional[Union[dict, str]] = None,
+    etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+    force_download: bool = False,
+    token: Optional[Union[bool, str]] = None,
+    local_files_only: bool = False,
+    allow_patterns: Optional[Union[list[str], str]] = None,
+    ignore_patterns: Optional[Union[list[str], str]] = None,
+    max_workers: int = 8,
+    tqdm_class: Optional[type[base_tqdm]] = None,
+    headers: Optional[dict[str, str]] = None,
+    endpoint: Optional[str] = None,
+    dry_run: Literal[True] = True,
+) -> list[DryRunFileInfo]: ...
+@overload
+def snapshot_download(
+    repo_id: str,
+    *,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    user_agent: Optional[Union[dict, str]] = None,
+    etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+    force_download: bool = False,
+    token: Optional[Union[bool, str]] = None,
+    local_files_only: bool = False,
+    allow_patterns: Optional[Union[list[str], str]] = None,
+    ignore_patterns: Optional[Union[list[str], str]] = None,
+    max_workers: int = 8,
+    tqdm_class: Optional[type[base_tqdm]] = None,
+    headers: Optional[dict[str, str]] = None,
+    endpoint: Optional[str] = None,
+    dry_run: bool = False,
+) -> Union[str, list[DryRunFileInfo]]: ...
 @validate_hf_hub_args
 def snapshot_download(
     repo_id: str,
@@ -35,22 +111,19 @@ def snapshot_download(
     local_dir: Union[str, Path, None] = None,
     library_name: Optional[str] = None,
     library_version: Optional[str] = None,
-    user_agent: Optional[Union[Dict, str]] = None,
-    proxies: Optional[Dict] = None,
+    user_agent: Optional[Union[dict, str]] = None,
     etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
     force_download: bool = False,
     token: Optional[Union[bool, str]] = None,
     local_files_only: bool = False,
-    allow_patterns: Optional[Union[List[str], str]] = None,
-    ignore_patterns: Optional[Union[List[str], str]] = None,
+    allow_patterns: Optional[Union[list[str], str]] = None,
+    ignore_patterns: Optional[Union[list[str], str]] = None,
     max_workers: int = 8,
-    tqdm_class: Optional[Type[base_tqdm]] = None,
-    headers: Optional[Dict[str, str]] = None,
+    tqdm_class: Optional[type[base_tqdm]] = None,
+    headers: Optional[dict[str, str]] = None,
     endpoint: Optional[str] = None,
-    # Deprecated args
-    local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
-    resume_download: Optional[bool] = None,
-) -> str:
+    dry_run: bool = False,
+) -> Union[str, list[DryRunFileInfo]]:
     """Download repo files.
     Download a whole snapshot of a repo's files at the specified revision. This is useful when you want all files from
@@ -85,12 +158,9 @@ def snapshot_download(
             The version of the library.
         user_agent (`str`, `dict`, *optional*):
             The user-agent info in the form of a dictionary or a string.
-        proxies (`dict`, *optional*):
-            Dictionary mapping protocol to the URL of the proxy passed to
-            `requests.request`.
         etag_timeout (`float`, *optional*, defaults to `10`):
             When fetching ETag, how many seconds to wait for the server to send
-            data before giving up which is passed to `requests.request`.
+            data before giving up which is passed to `httpx.request`.
         force_download (`bool`, *optional*, defaults to `False`):
             Whether the file should be downloaded even if it already exists in the local cache.
         token (`str`, `bool`, *optional*):
@@ -103,9 +173,9 @@ def snapshot_download(
         local_files_only (`bool`, *optional*, defaults to `False`):
             If `True`, avoid downloading the file and return the path to the
             local cached file if it exists.
-        allow_patterns (`List[str]` or `str`, *optional*):
+        allow_patterns (`list[str]` or `str`, *optional*):
             If provided, only files matching at least one pattern are downloaded.
-        ignore_patterns (`List[str]` or `str`, *optional*):
+        ignore_patterns (`list[str]` or `str`, *optional*):
             If provided, files matching any of the patterns are not downloaded.
         max_workers (`int`, *optional*):
             Number of concurrent threads to download files (1 thread = 1 file download).
@@ -116,9 +186,14 @@ def snapshot_download(
             Note that the `tqdm_class` is not passed to each individual download.
             Defaults to the custom HF progress bar that can be disabled by setting
             `HF_HUB_DISABLE_PROGRESS_BARS` environment variable.
+        dry_run (`bool`, *optional*, defaults to `False`):
+            If `True`, perform a dry run without actually downloading the files. Returns a list of
+            [`DryRunFileInfo`] objects containing information about what would be downloaded.
     Returns:
-        `str`: folder path of the repo snapshot.
+        `str` or list of [`DryRunFileInfo`]:
+            - If `dry_run=False`: Local snapshot path.
+            - If `dry_run=True`: A list of [`DryRunFileInfo`] objects containing download information.
     Raises:
         [`~utils.RepositoryNotFoundError`]
@@ -163,14 +238,10 @@ def snapshot_download(
         try:
             # if we have internet connection we want to list files to download
             repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type, revision=revision)
-        except (requests.exceptions.SSLError, requests.exceptions.ProxyError):
-            # Actually raise for those subclasses of ConnectionError
+        except httpx.ProxyError:
+            # Actually raise on proxy error
             raise
-        except (
-            requests.exceptions.ConnectionError,
-            requests.exceptions.Timeout,
-            OfflineModeIsEnabled,
-        ) as error:
+        except (httpx.ConnectError, httpx.TimeoutException, OfflineModeIsEnabled) as error:
             # Internet connection is down
             # => will try to use local files only
             api_call_error = error
@@ -178,7 +249,7 @@ def snapshot_download(
         except RevisionNotFoundError:
             # The repo was found but the revision doesn't exist on the Hub (never existed or got deleted)
             raise
-        except requests.HTTPError as error:
+        except HfHubHTTPError as error:
             # Multiple reasons for an http error:
             # - Repository is private and invalid/missing token sent
             # - Repository is gated and invalid/missing token sent
@@ -198,6 +269,11 @@ def snapshot_download(
     #    - f the specified revision is a branch or tag, look inside "refs".
     # => if local_dir is not None, we will return the path to the local folder if it exists.
     if repo_info is None:
+        if dry_run:
+            raise DryRunError(
+                "Dry run cannot be performed as the repository cannot be accessed. Please check your internet connection or authentication token."
+            ) from api_call_error
         # Try to get which commit hash corresponds to the specified revision
         commit_hash = None
         if REGEX_COMMIT_HASH.match(revision):
@@ -284,6 +360,8 @@ def snapshot_download(
         tqdm_desc = f"Fetching {len(filtered_repo_files)} files"
     else:
         tqdm_desc = "Fetching ... files"
+    if dry_run:
+        tqdm_desc = "[dry-run] " + tqdm_desc
     commit_hash = repo_info.sha
     snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash)
@@ -299,33 +377,36 @@ def snapshot_download(
         except OSError as e:
             logger.warning(f"Ignored error while writing commit hash to {ref_path}: {e}.")
+    results: List[Union[str, DryRunFileInfo]] = []
     # we pass the commit_hash to hf_hub_download
     # so no network call happens if we already
     # have the file locally.
-    def _inner_hf_hub_download(repo_file: str):
-        return hf_hub_download(
-            repo_id,
-            filename=repo_file,
-            repo_type=repo_type,
-            revision=commit_hash,
-            endpoint=endpoint,
-            cache_dir=cache_dir,
-            local_dir=local_dir,
-            local_dir_use_symlinks=local_dir_use_symlinks,
-            library_name=library_name,
-            library_version=library_version,
-            user_agent=user_agent,
-            proxies=proxies,
-            etag_timeout=etag_timeout,
-            resume_download=resume_download,
-            force_download=force_download,
-            token=token,
-            headers=headers,
+    def _inner_hf_hub_download(repo_file: str) -> None:
+        results.append(
+            hf_hub_download(  # type: ignore[no-matching-overload] # ty not happy, don't know why :/
+                repo_id,
+                filename=repo_file,
+                repo_type=repo_type,
+                revision=commit_hash,
+                endpoint=endpoint,
+                cache_dir=cache_dir,
+                local_dir=local_dir,
+                library_name=library_name,
+                library_version=library_version,
+                user_agent=user_agent,
+                etag_timeout=etag_timeout,
+                force_download=force_download,
+                token=token,
+                headers=headers,
+                dry_run=dry_run,
+            )
         )
-    if constants.HF_HUB_ENABLE_HF_TRANSFER:
-        # when using hf_transfer we don't want extra parallelism
-        # from the one hf_transfer provides
+    if constants.HF_XET_HIGH_PERFORMANCE and not dry_run:
+        # when using hf_xet high performance we don't want extra parallelism
+        # from the one hf_xet provides
+        # TODO: revisit this when xet_session is implemented
         for file in filtered_repo_files:
             _inner_hf_hub_download(file)
     else:
@@ -338,6 +419,10 @@ def snapshot_download(
             tqdm_class=tqdm_class or hf_tqdm,
         )
+    if dry_run:
+        assert all(isinstance(r, DryRunFileInfo) for r in results)
+        return results  # type: ignore
     if local_dir is not None:
         return str(os.path.realpath(local_dir))
     return snapshot_folder

huggingface-hub 0.36.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

Potentially problematic release.

huggingface-hub 0.36.0py3-none-any.whl → 1.0.0py3-none-any.whl