PyPI - huggingface-hub - Versions diffs - 0.29.0rc2__py3-none-any.whl → 1.1.3__py3-none-any.whl - Mend

huggingface-hub 0.29.0rc2py3-none-any.whl → 1.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

huggingface_hub/__init__.py +160 -46
huggingface_hub/_commit_api.py +277 -71
huggingface_hub/_commit_scheduler.py +15 -15
huggingface_hub/_inference_endpoints.py +33 -22
huggingface_hub/_jobs_api.py +301 -0
huggingface_hub/_local_folder.py +18 -3
huggingface_hub/_login.py +31 -63
huggingface_hub/_oauth.py +460 -0
huggingface_hub/_snapshot_download.py +241 -81
huggingface_hub/_space_api.py +18 -10
huggingface_hub/_tensorboard_logger.py +15 -19
huggingface_hub/_upload_large_folder.py +196 -76
huggingface_hub/_webhooks_payload.py +3 -3
huggingface_hub/_webhooks_server.py +15 -25
huggingface_hub/{commands → cli}/__init__.py +1 -15
huggingface_hub/cli/_cli_utils.py +173 -0
huggingface_hub/cli/auth.py +147 -0
huggingface_hub/cli/cache.py +841 -0
huggingface_hub/cli/download.py +189 -0
huggingface_hub/cli/hf.py +60 -0
huggingface_hub/cli/inference_endpoints.py +377 -0
huggingface_hub/cli/jobs.py +772 -0
huggingface_hub/cli/lfs.py +175 -0
huggingface_hub/cli/repo.py +315 -0
huggingface_hub/cli/repo_files.py +94 -0
huggingface_hub/{commands/env.py → cli/system.py} +10 -13
huggingface_hub/cli/upload.py +294 -0
huggingface_hub/cli/upload_large_folder.py +117 -0
huggingface_hub/community.py +20 -12
huggingface_hub/constants.py +83 -59
huggingface_hub/dataclasses.py +609 -0
huggingface_hub/errors.py +99 -30
huggingface_hub/fastai_utils.py +30 -41
huggingface_hub/file_download.py +606 -346
huggingface_hub/hf_api.py +2445 -1132
huggingface_hub/hf_file_system.py +269 -152
huggingface_hub/hub_mixin.py +61 -66
huggingface_hub/inference/_client.py +501 -630
huggingface_hub/inference/_common.py +133 -121
huggingface_hub/inference/_generated/_async_client.py +536 -722
huggingface_hub/inference/_generated/types/__init__.py +6 -1
huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +5 -6
huggingface_hub/inference/_generated/types/base.py +10 -7
huggingface_hub/inference/_generated/types/chat_completion.py +77 -31
huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
huggingface_hub/inference/_generated/types/image_to_image.py +8 -2
huggingface_hub/inference/_generated/types/image_to_text.py +2 -3
huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
huggingface_hub/inference/_generated/types/summarization.py +2 -2
huggingface_hub/inference/_generated/types/table_question_answering.py +5 -5
huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
huggingface_hub/inference/_generated/types/text_generation.py +11 -11
huggingface_hub/inference/_generated/types/text_to_audio.py +1 -2
huggingface_hub/inference/_generated/types/text_to_speech.py +1 -2
huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
huggingface_hub/inference/_generated/types/token_classification.py +2 -2
huggingface_hub/inference/_generated/types/translation.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
huggingface_hub/inference/_mcp/__init__.py +0 -0
huggingface_hub/inference/_mcp/_cli_hacks.py +88 -0
huggingface_hub/inference/_mcp/agent.py +100 -0
huggingface_hub/inference/_mcp/cli.py +247 -0
huggingface_hub/inference/_mcp/constants.py +81 -0
huggingface_hub/inference/_mcp/mcp_client.py +395 -0
huggingface_hub/inference/_mcp/types.py +45 -0
huggingface_hub/inference/_mcp/utils.py +128 -0
huggingface_hub/inference/_providers/__init__.py +149 -20
huggingface_hub/inference/_providers/_common.py +160 -37
huggingface_hub/inference/_providers/black_forest_labs.py +12 -9
huggingface_hub/inference/_providers/cerebras.py +6 -0
huggingface_hub/inference/_providers/clarifai.py +13 -0
huggingface_hub/inference/_providers/cohere.py +32 -0
huggingface_hub/inference/_providers/fal_ai.py +231 -22
huggingface_hub/inference/_providers/featherless_ai.py +38 -0
huggingface_hub/inference/_providers/fireworks_ai.py +22 -1
huggingface_hub/inference/_providers/groq.py +9 -0
huggingface_hub/inference/_providers/hf_inference.py +143 -33
huggingface_hub/inference/_providers/hyperbolic.py +9 -5
huggingface_hub/inference/_providers/nebius.py +47 -5
huggingface_hub/inference/_providers/novita.py +48 -5
huggingface_hub/inference/_providers/nscale.py +44 -0
huggingface_hub/inference/_providers/openai.py +25 -0
huggingface_hub/inference/_providers/publicai.py +6 -0
huggingface_hub/inference/_providers/replicate.py +46 -9
huggingface_hub/inference/_providers/sambanova.py +37 -1
huggingface_hub/inference/_providers/scaleway.py +28 -0
huggingface_hub/inference/_providers/together.py +34 -5
huggingface_hub/inference/_providers/wavespeed.py +138 -0
huggingface_hub/inference/_providers/zai_org.py +17 -0
huggingface_hub/lfs.py +33 -100
huggingface_hub/repocard.py +34 -38
huggingface_hub/repocard_data.py +79 -59
huggingface_hub/serialization/__init__.py +0 -1
huggingface_hub/serialization/_base.py +12 -15
huggingface_hub/serialization/_dduf.py +8 -8
huggingface_hub/serialization/_torch.py +69 -69
huggingface_hub/utils/__init__.py +27 -8
huggingface_hub/utils/_auth.py +7 -7
huggingface_hub/utils/_cache_manager.py +92 -147
huggingface_hub/utils/_chunk_utils.py +2 -3
huggingface_hub/utils/_deprecation.py +1 -1
huggingface_hub/utils/_dotenv.py +55 -0
huggingface_hub/utils/_experimental.py +7 -5
huggingface_hub/utils/_fixes.py +0 -10
huggingface_hub/utils/_git_credential.py +5 -5
huggingface_hub/utils/_headers.py +8 -30
huggingface_hub/utils/_http.py +399 -237
huggingface_hub/utils/_pagination.py +6 -6
huggingface_hub/utils/_parsing.py +98 -0
huggingface_hub/utils/_paths.py +5 -5
huggingface_hub/utils/_runtime.py +74 -22
huggingface_hub/utils/_safetensors.py +21 -21
huggingface_hub/utils/_subprocess.py +13 -11
huggingface_hub/utils/_telemetry.py +4 -4
huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -4
huggingface_hub/utils/_typing.py +25 -5
huggingface_hub/utils/_validators.py +55 -74
huggingface_hub/utils/_verification.py +167 -0
huggingface_hub/utils/_xet.py +235 -0
huggingface_hub/utils/_xet_progress_reporting.py +162 -0
huggingface_hub/utils/insecure_hashlib.py +3 -5
huggingface_hub/utils/logging.py +8 -11
huggingface_hub/utils/tqdm.py +33 -4
{huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/METADATA +94 -82
huggingface_hub-1.1.3.dist-info/RECORD +155 -0
{huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/WHEEL +1 -1
huggingface_hub-1.1.3.dist-info/entry_points.txt +6 -0
huggingface_hub/commands/delete_cache.py +0 -428
huggingface_hub/commands/download.py +0 -200
huggingface_hub/commands/huggingface_cli.py +0 -61
huggingface_hub/commands/lfs.py +0 -200
huggingface_hub/commands/repo_files.py +0 -128
huggingface_hub/commands/scan_cache.py +0 -181
huggingface_hub/commands/tag.py +0 -159
huggingface_hub/commands/upload.py +0 -299
huggingface_hub/commands/upload_large_folder.py +0 -129
huggingface_hub/commands/user.py +0 -304
huggingface_hub/commands/version.py +0 -37
huggingface_hub/inference_api.py +0 -217
huggingface_hub/keras_mixin.py +0 -500
huggingface_hub/repository.py +0 -1477
huggingface_hub/serialization/_tensorflow.py +0 -95
huggingface_hub/utils/_hf_folder.py +0 -68
huggingface_hub-0.29.0rc2.dist-info/RECORD +0 -131
huggingface_hub-0.29.0rc2.dist-info/entry_points.txt +0 -6
{huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info/licenses}/LICENSE +0 -0
{huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/top_level.txt +0 -0

huggingface_hub/_snapshot_download.py CHANGED Viewed

@@ -1,21 +1,105 @@
 import os
 from pathlib import Path
-from typing import Dict, List, Literal, Optional, Union
+from typing import Iterable, List, Literal, Optional, Union, overload
-import requests
+import httpx
 from tqdm.auto import tqdm as base_tqdm
 from tqdm.contrib.concurrent import thread_map
 from . import constants
-from .errors import GatedRepoError, LocalEntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError
-from .file_download import REGEX_COMMIT_HASH, hf_hub_download, repo_folder_name
-from .hf_api import DatasetInfo, HfApi, ModelInfo, SpaceInfo
-from .utils import OfflineModeIsEnabled, filter_repo_objects, logging, validate_hf_hub_args
+from .errors import (
+    DryRunError,
+    GatedRepoError,
+    HfHubHTTPError,
+    LocalEntryNotFoundError,
+    RepositoryNotFoundError,
+    RevisionNotFoundError,
+)
+from .file_download import REGEX_COMMIT_HASH, DryRunFileInfo, hf_hub_download, repo_folder_name
+from .hf_api import DatasetInfo, HfApi, ModelInfo, RepoFile, SpaceInfo
+from .utils import OfflineModeIsEnabled, filter_repo_objects, is_tqdm_disabled, logging, validate_hf_hub_args
 from .utils import tqdm as hf_tqdm
 logger = logging.get_logger(__name__)
+VERY_LARGE_REPO_THRESHOLD = 50000  # After this limit, we don't consider `repo_info.siblings` to be reliable enough
+@overload
+def snapshot_download(
+    repo_id: str,
+    *,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    user_agent: Optional[Union[dict, str]] = None,
+    etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+    force_download: bool = False,
+    token: Optional[Union[bool, str]] = None,
+    local_files_only: bool = False,
+    allow_patterns: Optional[Union[list[str], str]] = None,
+    ignore_patterns: Optional[Union[list[str], str]] = None,
+    max_workers: int = 8,
+    tqdm_class: Optional[type[base_tqdm]] = None,
+    headers: Optional[dict[str, str]] = None,
+    endpoint: Optional[str] = None,
+    dry_run: Literal[False] = False,
+) -> str: ...
+@overload
+def snapshot_download(
+    repo_id: str,
+    *,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    user_agent: Optional[Union[dict, str]] = None,
+    etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+    force_download: bool = False,
+    token: Optional[Union[bool, str]] = None,
+    local_files_only: bool = False,
+    allow_patterns: Optional[Union[list[str], str]] = None,
+    ignore_patterns: Optional[Union[list[str], str]] = None,
+    max_workers: int = 8,
+    tqdm_class: Optional[type[base_tqdm]] = None,
+    headers: Optional[dict[str, str]] = None,
+    endpoint: Optional[str] = None,
+    dry_run: Literal[True] = True,
+) -> list[DryRunFileInfo]: ...
+@overload
+def snapshot_download(
+    repo_id: str,
+    *,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    user_agent: Optional[Union[dict, str]] = None,
+    etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+    force_download: bool = False,
+    token: Optional[Union[bool, str]] = None,
+    local_files_only: bool = False,
+    allow_patterns: Optional[Union[list[str], str]] = None,
+    ignore_patterns: Optional[Union[list[str], str]] = None,
+    max_workers: int = 8,
+    tqdm_class: Optional[type[base_tqdm]] = None,
+    headers: Optional[dict[str, str]] = None,
+    endpoint: Optional[str] = None,
+    dry_run: bool = False,
+) -> Union[str, list[DryRunFileInfo]]: ...
 @validate_hf_hub_args
 def snapshot_download(
@@ -27,22 +111,19 @@ def snapshot_download(
     local_dir: Union[str, Path, None] = None,
     library_name: Optional[str] = None,
     library_version: Optional[str] = None,
-    user_agent: Optional[Union[Dict, str]] = None,
-    proxies: Optional[Dict] = None,
+    user_agent: Optional[Union[dict, str]] = None,
     etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
     force_download: bool = False,
     token: Optional[Union[bool, str]] = None,
     local_files_only: bool = False,
-    allow_patterns: Optional[Union[List[str], str]] = None,
-    ignore_patterns: Optional[Union[List[str], str]] = None,
+    allow_patterns: Optional[Union[list[str], str]] = None,
+    ignore_patterns: Optional[Union[list[str], str]] = None,
     max_workers: int = 8,
-    tqdm_class: Optional[base_tqdm] = None,
-    headers: Optional[Dict[str, str]] = None,
+    tqdm_class: Optional[type[base_tqdm]] = None,
+    headers: Optional[dict[str, str]] = None,
     endpoint: Optional[str] = None,
-    # Deprecated args
-    local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
-    resume_download: Optional[bool] = None,
-) -> str:
+    dry_run: bool = False,
+) -> Union[str, list[DryRunFileInfo]]:
     """Download repo files.
     Download a whole snapshot of a repo's files at the specified revision. This is useful when you want all files from
@@ -77,12 +158,9 @@ def snapshot_download(
             The version of the library.
         user_agent (`str`, `dict`, *optional*):
             The user-agent info in the form of a dictionary or a string.
-        proxies (`dict`, *optional*):
-            Dictionary mapping protocol to the URL of the proxy passed to
-            `requests.request`.
         etag_timeout (`float`, *optional*, defaults to `10`):
             When fetching ETag, how many seconds to wait for the server to send
-            data before giving up which is passed to `requests.request`.
+            data before giving up which is passed to `httpx.request`.
         force_download (`bool`, *optional*, defaults to `False`):
             Whether the file should be downloaded even if it already exists in the local cache.
         token (`str`, `bool`, *optional*):
@@ -95,9 +173,9 @@ def snapshot_download(
         local_files_only (`bool`, *optional*, defaults to `False`):
             If `True`, avoid downloading the file and return the path to the
             local cached file if it exists.
-        allow_patterns (`List[str]` or `str`, *optional*):
+        allow_patterns (`list[str]` or `str`, *optional*):
             If provided, only files matching at least one pattern are downloaded.
-        ignore_patterns (`List[str]` or `str`, *optional*):
+        ignore_patterns (`list[str]` or `str`, *optional*):
             If provided, files matching any of the patterns are not downloaded.
         max_workers (`int`, *optional*):
             Number of concurrent threads to download files (1 thread = 1 file download).
@@ -108,9 +186,14 @@ def snapshot_download(
             Note that the `tqdm_class` is not passed to each individual download.
             Defaults to the custom HF progress bar that can be disabled by setting
             `HF_HUB_DISABLE_PROGRESS_BARS` environment variable.
+        dry_run (`bool`, *optional*, defaults to `False`):
+            If `True`, perform a dry run without actually downloading the files. Returns a list of
+            [`DryRunFileInfo`] objects containing information about what would be downloaded.
     Returns:
-        `str`: folder path of the repo snapshot.
+        `str` or list of [`DryRunFileInfo`]:
+            - If `dry_run=False`: Local snapshot path.
+            - If `dry_run=True`: A list of [`DryRunFileInfo`] objects containing download information.
     Raises:
         [`~utils.RepositoryNotFoundError`]
@@ -139,28 +222,26 @@ def snapshot_download(
     storage_folder = os.path.join(cache_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type))
+    api = HfApi(
+        library_name=library_name,
+        library_version=library_version,
+        user_agent=user_agent,
+        endpoint=endpoint,
+        headers=headers,
+        token=token,
+    )
     repo_info: Union[ModelInfo, DatasetInfo, SpaceInfo, None] = None
     api_call_error: Optional[Exception] = None
     if not local_files_only:
         # try/except logic to handle different errors => taken from `hf_hub_download`
         try:
             # if we have internet connection we want to list files to download
-            api = HfApi(
-                library_name=library_name,
-                library_version=library_version,
-                user_agent=user_agent,
-                endpoint=endpoint,
-                headers=headers,
-            )
-            repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type, revision=revision, token=token)
-        except (requests.exceptions.SSLError, requests.exceptions.ProxyError):
-            # Actually raise for those subclasses of ConnectionError
+            repo_info = api.repo_info(repo_id=repo_id, repo_type=repo_type, revision=revision)
+        except httpx.ProxyError:
+            # Actually raise on proxy error
             raise
-        except (
-            requests.exceptions.ConnectionError,
-            requests.exceptions.Timeout,
-            OfflineModeIsEnabled,
-        ) as error:
+        except (httpx.ConnectError, httpx.TimeoutException, OfflineModeIsEnabled) as error:
             # Internet connection is down
             # => will try to use local files only
             api_call_error = error
@@ -168,7 +249,7 @@ def snapshot_download(
         except RevisionNotFoundError:
             # The repo was found but the revision doesn't exist on the Hub (never existed or got deleted)
             raise
-        except requests.HTTPError as error:
+        except HfHubHTTPError as error:
             # Multiple reasons for an http error:
             # - Repository is private and invalid/missing token sent
             # - Repository is gated and invalid/missing token sent
@@ -188,6 +269,11 @@ def snapshot_download(
     #    - f the specified revision is a branch or tag, look inside "refs".
     # => if local_dir is not None, we will return the path to the local folder if it exists.
     if repo_info is None:
+        if dry_run:
+            raise DryRunError(
+                "Dry run cannot be performed as the repository cannot be accessed. Please check your internet connection or authentication token."
+            ) from api_call_error
         # Try to get which commit hash corresponds to the specified revision
         commit_hash = None
         if REGEX_COMMIT_HASH.match(revision):
@@ -200,12 +286,13 @@ def snapshot_download(
                     commit_hash = f.read()
         # Try to locate snapshot folder for this commit hash
-        if commit_hash is not None:
+        if commit_hash is not None and local_dir is None:
             snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash)
             if os.path.exists(snapshot_folder):
                 # Snapshot folder exists => let's return it
                 # (but we can't check if all the files are actually there)
                 return snapshot_folder
         # If local_dir is not None, return it if it exists and is not empty
         if local_dir is not None:
             local_dir = Path(local_dir)
@@ -227,8 +314,10 @@ def snapshot_download(
                 "outgoing traffic has been disabled. To enable repo look-ups and downloads online, set "
                 "'HF_HUB_OFFLINE=0' as environment variable."
             ) from api_call_error
-        elif isinstance(api_call_error, RepositoryNotFoundError) or isinstance(api_call_error, GatedRepoError):
-            # Repo not found => let's raise the actual error
+        elif isinstance(api_call_error, (RepositoryNotFoundError, GatedRepoError)) or (
+            isinstance(api_call_error, HfHubHTTPError) and api_call_error.response.status_code == 401
+        ):
+            # Repo not found, gated, or specific authentication error => let's raise the actual error
             raise api_call_error
         else:
             # Otherwise: most likely a connection issue or Hub downtime => let's warn the user
@@ -241,14 +330,39 @@ def snapshot_download(
     # At this stage, internet connection is up and running
     # => let's download the files!
     assert repo_info.sha is not None, "Repo info returned from server must have a revision sha."
-    assert repo_info.siblings is not None, "Repo info returned from server must have a siblings list."
-    filtered_repo_files = list(
-        filter_repo_objects(
-            items=[f.rfilename for f in repo_info.siblings],
-            allow_patterns=allow_patterns,
-            ignore_patterns=ignore_patterns,
+    # Corner case: on very large repos, the siblings list in `repo_info` might not contain all files.
+    # In that case, we need to use the `list_repo_tree` method to prevent caching issues.
+    repo_files: Iterable[str] = [f.rfilename for f in repo_info.siblings] if repo_info.siblings is not None else []
+    unreliable_nb_files = (
+        repo_info.siblings is None
+        or len(repo_info.siblings) == 0
+        or len(repo_info.siblings) > VERY_LARGE_REPO_THRESHOLD
+    )
+    if unreliable_nb_files:
+        logger.info(
+            "Number of files in the repo is unreliable. Using `list_repo_tree` to ensure all files are listed."
         )
+        repo_files = (
+            f.rfilename
+            for f in api.list_repo_tree(repo_id=repo_id, recursive=True, revision=revision, repo_type=repo_type)
+            if isinstance(f, RepoFile)
+        )
+    filtered_repo_files: Iterable[str] = filter_repo_objects(
+        items=repo_files,
+        allow_patterns=allow_patterns,
+        ignore_patterns=ignore_patterns,
     )
+    if not unreliable_nb_files:
+        filtered_repo_files = list(filtered_repo_files)
+        tqdm_desc = f"Fetching {len(filtered_repo_files)} files"
+    else:
+        tqdm_desc = "Fetching ... files"
+    if dry_run:
+        tqdm_desc = "[dry-run] " + tqdm_desc
     commit_hash = repo_info.sha
     snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash)
     # if passed revision is not identical to commit_hash
@@ -263,44 +377,90 @@ def snapshot_download(
         except OSError as e:
             logger.warning(f"Ignored error while writing commit hash to {ref_path}: {e}.")
+    results: List[Union[str, DryRunFileInfo]] = []
+    # User can use its own tqdm class or the default one from `huggingface_hub.utils`
+    tqdm_class = tqdm_class or hf_tqdm
+    # Create a progress bar for the bytes downloaded
+    # This progress bar is shared across threads/files and gets updated each time we fetch
+    # metadata for a file.
+    bytes_progress = tqdm_class(
+        desc="Downloading (incomplete total...)",
+        disable=is_tqdm_disabled(log_level=logger.getEffectiveLevel()),
+        total=0,
+        initial=0,
+        unit="B",
+        unit_scale=True,
+        name="huggingface_hub.snapshot_download",
+    )
+    class _AggregatedTqdm:
+        """Fake tqdm object to aggregate progress into the parent `bytes_progress` bar.
+        In practice the `_AggregatedTqdm` object won't be displayed, it's just used to update
+        the `bytes_progress` bar from each thread/file download.
+        """
+        def __init__(self, *args, **kwargs):
+            # Adjust the total of the parent progress bar
+            total = kwargs.pop("total", None)
+            if total is not None:
+                bytes_progress.total += total
+                bytes_progress.refresh()
+            # Adjust initial of the parent progress bar
+            initial = kwargs.pop("initial", 0)
+            if initial:
+                bytes_progress.update(initial)
+        def __enter__(self):
+            return self
+        def __exit__(self, exc_type, exc_value, traceback):
+            pass
+        def update(self, n: Optional[Union[int, float]] = 1) -> None:
+            bytes_progress.update(n)
     # we pass the commit_hash to hf_hub_download
     # so no network call happens if we already
     # have the file locally.
-    def _inner_hf_hub_download(repo_file: str):
-        return hf_hub_download(
-            repo_id,
-            filename=repo_file,
-            repo_type=repo_type,
-            revision=commit_hash,
-            endpoint=endpoint,
-            cache_dir=cache_dir,
-            local_dir=local_dir,
-            local_dir_use_symlinks=local_dir_use_symlinks,
-            library_name=library_name,
-            library_version=library_version,
-            user_agent=user_agent,
-            proxies=proxies,
-            etag_timeout=etag_timeout,
-            resume_download=resume_download,
-            force_download=force_download,
-            token=token,
-            headers=headers,
+    def _inner_hf_hub_download(repo_file: str) -> None:
+        results.append(
+            hf_hub_download(  # type: ignore
+                repo_id,
+                filename=repo_file,
+                repo_type=repo_type,
+                revision=commit_hash,
+                endpoint=endpoint,
+                cache_dir=cache_dir,
+                local_dir=local_dir,
+                library_name=library_name,
+                library_version=library_version,
+                user_agent=user_agent,
+                etag_timeout=etag_timeout,
+                force_download=force_download,
+                token=token,
+                headers=headers,
+                tqdm_class=_AggregatedTqdm,  # type: ignore
+                dry_run=dry_run,
+            )
         )
-    if constants.HF_HUB_ENABLE_HF_TRANSFER:
-        # when using hf_transfer we don't want extra parallelism
-        # from the one hf_transfer provides
-        for file in filtered_repo_files:
-            _inner_hf_hub_download(file)
-    else:
-        thread_map(
-            _inner_hf_hub_download,
-            filtered_repo_files,
-            desc=f"Fetching {len(filtered_repo_files)} files",
-            max_workers=max_workers,
-            # User can use its own tqdm class or the default one from `huggingface_hub.utils`
-            tqdm_class=tqdm_class or hf_tqdm,
-        )
+    thread_map(
+        _inner_hf_hub_download,
+        filtered_repo_files,
+        desc=tqdm_desc,
+        max_workers=max_workers,
+        tqdm_class=tqdm_class,
+    )
+    bytes_progress.set_description("Download complete")
+    if dry_run:
+        assert all(isinstance(r, DryRunFileInfo) for r in results)
+        return results  # type: ignore
     if local_dir is not None:
         return str(os.path.realpath(local_dir))

huggingface_hub/_space_api.py CHANGED Viewed

@@ -15,7 +15,7 @@
 from dataclasses import dataclass
 from datetime import datetime
 from enum import Enum
-from typing import Dict, Optional
+from typing import Optional
 from huggingface_hub.utils import parse_datetime
@@ -54,24 +54,32 @@ class SpaceHardware(str, Enum):
     assert SpaceHardware.CPU_BASIC == "cpu-basic"
     ```
-    Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/SpaceInfo.ts#L73 (private url).
+    Taken from https://github.com/huggingface-internal/moon-landing/blob/main/server/repo_types/SpaceHardwareFlavor.ts (private url).
     """
+    # CPU
     CPU_BASIC = "cpu-basic"
     CPU_UPGRADE = "cpu-upgrade"
+    CPU_XL = "cpu-xl"
+    # ZeroGPU
+    ZERO_A10G = "zero-a10g"
+    # GPU
     T4_SMALL = "t4-small"
     T4_MEDIUM = "t4-medium"
     L4X1 = "l4x1"
     L4X4 = "l4x4"
-    ZERO_A10G = "zero-a10g"
+    L40SX1 = "l40sx1"
+    L40SX4 = "l40sx4"
+    L40SX8 = "l40sx8"
     A10G_SMALL = "a10g-small"
     A10G_LARGE = "a10g-large"
     A10G_LARGEX2 = "a10g-largex2"
     A10G_LARGEX4 = "a10g-largex4"
     A100_LARGE = "a100-large"
-    V5E_1X1 = "v5e-1x1"
-    V5E_2X2 = "v5e-2x2"
-    V5E_2X4 = "v5e-2x4"
+    H100 = "h100"
+    H100X8 = "h100x8"
 class SpaceStorage(str, Enum):
@@ -103,7 +111,7 @@ class SpaceRuntime:
             Current hardware of the space. Example: "cpu-basic". Can be `None` if Space
             is `BUILDING` for the first time.
         requested_hardware (`str` or `None`):
-            Requested hardware. Can be different than `hardware` especially if the request
+            Requested hardware. Can be different from `hardware` especially if the request
             has just been made. Example: "t4-medium". Can be `None` if no hardware has
             been requested yet.
         sleep_time (`int` or `None`):
@@ -120,9 +128,9 @@ class SpaceRuntime:
     requested_hardware: Optional[SpaceHardware]
     sleep_time: Optional[int]
     storage: Optional[SpaceStorage]
-    raw: Dict
+    raw: dict
-    def __init__(self, data: Dict) -> None:
+    def __init__(self, data: dict) -> None:
         self.stage = data["stage"]
         self.hardware = data.get("hardware", {}).get("current")
         self.requested_hardware = data.get("hardware", {}).get("requested")
@@ -152,7 +160,7 @@ class SpaceVariable:
     description: Optional[str]
     updated_at: Optional[datetime]
-    def __init__(self, key: str, values: Dict) -> None:
+    def __init__(self, key: str, values: dict) -> None:
         self.key = key
         self.value = values["value"]
         self.description = values.get("description")

huggingface_hub/_tensorboard_logger.py CHANGED Viewed

@@ -14,7 +14,7 @@
 """Contains a logger to push training logs to the Hub, using Tensorboard."""
 from pathlib import Path
-from typing import TYPE_CHECKING, List, Optional, Union
+from typing import Optional, Union
 from ._commit_scheduler import CommitScheduler
 from .errors import EntryNotFoundError
@@ -26,25 +26,24 @@ from .utils import experimental
 # or from 'torch.utils.tensorboard'. Both are compatible so let's try to load
 # from either of them.
 try:
-    from tensorboardX import SummaryWriter
+    from tensorboardX import SummaryWriter as _RuntimeSummaryWriter
     is_summary_writer_available = True
 except ImportError:
     try:
-        from torch.utils.tensorboard import SummaryWriter
+        from torch.utils.tensorboard import SummaryWriter as _RuntimeSummaryWriter
-        is_summary_writer_available = False
+        is_summary_writer_available = True
     except ImportError:
         # Dummy class to avoid failing at import. Will raise on instance creation.
-        SummaryWriter = object
-        is_summary_writer_available = False
+        class _DummySummaryWriter:
+            pass
-if TYPE_CHECKING:
-    from tensorboardX import SummaryWriter
+        _RuntimeSummaryWriter = _DummySummaryWriter  # type: ignore[assignment]
+        is_summary_writer_available = False
-class HFSummaryWriter(SummaryWriter):
+class HFSummaryWriter(_RuntimeSummaryWriter):
     """
     Wrapper around the tensorboard's `SummaryWriter` to push training logs to the Hub.
@@ -53,11 +52,8 @@ class HFSummaryWriter(SummaryWriter):
     issue), the main script will not be interrupted. Data is automatically pushed to the Hub every `commit_every`
     minutes (default to every 5 minutes).
-    <Tip warning={true}>
-    `HFSummaryWriter` is experimental. Its API is subject to change in the future without prior notice.
-    </Tip>
+    > [!WARNING]
+    > `HFSummaryWriter` is experimental. Its API is subject to change in the future without prior notice.
     Args:
         repo_id (`str`):
@@ -78,10 +74,10 @@ class HFSummaryWriter(SummaryWriter):
             Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
         path_in_repo (`str`, *optional*):
             The path to the folder in the repo where the logs will be pushed. Defaults to "tensorboard/".
-        repo_allow_patterns (`List[str]` or `str`, *optional*):
+        repo_allow_patterns (`list[str]` or `str`, *optional*):
             A list of patterns to include in the upload. Defaults to `"*.tfevents.*"`. Check out the
             [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details.
-        repo_ignore_patterns (`List[str]` or `str`, *optional*):
+        repo_ignore_patterns (`list[str]` or `str`, *optional*):
             A list of patterns to exclude in the upload. Check out the
             [upload guide](https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-folder) for more details.
         token (`str`, *optional*):
@@ -138,8 +134,8 @@ class HFSummaryWriter(SummaryWriter):
         repo_revision: Optional[str] = None,
         repo_private: Optional[bool] = None,
         path_in_repo: Optional[str] = "tensorboard",
-        repo_allow_patterns: Optional[Union[List[str], str]] = "*.tfevents.*",
-        repo_ignore_patterns: Optional[Union[List[str], str]] = None,
+        repo_allow_patterns: Optional[Union[list[str], str]] = "*.tfevents.*",
+        repo_ignore_patterns: Optional[Union[list[str], str]] = None,
         token: Optional[str] = None,
         **kwargs,
     ):

huggingface-hub 0.29.0rc2__py3-none-any.whl → 1.1.3__py3-none-any.whl

huggingface-hub 0.29.0rc2py3-none-any.whl → 1.1.3py3-none-any.whl