PyPI - huggingface-hub - Versions diffs - 0.29.3rc0__py3-none-any.whl → 0.30.0rc1__py3-none-any.whl - Mend

huggingface-hub 0.29.3rc0py3-none-any.whl → 0.30.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of huggingface-hub might be problematic. Click here for more details.

Files changed (40) hide show

huggingface_hub/__init__.py +16 -1
huggingface_hub/_commit_api.py +142 -4
huggingface_hub/_space_api.py +15 -2
huggingface_hub/_webhooks_server.py +2 -0
huggingface_hub/commands/delete_cache.py +66 -20
huggingface_hub/commands/upload.py +16 -2
huggingface_hub/constants.py +44 -7
huggingface_hub/errors.py +19 -0
huggingface_hub/file_download.py +163 -35
huggingface_hub/hf_api.py +349 -28
huggingface_hub/hub_mixin.py +19 -4
huggingface_hub/inference/_client.py +50 -69
huggingface_hub/inference/_generated/_async_client.py +57 -76
huggingface_hub/inference/_generated/types/__init__.py +1 -0
huggingface_hub/inference/_generated/types/chat_completion.py +20 -10
huggingface_hub/inference/_generated/types/image_to_image.py +2 -0
huggingface_hub/inference/_providers/__init__.py +7 -1
huggingface_hub/inference/_providers/_common.py +9 -5
huggingface_hub/inference/_providers/black_forest_labs.py +5 -5
huggingface_hub/inference/_providers/cohere.py +1 -1
huggingface_hub/inference/_providers/fal_ai.py +64 -7
huggingface_hub/inference/_providers/fireworks_ai.py +4 -1
huggingface_hub/inference/_providers/hf_inference.py +41 -4
huggingface_hub/inference/_providers/hyperbolic.py +3 -3
huggingface_hub/inference/_providers/nebius.py +3 -3
huggingface_hub/inference/_providers/novita.py +35 -5
huggingface_hub/inference/_providers/openai.py +22 -0
huggingface_hub/inference/_providers/replicate.py +3 -3
huggingface_hub/inference/_providers/together.py +3 -3
huggingface_hub/utils/__init__.py +8 -0
huggingface_hub/utils/_http.py +4 -1
huggingface_hub/utils/_runtime.py +11 -0
huggingface_hub/utils/_xet.py +199 -0
huggingface_hub/utils/tqdm.py +30 -2
{huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0rc1.dist-info}/METADATA +3 -1
{huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0rc1.dist-info}/RECORD +40 -38
{huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0rc1.dist-info}/LICENSE +0 -0
{huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0rc1.dist-info}/WHEEL +0 -0
{huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0rc1.dist-info}/entry_points.txt +0 -0
{huggingface_hub-0.29.3rc0.dist-info → huggingface_hub-0.30.0rc1.dist-info}/top_level.txt +0 -0

huggingface_hub/__init__.py CHANGED Viewed

@@ -46,7 +46,7 @@ import sys
 from typing import TYPE_CHECKING
-__version__ = "0.29.3.rc0"
+__version__ = "0.30.0rc1"
 # Alphabetical order of definitions is ensured in tests
 # WARNING: any comment added in this dictionary definition will be lost when
@@ -165,6 +165,7 @@ _SUBMOD_ATTRS = {
         "create_commit",
         "create_discussion",
         "create_inference_endpoint",
+        "create_inference_endpoint_from_catalog",
         "create_pull_request",
         "create_repo",
         "create_tag",
@@ -205,7 +206,9 @@ _SUBMOD_ATTRS = {
         "list_accepted_access_requests",
         "list_collections",
         "list_datasets",
+        "list_inference_catalog",
         "list_inference_endpoints",
+        "list_lfs_files",
         "list_liked_repos",
         "list_models",
         "list_organization_members",
@@ -228,6 +231,7 @@ _SUBMOD_ATTRS = {
         "parse_safetensors_file_metadata",
         "pause_inference_endpoint",
         "pause_space",
+        "permanently_delete_lfs_files",
         "preupload_lfs_files",
         "reject_access_request",
         "rename_discussion",
@@ -296,6 +300,7 @@ _SUBMOD_ATTRS = {
         "ChatCompletionInputMessageChunkType",
         "ChatCompletionInputStreamOptions",
         "ChatCompletionInputTool",
+        "ChatCompletionInputToolCall",
         "ChatCompletionInputToolChoiceClass",
         "ChatCompletionInputToolChoiceEnum",
         "ChatCompletionInputURL",
@@ -536,6 +541,7 @@ __all__ = [
     "ChatCompletionInputMessageChunkType",
     "ChatCompletionInputStreamOptions",
     "ChatCompletionInputTool",
+    "ChatCompletionInputToolCall",
     "ChatCompletionInputToolChoiceClass",
     "ChatCompletionInputToolChoiceEnum",
     "ChatCompletionInputURL",
@@ -769,6 +775,7 @@ __all__ = [
     "create_commit",
     "create_discussion",
     "create_inference_endpoint",
+    "create_inference_endpoint_from_catalog",
     "create_pull_request",
     "create_repo",
     "create_tag",
@@ -823,7 +830,9 @@ __all__ = [
     "list_accepted_access_requests",
     "list_collections",
     "list_datasets",
+    "list_inference_catalog",
     "list_inference_endpoints",
+    "list_lfs_files",
     "list_liked_repos",
     "list_models",
     "list_organization_members",
@@ -856,6 +865,7 @@ __all__ = [
     "parse_safetensors_file_metadata",
     "pause_inference_endpoint",
     "pause_space",
+    "permanently_delete_lfs_files",
     "preupload_lfs_files",
     "push_to_hub_fastai",
     "push_to_hub_keras",
@@ -1107,6 +1117,7 @@ if TYPE_CHECKING:  # pragma: no cover
         create_commit,  # noqa: F401
         create_discussion,  # noqa: F401
         create_inference_endpoint,  # noqa: F401
+        create_inference_endpoint_from_catalog,  # noqa: F401
         create_pull_request,  # noqa: F401
         create_repo,  # noqa: F401
         create_tag,  # noqa: F401
@@ -1147,7 +1158,9 @@ if TYPE_CHECKING:  # pragma: no cover
         list_accepted_access_requests,  # noqa: F401
         list_collections,  # noqa: F401
         list_datasets,  # noqa: F401
+        list_inference_catalog,  # noqa: F401
         list_inference_endpoints,  # noqa: F401
+        list_lfs_files,  # noqa: F401
         list_liked_repos,  # noqa: F401
         list_models,  # noqa: F401
         list_organization_members,  # noqa: F401
@@ -1170,6 +1183,7 @@ if TYPE_CHECKING:  # pragma: no cover
         parse_safetensors_file_metadata,  # noqa: F401
         pause_inference_endpoint,  # noqa: F401
         pause_space,  # noqa: F401
+        permanently_delete_lfs_files,  # noqa: F401
         preupload_lfs_files,  # noqa: F401
         reject_access_request,  # noqa: F401
         rename_discussion,  # noqa: F401
@@ -1236,6 +1250,7 @@ if TYPE_CHECKING:  # pragma: no cover
         ChatCompletionInputMessageChunkType,  # noqa: F401
         ChatCompletionInputStreamOptions,  # noqa: F401
         ChatCompletionInputTool,  # noqa: F401
+        ChatCompletionInputToolCall,  # noqa: F401
         ChatCompletionInputToolChoiceClass,  # noqa: F401
         ChatCompletionInputToolChoiceEnum,  # noqa: F401
         ChatCompletionInputURL,  # noqa: F401

huggingface_hub/_commit_api.py CHANGED Viewed

@@ -4,6 +4,7 @@ Type definitions and utilities for the `create_commit` API
 import base64
 import io
+import math
 import os
 import warnings
 from collections import defaultdict
@@ -16,12 +17,14 @@ from typing import TYPE_CHECKING, Any, BinaryIO, Dict, Iterable, Iterator, List,
 from tqdm.contrib.concurrent import thread_map
 from . import constants
-from .errors import EntryNotFoundError
+from .errors import EntryNotFoundError, HfHubHTTPError, XetAuthorizationError, XetRefreshTokenError
 from .file_download import hf_hub_url
 from .lfs import UploadInfo, lfs_upload, post_lfs_batch_info
 from .utils import (
     FORBIDDEN_FOLDERS,
+    XetTokenType,
     chunk_iterable,
+    fetch_xet_connection_info_from_repo_info,
     get_session,
     hf_raise_for_status,
     logging,
@@ -30,6 +33,7 @@ from .utils import (
     validate_hf_hub_args,
 )
 from .utils import tqdm as hf_tqdm
+from .utils.tqdm import _get_progress_bar_context
 if TYPE_CHECKING:
@@ -47,6 +51,8 @@ UploadMode = Literal["lfs", "regular"]
 # See https://github.com/huggingface/huggingface_hub/issues/1503
 FETCH_LFS_BATCH_SIZE = 500
+UPLOAD_BATCH_MAX_NUM_FILES = 256
 @dataclass
 class CommitOperationDelete:
@@ -391,7 +397,7 @@ def _upload_lfs_files(
     #         Upload instructions are retrieved by chunk of 256 files to avoid reaching
     #         the payload limit.
     batch_actions: List[Dict] = []
-    for chunk in chunk_iterable(additions, chunk_size=256):
+    for chunk in chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES):
         batch_actions_chunk, batch_errors_chunk = post_lfs_batch_info(
             upload_infos=[op.upload_info for op in chunk],
             repo_id=repo_id,
@@ -458,6 +464,138 @@ def _upload_lfs_files(
         )
+@validate_hf_hub_args
+def _upload_xet_files(
+    *,
+    additions: List[CommitOperationAdd],
+    repo_type: str,
+    repo_id: str,
+    headers: Dict[str, str],
+    endpoint: Optional[str] = None,
+    revision: Optional[str] = None,
+    create_pr: Optional[bool] = None,
+):
+    """
+    Uploads the content of `additions` to the Hub using the xet storage protocol.
+    This chunks the files and deduplicates the chunks before uploading them to xetcas storage.
+    Args:
+        additions (`List` of `CommitOperationAdd`):
+            The files to be uploaded.
+        repo_type (`str`):
+            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
+        repo_id (`str`):
+            A namespace (user or an organization) and a repo name separated
+            by a `/`.
+        headers (`Dict[str, str]`):
+            Headers to use for the request, including authorization headers and user agent.
+        endpoint: (`str`, *optional*):
+            The endpoint to use for the xetcas service. Defaults to `constants.ENDPOINT`.
+        revision (`str`, *optional*):
+            The git revision to upload to.
+        create_pr (`bool`, *optional*):
+            Whether or not to create a Pull Request with that commit.
+    Raises:
+        [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
+            If an upload failed for any reason.
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If the server returns malformed responses or if the user is unauthorized to upload to xet storage.
+        [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
+            If the LFS batch endpoint returned an HTTP error.
+    **How it works:**
+        The file download system uses Xet storage, which is a content-addressable storage system that breaks files into chunks
+            for efficient storage and transfer.
+        `hf_xet.upload_files` manages uploading files by:
+            - Taking a list of file paths to upload
+            - Breaking files into smaller chunks for efficient storage
+            - Avoiding duplicate storage by recognizing identical chunks across files
+            - Connecting to a storage server (CAS server) that manages these chunks
+        The upload process works like this:
+        1. Create a local folder at ~/.cache/huggingface/xet/chunk-cache to store file chunks for reuse.
+        2. Process files in parallel (up to 8 files at once):
+            2.1. Read the file content.
+            2.2. Split the file content into smaller chunks based on content patterns: each chunk gets a unique ID based on what's in it.
+            2.3. For each chunk:
+                - Check if it already exists in storage.
+                - Skip uploading chunks that already exist.
+            2.4. Group chunks into larger blocks for efficient transfer.
+            2.5. Upload these blocks to the storage server.
+            2.6. Create and upload information about how the file is structured.
+        3. Return reference files that contain information about the uploaded files, which can be used later to download them.
+    """
+    if len(additions) == 0:
+        return
+    # at this point, we know that hf_xet is installed
+    from hf_xet import upload_files
+    try:
+        xet_connection_info = fetch_xet_connection_info_from_repo_info(
+            token_type=XetTokenType.WRITE,
+            repo_id=repo_id,
+            repo_type=repo_type,
+            revision=revision,
+            headers=headers,
+            endpoint=endpoint,
+            params={"create_pr": "1"} if create_pr else None,
+        )
+    except HfHubHTTPError as e:
+        if e.response.status_code == 401:
+            raise XetAuthorizationError(
+                f"You are unauthorized to upload to xet storage for {repo_type}/{repo_id}. "
+                f"Please check that you have configured your access token with write access to the repo."
+            ) from e
+        raise
+    xet_endpoint = xet_connection_info.endpoint
+    access_token_info = (xet_connection_info.access_token, xet_connection_info.expiration_unix_epoch)
+    def token_refresher() -> Tuple[str, int]:
+        new_xet_connection = fetch_xet_connection_info_from_repo_info(
+            token_type=XetTokenType.WRITE,
+            repo_id=repo_id,
+            repo_type=repo_type,
+            revision=revision,
+            headers=headers,
+            endpoint=endpoint,
+            params={"create_pr": "1"} if create_pr else None,
+        )
+        if new_xet_connection is None:
+            raise XetRefreshTokenError("Failed to refresh xet token")
+        return new_xet_connection.access_token, new_xet_connection.expiration_unix_epoch
+    num_chunks = math.ceil(len(additions) / UPLOAD_BATCH_MAX_NUM_FILES)
+    num_chunks_num_digits = int(math.log10(num_chunks)) + 1
+    for i, chunk in enumerate(chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES)):
+        _chunk = [op for op in chunk]
+        paths = [str(op.path_or_fileobj) for op in _chunk]
+        expected_size = sum([os.path.getsize(path) for path in paths])
+        if num_chunks > 1:
+            description = f"Uploading Batch [{str(i + 1).zfill(num_chunks_num_digits)}/{num_chunks}]..."
+        else:
+            description = "Uploading..."
+        progress_cm = _get_progress_bar_context(
+            desc=description,
+            total=expected_size,
+            initial=0,
+            unit="B",
+            unit_scale=True,
+            name="huggingface_hub.xet_put",
+            log_level=logger.getEffectiveLevel(),
+        )
+        with progress_cm as progress:
+            def update_progress(increment: int):
+                progress.update(increment)
+            upload_files(paths, xet_endpoint, access_token_info, token_refresher, update_progress, repo_type)
+    return
 def _validate_preupload_info(preupload_info: dict):
     files = preupload_info.get("files")
     if not isinstance(files, list):
@@ -485,8 +623,8 @@ def _fetch_upload_modes(
     gitignore_content: Optional[str] = None,
 ) -> None:
     """
-    Requests the Hub "preupload" endpoint to determine whether each input file should be uploaded as a regular git blob
-    or as git LFS blob. Input `additions` are mutated in-place with the upload mode.
+    Requests the Hub "preupload" endpoint to determine whether each input file should be uploaded as a regular git blob,
+    as a git LFS blob, or as a XET file. Input `additions` are mutated in-place with the upload mode.
     Args:
         additions (`Iterable` of :class:`CommitOperationAdd`):

huggingface_hub/_space_api.py CHANGED Viewed

@@ -54,21 +54,34 @@ class SpaceHardware(str, Enum):
     assert SpaceHardware.CPU_BASIC == "cpu-basic"
     ```
-    Taken from https://github.com/huggingface/moon-landing/blob/main/server/repo_types/SpaceInfo.ts#L73 (private url).
+    Taken from https://github.com/huggingface-internal/moon-landing/blob/main/server/repo_types/SpaceHardwareFlavor.ts (private url).
     """
+    # CPU
     CPU_BASIC = "cpu-basic"
     CPU_UPGRADE = "cpu-upgrade"
+    CPU_XL = "cpu-xl"
+    # ZeroGPU
+    ZERO_A10G = "zero-a10g"
+    # GPU
     T4_SMALL = "t4-small"
     T4_MEDIUM = "t4-medium"
     L4X1 = "l4x1"
     L4X4 = "l4x4"
-    ZERO_A10G = "zero-a10g"
+    L40SX1 = "l40sx1"
+    L40SX4 = "l40sx4"
+    L40SX8 = "l40sx8"
     A10G_SMALL = "a10g-small"
     A10G_LARGE = "a10g-large"
     A10G_LARGEX2 = "a10g-largex2"
     A10G_LARGEX4 = "a10g-largex4"
     A100_LARGE = "a100-large"
+    H100 = "h100"
+    H100X8 = "h100x8"
+    # TPU
     V5E_1X1 = "v5e-1x1"
     V5E_2X2 = "v5e-2x2"
     V5E_2X4 = "v5e-2x4"

huggingface_hub/_webhooks_server.py CHANGED Viewed

@@ -186,6 +186,8 @@ class WebhooksServer:
         # Print instructions and block main thread
         space_host = os.environ.get("SPACE_HOST")
         url = "https://" + space_host if space_host is not None else (ui.share_url or ui.local_url)
+        if url is None:
+            raise ValueError("Cannot find the URL of the app. Please provide a valid `ui` or update `gradio` version.")
         url = url.strip("/")
         message = "\nWebhooks are correctly setup and ready to use:"
         message += "\n" + "\n".join(f"  - POST {url}{webhook}" for webhook in self.registered_webhooks)

huggingface_hub/commands/delete_cache.py CHANGED Viewed

@@ -18,6 +18,7 @@ Usage:
     huggingface-cli delete-cache
     huggingface-cli delete-cache --disable-tui
     huggingface-cli delete-cache --dir ~/.cache/huggingface/hub
+    huggingface-cli delete-cache --sort=size
 NOTE:
     This command is based on `InquirerPy` to build the multiselect menu in the terminal.
@@ -50,7 +51,6 @@ NOTE:
 TODO: add support for `huggingface-cli delete-cache aaaaaa bbbbbb cccccc (...)` ?
 TODO: add "--keep-last" arg to delete revisions that are not on `main` ref
 TODO: add "--filter" arg to filter repositories by name ?
-TODO: add "--sort" arg to sort by size ?
 TODO: add "--limit" arg to limit to X repos ?
 TODO: add "-y" arg for immediate deletion ?
 See discussions in https://github.com/huggingface/huggingface_hub/issues/1025.
@@ -60,7 +60,7 @@ import os
 from argparse import Namespace, _SubParsersAction
 from functools import wraps
 from tempfile import mkstemp
-from typing import Any, Callable, Iterable, List, Optional, Union
+from typing import Any, Callable, Iterable, List, Literal, Optional, Union
 from ..utils import CachedRepoInfo, CachedRevisionInfo, HFCacheInfo, scan_cache_dir
 from . import BaseHuggingfaceCLICommand
@@ -76,6 +76,8 @@ try:
 except ImportError:
     _inquirer_py_available = False
+SortingOption_T = Literal["alphabetical", "lastUpdated", "lastUsed", "size"]
 def require_inquirer_py(fn: Callable) -> Callable:
     """Decorator to flag methods that require `InquirerPy`."""
@@ -120,11 +122,25 @@ class DeleteCacheCommand(BaseHuggingfaceCLICommand):
             ),
         )
+        delete_cache_parser.add_argument(
+            "--sort",
+            nargs="?",
+            choices=["alphabetical", "lastUpdated", "lastUsed", "size"],
+            help=(
+                "Sort repositories by the specified criteria. Options: "
+                "'alphabetical' (A-Z), "
+                "'lastUpdated' (newest first), "
+                "'lastUsed' (most recent first), "
+                "'size' (largest first)."
+            ),
+        )
         delete_cache_parser.set_defaults(func=DeleteCacheCommand)
     def __init__(self, args: Namespace) -> None:
         self.cache_dir: Optional[str] = args.dir
         self.disable_tui: bool = args.disable_tui
+        self.sort_by: Optional[SortingOption_T] = args.sort
     def run(self):
         """Run `delete-cache` command with or without TUI."""
@@ -133,9 +149,9 @@ class DeleteCacheCommand(BaseHuggingfaceCLICommand):
         # Manual review from the user
         if self.disable_tui:
-            selected_hashes = _manual_review_no_tui(hf_cache_info, preselected=[])
+            selected_hashes = _manual_review_no_tui(hf_cache_info, preselected=[], sort_by=self.sort_by)
         else:
-            selected_hashes = _manual_review_tui(hf_cache_info, preselected=[])
+            selected_hashes = _manual_review_tui(hf_cache_info, preselected=[], sort_by=self.sort_by)
         # If deletion is not cancelled
         if len(selected_hashes) > 0 and _CANCEL_DELETION_STR not in selected_hashes:
@@ -163,14 +179,35 @@ class DeleteCacheCommand(BaseHuggingfaceCLICommand):
         print("Deletion is cancelled. Do nothing.")
+def _get_repo_sorting_key(repo: CachedRepoInfo, sort_by: Optional[SortingOption_T] = None):
+    if sort_by == "alphabetical":
+        return (repo.repo_type, repo.repo_id.lower())  # by type then name
+    elif sort_by == "lastUpdated":
+        return -max(rev.last_modified for rev in repo.revisions)  # newest first
+    elif sort_by == "lastUsed":
+        return -repo.last_accessed  # most recently used first
+    elif sort_by == "size":
+        return -repo.size_on_disk  # largest first
+    else:
+        return (repo.repo_type, repo.repo_id)  # default stable order
 @require_inquirer_py
-def _manual_review_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) -> List[str]:
+def _manual_review_tui(
+    hf_cache_info: HFCacheInfo,
+    preselected: List[str],
+    sort_by: Optional[SortingOption_T] = None,
+) -> List[str]:
     """Ask the user for a manual review of the revisions to delete.
     Displays a multi-select menu in the terminal (TUI).
     """
     # Define multiselect list
-    choices = _get_tui_choices_from_scan(repos=hf_cache_info.repos, preselected=preselected)
+    choices = _get_tui_choices_from_scan(
+        repos=hf_cache_info.repos,
+        preselected=preselected,
+        sort_by=sort_by,
+    )
     checkbox = inquirer.checkbox(
         message="Select revisions to delete:",
         choices=choices,  # List of revisions with some pre-selection
@@ -213,7 +250,11 @@ def _ask_for_confirmation_tui(message: str, default: bool = True) -> bool:
     return inquirer.confirm(message, default=default).execute()
-def _get_tui_choices_from_scan(repos: Iterable[CachedRepoInfo], preselected: List[str]) -> List:
+def _get_tui_choices_from_scan(
+    repos: Iterable[CachedRepoInfo],
+    preselected: List[str],
+    sort_by: Optional[SortingOption_T] = None,
+) -> List:
     """Build a list of choices from the scanned repos.
     Args:
@@ -221,14 +262,15 @@ def _get_tui_choices_from_scan(repos: Iterable[CachedRepoInfo], preselected: Lis
             List of scanned repos on which we want to delete revisions.
         preselected (*List[`str`]*):
             List of revision hashes that will be preselected.
+        sort_by (*Optional[SortingOption_T]*):
+            Sorting direction. Choices: "alphabetical", "lastUpdated", "lastUsed", "size".
     Return:
         The list of choices to pass to `inquirer.checkbox`.
     """
     choices: List[Union[Choice, Separator]] = []
-    # First choice is to cancel the deletion. If selected, nothing will be deleted,
-    # no matter the other selected items.
+    # First choice is to cancel the deletion
     choices.append(
         Choice(
             _CANCEL_DELETION_STR,
@@ -237,8 +279,10 @@ def _get_tui_choices_from_scan(repos: Iterable[CachedRepoInfo], preselected: Lis
         )
     )
-    # Display a separator per repo and a Choice for each revisions of the repo
-    for repo in sorted(repos, key=_repo_sorting_order):
+    # Sort repos based on specified criteria
+    sorted_repos = sorted(repos, key=lambda repo: _get_repo_sorting_key(repo, sort_by))
+    for repo in sorted_repos:
         # Repo as separator
         choices.append(
             Separator(
@@ -264,7 +308,11 @@ def _get_tui_choices_from_scan(repos: Iterable[CachedRepoInfo], preselected: Lis
     return choices
-def _manual_review_no_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) -> List[str]:
+def _manual_review_no_tui(
+    hf_cache_info: HFCacheInfo,
+    preselected: List[str],
+    sort_by: Optional[SortingOption_T] = None,
+) -> List[str]:
     """Ask the user for a manual review of the revisions to delete.
     Used when TUI is disabled. Manual review happens in a separate tmp file that the
@@ -275,7 +323,10 @@ def _manual_review_no_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) ->
     os.close(fd)
     lines = []
-    for repo in sorted(hf_cache_info.repos, key=_repo_sorting_order):
+    sorted_repos = sorted(hf_cache_info.repos, key=lambda repo: _get_repo_sorting_key(repo, sort_by))
+    for repo in sorted_repos:
         lines.append(
             f"\n# {repo.repo_type.capitalize()} {repo.repo_id} ({repo.size_on_disk_str},"
             f" used {repo.last_accessed_str})"
@@ -314,9 +365,9 @@ def _manual_review_no_tui(hf_cache_info: HFCacheInfo, preselected: List[str]) ->
         ):
             break
-    # 4. Return selected_hashes
+    # 4. Return selected_hashes sorted to maintain stable order
     os.remove(tmp_path)
-    return selected_hashes
+    return sorted(selected_hashes)  # Sort to maintain stable order
 def _ask_for_confirmation_no_tui(message: str, default: bool = True) -> bool:
@@ -418,11 +469,6 @@ _MANUAL_REVIEW_NO_TUI_INSTRUCTIONS = f"""
 """.strip()
-def _repo_sorting_order(repo: CachedRepoInfo) -> Any:
-    # First split by Dataset/Model, then sort by last accessed (oldest first)
-    return (repo.repo_type, repo.last_accessed)
 def _revision_sorting_order(revision: CachedRevisionInfo) -> Any:
     # Sort by last modified (oldest first)
     return revision.last_modified

huggingface_hub/commands/upload.py CHANGED Viewed

@@ -30,6 +30,9 @@ Usage:
     # Upload filtered directory (example: tensorboard logs except for the last run)
     huggingface-cli upload my-cool-model ./model/training /logs --include "*.tfevents.*" --exclude "*20230905*"
+    # Upload with wildcard
+    huggingface-cli upload my-cool-model "./model/training/*.safetensors"
     # Upload private dataset
     huggingface-cli upload Wauplin/my-cool-dataset ./data . --repo-type=dataset --private
@@ -69,7 +72,9 @@ class UploadCommand(BaseHuggingfaceCLICommand):
             "repo_id", type=str, help="The ID of the repo to upload to (e.g. `username/repo-name`)."
         )
         upload_parser.add_argument(
-            "local_path", nargs="?", help="Local path to the file or folder to upload. Defaults to current directory."
+            "local_path",
+            nargs="?",
+            help="Local path to the file or folder to upload. Wildcard patterns are supported. Defaults to current directory.",
         )
         upload_parser.add_argument(
             "path_in_repo",
@@ -155,7 +160,16 @@ class UploadCommand(BaseHuggingfaceCLICommand):
         repo_name: str = args.repo_id.split("/")[-1]  # e.g. "Wauplin/my-cool-model" => "my-cool-model"
         self.local_path: str
         self.path_in_repo: str
-        if args.local_path is None and os.path.isfile(repo_name):
+        if args.local_path is not None and any(c in args.local_path for c in ["*", "?", "["]):
+            if args.include is not None:
+                raise ValueError("Cannot set `--include` when passing a `local_path` containing a wildcard.")
+            if args.path_in_repo is not None and args.path_in_repo != ".":
+                raise ValueError("Cannot set `path_in_repo` when passing a `local_path` containing a wildcard.")
+            self.local_path = "."
+            self.include = args.local_path
+            self.path_in_repo = "."
+        elif args.local_path is None and os.path.isfile(repo_name):
             # Implicit case 1: user provided only a repo_id which happen to be a local file as well => upload it with same name
             self.local_path = repo_name
             self.path_in_repo = repo_name

huggingface_hub/constants.py CHANGED Viewed

@@ -78,6 +78,7 @@ INFERENCE_ENDPOINT = os.environ.get("HF_INFERENCE_ENDPOINT", "https://api-infere
 # See https://huggingface.co/docs/inference-endpoints/index
 INFERENCE_ENDPOINTS_ENDPOINT = "https://api.endpoints.huggingface.cloud/v2"
+INFERENCE_CATALOG_ENDPOINT = "https://endpoints.huggingface.co/api/catalog"
 # Proxy for third-party providers
 INFERENCE_PROXY_TEMPLATE = "https://router.huggingface.co/{provider}"
@@ -113,10 +114,12 @@ WEBHOOK_DOMAIN_T = Literal["repo", "discussions"]
 # default cache
 default_home = os.path.join(os.path.expanduser("~"), ".cache")
-HF_HOME = os.path.expanduser(
-    os.getenv(
-        "HF_HOME",
-        os.path.join(os.getenv("XDG_CACHE_HOME", default_home), "huggingface"),
+HF_HOME = os.path.expandvars(
+    os.path.expanduser(
+        os.getenv(
+            "HF_HOME",
+            os.path.join(os.getenv("XDG_CACHE_HOME", default_home), "huggingface"),
+        )
     )
 )
 hf_cache_home = HF_HOME  # for backward compatibility. TODO: remove this in 1.0.0
@@ -129,8 +132,22 @@ HUGGINGFACE_HUB_CACHE = os.getenv("HUGGINGFACE_HUB_CACHE", default_cache_path)
 HUGGINGFACE_ASSETS_CACHE = os.getenv("HUGGINGFACE_ASSETS_CACHE", default_assets_cache_path)
 # New env variables
-HF_HUB_CACHE = os.getenv("HF_HUB_CACHE", HUGGINGFACE_HUB_CACHE)
-HF_ASSETS_CACHE = os.getenv("HF_ASSETS_CACHE", HUGGINGFACE_ASSETS_CACHE)
+HF_HUB_CACHE = os.path.expandvars(
+    os.path.expanduser(
+        os.getenv(
+            "HF_HUB_CACHE",
+            HUGGINGFACE_HUB_CACHE,
+        )
+    )
+)
+HF_ASSETS_CACHE = os.path.expandvars(
+    os.path.expanduser(
+        os.getenv(
+            "HF_ASSETS_CACHE",
+            HUGGINGFACE_ASSETS_CACHE,
+        )
+    )
+)
 HF_HUB_OFFLINE = _is_true(os.environ.get("HF_HUB_OFFLINE") or os.environ.get("TRANSFORMERS_OFFLINE"))
@@ -145,7 +162,14 @@ HF_HUB_DISABLE_TELEMETRY = (
     or _is_true(os.environ.get("DO_NOT_TRACK"))  # https://consoledonottrack.com/
 )
-HF_TOKEN_PATH = os.environ.get("HF_TOKEN_PATH", os.path.join(HF_HOME, "token"))
+HF_TOKEN_PATH = os.path.expandvars(
+    os.path.expanduser(
+        os.getenv(
+            "HF_TOKEN_PATH",
+            os.path.join(HF_HOME, "token"),
+        )
+    )
+)
 HF_STORED_TOKENS_PATH = os.path.join(os.path.dirname(HF_TOKEN_PATH), "stored_tokens")
 if _staging_mode:
@@ -233,3 +257,16 @@ ALL_INFERENCE_API_FRAMEWORKS = MAIN_INFERENCE_API_FRAMEWORKS + [
     "stanza",
     "timm",
 ]
+# Xet constants
+HUGGINGFACE_HEADER_X_XET_ENDPOINT = "X-Xet-Cas-Url"
+HUGGINGFACE_HEADER_X_XET_ACCESS_TOKEN = "X-Xet-Access-Token"
+HUGGINGFACE_HEADER_X_XET_EXPIRATION = "X-Xet-Token-Expiration"
+HUGGINGFACE_HEADER_X_XET_HASH = "X-Xet-Hash"
+HUGGINGFACE_HEADER_X_XET_REFRESH_ROUTE = "X-Xet-Refresh-Route"
+HUGGINGFACE_HEADER_LINK_XET_AUTH_KEY = "xet-auth"
+default_xet_cache_path = os.path.join(HF_HOME, "xet")
+HF_XET_CACHE = os.getenv("HF_XET_CACHE", default_xet_cache_path)

huggingface-hub 0.29.3rc0__py3-none-any.whl → 0.30.0rc1__py3-none-any.whl

Potentially problematic release.

huggingface-hub 0.29.3rc0py3-none-any.whl → 0.30.0rc1py3-none-any.whl