PyPI - huggingface-hub - Versions diffs - 0.33.5__py3-none-any.whl → 0.35.0rc0__py3-none-any.whl - Mend

huggingface-hub 0.33.5py3-none-any.whl → 0.35.0rc0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of huggingface-hub might be problematic. Click here for more details.

Files changed (68) hide show

huggingface_hub/__init__.py +487 -525
huggingface_hub/_commit_api.py +21 -28
huggingface_hub/_jobs_api.py +145 -0
huggingface_hub/_local_folder.py +7 -1
huggingface_hub/_login.py +5 -5
huggingface_hub/_oauth.py +1 -1
huggingface_hub/_snapshot_download.py +11 -6
huggingface_hub/_upload_large_folder.py +46 -23
huggingface_hub/cli/__init__.py +27 -0
huggingface_hub/cli/_cli_utils.py +69 -0
huggingface_hub/cli/auth.py +210 -0
huggingface_hub/cli/cache.py +405 -0
huggingface_hub/cli/download.py +181 -0
huggingface_hub/cli/hf.py +66 -0
huggingface_hub/cli/jobs.py +522 -0
huggingface_hub/cli/lfs.py +198 -0
huggingface_hub/cli/repo.py +243 -0
huggingface_hub/cli/repo_files.py +128 -0
huggingface_hub/cli/system.py +52 -0
huggingface_hub/cli/upload.py +316 -0
huggingface_hub/cli/upload_large_folder.py +132 -0
huggingface_hub/commands/_cli_utils.py +5 -0
huggingface_hub/commands/delete_cache.py +3 -1
huggingface_hub/commands/download.py +4 -0
huggingface_hub/commands/env.py +3 -0
huggingface_hub/commands/huggingface_cli.py +2 -0
huggingface_hub/commands/repo.py +4 -0
huggingface_hub/commands/repo_files.py +4 -0
huggingface_hub/commands/scan_cache.py +3 -1
huggingface_hub/commands/tag.py +3 -1
huggingface_hub/commands/upload.py +4 -0
huggingface_hub/commands/upload_large_folder.py +3 -1
huggingface_hub/commands/user.py +11 -1
huggingface_hub/commands/version.py +3 -0
huggingface_hub/constants.py +1 -0
huggingface_hub/file_download.py +16 -5
huggingface_hub/hf_api.py +519 -7
huggingface_hub/hf_file_system.py +8 -16
huggingface_hub/hub_mixin.py +3 -3
huggingface_hub/inference/_client.py +38 -39
huggingface_hub/inference/_common.py +38 -11
huggingface_hub/inference/_generated/_async_client.py +50 -51
huggingface_hub/inference/_generated/types/__init__.py +1 -0
huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
huggingface_hub/inference/_mcp/cli.py +36 -18
huggingface_hub/inference/_mcp/constants.py +8 -0
huggingface_hub/inference/_mcp/types.py +3 -0
huggingface_hub/inference/_providers/__init__.py +4 -1
huggingface_hub/inference/_providers/_common.py +3 -6
huggingface_hub/inference/_providers/fal_ai.py +85 -42
huggingface_hub/inference/_providers/hf_inference.py +17 -9
huggingface_hub/inference/_providers/replicate.py +19 -1
huggingface_hub/keras_mixin.py +2 -2
huggingface_hub/repocard.py +1 -1
huggingface_hub/repository.py +2 -2
huggingface_hub/utils/_auth.py +1 -1
huggingface_hub/utils/_cache_manager.py +2 -2
huggingface_hub/utils/_dotenv.py +51 -0
huggingface_hub/utils/_headers.py +1 -1
huggingface_hub/utils/_runtime.py +1 -1
huggingface_hub/utils/_xet.py +6 -2
huggingface_hub/utils/_xet_progress_reporting.py +141 -0
{huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/METADATA +7 -8
{huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/RECORD +68 -51
{huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/entry_points.txt +1 -0
{huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/LICENSE +0 -0
{huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/WHEEL +0 -0
{huggingface_hub-0.33.5.dist-info → huggingface_hub-0.35.0rc0.dist-info}/top_level.txt +0 -0

huggingface_hub/_commit_api.py CHANGED Viewed

@@ -4,7 +4,6 @@ Type definitions and utilities for the `create_commit` API
 import base64
 import io
-import math
 import os
 import warnings
 from collections import defaultdict
@@ -23,6 +22,7 @@ from .lfs import UploadInfo, lfs_upload, post_lfs_batch_info
 from .utils import (
     FORBIDDEN_FOLDERS,
     XetTokenType,
+    are_progress_bars_disabled,
     chunk_iterable,
     fetch_xet_connection_info_from_repo_info,
     get_session,
@@ -33,7 +33,6 @@ from .utils import (
     validate_hf_hub_args,
 )
 from .utils import tqdm as hf_tqdm
-from .utils.tqdm import _get_progress_bar_context
 if TYPE_CHECKING:
@@ -529,9 +528,12 @@ def _upload_xet_files(
     """
     if len(additions) == 0:
         return
     # at this point, we know that hf_xet is installed
     from hf_xet import upload_bytes, upload_files
+    from .utils._xet_progress_reporting import XetProgressReporter
     try:
         xet_connection_info = fetch_xet_connection_info_from_repo_info(
             token_type=XetTokenType.WRITE,
@@ -567,32 +569,18 @@ def _upload_xet_files(
             raise XetRefreshTokenError("Failed to refresh xet token")
         return new_xet_connection.access_token, new_xet_connection.expiration_unix_epoch
-    num_chunks = math.ceil(len(additions) / UPLOAD_BATCH_MAX_NUM_FILES)
-    num_chunks_num_digits = int(math.log10(num_chunks)) + 1
-    for i, chunk in enumerate(chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES)):
-        _chunk = [op for op in chunk]
-        bytes_ops = [op for op in _chunk if isinstance(op.path_or_fileobj, bytes)]
-        paths_ops = [op for op in _chunk if isinstance(op.path_or_fileobj, (str, Path))]
-        expected_size = sum(op.upload_info.size for op in bytes_ops + paths_ops)
+    if not are_progress_bars_disabled():
+        progress = XetProgressReporter()
+        progress_callback = progress.update_progress
+    else:
+        progress, progress_callback = None, None
-        if num_chunks > 1:
-            description = f"Uploading Batch [{str(i + 1).zfill(num_chunks_num_digits)}/{num_chunks}]..."
-        else:
-            description = "Uploading..."
-        progress_cm = _get_progress_bar_context(
-            desc=description,
-            total=expected_size,
-            initial=0,
-            unit="B",
-            unit_scale=True,
-            name="huggingface_hub.xet_put",
-            log_level=logger.getEffectiveLevel(),
-        )
-        with progress_cm as progress:
+    try:
+        for i, chunk in enumerate(chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES)):
+            _chunk = [op for op in chunk]
-            def update_progress(increment: int):
-                progress.update(increment)
+            bytes_ops = [op for op in _chunk if isinstance(op.path_or_fileobj, bytes)]
+            paths_ops = [op for op in _chunk if isinstance(op.path_or_fileobj, (str, Path))]
             if len(paths_ops) > 0:
                 upload_files(
@@ -600,7 +588,7 @@ def _upload_xet_files(
                     xet_endpoint,
                     access_token_info,
                     token_refresher,
-                    update_progress,
+                    progress_callback,
                     repo_type,
                 )
             if len(bytes_ops) > 0:
@@ -609,9 +597,14 @@ def _upload_xet_files(
                     xet_endpoint,
                     access_token_info,
                     token_refresher,
-                    update_progress,
+                    progress_callback,
                     repo_type,
                 )
+    finally:
+        if progress is not None:
+            progress.close(False)
     return

huggingface_hub/_jobs_api.py ADDED Viewed

@@ -0,0 +1,145 @@
+# coding=utf-8
+# Copyright 2025-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from dataclasses import dataclass
+from datetime import datetime
+from enum import Enum
+from typing import Any, Dict, List, Optional
+from huggingface_hub import constants
+from huggingface_hub._space_api import SpaceHardware
+from huggingface_hub.utils._datetime import parse_datetime
+class JobStage(str, Enum):
+    """
+    Enumeration of possible stage of a Job on the Hub.
+    Value can be compared to a string:
+    ```py
+    assert JobStage.COMPLETED == "COMPLETED"
+    ```
+    Taken from https://github.com/huggingface/moon-landing/blob/main/server/job_types/JobInfo.ts#L61 (private url).
+    """
+    # Copied from moon-landing > server > lib > Job.ts
+    COMPLETED = "COMPLETED"
+    CANCELED = "CANCELED"
+    ERROR = "ERROR"
+    DELETED = "DELETED"
+    RUNNING = "RUNNING"
+@dataclass
+class JobStatus:
+    stage: JobStage
+    message: Optional[str]
+    def __init__(self, **kwargs) -> None:
+        self.stage = kwargs["stage"]
+        self.message = kwargs.get("message")
+@dataclass
+class JobOwner:
+    id: str
+    name: str
+@dataclass
+class JobInfo:
+    """
+    Contains information about a Job.
+    Args:
+        id (`str`):
+            Job ID.
+        created_at (`datetime` or `None`):
+            When the Job was created.
+        docker_image (`str` or `None`):
+            The Docker image from Docker Hub used for the Job.
+            Can be None if space_id is present instead.
+        space_id (`str` or `None`):
+            The Docker image from Hugging Face Spaces used for the Job.
+            Can be None if docker_image is present instead.
+        command (`List[str]` or `None`):
+            Command of the Job, e.g. `["python", "-c", "print('hello world')"]`
+        arguments (`List[str]` or `None`):
+            Arguments passed to the command
+        environment (`Dict[str]` or `None`):
+            Environment variables of the Job as a dictionary.
+        secrets (`Dict[str]` or `None`):
+            Secret environment variables of the Job (encrypted).
+        flavor (`str` or `None`):
+            Flavor for the hardware, as in Hugging Face Spaces. See [`SpaceHardware`] for possible values.
+            E.g. `"cpu-basic"`.
+        status: (`JobStatus` or `None`):
+            Status of the Job, e.g. `JobStatus(stage="RUNNING", message=None)`
+            See [`JobStage`] for possible stage values.
+        status: (`JobOwner` or `None`):
+            Owner of the Job, e.g. `JobOwner(id="5e9ecfc04957053f60648a3e", name="lhoestq")`
+    Example:
+    ```python
+    >>> from huggingface_hub import run_job
+    >>> job = run_job(
+    ...     image="python:3.12",
+    ...     command=["python", "-c", "print('Hello from the cloud!')"]
+    ... )
+    >>> job
+    JobInfo(id='687fb701029421ae5549d998', created_at=datetime.datetime(2025, 7, 22, 16, 6, 25, 79000, tzinfo=datetime.timezone.utc), docker_image='python:3.12', space_id=None, command=['python', '-c', "print('Hello from the cloud!')"], arguments=[], environment={}, secrets={}, flavor='cpu-basic', status=JobStatus(stage='RUNNING', message=None), owner=JobOwner(id='5e9ecfc04957053f60648a3e', name='lhoestq'), endpoint='https://huggingface.co', url='https://huggingface.co/jobs/lhoestq/687fb701029421ae5549d998')
+    >>> job.id
+    '687fb701029421ae5549d998'
+    >>> job.url
+    'https://huggingface.co/jobs/lhoestq/687fb701029421ae5549d998'
+    >>> job.status.stage
+    'RUNNING'
+    ```
+    """
+    id: str
+    created_at: Optional[datetime]
+    docker_image: Optional[str]
+    space_id: Optional[str]
+    command: Optional[List[str]]
+    arguments: Optional[List[str]]
+    environment: Optional[Dict[str, Any]]
+    secrets: Optional[Dict[str, Any]]
+    flavor: Optional[SpaceHardware]
+    status: Optional[JobStatus]
+    owner: Optional[JobOwner]
+    # Inferred fields
+    endpoint: str
+    url: str
+    def __init__(self, **kwargs) -> None:
+        self.id = kwargs["id"]
+        created_at = kwargs.get("createdAt") or kwargs.get("created_at")
+        self.created_at = parse_datetime(created_at) if created_at else None
+        self.docker_image = kwargs.get("dockerImage") or kwargs.get("docker_image")
+        self.space_id = kwargs.get("spaceId") or kwargs.get("space_id")
+        self.owner = JobOwner(**(kwargs["owner"] if isinstance(kwargs.get("owner"), dict) else {}))
+        self.command = kwargs.get("command")
+        self.arguments = kwargs.get("arguments")
+        self.environment = kwargs.get("environment")
+        self.secrets = kwargs.get("secrets")
+        self.flavor = kwargs.get("flavor")
+        self.status = JobStatus(**(kwargs["status"] if isinstance(kwargs.get("status"), dict) else {}))
+        # Inferred fields
+        self.endpoint = kwargs.get("endpoint", constants.ENDPOINT)
+        self.url = f"{self.endpoint}/jobs/{self.owner.name}/{self.id}"

huggingface_hub/_local_folder.py CHANGED Viewed

@@ -86,7 +86,13 @@ class LocalDownloadFilePaths:
     def incomplete_path(self, etag: str) -> Path:
         """Return the path where a file will be temporarily downloaded before being moved to `file_path`."""
-        return self.metadata_path.parent / f"{_short_hash(self.metadata_path.name)}.{etag}.incomplete"
+        path = self.metadata_path.parent / f"{_short_hash(self.metadata_path.name)}.{etag}.incomplete"
+        resolved_path = str(path.resolve())
+        # Some Windows versions do not allow for paths longer than 255 characters.
+        # In this case, we must specify it as an extended path by using the "\\?\" prefix.
+        if len(resolved_path) > 255 and not resolved_path.startswith("\\\\?\\"):
+            path = Path("\\\\?\\" + resolved_path)
+        return path
 @dataclass(frozen=True)

huggingface_hub/_login.py CHANGED Viewed

@@ -75,7 +75,7 @@ def login(
     components. If `token` is not provided, it will be prompted to the user either with
     a widget (in a notebook) or via the terminal.
-    To log in from outside of a script, one can also use `huggingface-cli login` which is
+    To log in from outside of a script, one can also use `hf auth login` which is
     a cli command that wraps [`login`].
     <Tip>
@@ -120,7 +120,7 @@ def login(
             logger.info(
                 "The token has not been saved to the git credentials helper. Pass "
                 "`add_to_git_credential=True` in this function directly or "
-                "`--add-to-git-credential` if using via `huggingface-cli` if "
+                "`--add-to-git-credential` if using via `hf`CLI if "
                 "you want to set the git credential as well."
             )
         _login(token, add_to_git_credential=add_to_git_credential)
@@ -233,7 +233,7 @@ def auth_list() -> None:
         )
     elif current_token_name is None:
         logger.warning(
-            "\nNote: No active token is set and no environment variable `HF_TOKEN` is found. Use `huggingface-cli login` to log in."
+            "\nNote: No active token is set and no environment variable `HF_TOKEN` is found. Use `hf auth login` to log in."
         )
@@ -273,8 +273,8 @@ def interpreter_login(*, new_session: bool = True, write_permission: bool = Fals
     print(_HF_LOGO_ASCII)
     if get_token() is not None:
         logger.info(
-            "    A token is already saved on your machine. Run `huggingface-cli"
-            " whoami` to get more information or `huggingface-cli logout` if you want"
+            "    A token is already saved on your machine. Run `hf auth whoami`"
+            " to get more information or `hf auth logout` if you want"
             " to log out."
         )
         logger.info("    Setting a new token will erase the existing one.")

huggingface_hub/_oauth.py CHANGED Viewed

@@ -415,7 +415,7 @@ def _get_mocked_oauth_info() -> Dict:
     if token is None:
         raise ValueError(
             "Your machine must be logged in to HF to debug an OAuth app locally. Please"
-            " run `huggingface-cli login` or set `HF_TOKEN` as environment variable "
+            " run `hf auth login` or set `HF_TOKEN` as environment variable "
             "with one of your access token. You can generate a new token in your "
             "settings page (https://huggingface.co/settings/tokens)."
         )

huggingface_hub/_snapshot_download.py CHANGED Viewed

@@ -254,14 +254,19 @@ def snapshot_download(
     # At this stage, internet connection is up and running
     # => let's download the files!
     assert repo_info.sha is not None, "Repo info returned from server must have a revision sha."
-    assert repo_info.siblings is not None, "Repo info returned from server must have a siblings list."
     # Corner case: on very large repos, the siblings list in `repo_info` might not contain all files.
     # In that case, we need to use the `list_repo_tree` method to prevent caching issues.
-    repo_files: Iterable[str] = [f.rfilename for f in repo_info.siblings]
-    has_many_files = len(repo_info.siblings) > VERY_LARGE_REPO_THRESHOLD
-    if has_many_files:
-        logger.info("The repo has more than 50,000 files. Using `list_repo_tree` to ensure all files are listed.")
+    repo_files: Iterable[str] = [f.rfilename for f in repo_info.siblings] if repo_info.siblings is not None else []
+    unreliable_nb_files = (
+        repo_info.siblings is None
+        or len(repo_info.siblings) == 0
+        or len(repo_info.siblings) > VERY_LARGE_REPO_THRESHOLD
+    )
+    if unreliable_nb_files:
+        logger.info(
+            "Number of files in the repo is unreliable. Using `list_repo_tree` to ensure all files are listed."
+        )
         repo_files = (
             f.rfilename
             for f in api.list_repo_tree(repo_id=repo_id, recursive=True, revision=revision, repo_type=repo_type)
@@ -274,7 +279,7 @@ def snapshot_download(
         ignore_patterns=ignore_patterns,
     )
-    if not has_many_files:
+    if not unreliable_nb_files:
         filtered_repo_files = list(filtered_repo_files)
         tqdm_desc = f"Fetching {len(filtered_repo_files)} files"
     else:

huggingface_hub/_upload_large_folder.py CHANGED Viewed

@@ -33,6 +33,7 @@ from ._local_folder import LocalUploadFileMetadata, LocalUploadFilePaths, get_lo
 from .constants import DEFAULT_REVISION, REPO_TYPES
 from .utils import DEFAULT_IGNORE_PATTERNS, filter_repo_objects, tqdm
 from .utils._cache_manager import _format_size
+from .utils._runtime import is_xet_available
 from .utils.sha import sha_fileobj
@@ -45,6 +46,9 @@ WAITING_TIME_IF_NO_TASKS = 10  # seconds
 MAX_NB_FILES_FETCH_UPLOAD_MODE = 100
 COMMIT_SIZE_SCALE: List[int] = [20, 50, 75, 100, 125, 200, 250, 400, 600, 1000]
+UPLOAD_BATCH_SIZE_XET = 256  # Max 256 files per upload batch for XET-enabled repos
+UPLOAD_BATCH_SIZE_LFS = 1  # Otherwise, batches of 1 for regular LFS upload
 def upload_large_folder_internal(
     api: "HfApi",
@@ -93,6 +97,17 @@ def upload_large_folder_internal(
     repo_url = api.create_repo(repo_id=repo_id, repo_type=repo_type, private=private, exist_ok=True)
     logger.info(f"Repo created: {repo_url}")
     repo_id = repo_url.repo_id
+    # 2.1 Check if xet is enabled to set batch file upload size
+    is_xet_enabled = (
+        is_xet_available()
+        and api.repo_info(
+            repo_id=repo_id,
+            repo_type=repo_type,
+            revision=revision,
+            expand="xetEnabled",
+        ).xet_enabled
+    )
+    upload_batch_size = UPLOAD_BATCH_SIZE_XET if is_xet_enabled else UPLOAD_BATCH_SIZE_LFS
     # 3. List files to upload
     filtered_paths_list = filter_repo_objects(
@@ -110,7 +125,7 @@ def upload_large_folder_internal(
     ]
     # 4. Start workers
-    status = LargeUploadStatus(items)
+    status = LargeUploadStatus(items, upload_batch_size)
     threads = [
         threading.Thread(
             target=_worker_job,
@@ -168,7 +183,7 @@ JOB_ITEM_T = Tuple[LocalUploadFilePaths, LocalUploadFileMetadata]
 class LargeUploadStatus:
     """Contains information, queues and tasks for a large upload process."""
-    def __init__(self, items: List[JOB_ITEM_T]):
+    def __init__(self, items: List[JOB_ITEM_T], upload_batch_size: int = 1):
         self.items = items
         self.queue_sha256: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
         self.queue_get_upload_mode: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
@@ -179,6 +194,7 @@ class LargeUploadStatus:
         self.nb_workers_sha256: int = 0
         self.nb_workers_get_upload_mode: int = 0
         self.nb_workers_preupload_lfs: int = 0
+        self.upload_batch_size: int = upload_batch_size
         self.nb_workers_commit: int = 0
         self.nb_workers_waiting: int = 0
         self.last_commit_attempt: Optional[float] = None
@@ -353,16 +369,17 @@ def _worker_job(
                 status.nb_workers_get_upload_mode -= 1
         elif job == WorkerJob.PREUPLOAD_LFS:
-            item = items[0]  # single item
             try:
-                _preupload_lfs(item, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
-                status.queue_commit.put(item)
+                _preupload_lfs(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
+                for item in items:
+                    status.queue_commit.put(item)
             except KeyboardInterrupt:
                 raise
             except Exception as e:
                 logger.error(f"Failed to preupload LFS: {e}")
                 traceback.format_exc()
-                status.queue_preupload_lfs.put(item)
+                for item in items:
+                    status.queue_preupload_lfs.put(item)
             with status.lock:
                 status.nb_workers_preupload_lfs -= 1
@@ -417,11 +434,11 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
             logger.debug(f"Job: get upload mode (>{MAX_NB_FILES_FETCH_UPLOAD_MODE} files ready)")
             return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, MAX_NB_FILES_FETCH_UPLOAD_MODE))
-        # 4. Preupload LFS file if at least 1 file and no worker is preuploading LFS
-        elif status.queue_preupload_lfs.qsize() > 0 and status.nb_workers_preupload_lfs == 0:
+        # 4. Preupload LFS file if at least `status.upload_batch_size` files and no worker is preuploading LFS
+        elif status.queue_preupload_lfs.qsize() >= status.upload_batch_size and status.nb_workers_preupload_lfs == 0:
             status.nb_workers_preupload_lfs += 1
             logger.debug("Job: preupload LFS (no other worker preuploading LFS)")
-            return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
+            return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, status.upload_batch_size))
         # 5. Compute sha256 if at least 1 file and no worker is computing sha256
         elif status.queue_sha256.qsize() > 0 and status.nb_workers_sha256 == 0:
@@ -435,14 +452,14 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
             logger.debug("Job: get upload mode (no other worker getting upload mode)")
             return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, MAX_NB_FILES_FETCH_UPLOAD_MODE))
-        # 7. Preupload LFS file if at least 1 file
+        # 7. Preupload LFS file if at least `status.upload_batch_size` files
         #    Skip if hf_transfer is enabled and there is already a worker preuploading LFS
-        elif status.queue_preupload_lfs.qsize() > 0 and (
+        elif status.queue_preupload_lfs.qsize() >= status.upload_batch_size and (
             status.nb_workers_preupload_lfs == 0 or not constants.HF_HUB_ENABLE_HF_TRANSFER
         ):
             status.nb_workers_preupload_lfs += 1
             logger.debug("Job: preupload LFS")
-            return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
+            return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, status.upload_batch_size))
         # 8. Compute sha256 if at least 1 file
         elif status.queue_sha256.qsize() > 0:
@@ -456,7 +473,13 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
             logger.debug("Job: get upload mode")
             return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, MAX_NB_FILES_FETCH_UPLOAD_MODE))
-        # 10. Commit if at least 1 file and 1 min since last commit attempt
+        # 10. Preupload LFS file if at least 1 file
+        elif status.queue_preupload_lfs.qsize() > 0:
+            status.nb_workers_preupload_lfs += 1
+            logger.debug("Job: preupload LFS")
+            return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, status.upload_batch_size))
+        # 11. Commit if at least 1 file and 1 min since last commit attempt
         elif (
             status.nb_workers_commit == 0
             and status.queue_commit.qsize() > 0
@@ -467,7 +490,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
             logger.debug("Job: commit (1 min since last commit attempt)")
             return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
-        # 11. Commit if at least 1 file all other queues are empty and all workers are waiting
+        # 12. Commit if at least 1 file all other queues are empty and all workers are waiting
         #     e.g. when it's the last commit
         elif (
             status.nb_workers_commit == 0
@@ -483,12 +506,12 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
             logger.debug("Job: commit")
             return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
-        # 12. If all queues are empty, exit
+        # 13. If all queues are empty, exit
         elif all(metadata.is_committed or metadata.should_ignore for _, metadata in status.items):
             logger.info("All files have been processed! Exiting worker.")
             return None
-        # 13. If no task is available, wait
+        # 14. If no task is available, wait
         else:
             status.nb_workers_waiting += 1
             logger.debug(f"No task available, waiting... ({WAITING_TIME_IF_NO_TASKS}s)")
@@ -531,19 +554,19 @@ def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_t
         metadata.save(paths)
-def _preupload_lfs(item: JOB_ITEM_T, api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
-    """Preupload LFS file and update metadata."""
-    paths, metadata = item
-    addition = _build_hacky_operation(item)
+def _preupload_lfs(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
+    """Preupload LFS files and update metadata."""
+    additions = [_build_hacky_operation(item) for item in items]
     api.preupload_lfs_files(
         repo_id=repo_id,
         repo_type=repo_type,
         revision=revision,
-        additions=[addition],
+        additions=additions,
     )
-    metadata.is_uploaded = True
-    metadata.save(paths)
+    for paths, metadata in items:
+        metadata.is_uploaded = True
+        metadata.save(paths)
 def _commit(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:

huggingface_hub/cli/__init__.py ADDED Viewed

@@ -0,0 +1,27 @@
+# Copyright 2025 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from abc import ABC, abstractmethod
+from argparse import _SubParsersAction
+class BaseHuggingfaceCLICommand(ABC):
+    @staticmethod
+    @abstractmethod
+    def register_subcommand(parser: _SubParsersAction):
+        raise NotImplementedError()
+    @abstractmethod
+    def run(self):
+        raise NotImplementedError()

huggingface_hub/cli/_cli_utils.py ADDED Viewed

@@ -0,0 +1,69 @@
+# Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains a utility for good-looking prints."""
+import os
+from typing import List, Union
+class ANSI:
+    """
+    Helper for en.wikipedia.org/wiki/ANSI_escape_code
+    """
+    _bold = "\u001b[1m"
+    _gray = "\u001b[90m"
+    _red = "\u001b[31m"
+    _reset = "\u001b[0m"
+    _yellow = "\u001b[33m"
+    @classmethod
+    def bold(cls, s: str) -> str:
+        return cls._format(s, cls._bold)
+    @classmethod
+    def gray(cls, s: str) -> str:
+        return cls._format(s, cls._gray)
+    @classmethod
+    def red(cls, s: str) -> str:
+        return cls._format(s, cls._bold + cls._red)
+    @classmethod
+    def yellow(cls, s: str) -> str:
+        return cls._format(s, cls._yellow)
+    @classmethod
+    def _format(cls, s: str, code: str) -> str:
+        if os.environ.get("NO_COLOR"):
+            # See https://no-color.org/
+            return s
+        return f"{code}{s}{cls._reset}"
+def tabulate(rows: List[List[Union[str, int]]], headers: List[str]) -> str:
+    """
+    Inspired by:
+    - stackoverflow.com/a/8356620/593036
+    - stackoverflow.com/questions/9535954/printing-lists-as-tabular-data
+    """
+    col_widths = [max(len(str(x)) for x in col) for col in zip(*rows, headers)]
+    row_format = ("{{:{}}} " * len(headers)).format(*col_widths)
+    lines = []
+    lines.append(row_format.format(*headers))
+    lines.append(row_format.format(*["-" * w for w in col_widths]))
+    for row in rows:
+        lines.append(row_format.format(*row))
+    return "\n".join(lines)

huggingface-hub 0.33.5__py3-none-any.whl → 0.35.0rc0__py3-none-any.whl

Potentially problematic release.

huggingface-hub 0.33.5py3-none-any.whl → 0.35.0rc0py3-none-any.whl