PyPI - huggingface-hub - Versions diffs - 0.24.7__py3-none-any.whl → 0.25.1__py3-none-any.whl - Mend

huggingface-hub 0.24.7py3-none-any.whl → 0.25.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of huggingface-hub might be problematic. Click here for more details.

Files changed (53) hide show

huggingface_hub/__init__.py +21 -1
huggingface_hub/_commit_api.py +4 -4
huggingface_hub/_inference_endpoints.py +13 -1
huggingface_hub/_local_folder.py +191 -4
huggingface_hub/_login.py +6 -6
huggingface_hub/_snapshot_download.py +8 -17
huggingface_hub/_space_api.py +5 -0
huggingface_hub/_tensorboard_logger.py +29 -13
huggingface_hub/_upload_large_folder.py +621 -0
huggingface_hub/_webhooks_server.py +1 -1
huggingface_hub/commands/_cli_utils.py +5 -0
huggingface_hub/commands/download.py +8 -0
huggingface_hub/commands/huggingface_cli.py +6 -1
huggingface_hub/commands/lfs.py +2 -1
huggingface_hub/commands/repo_files.py +2 -2
huggingface_hub/commands/scan_cache.py +99 -57
huggingface_hub/commands/tag.py +1 -1
huggingface_hub/commands/upload.py +2 -1
huggingface_hub/commands/upload_large_folder.py +129 -0
huggingface_hub/commands/version.py +37 -0
huggingface_hub/community.py +2 -2
huggingface_hub/errors.py +218 -1
huggingface_hub/fastai_utils.py +2 -3
huggingface_hub/file_download.py +61 -62
huggingface_hub/hf_api.py +783 -314
huggingface_hub/hf_file_system.py +15 -23
huggingface_hub/hub_mixin.py +27 -25
huggingface_hub/inference/_client.py +78 -127
huggingface_hub/inference/_common.py +6 -0
huggingface_hub/inference/_generated/_async_client.py +169 -144
huggingface_hub/inference/_generated/types/base.py +0 -9
huggingface_hub/inference/_templating.py +2 -3
huggingface_hub/inference_api.py +2 -2
huggingface_hub/keras_mixin.py +2 -2
huggingface_hub/lfs.py +7 -98
huggingface_hub/repocard.py +6 -5
huggingface_hub/repository.py +5 -5
huggingface_hub/serialization/_torch.py +64 -11
huggingface_hub/utils/__init__.py +13 -14
huggingface_hub/utils/_cache_manager.py +97 -14
huggingface_hub/utils/_fixes.py +18 -2
huggingface_hub/utils/_http.py +228 -2
huggingface_hub/utils/_lfs.py +110 -0
huggingface_hub/utils/_runtime.py +7 -1
huggingface_hub/utils/_token.py +3 -2
{huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.1.dist-info}/METADATA +2 -2
{huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.1.dist-info}/RECORD +51 -49
huggingface_hub/inference/_types.py +0 -52
huggingface_hub/utils/_errors.py +0 -397
{huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.1.dist-info}/LICENSE +0 -0
{huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.1.dist-info}/WHEEL +0 -0
{huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.1.dist-info}/entry_points.txt +0 -0
{huggingface_hub-0.24.7.dist-info → huggingface_hub-0.25.1.dist-info}/top_level.txt +0 -0

huggingface_hub/_upload_large_folder.py ADDED Viewed

@@ -0,0 +1,621 @@
+# coding=utf-8
+# Copyright 2024-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import enum
+import logging
+import os
+import queue
+import shutil
+import sys
+import threading
+import time
+import traceback
+from datetime import datetime
+from pathlib import Path
+from threading import Lock
+from typing import TYPE_CHECKING, List, Optional, Tuple, Union
+from . import constants
+from ._commit_api import CommitOperationAdd, UploadInfo, _fetch_upload_modes
+from ._local_folder import LocalUploadFileMetadata, LocalUploadFilePaths, get_local_upload_paths, read_upload_metadata
+from .constants import DEFAULT_REVISION, REPO_TYPES
+from .utils import DEFAULT_IGNORE_PATTERNS, filter_repo_objects, tqdm
+from .utils._cache_manager import _format_size
+from .utils.sha import sha_fileobj
+if TYPE_CHECKING:
+    from .hf_api import HfApi
+logger = logging.getLogger(__name__)
+WAITING_TIME_IF_NO_TASKS = 10  # seconds
+MAX_NB_REGULAR_FILES_PER_COMMIT = 75
+MAX_NB_LFS_FILES_PER_COMMIT = 150
+def upload_large_folder_internal(
+    api: "HfApi",
+    repo_id: str,
+    folder_path: Union[str, Path],
+    *,
+    repo_type: str,  # Repo type is required!
+    revision: Optional[str] = None,
+    private: bool = False,
+    allow_patterns: Optional[Union[List[str], str]] = None,
+    ignore_patterns: Optional[Union[List[str], str]] = None,
+    num_workers: Optional[int] = None,
+    print_report: bool = True,
+    print_report_every: int = 60,
+):
+    """Upload a large folder to the Hub in the most resilient way possible.
+    See [`HfApi.upload_large_folder`] for the full documentation.
+    """
+    # 1. Check args and setup
+    if repo_type is None:
+        raise ValueError(
+            "For large uploads, `repo_type` is explicitly required. Please set it to `model`, `dataset` or `space`."
+            " If you are using the CLI, pass it as `--repo-type=model`."
+        )
+    if repo_type not in REPO_TYPES:
+        raise ValueError(f"Invalid repo type, must be one of {REPO_TYPES}")
+    if revision is None:
+        revision = DEFAULT_REVISION
+    folder_path = Path(folder_path).expanduser().resolve()
+    if not folder_path.is_dir():
+        raise ValueError(f"Provided path: '{folder_path}' is not a directory")
+    if ignore_patterns is None:
+        ignore_patterns = []
+    elif isinstance(ignore_patterns, str):
+        ignore_patterns = [ignore_patterns]
+    ignore_patterns += DEFAULT_IGNORE_PATTERNS
+    if num_workers is None:
+        nb_cores = os.cpu_count() or 1
+        num_workers = max(nb_cores - 2, 2)  # Use all but 2 cores, or at least 2 cores
+    # 2. Create repo if missing
+    repo_url = api.create_repo(repo_id=repo_id, repo_type=repo_type, private=private, exist_ok=True)
+    logger.info(f"Repo created: {repo_url}")
+    repo_id = repo_url.repo_id
+    # 3. List files to upload
+    filtered_paths_list = filter_repo_objects(
+        (path.relative_to(folder_path).as_posix() for path in folder_path.glob("**/*") if path.is_file()),
+        allow_patterns=allow_patterns,
+        ignore_patterns=ignore_patterns,
+    )
+    paths_list = [get_local_upload_paths(folder_path, relpath) for relpath in filtered_paths_list]
+    logger.info(f"Found {len(paths_list)} candidate files to upload")
+    # Read metadata for each file
+    items = [
+        (paths, read_upload_metadata(folder_path, paths.path_in_repo))
+        for paths in tqdm(paths_list, desc="Recovering from metadata files")
+    ]
+    # 4. Start workers
+    status = LargeUploadStatus(items)
+    threads = [
+        threading.Thread(
+            target=_worker_job,
+            kwargs={
+                "status": status,
+                "api": api,
+                "repo_id": repo_id,
+                "repo_type": repo_type,
+                "revision": revision,
+            },
+        )
+        for _ in range(num_workers)
+    ]
+    for thread in threads:
+        thread.start()
+    # 5. Print regular reports
+    if print_report:
+        print("\n\n" + status.current_report())
+    last_report_ts = time.time()
+    while True:
+        time.sleep(1)
+        if time.time() - last_report_ts >= print_report_every:
+            if print_report:
+                _print_overwrite(status.current_report())
+            last_report_ts = time.time()
+        if status.is_done():
+            logging.info("Is done: exiting main loop")
+            break
+    for thread in threads:
+        thread.join()
+    logger.info(status.current_report())
+    logging.info("Upload is complete!")
+####################
+# Logic to manage workers and synchronize tasks
+####################
+class WorkerJob(enum.Enum):
+    SHA256 = enum.auto()
+    GET_UPLOAD_MODE = enum.auto()
+    PREUPLOAD_LFS = enum.auto()
+    COMMIT = enum.auto()
+    WAIT = enum.auto()  # if no tasks are available but we don't want to exit
+JOB_ITEM_T = Tuple[LocalUploadFilePaths, LocalUploadFileMetadata]
+class LargeUploadStatus:
+    """Contains information, queues and tasks for a large upload process."""
+    def __init__(self, items: List[JOB_ITEM_T]):
+        self.items = items
+        self.queue_sha256: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
+        self.queue_get_upload_mode: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
+        self.queue_preupload_lfs: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
+        self.queue_commit: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
+        self.lock = Lock()
+        self.nb_workers_sha256: int = 0
+        self.nb_workers_get_upload_mode: int = 0
+        self.nb_workers_preupload_lfs: int = 0
+        self.nb_workers_commit: int = 0
+        self.nb_workers_waiting: int = 0
+        self.last_commit_attempt: Optional[float] = None
+        self._started_at = datetime.now()
+        # Setup queues
+        for item in self.items:
+            paths, metadata = item
+            if metadata.sha256 is None:
+                self.queue_sha256.put(item)
+            elif metadata.upload_mode is None:
+                self.queue_get_upload_mode.put(item)
+            elif metadata.upload_mode == "lfs" and not metadata.is_uploaded:
+                self.queue_preupload_lfs.put(item)
+            elif not metadata.is_committed:
+                self.queue_commit.put(item)
+            else:
+                logger.debug(f"Skipping file {paths.path_in_repo} (already uploaded and committed)")
+    def current_report(self) -> str:
+        """Generate a report of the current status of the large upload."""
+        nb_hashed = 0
+        size_hashed = 0
+        nb_preuploaded = 0
+        nb_lfs = 0
+        nb_lfs_unsure = 0
+        size_preuploaded = 0
+        nb_committed = 0
+        size_committed = 0
+        total_size = 0
+        ignored_files = 0
+        total_files = 0
+        with self.lock:
+            for _, metadata in self.items:
+                if metadata.should_ignore:
+                    ignored_files += 1
+                    continue
+                total_size += metadata.size
+                total_files += 1
+                if metadata.sha256 is not None:
+                    nb_hashed += 1
+                    size_hashed += metadata.size
+                if metadata.upload_mode == "lfs":
+                    nb_lfs += 1
+                if metadata.upload_mode is None:
+                    nb_lfs_unsure += 1
+                if metadata.is_uploaded:
+                    nb_preuploaded += 1
+                    size_preuploaded += metadata.size
+                if metadata.is_committed:
+                    nb_committed += 1
+                    size_committed += metadata.size
+            total_size_str = _format_size(total_size)
+            now = datetime.now()
+            now_str = now.strftime("%Y-%m-%d %H:%M:%S")
+            elapsed = now - self._started_at
+            elapsed_str = str(elapsed).split(".")[0]  # remove milliseconds
+            message = "\n" + "-" * 10
+            message += f" {now_str} ({elapsed_str}) "
+            message += "-" * 10 + "\n"
+            message += "Files:   "
+            message += f"hashed {nb_hashed}/{total_files} ({_format_size(size_hashed)}/{total_size_str}) | "
+            message += f"pre-uploaded: {nb_preuploaded}/{nb_lfs} ({_format_size(size_preuploaded)}/{total_size_str})"
+            if nb_lfs_unsure > 0:
+                message += f" (+{nb_lfs_unsure} unsure)"
+            message += f" | committed: {nb_committed}/{total_files} ({_format_size(size_committed)}/{total_size_str})"
+            message += f" | ignored: {ignored_files}\n"
+            message += "Workers: "
+            message += f"hashing: {self.nb_workers_sha256} | "
+            message += f"get upload mode: {self.nb_workers_get_upload_mode} | "
+            message += f"pre-uploading: {self.nb_workers_preupload_lfs} | "
+            message += f"committing: {self.nb_workers_commit} | "
+            message += f"waiting: {self.nb_workers_waiting}\n"
+            message += "-" * 51
+            return message
+    def is_done(self) -> bool:
+        with self.lock:
+            return all(metadata.is_committed or metadata.should_ignore for _, metadata in self.items)
+def _worker_job(
+    status: LargeUploadStatus,
+    api: "HfApi",
+    repo_id: str,
+    repo_type: str,
+    revision: str,
+):
+    """
+    Main process for a worker. The worker will perform tasks based on the priority list until all files are uploaded
+    and committed. If no tasks are available, the worker will wait for 10 seconds before checking again.
+    If a task fails for any reason, the item(s) are put back in the queue for another worker to pick up.
+    Read `upload_large_folder` docstring for more information on how tasks are prioritized.
+    """
+    while True:
+        next_job: Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]] = None
+        # Determine next task
+        next_job = _determine_next_job(status)
+        if next_job is None:
+            return
+        job, items = next_job
+        # Perform task
+        if job == WorkerJob.SHA256:
+            item = items[0]  # single item
+            try:
+                _compute_sha256(item)
+                status.queue_get_upload_mode.put(item)
+            except KeyboardInterrupt:
+                raise
+            except Exception as e:
+                logger.error(f"Failed to compute sha256: {e}")
+                traceback.format_exc()
+                status.queue_sha256.put(item)
+            with status.lock:
+                status.nb_workers_sha256 -= 1
+        elif job == WorkerJob.GET_UPLOAD_MODE:
+            try:
+                _get_upload_mode(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
+            except KeyboardInterrupt:
+                raise
+            except Exception as e:
+                logger.error(f"Failed to get upload mode: {e}")
+                traceback.format_exc()
+            # Items are either:
+            # - dropped (if should_ignore)
+            # - put in LFS queue (if LFS)
+            # - put in commit queue (if regular)
+            # - or put back (if error occurred).
+            for item in items:
+                _, metadata = item
+                if metadata.should_ignore:
+                    continue
+                if metadata.upload_mode == "lfs":
+                    status.queue_preupload_lfs.put(item)
+                elif metadata.upload_mode == "regular":
+                    status.queue_commit.put(item)
+                else:
+                    status.queue_get_upload_mode.put(item)
+            with status.lock:
+                status.nb_workers_get_upload_mode -= 1
+        elif job == WorkerJob.PREUPLOAD_LFS:
+            item = items[0]  # single item
+            try:
+                _preupload_lfs(item, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
+                status.queue_commit.put(item)
+            except KeyboardInterrupt:
+                raise
+            except Exception as e:
+                logger.error(f"Failed to preupload LFS: {e}")
+                traceback.format_exc()
+                status.queue_preupload_lfs.put(item)
+            with status.lock:
+                status.nb_workers_preupload_lfs -= 1
+        elif job == WorkerJob.COMMIT:
+            try:
+                _commit(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
+            except KeyboardInterrupt:
+                raise
+            except Exception as e:
+                logger.error(f"Failed to commit: {e}")
+                traceback.format_exc()
+                for item in items:
+                    status.queue_commit.put(item)
+            with status.lock:
+                status.last_commit_attempt = time.time()
+                status.nb_workers_commit -= 1
+        elif job == WorkerJob.WAIT:
+            time.sleep(WAITING_TIME_IF_NO_TASKS)
+            with status.lock:
+                status.nb_workers_waiting -= 1
+def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]]:
+    with status.lock:
+        # 1. Commit if more than 5 minutes since last commit attempt (and at least 1 file)
+        if (
+            status.nb_workers_commit == 0
+            and status.queue_commit.qsize() > 0
+            and status.last_commit_attempt is not None
+            and time.time() - status.last_commit_attempt > 5 * 60
+        ):
+            status.nb_workers_commit += 1
+            logger.debug("Job: commit (more than 5 minutes since last commit attempt)")
+            return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
+        # 2. Commit if at least 100 files are ready to commit
+        elif status.nb_workers_commit == 0 and status.queue_commit.qsize() >= 150:
+            status.nb_workers_commit += 1
+            logger.debug("Job: commit (>100 files ready)")
+            return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
+        # 3. Get upload mode if at least 10 files
+        elif status.queue_get_upload_mode.qsize() >= 10:
+            status.nb_workers_get_upload_mode += 1
+            logger.debug("Job: get upload mode (>10 files ready)")
+            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
+        # 4. Preupload LFS file if at least 1 file and no worker is preuploading LFS
+        elif status.queue_preupload_lfs.qsize() > 0 and status.nb_workers_preupload_lfs == 0:
+            status.nb_workers_preupload_lfs += 1
+            logger.debug("Job: preupload LFS (no other worker preuploading LFS)")
+            return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
+        # 5. Compute sha256 if at least 1 file and no worker is computing sha256
+        elif status.queue_sha256.qsize() > 0 and status.nb_workers_sha256 == 0:
+            status.nb_workers_sha256 += 1
+            logger.debug("Job: sha256 (no other worker computing sha256)")
+            return (WorkerJob.SHA256, _get_one(status.queue_sha256))
+        # 6. Get upload mode if at least 1 file and no worker is getting upload mode
+        elif status.queue_get_upload_mode.qsize() > 0 and status.nb_workers_get_upload_mode == 0:
+            status.nb_workers_get_upload_mode += 1
+            logger.debug("Job: get upload mode (no other worker getting upload mode)")
+            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
+        # 7. Preupload LFS file if at least 1 file
+        #    Skip if hf_transfer is enabled and there is already a worker preuploading LFS
+        elif status.queue_preupload_lfs.qsize() > 0 and (
+            status.nb_workers_preupload_lfs == 0 or not constants.HF_HUB_ENABLE_HF_TRANSFER
+        ):
+            status.nb_workers_preupload_lfs += 1
+            logger.debug("Job: preupload LFS")
+            return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
+        # 8. Compute sha256 if at least 1 file
+        elif status.queue_sha256.qsize() > 0:
+            status.nb_workers_sha256 += 1
+            logger.debug("Job: sha256")
+            return (WorkerJob.SHA256, _get_one(status.queue_sha256))
+        # 9. Get upload mode if at least 1 file
+        elif status.queue_get_upload_mode.qsize() > 0:
+            status.nb_workers_get_upload_mode += 1
+            logger.debug("Job: get upload mode")
+            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
+        # 10. Commit if at least 1 file and 1 min since last commit attempt
+        elif (
+            status.nb_workers_commit == 0
+            and status.queue_commit.qsize() > 0
+            and status.last_commit_attempt is not None
+            and time.time() - status.last_commit_attempt > 1 * 60
+        ):
+            status.nb_workers_commit += 1
+            logger.debug("Job: commit (1 min since last commit attempt)")
+            return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
+        # 11. Commit if at least 1 file all other queues are empty and all workers are waiting
+        #     e.g. when it's the last commit
+        elif (
+            status.nb_workers_commit == 0
+            and status.queue_commit.qsize() > 0
+            and status.queue_sha256.qsize() == 0
+            and status.queue_get_upload_mode.qsize() == 0
+            and status.queue_preupload_lfs.qsize() == 0
+            and status.nb_workers_sha256 == 0
+            and status.nb_workers_get_upload_mode == 0
+            and status.nb_workers_preupload_lfs == 0
+        ):
+            status.nb_workers_commit += 1
+            logger.debug("Job: commit")
+            return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
+        # 12. If all queues are empty, exit
+        elif all(metadata.is_committed or metadata.should_ignore for _, metadata in status.items):
+            logger.info("All files have been processed! Exiting worker.")
+            return None
+        # 13. If no task is available, wait
+        else:
+            status.nb_workers_waiting += 1
+            logger.debug(f"No task available, waiting... ({WAITING_TIME_IF_NO_TASKS}s)")
+            return (WorkerJob.WAIT, [])
+####################
+# Atomic jobs (sha256, get_upload_mode, preupload_lfs, commit)
+####################
+def _compute_sha256(item: JOB_ITEM_T) -> None:
+    """Compute sha256 of a file and save it in metadata."""
+    paths, metadata = item
+    if metadata.sha256 is None:
+        with paths.file_path.open("rb") as f:
+            metadata.sha256 = sha_fileobj(f).hex()
+    metadata.save(paths)
+def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
+    """Get upload mode for each file and update metadata.
+    Also receive info if the file should be ignored.
+    """
+    additions = [_build_hacky_operation(item) for item in items]
+    _fetch_upload_modes(
+        additions=additions,
+        repo_type=repo_type,
+        repo_id=repo_id,
+        headers=api._build_hf_headers(),
+        revision=revision,
+    )
+    for item, addition in zip(items, additions):
+        paths, metadata = item
+        metadata.upload_mode = addition._upload_mode
+        metadata.should_ignore = addition._should_ignore
+        metadata.save(paths)
+def _preupload_lfs(item: JOB_ITEM_T, api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
+    """Preupload LFS file and update metadata."""
+    paths, metadata = item
+    addition = _build_hacky_operation(item)
+    api.preupload_lfs_files(
+        repo_id=repo_id,
+        repo_type=repo_type,
+        revision=revision,
+        additions=[addition],
+    )
+    metadata.is_uploaded = True
+    metadata.save(paths)
+def _commit(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
+    """Commit files to the repo."""
+    additions = [_build_hacky_operation(item) for item in items]
+    api.create_commit(
+        repo_id=repo_id,
+        repo_type=repo_type,
+        revision=revision,
+        operations=additions,
+        commit_message="Add files using upload-large-folder tool",
+    )
+    for paths, metadata in items:
+        metadata.is_committed = True
+        metadata.save(paths)
+####################
+# Hacks with CommitOperationAdd to bypass checks/sha256 calculation
+####################
+class HackyCommitOperationAdd(CommitOperationAdd):
+    def __post_init__(self) -> None:
+        if isinstance(self.path_or_fileobj, Path):
+            self.path_or_fileobj = str(self.path_or_fileobj)
+def _build_hacky_operation(item: JOB_ITEM_T) -> HackyCommitOperationAdd:
+    paths, metadata = item
+    operation = HackyCommitOperationAdd(path_in_repo=paths.path_in_repo, path_or_fileobj=paths.file_path)
+    with paths.file_path.open("rb") as file:
+        sample = file.peek(512)[:512]
+    if metadata.sha256 is None:
+        raise ValueError("sha256 must have been computed by now!")
+    operation.upload_info = UploadInfo(sha256=bytes.fromhex(metadata.sha256), size=metadata.size, sample=sample)
+    return operation
+####################
+# Misc helpers
+####################
+def _get_one(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]:
+    return [queue.get()]
+def _get_n(queue: "queue.Queue[JOB_ITEM_T]", n: int) -> List[JOB_ITEM_T]:
+    return [queue.get() for _ in range(min(queue.qsize(), n))]
+def _get_items_to_commit(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]:
+    """Special case for commit job: the number of items to commit depends on the type of files."""
+    # Can take at most 50 regular files and/or 100 LFS files in a single commit
+    items: List[JOB_ITEM_T] = []
+    nb_lfs, nb_regular = 0, 0
+    while True:
+        # If empty queue => commit everything
+        if queue.qsize() == 0:
+            return items
+        # If we have enough items => commit them
+        if nb_lfs >= MAX_NB_LFS_FILES_PER_COMMIT or nb_regular >= MAX_NB_REGULAR_FILES_PER_COMMIT:
+            return items
+        # Else, get a new item and increase counter
+        item = queue.get()
+        items.append(item)
+        _, metadata = item
+        if metadata.upload_mode == "lfs":
+            nb_lfs += 1
+        else:
+            nb_regular += 1
+def _print_overwrite(report: str) -> None:
+    """Print a report, overwriting the previous lines.
+    Since tqdm in using `sys.stderr` to (re-)write progress bars, we need to use `sys.stdout`
+    to print the report.
+    Note: works well only if no other process is writing to `sys.stdout`!
+    """
+    report += "\n"
+    # Get terminal width
+    terminal_width = shutil.get_terminal_size().columns
+    # Count number of lines that should be cleared
+    nb_lines = sum(len(line) // terminal_width + 1 for line in report.splitlines())
+    # Clear previous lines based on the number of lines in the report
+    for _ in range(nb_lines):
+        sys.stdout.write("\r\033[K")  # Clear line
+        sys.stdout.write("\033[F")  # Move cursor up one line
+    # Print the new report, filling remaining space with whitespace
+    sys.stdout.write(report)
+    sys.stdout.write(" " * (terminal_width - len(report.splitlines()[-1])))
+    sys.stdout.flush()

huggingface_hub/_webhooks_server.py CHANGED Viewed

@@ -36,7 +36,7 @@ else:
 _global_app: Optional["WebhooksServer"] = None
-_is_local = os.getenv("SYSTEM") != "spaces"
+_is_local = os.environ.get("SPACE_ID") is None
 @experimental

huggingface_hub/commands/_cli_utils.py CHANGED Viewed

@@ -26,6 +26,7 @@ class ANSI:
     _gray = "\u001b[90m"
     _red = "\u001b[31m"
     _reset = "\u001b[0m"
+    _yellow = "\u001b[33m"
     @classmethod
     def bold(cls, s: str) -> str:
@@ -39,6 +40,10 @@ class ANSI:
     def red(cls, s: str) -> str:
         return cls._format(s, cls._bold + cls._red)
+    @classmethod
+    def yellow(cls, s: str) -> str:
+        return cls._format(s, cls._yellow)
     @classmethod
     def _format(cls, s: str, code: str) -> str:
         if os.environ.get("NO_COLOR"):

huggingface_hub/commands/download.py CHANGED Viewed

@@ -112,6 +112,12 @@ class DownloadCommand(BaseHuggingfaceCLICommand):
             action="store_true",
             help="If True, progress bars are disabled and only the path to the download files is printed.",
         )
+        download_parser.add_argument(
+            "--max-workers",
+            type=int,
+            default=8,
+            help="Maximum number of workers to use for downloading files. Default is 8.",
+        )
         download_parser.set_defaults(func=DownloadCommand)
     def __init__(self, args: Namespace) -> None:
@@ -127,6 +133,7 @@ class DownloadCommand(BaseHuggingfaceCLICommand):
         self.force_download: bool = args.force_download
         self.resume_download: Optional[bool] = args.resume_download or None
         self.quiet: bool = args.quiet
+        self.max_workers: int = args.max_workers
         if args.local_dir_use_symlinks is not None:
             warnings.warn(
@@ -189,4 +196,5 @@ class DownloadCommand(BaseHuggingfaceCLICommand):
             token=self.token,
             local_dir=self.local_dir,
             library_name="huggingface-cli",
+            max_workers=self.max_workers,
         )

huggingface_hub/commands/huggingface_cli.py CHANGED Viewed

@@ -22,7 +22,9 @@ from huggingface_hub.commands.repo_files import RepoFilesCommand
 from huggingface_hub.commands.scan_cache import ScanCacheCommand
 from huggingface_hub.commands.tag import TagCommands
 from huggingface_hub.commands.upload import UploadCommand
+from huggingface_hub.commands.upload_large_folder import UploadLargeFolderCommand
 from huggingface_hub.commands.user import UserCommands
+from huggingface_hub.commands.version import VersionCommand
 def main():
@@ -39,10 +41,13 @@ def main():
     ScanCacheCommand.register_subcommand(commands_parser)
     DeleteCacheCommand.register_subcommand(commands_parser)
     TagCommands.register_subcommand(commands_parser)
+    VersionCommand.register_subcommand(commands_parser)
+    # Experimental
+    UploadLargeFolderCommand.register_subcommand(commands_parser)
     # Let's go
     args = parser.parse_args()
     if not hasattr(args, "func"):
         parser.print_help()
         exit(1)

huggingface-hub 0.24.7__py3-none-any.whl → 0.25.1__py3-none-any.whl

Potentially problematic release.

huggingface-hub 0.24.7py3-none-any.whl → 0.25.1py3-none-any.whl