PyPI - huggingface-hub - Versions diffs - 0.29.0rc2__py3-none-any.whl → 1.1.3__py3-none-any.whl - Mend

huggingface-hub 0.29.0rc2py3-none-any.whl → 1.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

huggingface_hub/__init__.py +160 -46
huggingface_hub/_commit_api.py +277 -71
huggingface_hub/_commit_scheduler.py +15 -15
huggingface_hub/_inference_endpoints.py +33 -22
huggingface_hub/_jobs_api.py +301 -0
huggingface_hub/_local_folder.py +18 -3
huggingface_hub/_login.py +31 -63
huggingface_hub/_oauth.py +460 -0
huggingface_hub/_snapshot_download.py +241 -81
huggingface_hub/_space_api.py +18 -10
huggingface_hub/_tensorboard_logger.py +15 -19
huggingface_hub/_upload_large_folder.py +196 -76
huggingface_hub/_webhooks_payload.py +3 -3
huggingface_hub/_webhooks_server.py +15 -25
huggingface_hub/{commands → cli}/__init__.py +1 -15
huggingface_hub/cli/_cli_utils.py +173 -0
huggingface_hub/cli/auth.py +147 -0
huggingface_hub/cli/cache.py +841 -0
huggingface_hub/cli/download.py +189 -0
huggingface_hub/cli/hf.py +60 -0
huggingface_hub/cli/inference_endpoints.py +377 -0
huggingface_hub/cli/jobs.py +772 -0
huggingface_hub/cli/lfs.py +175 -0
huggingface_hub/cli/repo.py +315 -0
huggingface_hub/cli/repo_files.py +94 -0
huggingface_hub/{commands/env.py → cli/system.py} +10 -13
huggingface_hub/cli/upload.py +294 -0
huggingface_hub/cli/upload_large_folder.py +117 -0
huggingface_hub/community.py +20 -12
huggingface_hub/constants.py +83 -59
huggingface_hub/dataclasses.py +609 -0
huggingface_hub/errors.py +99 -30
huggingface_hub/fastai_utils.py +30 -41
huggingface_hub/file_download.py +606 -346
huggingface_hub/hf_api.py +2445 -1132
huggingface_hub/hf_file_system.py +269 -152
huggingface_hub/hub_mixin.py +61 -66
huggingface_hub/inference/_client.py +501 -630
huggingface_hub/inference/_common.py +133 -121
huggingface_hub/inference/_generated/_async_client.py +536 -722
huggingface_hub/inference/_generated/types/__init__.py +6 -1
huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +5 -6
huggingface_hub/inference/_generated/types/base.py +10 -7
huggingface_hub/inference/_generated/types/chat_completion.py +77 -31
huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
huggingface_hub/inference/_generated/types/image_to_image.py +8 -2
huggingface_hub/inference/_generated/types/image_to_text.py +2 -3
huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
huggingface_hub/inference/_generated/types/summarization.py +2 -2
huggingface_hub/inference/_generated/types/table_question_answering.py +5 -5
huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
huggingface_hub/inference/_generated/types/text_generation.py +11 -11
huggingface_hub/inference/_generated/types/text_to_audio.py +1 -2
huggingface_hub/inference/_generated/types/text_to_speech.py +1 -2
huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
huggingface_hub/inference/_generated/types/token_classification.py +2 -2
huggingface_hub/inference/_generated/types/translation.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
huggingface_hub/inference/_mcp/__init__.py +0 -0
huggingface_hub/inference/_mcp/_cli_hacks.py +88 -0
huggingface_hub/inference/_mcp/agent.py +100 -0
huggingface_hub/inference/_mcp/cli.py +247 -0
huggingface_hub/inference/_mcp/constants.py +81 -0
huggingface_hub/inference/_mcp/mcp_client.py +395 -0
huggingface_hub/inference/_mcp/types.py +45 -0
huggingface_hub/inference/_mcp/utils.py +128 -0
huggingface_hub/inference/_providers/__init__.py +149 -20
huggingface_hub/inference/_providers/_common.py +160 -37
huggingface_hub/inference/_providers/black_forest_labs.py +12 -9
huggingface_hub/inference/_providers/cerebras.py +6 -0
huggingface_hub/inference/_providers/clarifai.py +13 -0
huggingface_hub/inference/_providers/cohere.py +32 -0
huggingface_hub/inference/_providers/fal_ai.py +231 -22
huggingface_hub/inference/_providers/featherless_ai.py +38 -0
huggingface_hub/inference/_providers/fireworks_ai.py +22 -1
huggingface_hub/inference/_providers/groq.py +9 -0
huggingface_hub/inference/_providers/hf_inference.py +143 -33
huggingface_hub/inference/_providers/hyperbolic.py +9 -5
huggingface_hub/inference/_providers/nebius.py +47 -5
huggingface_hub/inference/_providers/novita.py +48 -5
huggingface_hub/inference/_providers/nscale.py +44 -0
huggingface_hub/inference/_providers/openai.py +25 -0
huggingface_hub/inference/_providers/publicai.py +6 -0
huggingface_hub/inference/_providers/replicate.py +46 -9
huggingface_hub/inference/_providers/sambanova.py +37 -1
huggingface_hub/inference/_providers/scaleway.py +28 -0
huggingface_hub/inference/_providers/together.py +34 -5
huggingface_hub/inference/_providers/wavespeed.py +138 -0
huggingface_hub/inference/_providers/zai_org.py +17 -0
huggingface_hub/lfs.py +33 -100
huggingface_hub/repocard.py +34 -38
huggingface_hub/repocard_data.py +79 -59
huggingface_hub/serialization/__init__.py +0 -1
huggingface_hub/serialization/_base.py +12 -15
huggingface_hub/serialization/_dduf.py +8 -8
huggingface_hub/serialization/_torch.py +69 -69
huggingface_hub/utils/__init__.py +27 -8
huggingface_hub/utils/_auth.py +7 -7
huggingface_hub/utils/_cache_manager.py +92 -147
huggingface_hub/utils/_chunk_utils.py +2 -3
huggingface_hub/utils/_deprecation.py +1 -1
huggingface_hub/utils/_dotenv.py +55 -0
huggingface_hub/utils/_experimental.py +7 -5
huggingface_hub/utils/_fixes.py +0 -10
huggingface_hub/utils/_git_credential.py +5 -5
huggingface_hub/utils/_headers.py +8 -30
huggingface_hub/utils/_http.py +399 -237
huggingface_hub/utils/_pagination.py +6 -6
huggingface_hub/utils/_parsing.py +98 -0
huggingface_hub/utils/_paths.py +5 -5
huggingface_hub/utils/_runtime.py +74 -22
huggingface_hub/utils/_safetensors.py +21 -21
huggingface_hub/utils/_subprocess.py +13 -11
huggingface_hub/utils/_telemetry.py +4 -4
huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -4
huggingface_hub/utils/_typing.py +25 -5
huggingface_hub/utils/_validators.py +55 -74
huggingface_hub/utils/_verification.py +167 -0
huggingface_hub/utils/_xet.py +235 -0
huggingface_hub/utils/_xet_progress_reporting.py +162 -0
huggingface_hub/utils/insecure_hashlib.py +3 -5
huggingface_hub/utils/logging.py +8 -11
huggingface_hub/utils/tqdm.py +33 -4
{huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/METADATA +94 -82
huggingface_hub-1.1.3.dist-info/RECORD +155 -0
{huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/WHEEL +1 -1
huggingface_hub-1.1.3.dist-info/entry_points.txt +6 -0
huggingface_hub/commands/delete_cache.py +0 -428
huggingface_hub/commands/download.py +0 -200
huggingface_hub/commands/huggingface_cli.py +0 -61
huggingface_hub/commands/lfs.py +0 -200
huggingface_hub/commands/repo_files.py +0 -128
huggingface_hub/commands/scan_cache.py +0 -181
huggingface_hub/commands/tag.py +0 -159
huggingface_hub/commands/upload.py +0 -299
huggingface_hub/commands/upload_large_folder.py +0 -129
huggingface_hub/commands/user.py +0 -304
huggingface_hub/commands/version.py +0 -37
huggingface_hub/inference_api.py +0 -217
huggingface_hub/keras_mixin.py +0 -500
huggingface_hub/repository.py +0 -1477
huggingface_hub/serialization/_tensorflow.py +0 -95
huggingface_hub/utils/_hf_folder.py +0 -68
huggingface_hub-0.29.0rc2.dist-info/RECORD +0 -131
huggingface_hub-0.29.0rc2.dist-info/entry_points.txt +0 -6
{huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info/licenses}/LICENSE +0 -0
{huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/top_level.txt +0 -0

huggingface_hub/_upload_large_folder.py CHANGED Viewed

@@ -24,14 +24,14 @@ import traceback
 from datetime import datetime
 from pathlib import Path
 from threading import Lock
-from typing import TYPE_CHECKING, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Optional, Union
+from urllib.parse import quote
-from . import constants
 from ._commit_api import CommitOperationAdd, UploadInfo, _fetch_upload_modes
 from ._local_folder import LocalUploadFileMetadata, LocalUploadFilePaths, get_local_upload_paths, read_upload_metadata
 from .constants import DEFAULT_REVISION, REPO_TYPES
-from .utils import DEFAULT_IGNORE_PATTERNS, filter_repo_objects, tqdm
-from .utils._cache_manager import _format_size
+from .utils import DEFAULT_IGNORE_PATTERNS, _format_size, filter_repo_objects, tqdm
+from .utils._runtime import is_xet_available
 from .utils.sha import sha_fileobj
@@ -41,8 +41,113 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
 WAITING_TIME_IF_NO_TASKS = 10  # seconds
-MAX_NB_REGULAR_FILES_PER_COMMIT = 75
-MAX_NB_LFS_FILES_PER_COMMIT = 150
+MAX_NB_FILES_FETCH_UPLOAD_MODE = 100
+COMMIT_SIZE_SCALE: list[int] = [20, 50, 75, 100, 125, 200, 250, 400, 600, 1000]
+UPLOAD_BATCH_SIZE_XET = 256  # Max 256 files per upload batch for XET-enabled repos
+UPLOAD_BATCH_SIZE_LFS = 1  # Otherwise, batches of 1 for regular LFS upload
+# Repository limits (from https://huggingface.co/docs/hub/repositories-recommendations)
+MAX_FILES_PER_REPO = 100_000  # Recommended maximum number of files per repository
+MAX_FILES_PER_FOLDER = 10_000  # Recommended maximum number of files per folder
+MAX_FILE_SIZE_GB = 50  # Hard limit for individual file size
+RECOMMENDED_FILE_SIZE_GB = 20  # Recommended maximum for individual file size
+def _validate_upload_limits(paths_list: list[LocalUploadFilePaths]) -> None:
+    """
+    Validate upload against repository limits and warn about potential issues.
+    Args:
+        paths_list: List of file paths to be uploaded
+    Warns about:
+        - Too many files in the repository (>100k)
+        - Too many entries (files or subdirectories) in a single folder (>10k)
+        - Files exceeding size limits (>20GB recommended, >50GB hard limit)
+    """
+    logger.info("Running validation checks on files to upload...")
+    # Check 1: Total file count
+    if len(paths_list) > MAX_FILES_PER_REPO:
+        logger.warning(
+            f"You are about to upload {len(paths_list):,} files. "
+            f"This exceeds the recommended limit of {MAX_FILES_PER_REPO:,} files per repository.\n"
+            f"Consider:\n"
+            f"  - Splitting your data into multiple repositories\n"
+            f"  - Using fewer, larger files (e.g., parquet files)\n"
+            f"  - See: https://huggingface.co/docs/hub/repositories-recommendations"
+        )
+    # Check 2: Files and subdirectories per folder
+    # Track immediate children (files and subdirs) for each folder
+    from collections import defaultdict
+    entries_per_folder: dict[str, Any] = defaultdict(lambda: {"files": 0, "subdirs": set()})
+    for paths in paths_list:
+        path = Path(paths.path_in_repo)
+        parts = path.parts
+        # Count this file in its immediate parent directory
+        parent = str(path.parent) if str(path.parent) != "." else "."
+        entries_per_folder[parent]["files"] += 1
+        # Track immediate subdirectories for each parent folder
+        # Walk through the path components to track parent-child relationships
+        for i, child in enumerate(parts[:-1]):
+            parent = "." if i == 0 else "/".join(parts[:i])
+            entries_per_folder[parent]["subdirs"].add(child)
+    # Check limits for each folder
+    for folder, data in entries_per_folder.items():
+        file_count = data["files"]
+        subdir_count = len(data["subdirs"])
+        total_entries = file_count + subdir_count
+        if total_entries > MAX_FILES_PER_FOLDER:
+            folder_display = "root" if folder == "." else folder
+            logger.warning(
+                f"Folder '{folder_display}' contains {total_entries:,} entries "
+                f"({file_count:,} files and {subdir_count:,} subdirectories). "
+                f"This exceeds the recommended {MAX_FILES_PER_FOLDER:,} entries per folder.\n"
+                "Consider reorganising into sub-folders."
+            )
+    # Check 3: File sizes
+    large_files = []
+    very_large_files = []
+    for paths in paths_list:
+        size = paths.file_path.stat().st_size
+        size_gb = size / 1_000_000_000  # Use decimal GB as per Hub limits
+        if size_gb > MAX_FILE_SIZE_GB:
+            very_large_files.append((paths.path_in_repo, size_gb))
+        elif size_gb > RECOMMENDED_FILE_SIZE_GB:
+            large_files.append((paths.path_in_repo, size_gb))
+    # Warn about very large files (>50GB)
+    if very_large_files:
+        files_str = "\n  - ".join(f"{path}: {size:.1f}GB" for path, size in very_large_files[:5])
+        more_str = f"\n  ... and {len(very_large_files) - 5} more files" if len(very_large_files) > 5 else ""
+        logger.warning(
+            f"Found {len(very_large_files)} files exceeding the {MAX_FILE_SIZE_GB}GB hard limit:\n"
+            f"  - {files_str}{more_str}\n"
+            f"These files may fail to upload. Consider splitting them into smaller chunks."
+        )
+    # Warn about large files (>20GB)
+    if large_files:
+        files_str = "\n  - ".join(f"{path}: {size:.1f}GB" for path, size in large_files[:5])
+        more_str = f"\n  ... and {len(large_files) - 5} more files" if len(large_files) > 5 else ""
+        logger.warning(
+            f"Found {len(large_files)} files larger than {RECOMMENDED_FILE_SIZE_GB}GB (recommended limit):\n"
+            f"  - {files_str}{more_str}\n"
+            f"Large files may slow down loading and processing."
+        )
+    logger.info("Validation checks complete.")
 def upload_large_folder_internal(
@@ -53,8 +158,8 @@ def upload_large_folder_internal(
     repo_type: str,  # Repo type is required!
     revision: Optional[str] = None,
     private: Optional[bool] = None,
-    allow_patterns: Optional[Union[List[str], str]] = None,
-    ignore_patterns: Optional[Union[List[str], str]] = None,
+    allow_patterns: Optional[Union[list[str], str]] = None,
+    ignore_patterns: Optional[Union[list[str], str]] = None,
     num_workers: Optional[int] = None,
     print_report: bool = True,
     print_report_every: int = 60,
@@ -92,6 +197,8 @@ def upload_large_folder_internal(
     repo_url = api.create_repo(repo_id=repo_id, repo_type=repo_type, private=private, exist_ok=True)
     logger.info(f"Repo created: {repo_url}")
     repo_id = repo_url.repo_id
+    # 2.1 Check if xet is enabled to set batch file upload size
+    upload_batch_size = UPLOAD_BATCH_SIZE_XET if is_xet_available() else UPLOAD_BATCH_SIZE_LFS
     # 3. List files to upload
     filtered_paths_list = filter_repo_objects(
@@ -102,6 +209,11 @@ def upload_large_folder_internal(
     paths_list = [get_local_upload_paths(folder_path, relpath) for relpath in filtered_paths_list]
     logger.info(f"Found {len(paths_list)} candidate files to upload")
+    # Validate upload against repository limits
+    _validate_upload_limits(paths_list)
+    logger.info("Starting upload...")
     # Read metadata for each file
     items = [
         (paths, read_upload_metadata(folder_path, paths.path_in_repo))
@@ -109,7 +221,7 @@ def upload_large_folder_internal(
     ]
     # 4. Start workers
-    status = LargeUploadStatus(items)
+    status = LargeUploadStatus(items, upload_batch_size)
     threads = [
         threading.Thread(
             target=_worker_job,
@@ -161,13 +273,13 @@ class WorkerJob(enum.Enum):
     WAIT = enum.auto()  # if no tasks are available but we don't want to exit
-JOB_ITEM_T = Tuple[LocalUploadFilePaths, LocalUploadFileMetadata]
+JOB_ITEM_T = tuple[LocalUploadFilePaths, LocalUploadFileMetadata]
 class LargeUploadStatus:
     """Contains information, queues and tasks for a large upload process."""
-    def __init__(self, items: List[JOB_ITEM_T]):
+    def __init__(self, items: list[JOB_ITEM_T], upload_batch_size: int = 1):
         self.items = items
         self.queue_sha256: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
         self.queue_get_upload_mode: "queue.Queue[JOB_ITEM_T]" = queue.Queue()
@@ -178,11 +290,14 @@ class LargeUploadStatus:
         self.nb_workers_sha256: int = 0
         self.nb_workers_get_upload_mode: int = 0
         self.nb_workers_preupload_lfs: int = 0
+        self.upload_batch_size: int = upload_batch_size
         self.nb_workers_commit: int = 0
         self.nb_workers_waiting: int = 0
         self.last_commit_attempt: Optional[float] = None
         self._started_at = datetime.now()
+        self._chunk_idx: int = 1
+        self._chunk_lock: Lock = Lock()
         # Setup queues
         for item in self.items:
@@ -198,6 +313,21 @@ class LargeUploadStatus:
             else:
                 logger.debug(f"Skipping file {paths.path_in_repo} (already uploaded and committed)")
+    def target_chunk(self) -> int:
+        with self._chunk_lock:
+            return COMMIT_SIZE_SCALE[self._chunk_idx]
+    def update_chunk(self, success: bool, nb_items: int, duration: float) -> None:
+        with self._chunk_lock:
+            if not success:
+                logger.warning(f"Failed to commit {nb_items} files at once. Will retry with less files in next batch.")
+                self._chunk_idx -= 1
+            elif nb_items >= COMMIT_SIZE_SCALE[self._chunk_idx] and duration < 40:
+                logger.info(f"Successfully committed {nb_items} at once. Increasing the limit for next batch.")
+                self._chunk_idx += 1
+            self._chunk_idx = max(0, min(self._chunk_idx, len(COMMIT_SIZE_SCALE) - 1))
     def current_report(self) -> str:
         """Generate a report of the current status of the large upload."""
         nb_hashed = 0
@@ -282,7 +412,7 @@ def _worker_job(
     Read `upload_large_folder` docstring for more information on how tasks are prioritized.
     """
     while True:
-        next_job: Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]] = None
+        next_job: Optional[tuple[WorkerJob, list[JOB_ITEM_T]]] = None
         # Determine next task
         next_job = _determine_next_job(status)
@@ -335,21 +465,24 @@ def _worker_job(
                 status.nb_workers_get_upload_mode -= 1
         elif job == WorkerJob.PREUPLOAD_LFS:
-            item = items[0]  # single item
             try:
-                _preupload_lfs(item, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
-                status.queue_commit.put(item)
+                _preupload_lfs(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
+                for item in items:
+                    status.queue_commit.put(item)
             except KeyboardInterrupt:
                 raise
             except Exception as e:
                 logger.error(f"Failed to preupload LFS: {e}")
                 traceback.format_exc()
-                status.queue_preupload_lfs.put(item)
+                for item in items:
+                    status.queue_preupload_lfs.put(item)
             with status.lock:
                 status.nb_workers_preupload_lfs -= 1
         elif job == WorkerJob.COMMIT:
+            start_ts = time.time()
+            success = True
             try:
                 _commit(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
             except KeyboardInterrupt:
@@ -359,6 +492,9 @@ def _worker_job(
                 traceback.format_exc()
                 for item in items:
                     status.queue_commit.put(item)
+                success = False
+            duration = time.time() - start_ts
+            status.update_chunk(success, len(items), duration)
             with status.lock:
                 status.last_commit_attempt = time.time()
                 status.nb_workers_commit -= 1
@@ -369,7 +505,7 @@ def _worker_job(
                 status.nb_workers_waiting -= 1
-def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob, List[JOB_ITEM_T]]]:
+def _determine_next_job(status: LargeUploadStatus) -> Optional[tuple[WorkerJob, list[JOB_ITEM_T]]]:
     with status.lock:
         # 1. Commit if more than 5 minutes since last commit attempt (and at least 1 file)
         if (
@@ -380,25 +516,25 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
         ):
             status.nb_workers_commit += 1
             logger.debug("Job: commit (more than 5 minutes since last commit attempt)")
-            return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
+            return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
         # 2. Commit if at least 100 files are ready to commit
         elif status.nb_workers_commit == 0 and status.queue_commit.qsize() >= 150:
             status.nb_workers_commit += 1
             logger.debug("Job: commit (>100 files ready)")
-            return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
+            return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
-        # 3. Get upload mode if at least 10 files
-        elif status.queue_get_upload_mode.qsize() >= 10:
+        # 3. Get upload mode if at least 100 files
+        elif status.queue_get_upload_mode.qsize() >= MAX_NB_FILES_FETCH_UPLOAD_MODE:
             status.nb_workers_get_upload_mode += 1
-            logger.debug("Job: get upload mode (>10 files ready)")
-            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
+            logger.debug(f"Job: get upload mode (>{MAX_NB_FILES_FETCH_UPLOAD_MODE} files ready)")
+            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, MAX_NB_FILES_FETCH_UPLOAD_MODE))
-        # 4. Preupload LFS file if at least 1 file and no worker is preuploading LFS
-        elif status.queue_preupload_lfs.qsize() > 0 and status.nb_workers_preupload_lfs == 0:
+        # 4. Preupload LFS file if at least `status.upload_batch_size` files and no worker is preuploading LFS
+        elif status.queue_preupload_lfs.qsize() >= status.upload_batch_size and status.nb_workers_preupload_lfs == 0:
             status.nb_workers_preupload_lfs += 1
             logger.debug("Job: preupload LFS (no other worker preuploading LFS)")
-            return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
+            return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, status.upload_batch_size))
         # 5. Compute sha256 if at least 1 file and no worker is computing sha256
         elif status.queue_sha256.qsize() > 0 and status.nb_workers_sha256 == 0:
@@ -410,16 +546,13 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
         elif status.queue_get_upload_mode.qsize() > 0 and status.nb_workers_get_upload_mode == 0:
             status.nb_workers_get_upload_mode += 1
             logger.debug("Job: get upload mode (no other worker getting upload mode)")
-            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
+            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, MAX_NB_FILES_FETCH_UPLOAD_MODE))
-        # 7. Preupload LFS file if at least 1 file
-        #    Skip if hf_transfer is enabled and there is already a worker preuploading LFS
-        elif status.queue_preupload_lfs.qsize() > 0 and (
-            status.nb_workers_preupload_lfs == 0 or not constants.HF_HUB_ENABLE_HF_TRANSFER
-        ):
+        # 7. Preupload LFS file if at least `status.upload_batch_size` files
+        elif status.queue_preupload_lfs.qsize() >= status.upload_batch_size:
             status.nb_workers_preupload_lfs += 1
             logger.debug("Job: preupload LFS")
-            return (WorkerJob.PREUPLOAD_LFS, _get_one(status.queue_preupload_lfs))
+            return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, status.upload_batch_size))
         # 8. Compute sha256 if at least 1 file
         elif status.queue_sha256.qsize() > 0:
@@ -431,9 +564,15 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
         elif status.queue_get_upload_mode.qsize() > 0:
             status.nb_workers_get_upload_mode += 1
             logger.debug("Job: get upload mode")
-            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
+            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, MAX_NB_FILES_FETCH_UPLOAD_MODE))
-        # 10. Commit if at least 1 file and 1 min since last commit attempt
+        # 10. Preupload LFS file if at least 1 file
+        elif status.queue_preupload_lfs.qsize() > 0:
+            status.nb_workers_preupload_lfs += 1
+            logger.debug("Job: preupload LFS")
+            return (WorkerJob.PREUPLOAD_LFS, _get_n(status.queue_preupload_lfs, status.upload_batch_size))
+        # 11. Commit if at least 1 file and 1 min since last commit attempt
         elif (
             status.nb_workers_commit == 0
             and status.queue_commit.qsize() > 0
@@ -442,9 +581,9 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
         ):
             status.nb_workers_commit += 1
             logger.debug("Job: commit (1 min since last commit attempt)")
-            return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
+            return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
-        # 11. Commit if at least 1 file all other queues are empty and all workers are waiting
+        # 12. Commit if at least 1 file all other queues are empty and all workers are waiting
         #     e.g. when it's the last commit
         elif (
             status.nb_workers_commit == 0
@@ -458,14 +597,14 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
         ):
             status.nb_workers_commit += 1
             logger.debug("Job: commit")
-            return (WorkerJob.COMMIT, _get_items_to_commit(status.queue_commit))
+            return (WorkerJob.COMMIT, _get_n(status.queue_commit, status.target_chunk()))
-        # 12. If all queues are empty, exit
+        # 13. If all queues are empty, exit
         elif all(metadata.is_committed or metadata.should_ignore for _, metadata in status.items):
             logger.info("All files have been processed! Exiting worker.")
             return None
-        # 13. If no task is available, wait
+        # 14. If no task is available, wait
         else:
             status.nb_workers_waiting += 1
             logger.debug(f"No task available, waiting... ({WAITING_TIME_IF_NO_TASKS}s)")
@@ -486,7 +625,7 @@ def _compute_sha256(item: JOB_ITEM_T) -> None:
     metadata.save(paths)
-def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
+def _get_upload_mode(items: list[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
     """Get upload mode for each file and update metadata.
     Also receive info if the file should be ignored.
@@ -497,31 +636,33 @@ def _get_upload_mode(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_t
         repo_type=repo_type,
         repo_id=repo_id,
         headers=api._build_hf_headers(),
-        revision=revision,
+        revision=quote(revision, safe=""),
+        endpoint=api.endpoint,
     )
     for item, addition in zip(items, additions):
         paths, metadata = item
         metadata.upload_mode = addition._upload_mode
         metadata.should_ignore = addition._should_ignore
+        metadata.remote_oid = addition._remote_oid
         metadata.save(paths)
-def _preupload_lfs(item: JOB_ITEM_T, api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
-    """Preupload LFS file and update metadata."""
-    paths, metadata = item
-    addition = _build_hacky_operation(item)
+def _preupload_lfs(items: list[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
+    """Preupload LFS files and update metadata."""
+    additions = [_build_hacky_operation(item) for item in items]
     api.preupload_lfs_files(
         repo_id=repo_id,
         repo_type=repo_type,
         revision=revision,
-        additions=[addition],
+        additions=additions,
     )
-    metadata.is_uploaded = True
-    metadata.save(paths)
+    for paths, metadata in items:
+        metadata.is_uploaded = True
+        metadata.save(paths)
-def _commit(items: List[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
+def _commit(items: list[JOB_ITEM_T], api: "HfApi", repo_id: str, repo_type: str, revision: str) -> None:
     """Commit files to the repo."""
     additions = [_build_hacky_operation(item) for item in items]
     api.create_commit(
@@ -555,6 +696,9 @@ def _build_hacky_operation(item: JOB_ITEM_T) -> HackyCommitOperationAdd:
     if metadata.sha256 is None:
         raise ValueError("sha256 must have been computed by now!")
     operation.upload_info = UploadInfo(sha256=bytes.fromhex(metadata.sha256), size=metadata.size, sample=sample)
+    operation._upload_mode = metadata.upload_mode  # type: ignore[assignment]
+    operation._should_ignore = metadata.should_ignore
+    operation._remote_oid = metadata.remote_oid
     return operation
@@ -563,38 +707,14 @@ def _build_hacky_operation(item: JOB_ITEM_T) -> HackyCommitOperationAdd:
 ####################
-def _get_one(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]:
+def _get_one(queue: "queue.Queue[JOB_ITEM_T]") -> list[JOB_ITEM_T]:
     return [queue.get()]
-def _get_n(queue: "queue.Queue[JOB_ITEM_T]", n: int) -> List[JOB_ITEM_T]:
+def _get_n(queue: "queue.Queue[JOB_ITEM_T]", n: int) -> list[JOB_ITEM_T]:
     return [queue.get() for _ in range(min(queue.qsize(), n))]
-def _get_items_to_commit(queue: "queue.Queue[JOB_ITEM_T]") -> List[JOB_ITEM_T]:
-    """Special case for commit job: the number of items to commit depends on the type of files."""
-    # Can take at most 50 regular files and/or 100 LFS files in a single commit
-    items: List[JOB_ITEM_T] = []
-    nb_lfs, nb_regular = 0, 0
-    while True:
-        # If empty queue => commit everything
-        if queue.qsize() == 0:
-            return items
-        # If we have enough items => commit them
-        if nb_lfs >= MAX_NB_LFS_FILES_PER_COMMIT or nb_regular >= MAX_NB_REGULAR_FILES_PER_COMMIT:
-            return items
-        # Else, get a new item and increase counter
-        item = queue.get()
-        items.append(item)
-        _, metadata = item
-        if metadata.upload_mode == "lfs":
-            nb_lfs += 1
-        else:
-            nb_regular += 1
 def _print_overwrite(report: str) -> None:
     """Print a report, overwriting the previous lines.

huggingface_hub/_webhooks_payload.py CHANGED Viewed

@@ -14,7 +14,7 @@
 # limitations under the License.
 """Contains data structures to parse the webhooks payload."""
-from typing import List, Literal, Optional
+from typing import Literal, Optional
 from .utils import is_pydantic_available
@@ -116,7 +116,7 @@ class WebhookPayloadRepo(ObjectId):
     name: str
     private: bool
     subdomain: Optional[str] = None
-    tags: Optional[List[str]] = None
+    tags: Optional[list[str]] = None
     type: Literal["dataset", "model", "space"]
     url: WebhookPayloadUrl
@@ -134,4 +134,4 @@ class WebhookPayload(BaseModel):
     comment: Optional[WebhookPayloadComment] = None
     webhook: WebhookPayloadWebhook
     movedTo: Optional[WebhookPayloadMovedTo] = None
-    updatedRefs: Optional[List[WebhookPayloadUpdatedRef]] = None
+    updatedRefs: Optional[list[WebhookPayloadUpdatedRef]] = None

huggingface_hub/_webhooks_server.py CHANGED Viewed

@@ -18,7 +18,7 @@ import atexit
 import inspect
 import os
 from functools import wraps
-from typing import TYPE_CHECKING, Any, Callable, Dict, Optional
+from typing import TYPE_CHECKING, Any, Callable, Optional
 from .utils import experimental, is_fastapi_available, is_gradio_available
@@ -32,7 +32,7 @@ if is_fastapi_available():
     from fastapi.responses import JSONResponse
 else:
     # Will fail at runtime if FastAPI is not available
-    FastAPI = Request = JSONResponse = None  # type: ignore [misc, assignment]
+    FastAPI = Request = JSONResponse = None  # type: ignore
 _global_app: Optional["WebhooksServer"] = None
@@ -50,20 +50,14 @@ class WebhooksServer:
     It is recommended to accept [`WebhookPayload`] as the first argument of the webhook function. It is a Pydantic
     model that contains all the information about the webhook event. The data will be parsed automatically for you.
-    Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your
+    Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to set up your
     WebhooksServer and deploy it on a Space.
-    <Tip warning={true}>
+    > [!WARNING]
+    > `WebhooksServer` is experimental. Its API is subject to change in the future.
-    `WebhooksServer` is experimental. Its API is subject to change in the future.
-    </Tip>
-    <Tip warning={true}>
-    You must have `gradio` installed to use `WebhooksServer` (`pip install --upgrade gradio`).
-    </Tip>
+    > [!WARNING]
+    > You must have `gradio` installed to use `WebhooksServer` (`pip install --upgrade gradio`).
     Args:
         ui (`gradio.Blocks`, optional):
@@ -115,7 +109,7 @@ class WebhooksServer:
         self._ui = ui
         self.webhook_secret = webhook_secret or os.getenv("WEBHOOK_SECRET")
-        self.registered_webhooks: Dict[str, Callable] = {}
+        self.registered_webhooks: dict[str, Callable] = {}
         _warn_on_empty_secret(self.webhook_secret)
     def add_webhook(self, path: Optional[str] = None) -> Callable:
@@ -186,6 +180,8 @@ class WebhooksServer:
         # Print instructions and block main thread
         space_host = os.environ.get("SPACE_HOST")
         url = "https://" + space_host if space_host is not None else (ui.share_url or ui.local_url)
+        if url is None:
+            raise ValueError("Cannot find the URL of the app. Please provide a valid `ui` or update `gradio` version.")
         url = url.strip("/")
         message = "\nWebhooks are correctly setup and ready to use:"
         message += "\n" + "\n".join(f"  - POST {url}{webhook}" for webhook in self.registered_webhooks)
@@ -235,20 +231,14 @@ def webhook_endpoint(path: Optional[str] = None) -> Callable:
     you can use [`WebhooksServer`] directly. You can register multiple webhook endpoints (to the same server) by using
     this decorator multiple times.
-    Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to setup your
+    Check out the [webhooks guide](../guides/webhooks_server) for a step-by-step tutorial on how to set up your
     server and deploy it on a Space.
-    <Tip warning={true}>
-    `webhook_endpoint` is experimental. Its API is subject to change in the future.
-    </Tip>
-    <Tip warning={true}>
-    You must have `gradio` installed to use `webhook_endpoint` (`pip install --upgrade gradio`).
+    > [!WARNING]
+    > `webhook_endpoint` is experimental. Its API is subject to change in the future.
-    </Tip>
+    > [!WARNING]
+    > You must have `gradio` installed to use `webhook_endpoint` (`pip install --upgrade gradio`).
     Args:
         path (`str`, optional):

huggingface_hub/{commands → cli}/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2020 The HuggingFace Team. All rights reserved.
+# Copyright 2025 The HuggingFace Team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,17 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from abc import ABC, abstractmethod
-from argparse import _SubParsersAction
-class BaseHuggingfaceCLICommand(ABC):
-    @staticmethod
-    @abstractmethod
-    def register_subcommand(parser: _SubParsersAction):
-        raise NotImplementedError()
-    @abstractmethod
-    def run(self):
-        raise NotImplementedError()

huggingface-hub 0.29.0rc2__py3-none-any.whl → 1.1.3__py3-none-any.whl

huggingface-hub 0.29.0rc2py3-none-any.whl → 1.1.3py3-none-any.whl