PyPI - huggingface-hub - Versions diffs - 0.35.0rc0__py3-none-any.whl → 0.35.1__py3-none-any.whl - Mend

huggingface-hub 0.35.0rc0py3-none-any.whl → 0.35.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of huggingface-hub might be problematic. Click here for more details.

Files changed (50) hide show

huggingface_hub/__init__.py +19 -1
huggingface_hub/_jobs_api.py +168 -12
huggingface_hub/_local_folder.py +1 -1
huggingface_hub/_oauth.py +5 -9
huggingface_hub/_tensorboard_logger.py +9 -10
huggingface_hub/_upload_large_folder.py +108 -1
huggingface_hub/cli/auth.py +4 -1
huggingface_hub/cli/cache.py +7 -9
huggingface_hub/cli/hf.py +2 -5
huggingface_hub/cli/jobs.py +591 -13
huggingface_hub/cli/repo.py +10 -4
huggingface_hub/commands/delete_cache.py +2 -2
huggingface_hub/commands/scan_cache.py +1 -1
huggingface_hub/dataclasses.py +3 -0
huggingface_hub/file_download.py +12 -10
huggingface_hub/hf_api.py +549 -95
huggingface_hub/hf_file_system.py +4 -10
huggingface_hub/hub_mixin.py +5 -3
huggingface_hub/inference/_client.py +98 -181
huggingface_hub/inference/_common.py +72 -70
huggingface_hub/inference/_generated/_async_client.py +116 -201
huggingface_hub/inference/_generated/types/chat_completion.py +2 -0
huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
huggingface_hub/inference/_mcp/cli.py +1 -1
huggingface_hub/inference/_mcp/constants.py +1 -1
huggingface_hub/inference/_mcp/mcp_client.py +28 -11
huggingface_hub/inference/_mcp/types.py +3 -0
huggingface_hub/inference/_mcp/utils.py +7 -3
huggingface_hub/inference/_providers/__init__.py +13 -0
huggingface_hub/inference/_providers/_common.py +29 -4
huggingface_hub/inference/_providers/black_forest_labs.py +1 -1
huggingface_hub/inference/_providers/fal_ai.py +33 -2
huggingface_hub/inference/_providers/hf_inference.py +15 -7
huggingface_hub/inference/_providers/publicai.py +6 -0
huggingface_hub/inference/_providers/replicate.py +1 -1
huggingface_hub/inference/_providers/scaleway.py +28 -0
huggingface_hub/lfs.py +2 -4
huggingface_hub/repocard.py +2 -1
huggingface_hub/utils/_dotenv.py +24 -20
huggingface_hub/utils/_git_credential.py +1 -1
huggingface_hub/utils/_http.py +3 -5
huggingface_hub/utils/_runtime.py +1 -0
huggingface_hub/utils/_typing.py +24 -4
huggingface_hub/utils/_xet_progress_reporting.py +31 -10
{huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/METADATA +7 -4
{huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/RECORD +50 -48
{huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/LICENSE +0 -0
{huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/WHEEL +0 -0
{huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/entry_points.txt +0 -0
{huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-0.35.1.dist-info}/top_level.txt +0 -0

huggingface_hub/__init__.py CHANGED Viewed

@@ -46,7 +46,7 @@ import sys
 from typing import TYPE_CHECKING
-__version__ = "0.35.0.rc0"
+__version__ = "0.35.1"
 # Alphabetical order of definitions is ensured in tests
 # WARNING: any comment added in this dictionary definition will be lost when
@@ -182,6 +182,8 @@ _SUBMOD_ATTRS = {
         "create_inference_endpoint_from_catalog",
         "create_pull_request",
         "create_repo",
+        "create_scheduled_job",
+        "create_scheduled_uv_job",
         "create_tag",
         "create_webhook",
         "dataset_info",
@@ -192,6 +194,7 @@ _SUBMOD_ATTRS = {
         "delete_folder",
         "delete_inference_endpoint",
         "delete_repo",
+        "delete_scheduled_job",
         "delete_space_secret",
         "delete_space_storage",
         "delete_space_variable",
@@ -219,6 +222,7 @@ _SUBMOD_ATTRS = {
         "get_webhook",
         "grant_access",
         "inspect_job",
+        "inspect_scheduled_job",
         "list_accepted_access_requests",
         "list_collections",
         "list_datasets",
@@ -259,6 +263,7 @@ _SUBMOD_ATTRS = {
         "request_space_storage",
         "restart_space",
         "resume_inference_endpoint",
+        "resume_scheduled_job",
         "revision_exists",
         "run_as_future",
         "run_job",
@@ -267,6 +272,7 @@ _SUBMOD_ATTRS = {
         "set_space_sleep_time",
         "space_info",
         "super_squash_history",
+        "suspend_scheduled_job",
         "unlike",
         "update_collection_item",
         "update_collection_metadata",
@@ -828,6 +834,8 @@ __all__ = [
     "create_inference_endpoint_from_catalog",
     "create_pull_request",
     "create_repo",
+    "create_scheduled_job",
+    "create_scheduled_uv_job",
     "create_tag",
     "create_webhook",
     "dataset_info",
@@ -838,6 +846,7 @@ __all__ = [
     "delete_folder",
     "delete_inference_endpoint",
     "delete_repo",
+    "delete_scheduled_job",
     "delete_space_secret",
     "delete_space_storage",
     "delete_space_variable",
@@ -878,6 +887,7 @@ __all__ = [
     "hf_hub_download",
     "hf_hub_url",
     "inspect_job",
+    "inspect_scheduled_job",
     "interpreter_login",
     "list_accepted_access_requests",
     "list_collections",
@@ -933,6 +943,7 @@ __all__ = [
     "request_space_storage",
     "restart_space",
     "resume_inference_endpoint",
+    "resume_scheduled_job",
     "revision_exists",
     "run_as_future",
     "run_job",
@@ -949,6 +960,7 @@ __all__ = [
     "split_tf_state_dict_into_shards",
     "split_torch_state_dict_into_shards",
     "super_squash_history",
+    "suspend_scheduled_job",
     "try_to_load_from_cache",
     "unlike",
     "update_collection_item",
@@ -1190,6 +1202,8 @@ if TYPE_CHECKING:  # pragma: no cover
         create_inference_endpoint_from_catalog,  # noqa: F401
         create_pull_request,  # noqa: F401
         create_repo,  # noqa: F401
+        create_scheduled_job,  # noqa: F401
+        create_scheduled_uv_job,  # noqa: F401
         create_tag,  # noqa: F401
         create_webhook,  # noqa: F401
         dataset_info,  # noqa: F401
@@ -1200,6 +1214,7 @@ if TYPE_CHECKING:  # pragma: no cover
         delete_folder,  # noqa: F401
         delete_inference_endpoint,  # noqa: F401
         delete_repo,  # noqa: F401
+        delete_scheduled_job,  # noqa: F401
         delete_space_secret,  # noqa: F401
         delete_space_storage,  # noqa: F401
         delete_space_variable,  # noqa: F401
@@ -1227,6 +1242,7 @@ if TYPE_CHECKING:  # pragma: no cover
         get_webhook,  # noqa: F401
         grant_access,  # noqa: F401
         inspect_job,  # noqa: F401
+        inspect_scheduled_job,  # noqa: F401
         list_accepted_access_requests,  # noqa: F401
         list_collections,  # noqa: F401
         list_datasets,  # noqa: F401
@@ -1267,6 +1283,7 @@ if TYPE_CHECKING:  # pragma: no cover
         request_space_storage,  # noqa: F401
         restart_space,  # noqa: F401
         resume_inference_endpoint,  # noqa: F401
+        resume_scheduled_job,  # noqa: F401
         revision_exists,  # noqa: F401
         run_as_future,  # noqa: F401
         run_job,  # noqa: F401
@@ -1275,6 +1292,7 @@ if TYPE_CHECKING:  # pragma: no cover
         set_space_sleep_time,  # noqa: F401
         space_info,  # noqa: F401
         super_squash_history,  # noqa: F401
+        suspend_scheduled_job,  # noqa: F401
         unlike,  # noqa: F401
         update_collection_item,  # noqa: F401
         update_collection_metadata,  # noqa: F401

huggingface_hub/_jobs_api.py CHANGED Viewed

@@ -15,7 +15,7 @@
 from dataclasses import dataclass
 from datetime import datetime
 from enum import Enum
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 from huggingface_hub import constants
 from huggingface_hub._space_api import SpaceHardware
@@ -47,15 +47,12 @@ class JobStatus:
     stage: JobStage
     message: Optional[str]
-    def __init__(self, **kwargs) -> None:
-        self.stage = kwargs["stage"]
-        self.message = kwargs.get("message")
 @dataclass
 class JobOwner:
     id: str
     name: str
+    type: str
 @dataclass
@@ -88,8 +85,8 @@ class JobInfo:
         status: (`JobStatus` or `None`):
             Status of the Job, e.g. `JobStatus(stage="RUNNING", message=None)`
             See [`JobStage`] for possible stage values.
-        status: (`JobOwner` or `None`):
-            Owner of the Job, e.g. `JobOwner(id="5e9ecfc04957053f60648a3e", name="lhoestq")`
+        owner: (`JobOwner` or `None`):
+            Owner of the Job, e.g. `JobOwner(id="5e9ecfc04957053f60648a3e", name="lhoestq", type="user")`
     Example:
@@ -100,7 +97,7 @@ class JobInfo:
     ...     command=["python", "-c", "print('Hello from the cloud!')"]
     ... )
     >>> job
-    JobInfo(id='687fb701029421ae5549d998', created_at=datetime.datetime(2025, 7, 22, 16, 6, 25, 79000, tzinfo=datetime.timezone.utc), docker_image='python:3.12', space_id=None, command=['python', '-c', "print('Hello from the cloud!')"], arguments=[], environment={}, secrets={}, flavor='cpu-basic', status=JobStatus(stage='RUNNING', message=None), owner=JobOwner(id='5e9ecfc04957053f60648a3e', name='lhoestq'), endpoint='https://huggingface.co', url='https://huggingface.co/jobs/lhoestq/687fb701029421ae5549d998')
+    JobInfo(id='687fb701029421ae5549d998', created_at=datetime.datetime(2025, 7, 22, 16, 6, 25, 79000, tzinfo=datetime.timezone.utc), docker_image='python:3.12', space_id=None, command=['python', '-c', "print('Hello from the cloud!')"], arguments=[], environment={}, secrets={}, flavor='cpu-basic', status=JobStatus(stage='RUNNING', message=None), owner=JobOwner(id='5e9ecfc04957053f60648a3e', name='lhoestq', type='user'), endpoint='https://huggingface.co', url='https://huggingface.co/jobs/lhoestq/687fb701029421ae5549d998')
     >>> job.id
     '687fb701029421ae5549d998'
     >>> job.url
@@ -119,8 +116,8 @@ class JobInfo:
     environment: Optional[Dict[str, Any]]
     secrets: Optional[Dict[str, Any]]
     flavor: Optional[SpaceHardware]
-    status: Optional[JobStatus]
-    owner: Optional[JobOwner]
+    status: JobStatus
+    owner: JobOwner
     # Inferred fields
     endpoint: str
@@ -132,14 +129,173 @@ class JobInfo:
         self.created_at = parse_datetime(created_at) if created_at else None
         self.docker_image = kwargs.get("dockerImage") or kwargs.get("docker_image")
         self.space_id = kwargs.get("spaceId") or kwargs.get("space_id")
-        self.owner = JobOwner(**(kwargs["owner"] if isinstance(kwargs.get("owner"), dict) else {}))
+        owner = kwargs.get("owner", {})
+        self.owner = JobOwner(id=owner["id"], name=owner["name"], type=owner["type"])
         self.command = kwargs.get("command")
         self.arguments = kwargs.get("arguments")
         self.environment = kwargs.get("environment")
         self.secrets = kwargs.get("secrets")
         self.flavor = kwargs.get("flavor")
-        self.status = JobStatus(**(kwargs["status"] if isinstance(kwargs.get("status"), dict) else {}))
+        status = kwargs.get("status", {})
+        self.status = JobStatus(stage=status["stage"], message=status.get("message"))
         # Inferred fields
         self.endpoint = kwargs.get("endpoint", constants.ENDPOINT)
         self.url = f"{self.endpoint}/jobs/{self.owner.name}/{self.id}"
+@dataclass
+class JobSpec:
+    docker_image: Optional[str]
+    space_id: Optional[str]
+    command: Optional[List[str]]
+    arguments: Optional[List[str]]
+    environment: Optional[Dict[str, Any]]
+    secrets: Optional[Dict[str, Any]]
+    flavor: Optional[SpaceHardware]
+    timeout: Optional[int]
+    tags: Optional[List[str]]
+    arch: Optional[str]
+    def __init__(self, **kwargs) -> None:
+        self.docker_image = kwargs.get("dockerImage") or kwargs.get("docker_image")
+        self.space_id = kwargs.get("spaceId") or kwargs.get("space_id")
+        self.command = kwargs.get("command")
+        self.arguments = kwargs.get("arguments")
+        self.environment = kwargs.get("environment")
+        self.secrets = kwargs.get("secrets")
+        self.flavor = kwargs.get("flavor")
+        self.timeout = kwargs.get("timeout")
+        self.tags = kwargs.get("tags")
+        self.arch = kwargs.get("arch")
+@dataclass
+class LastJobInfo:
+    id: str
+    at: datetime
+    def __init__(self, **kwargs) -> None:
+        self.id = kwargs["id"]
+        self.at = parse_datetime(kwargs["at"])
+@dataclass
+class ScheduledJobStatus:
+    last_job: Optional[LastJobInfo]
+    next_job_run_at: Optional[datetime]
+    def __init__(self, **kwargs) -> None:
+        last_job = kwargs.get("lastJob") or kwargs.get("last_job")
+        self.last_job = LastJobInfo(**last_job) if last_job else None
+        next_job_run_at = kwargs.get("nextJobRunAt") or kwargs.get("next_job_run_at")
+        self.next_job_run_at = parse_datetime(str(next_job_run_at)) if next_job_run_at else None
+@dataclass
+class ScheduledJobInfo:
+    """
+    Contains information about a Job.
+    Args:
+        id (`str`):
+            Scheduled Job ID.
+        created_at (`datetime` or `None`):
+            When the scheduled Job was created.
+        tags (`List[str]` or `None`):
+            The tags of the scheduled Job.
+        schedule (`str` or `None`):
+            One of "@annually", "@yearly", "@monthly", "@weekly", "@daily", "@hourly", or a
+            CRON schedule expression (e.g., '0 9 * * 1' for 9 AM every Monday).
+        suspend (`bool` or `None`):
+            Whether the scheduled job is suspended (paused).
+        concurrency (`bool` or `None`):
+            Whether multiple instances of this Job can run concurrently.
+        status (`ScheduledJobStatus` or `None`):
+            Status of the scheduled Job.
+        owner: (`JobOwner` or `None`):
+            Owner of the scheduled Job, e.g. `JobOwner(id="5e9ecfc04957053f60648a3e", name="lhoestq", type="user")`
+        job_spec: (`JobSpec` or `None`):
+            Specifications of the Job.
+    Example:
+    ```python
+    >>> from huggingface_hub import run_job
+    >>> scheduled_job = create_scheduled_job(
+    ...     image="python:3.12",
+    ...     command=["python", "-c", "print('Hello from the cloud!')"],
+    ...     schedule="@hourly",
+    ... )
+    >>> scheduled_job.id
+    '687fb701029421ae5549d999'
+    >>> scheduled_job.status.next_job_run_at
+    datetime.datetime(2025, 7, 22, 17, 6, 25, 79000, tzinfo=datetime.timezone.utc)
+    ```
+    """
+    id: str
+    created_at: Optional[datetime]
+    job_spec: JobSpec
+    schedule: Optional[str]
+    suspend: Optional[bool]
+    concurrency: Optional[bool]
+    status: ScheduledJobStatus
+    owner: JobOwner
+    def __init__(self, **kwargs) -> None:
+        self.id = kwargs["id"]
+        created_at = kwargs.get("createdAt") or kwargs.get("created_at")
+        self.created_at = parse_datetime(created_at) if created_at else None
+        self.job_spec = JobSpec(**(kwargs.get("job_spec") or kwargs.get("jobSpec", {})))
+        self.schedule = kwargs.get("schedule")
+        self.suspend = kwargs.get("suspend")
+        self.concurrency = kwargs.get("concurrency")
+        status = kwargs.get("status", {})
+        self.status = ScheduledJobStatus(
+            last_job=status.get("last_job") or status.get("lastJob"),
+            next_job_run_at=status.get("next_job_run_at") or status.get("nextJobRunAt"),
+        )
+        owner = kwargs.get("owner", {})
+        self.owner = JobOwner(id=owner["id"], name=owner["name"], type=owner["type"])
+def _create_job_spec(
+    *,
+    image: str,
+    command: List[str],
+    env: Optional[Dict[str, Any]],
+    secrets: Optional[Dict[str, Any]],
+    flavor: Optional[SpaceHardware],
+    timeout: Optional[Union[int, float, str]],
+) -> Dict[str, Any]:
+    # prepare job spec to send to HF Jobs API
+    job_spec: Dict[str, Any] = {
+        "command": command,
+        "arguments": [],
+        "environment": env or {},
+        "flavor": flavor or SpaceHardware.CPU_BASIC,
+    }
+    # secrets are optional
+    if secrets:
+        job_spec["secrets"] = secrets
+    # timeout is optional
+    if timeout:
+        time_units_factors = {"s": 1, "m": 60, "h": 3600, "d": 3600 * 24}
+        if isinstance(timeout, str) and timeout[-1] in time_units_factors:
+            job_spec["timeoutSeconds"] = int(float(timeout[:-1]) * time_units_factors[timeout[-1]])
+        else:
+            job_spec["timeoutSeconds"] = int(timeout)
+    # input is either from docker hub or from HF spaces
+    for prefix in (
+        "https://huggingface.co/spaces/",
+        "https://hf.co/spaces/",
+        "huggingface.co/spaces/",
+        "hf.co/spaces/",
+    ):
+        if image.startswith(prefix):
+            job_spec["spaceId"] = image[len(prefix) :]
+            break
+    else:
+        job_spec["dockerImage"] = image
+    return job_spec

huggingface_hub/_local_folder.py CHANGED Viewed

@@ -90,7 +90,7 @@ class LocalDownloadFilePaths:
         resolved_path = str(path.resolve())
         # Some Windows versions do not allow for paths longer than 255 characters.
         # In this case, we must specify it as an extended path by using the "\\?\" prefix.
-        if len(resolved_path) > 255 and not resolved_path.startswith("\\\\?\\"):
+        if os.name == "nt" and len(resolved_path) > 255 and not resolved_path.startswith("\\\\?\\"):
             path = Path("\\\\?\\" + resolved_path)
         return path

huggingface_hub/_oauth.py CHANGED Viewed

@@ -6,7 +6,7 @@ import time
 import urllib.parse
 import warnings
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Union
 from . import constants
 from .hf_api import whoami
@@ -39,10 +39,8 @@ class OAuthOrgInfo:
             Whether the org has a payment method set up. Hugging Face field.
         role_in_org (`Optional[str]`, *optional*):
             The user's role in the org. Hugging Face field.
-        pending_sso (`Optional[bool]`, *optional*):
-            Indicates if the user granted the OAuth app access to the org but didn't complete SSO. Hugging Face field.
-        missing_mfa (`Optional[bool]`, *optional*):
-            Indicates if the user granted the OAuth app access to the org but didn't complete MFA. Hugging Face field.
+        security_restrictions (`Optional[List[Literal["ip", "token-policy", "mfa", "sso"]]]`, *optional*):
+            Array of security restrictions that the user hasn't completed for this org. Possible values: "ip", "token-policy", "mfa", "sso". Hugging Face field.
     """
     sub: str
@@ -52,8 +50,7 @@ class OAuthOrgInfo:
     is_enterprise: bool
     can_pay: Optional[bool] = None
     role_in_org: Optional[str] = None
-    pending_sso: Optional[bool] = None
-    missing_mfa: Optional[bool] = None
+    security_restrictions: Optional[List[Literal["ip", "token-policy", "mfa", "sso"]]] = None
 @dataclass
@@ -221,8 +218,7 @@ def parse_huggingface_oauth(request: "fastapi.Request") -> Optional[OAuthInfo]:
                 is_enterprise=org.get("isEnterprise"),
                 can_pay=org.get("canPay"),
                 role_in_org=org.get("roleInOrg"),
-                pending_sso=org.get("pendingSSO"),
-                missing_mfa=org.get("missingMFA"),
+                security_restrictions=org.get("securityRestrictions"),
             )
             for org in orgs_data
         ]

huggingface_hub/_tensorboard_logger.py CHANGED Viewed

@@ -14,7 +14,7 @@
 """Contains a logger to push training logs to the Hub, using Tensorboard."""
 from pathlib import Path
-from typing import TYPE_CHECKING, List, Optional, Union
+from typing import List, Optional, Union
 from ._commit_scheduler import CommitScheduler
 from .errors import EntryNotFoundError
@@ -26,25 +26,24 @@ from .utils import experimental
 # or from 'torch.utils.tensorboard'. Both are compatible so let's try to load
 # from either of them.
 try:
-    from tensorboardX import SummaryWriter
+    from tensorboardX import SummaryWriter as _RuntimeSummaryWriter
     is_summary_writer_available = True
 except ImportError:
     try:
-        from torch.utils.tensorboard import SummaryWriter
+        from torch.utils.tensorboard import SummaryWriter as _RuntimeSummaryWriter
-        is_summary_writer_available = False
+        is_summary_writer_available = True
     except ImportError:
         # Dummy class to avoid failing at import. Will raise on instance creation.
-        SummaryWriter = object
-        is_summary_writer_available = False
+        class _DummySummaryWriter:
+            pass
-if TYPE_CHECKING:
-    from tensorboardX import SummaryWriter
+        _RuntimeSummaryWriter = _DummySummaryWriter  # type: ignore[assignment]
+        is_summary_writer_available = False
-class HFSummaryWriter(SummaryWriter):
+class HFSummaryWriter(_RuntimeSummaryWriter):
     """
     Wrapper around the tensorboard's `SummaryWriter` to push training logs to the Hub.

huggingface_hub/_upload_large_folder.py CHANGED Viewed

@@ -24,7 +24,7 @@ import traceback
 from datetime import datetime
 from pathlib import Path
 from threading import Lock
-from typing import TYPE_CHECKING, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
 from urllib.parse import quote
 from . import constants
@@ -49,6 +49,108 @@ COMMIT_SIZE_SCALE: List[int] = [20, 50, 75, 100, 125, 200, 250, 400, 600, 1000]
 UPLOAD_BATCH_SIZE_XET = 256  # Max 256 files per upload batch for XET-enabled repos
 UPLOAD_BATCH_SIZE_LFS = 1  # Otherwise, batches of 1 for regular LFS upload
+# Repository limits (from https://huggingface.co/docs/hub/repositories-recommendations)
+MAX_FILES_PER_REPO = 100_000  # Recommended maximum number of files per repository
+MAX_FILES_PER_FOLDER = 10_000  # Recommended maximum number of files per folder
+MAX_FILE_SIZE_GB = 50  # Hard limit for individual file size
+RECOMMENDED_FILE_SIZE_GB = 20  # Recommended maximum for individual file size
+def _validate_upload_limits(paths_list: List[LocalUploadFilePaths]) -> None:
+    """
+    Validate upload against repository limits and warn about potential issues.
+    Args:
+        paths_list: List of file paths to be uploaded
+    Warns about:
+        - Too many files in the repository (>100k)
+        - Too many entries (files or subdirectories) in a single folder (>10k)
+        - Files exceeding size limits (>20GB recommended, >50GB hard limit)
+    """
+    logger.info("Running validation checks on files to upload...")
+    # Check 1: Total file count
+    if len(paths_list) > MAX_FILES_PER_REPO:
+        logger.warning(
+            f"You are about to upload {len(paths_list):,} files. "
+            f"This exceeds the recommended limit of {MAX_FILES_PER_REPO:,} files per repository.\n"
+            f"Consider:\n"
+            f"  - Splitting your data into multiple repositories\n"
+            f"  - Using fewer, larger files (e.g., parquet files)\n"
+            f"  - See: https://huggingface.co/docs/hub/repositories-recommendations"
+        )
+    # Check 2: Files and subdirectories per folder
+    # Track immediate children (files and subdirs) for each folder
+    from collections import defaultdict
+    entries_per_folder: Dict[str, Any] = defaultdict(lambda: {"files": 0, "subdirs": set()})
+    for paths in paths_list:
+        path = Path(paths.path_in_repo)
+        parts = path.parts
+        # Count this file in its immediate parent directory
+        parent = str(path.parent) if str(path.parent) != "." else "."
+        entries_per_folder[parent]["files"] += 1
+        # Track immediate subdirectories for each parent folder
+        # Walk through the path components to track parent-child relationships
+        for i, child in enumerate(parts[:-1]):
+            parent = "." if i == 0 else "/".join(parts[:i])
+            entries_per_folder[parent]["subdirs"].add(child)
+    # Check limits for each folder
+    for folder, data in entries_per_folder.items():
+        file_count = data["files"]
+        subdir_count = len(data["subdirs"])
+        total_entries = file_count + subdir_count
+        if total_entries > MAX_FILES_PER_FOLDER:
+            folder_display = "root" if folder == "." else folder
+            logger.warning(
+                f"Folder '{folder_display}' contains {total_entries:,} entries "
+                f"({file_count:,} files and {subdir_count:,} subdirectories). "
+                f"This exceeds the recommended {MAX_FILES_PER_FOLDER:,} entries per folder.\n"
+                "Consider reorganising into sub-folders."
+            )
+    # Check 3: File sizes
+    large_files = []
+    very_large_files = []
+    for paths in paths_list:
+        size = paths.file_path.stat().st_size
+        size_gb = size / 1_000_000_000  # Use decimal GB as per Hub limits
+        if size_gb > MAX_FILE_SIZE_GB:
+            very_large_files.append((paths.path_in_repo, size_gb))
+        elif size_gb > RECOMMENDED_FILE_SIZE_GB:
+            large_files.append((paths.path_in_repo, size_gb))
+    # Warn about very large files (>50GB)
+    if very_large_files:
+        files_str = "\n  - ".join(f"{path}: {size:.1f}GB" for path, size in very_large_files[:5])
+        more_str = f"\n  ... and {len(very_large_files) - 5} more files" if len(very_large_files) > 5 else ""
+        logger.warning(
+            f"Found {len(very_large_files)} files exceeding the {MAX_FILE_SIZE_GB}GB hard limit:\n"
+            f"  - {files_str}{more_str}\n"
+            f"These files may fail to upload. Consider splitting them into smaller chunks."
+        )
+    # Warn about large files (>20GB)
+    if large_files:
+        files_str = "\n  - ".join(f"{path}: {size:.1f}GB" for path, size in large_files[:5])
+        more_str = f"\n  ... and {len(large_files) - 5} more files" if len(large_files) > 5 else ""
+        logger.warning(
+            f"Found {len(large_files)} files larger than {RECOMMENDED_FILE_SIZE_GB}GB (recommended limit):\n"
+            f"  - {files_str}{more_str}\n"
+            f"Large files may slow down loading and processing."
+        )
+    logger.info("Validation checks complete.")
 def upload_large_folder_internal(
     api: "HfApi",
@@ -118,6 +220,11 @@ def upload_large_folder_internal(
     paths_list = [get_local_upload_paths(folder_path, relpath) for relpath in filtered_paths_list]
     logger.info(f"Found {len(paths_list)} candidate files to upload")
+    # Validate upload against repository limits
+    _validate_upload_limits(paths_list)
+    logger.info("Starting upload...")
     # Read metadata for each file
     items = [
         (paths, read_upload_metadata(folder_path, paths.path_in_repo))

huggingface_hub/cli/auth.py CHANGED Viewed

@@ -62,6 +62,9 @@ class AuthCommands(BaseHuggingfaceCLICommand):
         auth_parser = parser.add_parser("auth", help="Manage authentication (login, logout, etc.).")
         auth_subparsers = auth_parser.add_subparsers(help="Authentication subcommands")
+        # Show help if no subcommand is provided
+        auth_parser.set_defaults(func=lambda args: auth_parser.print_help())
         # Add 'login' as a subcommand of 'auth'
         login_parser = auth_subparsers.add_parser(
             "login", help="Log in using a token from huggingface.co/settings/tokens"
@@ -197,7 +200,7 @@ class AuthWhoami(BaseAuthCommand):
             exit()
         try:
             info = self._api.whoami(token)
-            print(info["name"])
+            print(ANSI.bold("user: "), info["name"])
             orgs = [org["name"] for org in info["orgs"]]
             if orgs:
                 print(ANSI.bold("orgs: "), ",".join(orgs))

huggingface_hub/cli/cache.py CHANGED Viewed

@@ -21,13 +21,7 @@ from functools import wraps
 from tempfile import mkstemp
 from typing import Any, Callable, Iterable, List, Literal, Optional, Union
-from ..utils import (
-    CachedRepoInfo,
-    CachedRevisionInfo,
-    CacheNotFound,
-    HFCacheInfo,
-    scan_cache_dir,
-)
+from ..utils import CachedRepoInfo, CachedRevisionInfo, CacheNotFound, HFCacheInfo, scan_cache_dir
 from . import BaseHuggingfaceCLICommand
 from ._cli_utils import ANSI, tabulate
@@ -52,7 +46,7 @@ def require_inquirer_py(fn: Callable) -> Callable:
         if not _inquirer_py_available:
             raise ImportError(
                 "The 'cache delete' command requires extra dependencies for the TUI.\n"
-                "Please run 'pip install huggingface_hub[cli]' to install them.\n"
+                "Please run 'pip install \"huggingface_hub[cli]\"' to install them.\n"
                 "Otherwise, disable TUI using the '--disable-tui' flag."
             )
         return fn(*args, **kwargs)
@@ -65,6 +59,10 @@ class CacheCommand(BaseHuggingfaceCLICommand):
     def register_subcommand(parser: _SubParsersAction):
         cache_parser = parser.add_parser("cache", help="Manage local cache directory.")
         cache_subparsers = cache_parser.add_subparsers(dest="cache_command", help="Cache subcommands")
+        # Show help if no subcommand is provided
+        cache_parser.set_defaults(func=lambda args: cache_parser.print_help())
         # Scan subcommand
         scan_parser = cache_subparsers.add_parser("scan", help="Scan cache directory.")
         scan_parser.add_argument(
@@ -145,7 +143,7 @@ class CacheCommand(BaseHuggingfaceCLICommand):
             if self.verbosity >= 3:
                 print(ANSI.gray(message))
                 for warning in hf_cache_info.warnings:
-                    print(ANSI.gray(warning))
+                    print(ANSI.gray(str(warning)))
             else:
                 print(ANSI.gray(message + " Use -vvv to print details."))

huggingface_hub/cli/hf.py CHANGED Viewed

@@ -47,10 +47,6 @@ def main():
     # LFS commands (hidden in --help)
     LfsCommands.register_subcommand(commands_parser)
-    # Legacy commands
-    # Experimental
     # Let's go
     args = parser.parse_args()
     if not hasattr(args, "func"):
@@ -59,7 +55,8 @@ def main():
     # Run
     service = args.func(args)
-    service.run()
+    if service is not None:
+        service.run()
 if __name__ == "__main__":

huggingface-hub 0.35.0rc0__py3-none-any.whl → 0.35.1__py3-none-any.whl

Potentially problematic release.

huggingface-hub 0.35.0rc0py3-none-any.whl → 0.35.1py3-none-any.whl