PyPI - huggingface-hub - Versions diffs - 0.36.0rc0__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

huggingface-hub 0.36.0rc0py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of huggingface-hub might be problematic. Click here for more details.

Files changed (132) hide show

huggingface_hub/__init__.py +33 -45
huggingface_hub/_commit_api.py +39 -43
huggingface_hub/_commit_scheduler.py +11 -8
huggingface_hub/_inference_endpoints.py +8 -8
huggingface_hub/_jobs_api.py +20 -20
huggingface_hub/_login.py +17 -43
huggingface_hub/_oauth.py +8 -8
huggingface_hub/_snapshot_download.py +135 -50
huggingface_hub/_space_api.py +4 -4
huggingface_hub/_tensorboard_logger.py +5 -5
huggingface_hub/_upload_large_folder.py +18 -32
huggingface_hub/_webhooks_payload.py +3 -3
huggingface_hub/_webhooks_server.py +2 -2
huggingface_hub/cli/__init__.py +0 -14
huggingface_hub/cli/_cli_utils.py +143 -39
huggingface_hub/cli/auth.py +105 -171
huggingface_hub/cli/cache.py +594 -361
huggingface_hub/cli/download.py +120 -112
huggingface_hub/cli/hf.py +38 -41
huggingface_hub/cli/jobs.py +689 -1017
huggingface_hub/cli/lfs.py +120 -143
huggingface_hub/cli/repo.py +282 -216
huggingface_hub/cli/repo_files.py +50 -84
huggingface_hub/cli/system.py +6 -25
huggingface_hub/cli/upload.py +198 -220
huggingface_hub/cli/upload_large_folder.py +91 -106
huggingface_hub/community.py +5 -5
huggingface_hub/constants.py +17 -52
huggingface_hub/dataclasses.py +135 -21
huggingface_hub/errors.py +47 -30
huggingface_hub/fastai_utils.py +8 -9
huggingface_hub/file_download.py +351 -303
huggingface_hub/hf_api.py +398 -570
huggingface_hub/hf_file_system.py +101 -66
huggingface_hub/hub_mixin.py +32 -54
huggingface_hub/inference/_client.py +177 -162
huggingface_hub/inference/_common.py +38 -54
huggingface_hub/inference/_generated/_async_client.py +218 -258
huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
huggingface_hub/inference/_generated/types/base.py +10 -7
huggingface_hub/inference/_generated/types/chat_completion.py +16 -16
huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
huggingface_hub/inference/_generated/types/summarization.py +2 -2
huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
huggingface_hub/inference/_generated/types/text_generation.py +10 -10
huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
huggingface_hub/inference/_generated/types/token_classification.py +2 -2
huggingface_hub/inference/_generated/types/translation.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
huggingface_hub/inference/_mcp/agent.py +3 -3
huggingface_hub/inference/_mcp/constants.py +1 -2
huggingface_hub/inference/_mcp/mcp_client.py +33 -22
huggingface_hub/inference/_mcp/types.py +10 -10
huggingface_hub/inference/_mcp/utils.py +4 -4
huggingface_hub/inference/_providers/__init__.py +12 -4
huggingface_hub/inference/_providers/_common.py +62 -24
huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
huggingface_hub/inference/_providers/cohere.py +3 -3
huggingface_hub/inference/_providers/fal_ai.py +25 -25
huggingface_hub/inference/_providers/featherless_ai.py +4 -4
huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
huggingface_hub/inference/_providers/hf_inference.py +13 -13
huggingface_hub/inference/_providers/hyperbolic.py +4 -4
huggingface_hub/inference/_providers/nebius.py +10 -10
huggingface_hub/inference/_providers/novita.py +5 -5
huggingface_hub/inference/_providers/nscale.py +4 -4
huggingface_hub/inference/_providers/replicate.py +15 -15
huggingface_hub/inference/_providers/sambanova.py +6 -6
huggingface_hub/inference/_providers/together.py +7 -7
huggingface_hub/lfs.py +21 -94
huggingface_hub/repocard.py +15 -16
huggingface_hub/repocard_data.py +57 -57
huggingface_hub/serialization/__init__.py +0 -1
huggingface_hub/serialization/_base.py +9 -9
huggingface_hub/serialization/_dduf.py +7 -7
huggingface_hub/serialization/_torch.py +28 -28
huggingface_hub/utils/__init__.py +11 -6
huggingface_hub/utils/_auth.py +5 -5
huggingface_hub/utils/_cache_manager.py +49 -74
huggingface_hub/utils/_deprecation.py +1 -1
huggingface_hub/utils/_dotenv.py +3 -3
huggingface_hub/utils/_fixes.py +0 -10
huggingface_hub/utils/_git_credential.py +3 -3
huggingface_hub/utils/_headers.py +7 -29
huggingface_hub/utils/_http.py +371 -208
huggingface_hub/utils/_pagination.py +4 -4
huggingface_hub/utils/_parsing.py +98 -0
huggingface_hub/utils/_paths.py +5 -5
huggingface_hub/utils/_runtime.py +59 -23
huggingface_hub/utils/_safetensors.py +21 -21
huggingface_hub/utils/_subprocess.py +9 -9
huggingface_hub/utils/_telemetry.py +3 -3
huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -9
huggingface_hub/utils/_typing.py +3 -3
huggingface_hub/utils/_validators.py +53 -72
huggingface_hub/utils/_xet.py +16 -16
huggingface_hub/utils/_xet_progress_reporting.py +1 -1
huggingface_hub/utils/insecure_hashlib.py +3 -9
huggingface_hub/utils/tqdm.py +3 -3
{huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/METADATA +16 -35
huggingface_hub-1.0.0.dist-info/RECORD +152 -0
{huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/entry_points.txt +0 -1
huggingface_hub/commands/__init__.py +0 -27
huggingface_hub/commands/delete_cache.py +0 -476
huggingface_hub/commands/download.py +0 -204
huggingface_hub/commands/env.py +0 -39
huggingface_hub/commands/huggingface_cli.py +0 -65
huggingface_hub/commands/lfs.py +0 -200
huggingface_hub/commands/repo.py +0 -151
huggingface_hub/commands/repo_files.py +0 -132
huggingface_hub/commands/scan_cache.py +0 -183
huggingface_hub/commands/tag.py +0 -161
huggingface_hub/commands/upload.py +0 -318
huggingface_hub/commands/upload_large_folder.py +0 -131
huggingface_hub/commands/user.py +0 -208
huggingface_hub/commands/version.py +0 -40
huggingface_hub/inference_api.py +0 -217
huggingface_hub/keras_mixin.py +0 -497
huggingface_hub/repository.py +0 -1471
huggingface_hub/serialization/_tensorflow.py +0 -92
huggingface_hub/utils/_hf_folder.py +0 -68
huggingface_hub-0.36.0rc0.dist-info/RECORD +0 -170
{huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/LICENSE +0 -0
{huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/WHEEL +0 -0
{huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/top_level.txt +0 -0

huggingface_hub/cli/upload_large_folder.py CHANGED Viewed

@@ -15,118 +15,103 @@
 """Contains command to upload a large folder with the CLI."""
 import os
-from argparse import Namespace, _SubParsersAction
-from typing import List, Optional
+from typing import Annotated, Optional
+import typer
 from huggingface_hub import logging
-from huggingface_hub.commands import BaseHuggingfaceCLICommand
-from huggingface_hub.hf_api import HfApi
-from huggingface_hub.utils import disable_progress_bars
+from huggingface_hub.utils import ANSI, disable_progress_bars
-from ._cli_utils import ANSI
+from ._cli_utils import PrivateOpt, RepoIdArg, RepoType, RepoTypeOpt, RevisionOpt, TokenOpt, get_hf_api
 logger = logging.get_logger(__name__)
-class UploadLargeFolderCommand(BaseHuggingfaceCLICommand):
-    @staticmethod
-    def register_subcommand(parser: _SubParsersAction):
-        subparser = parser.add_parser(
-            "upload-large-folder",
-            help="Upload a large folder to the Hub. Recommended for resumable uploads.",
-        )
-        subparser.add_argument(
-            "repo_id", type=str, help="The ID of the repo to upload to (e.g. `username/repo-name`)."
-        )
-        subparser.add_argument("local_path", type=str, help="Local path to the file or folder to upload.")
-        subparser.add_argument(
-            "--repo-type",
-            choices=["model", "dataset", "space"],
-            help="Type of the repo to upload to (e.g. `dataset`).",
-        )
-        subparser.add_argument(
-            "--revision",
-            type=str,
-            help=("An optional Git revision to push to. It can be a branch name or a PR reference."),
-        )
-        subparser.add_argument(
-            "--private",
-            action="store_true",
-            help=(
-                "Whether to create a private repo if repo doesn't exist on the Hub. Ignored if the repo already exists."
-            ),
-        )
-        subparser.add_argument("--include", nargs="*", type=str, help="Glob patterns to match files to upload.")
-        subparser.add_argument("--exclude", nargs="*", type=str, help="Glob patterns to exclude from files to upload.")
-        subparser.add_argument(
-            "--token", type=str, help="A User Access Token generated from https://huggingface.co/settings/tokens"
-        )
-        subparser.add_argument(
-            "--num-workers", type=int, help="Number of workers to use to hash, upload and commit files."
-        )
-        subparser.add_argument("--no-report", action="store_true", help="Whether to disable regular status report.")
-        subparser.add_argument("--no-bars", action="store_true", help="Whether to disable progress bars.")
-        subparser.set_defaults(func=UploadLargeFolderCommand)
-    def __init__(self, args: Namespace) -> None:
-        self.repo_id: str = args.repo_id
-        self.local_path: str = args.local_path
-        self.repo_type: str = args.repo_type
-        self.revision: Optional[str] = args.revision
-        self.private: bool = args.private
-        self.include: Optional[List[str]] = args.include
-        self.exclude: Optional[List[str]] = args.exclude
-        self.api: HfApi = HfApi(token=args.token, library_name="huggingface-cli")
-        self.num_workers: Optional[int] = args.num_workers
-        self.no_report: bool = args.no_report
-        self.no_bars: bool = args.no_bars
-        if not os.path.isdir(self.local_path):
-            raise ValueError("Large upload is only supported for folders.")
-    def run(self) -> None:
-        logging.set_verbosity_info()
-        print(
-            ANSI.yellow(
-                "You are about to upload a large folder to the Hub using `hf upload-large-folder`. "
-                "This is a new feature so feedback is very welcome!\n"
-                "\n"
-                "A few things to keep in mind:\n"
-                "  - Repository limits still apply: https://huggingface.co/docs/hub/repositories-recommendations\n"
-                "  - Do not start several processes in parallel.\n"
-                "  - You can interrupt and resume the process at any time. "
-                "The script will pick up where it left off except for partially uploaded files that would have to be entirely reuploaded.\n"
-                "  - Do not upload the same folder to several repositories. If you need to do so, you must delete the `./.cache/huggingface/` folder first.\n"
-                "\n"
-                f"Some temporary metadata will be stored under `{self.local_path}/.cache/huggingface`.\n"
-                "  - You must not modify those files manually.\n"
-                "  - You must not delete the `./.cache/huggingface/` folder while a process is running.\n"
-                "  - You can delete the `./.cache/huggingface/` folder to reinitialize the upload state when process is not running. Files will have to be hashed and preuploaded again, except for already committed files.\n"
-                "\n"
-                "If the process output is too verbose, you can disable the progress bars with `--no-bars`. "
-                "You can also entirely disable the status report with `--no-report`.\n"
-                "\n"
-                "For more details, run `hf upload-large-folder --help` or check the documentation at "
-                "https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-large-folder."
-            )
-        )
-        if self.no_bars:
-            disable_progress_bars()
-        self.api.upload_large_folder(
-            repo_id=self.repo_id,
-            folder_path=self.local_path,
-            repo_type=self.repo_type,
-            revision=self.revision,
-            private=self.private,
-            allow_patterns=self.include,
-            ignore_patterns=self.exclude,
-            num_workers=self.num_workers,
-            print_report=not self.no_report,
+def upload_large_folder(
+    repo_id: RepoIdArg,
+    local_path: Annotated[
+        str,
+        typer.Argument(
+            help="Local path to the folder to upload.",
+        ),
+    ],
+    repo_type: RepoTypeOpt = RepoType.model,
+    revision: RevisionOpt = None,
+    private: PrivateOpt = False,
+    include: Annotated[
+        Optional[list[str]],
+        typer.Option(
+            help="Glob patterns to match files to upload.",
+        ),
+    ] = None,
+    exclude: Annotated[
+        Optional[list[str]],
+        typer.Option(
+            help="Glob patterns to exclude from files to upload.",
+        ),
+    ] = None,
+    token: TokenOpt = None,
+    num_workers: Annotated[
+        Optional[int],
+        typer.Option(
+            help="Number of workers to use to hash, upload and commit files.",
+        ),
+    ] = None,
+    no_report: Annotated[
+        bool,
+        typer.Option(
+            help="Whether to disable regular status report.",
+        ),
+    ] = False,
+    no_bars: Annotated[
+        bool,
+        typer.Option(
+            help="Whether to disable progress bars.",
+        ),
+    ] = False,
+) -> None:
+    """Upload a large folder to the Hub. Recommended for resumable uploads."""
+    if not os.path.isdir(local_path):
+        raise typer.BadParameter("Large upload is only supported for folders.", param_hint="local_path")
+    print(
+        ANSI.yellow(
+            "You are about to upload a large folder to the Hub using `hf upload-large-folder`. "
+            "This is a new feature so feedback is very welcome!\n"
+            "\n"
+            "A few things to keep in mind:\n"
+            "  - Repository limits still apply: https://huggingface.co/docs/hub/repositories-recommendations\n"
+            "  - Do not start several processes in parallel.\n"
+            "  - You can interrupt and resume the process at any time. "
+            "The script will pick up where it left off except for partially uploaded files that would have to be entirely reuploaded.\n"
+            "  - Do not upload the same folder to several repositories. If you need to do so, you must delete the `./.cache/huggingface/` folder first.\n"
+            "\n"
+            f"Some temporary metadata will be stored under `{local_path}/.cache/huggingface`.\n"
+            "  - You must not modify those files manually.\n"
+            "  - You must not delete the `./.cache/huggingface/` folder while a process is running.\n"
+            "  - You can delete the `./.cache/huggingface/` folder to reinitialize the upload state when process is not running. Files will have to be hashed and preuploaded again, except for already committed files.\n"
+            "\n"
+            "If the process output is too verbose, you can disable the progress bars with `--no-bars`. "
+            "You can also entirely disable the status report with `--no-report`.\n"
+            "\n"
+            "For more details, run `hf upload-large-folder --help` or check the documentation at "
+            "https://huggingface.co/docs/huggingface_hub/guides/upload#upload-a-large-folder."
         )
+    )
+    if no_bars:
+        disable_progress_bars()
+    api = get_hf_api(token=token)
+    api.upload_large_folder(
+        repo_id=repo_id,
+        folder_path=local_path,
+        repo_type=repo_type.value,
+        revision=revision,
+        private=private,
+        allow_patterns=include,
+        ignore_patterns=exclude,
+        num_workers=num_workers,
+        print_report=not no_report,
+    )

huggingface_hub/community.py CHANGED Viewed

@@ -7,7 +7,7 @@ for more information on Pull Requests, Discussions, and the community tab.
 from dataclasses import dataclass
 from datetime import datetime
-from typing import List, Literal, Optional, TypedDict, Union
+from typing import Literal, Optional, TypedDict, Union
 from . import constants
 from .utils import parse_datetime
@@ -116,7 +116,7 @@ class DiscussionWithDetails(Discussion):
             The `datetime` of creation of the Discussion / Pull Request.
         events (`list` of [`DiscussionEvent`])
             The list of [`DiscussionEvents`] in this Discussion or Pull Request.
-        conflicting_files (`Union[List[str], bool, None]`, *optional*):
+        conflicting_files (`Union[list[str], bool, None]`, *optional*):
             A list of conflicting files if this is a Pull Request.
             `None` if `self.is_pull_request` is `False`.
             `True` if there are conflicting files but the list can't be retrieved.
@@ -136,8 +136,8 @@ class DiscussionWithDetails(Discussion):
             (property) URL of the discussion on the Hub.
     """
-    events: List["DiscussionEvent"]
-    conflicting_files: Union[List[str], bool, None]
+    events: list["DiscussionEvent"]
+    conflicting_files: Union[list[str], bool, None]
     target_branch: Optional[str]
     merge_commit_oid: Optional[str]
     diff: Optional[str]
@@ -230,7 +230,7 @@ class DiscussionComment(DiscussionEvent):
         return self._event["data"]["latest"].get("author", {}).get("name", "deleted")
     @property
-    def edit_history(self) -> List[dict]:
+    def edit_history(self) -> list[dict]:
         """The edit history of the comment"""
         return self._event["data"]["history"]

huggingface_hub/constants.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 import re
 import typing
-from typing import Literal, Optional, Tuple
+from typing import Literal, Optional
 # Possible values for env variables
@@ -35,7 +35,6 @@ DEFAULT_ETAG_TIMEOUT = 10
 DEFAULT_DOWNLOAD_TIMEOUT = 10
 DEFAULT_REQUEST_TIMEOUT = 10
 DOWNLOAD_CHUNK_SIZE = 10 * 1024 * 1024
-HF_TRANSFER_CONCURRENCY = 100
 MAX_HTTP_DOWNLOAD_SIZE = 50 * 1000 * 1000 * 1000  # 50 GB
 # Constants for serialization
@@ -118,9 +117,9 @@ REPO_TYPES_MAPPING = {
 }
 DiscussionTypeFilter = Literal["all", "discussion", "pull_request"]
-DISCUSSION_TYPES: Tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionTypeFilter)
+DISCUSSION_TYPES: tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionTypeFilter)
 DiscussionStatusFilter = Literal["all", "open", "closed"]
-DISCUSSION_STATUS: Tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionStatusFilter)
+DISCUSSION_STATUS: tuple[DiscussionTypeFilter, ...] = typing.get_args(DiscussionStatusFilter)
 # Webhook subscription types
 WEBHOOK_DOMAIN_T = Literal["repo", "discussions"]
@@ -135,7 +134,6 @@ HF_HOME = os.path.expandvars(
         )
     )
 )
-hf_cache_home = HF_HOME  # for backward compatibility. TODO: remove this in 1.0.0
 default_cache_path = os.path.join(HF_HOME, "hub")
 default_assets_cache_path = os.path.join(HF_HOME, "assets")
@@ -164,6 +162,10 @@ HF_ASSETS_CACHE = os.path.expandvars(
 HF_HUB_OFFLINE = _is_true(os.environ.get("HF_HUB_OFFLINE") or os.environ.get("TRANSFORMERS_OFFLINE"))
+# File created to mark that the version check has been done.
+# Check is performed once per 24 hours at most.
+CHECK_FOR_UPDATE_DONE_PATH = os.path.join(HF_HOME, ".check_for_update_done")
 # If set, log level will be set to DEBUG and all requests made to the Hub will be logged
 # as curl commands for reproducibility.
 HF_DEBUG = _is_true(os.environ.get("HF_DEBUG"))
@@ -212,18 +214,18 @@ HF_HUB_DISABLE_EXPERIMENTAL_WARNING: bool = _is_true(os.environ.get("HF_HUB_DISA
 # Disable sending the cached token by default is all HTTP requests to the Hub
 HF_HUB_DISABLE_IMPLICIT_TOKEN: bool = _is_true(os.environ.get("HF_HUB_DISABLE_IMPLICIT_TOKEN"))
-# Enable fast-download using external dependency "hf_transfer"
-# See:
-# - https://pypi.org/project/hf-transfer/
-# - https://github.com/huggingface/hf_transfer (private)
-HF_HUB_ENABLE_HF_TRANSFER: bool = _is_true(os.environ.get("HF_HUB_ENABLE_HF_TRANSFER"))
+HF_XET_HIGH_PERFORMANCE: bool = _is_true(os.environ.get("HF_XET_HIGH_PERFORMANCE"))
+# hf_transfer is not used anymore. Let's warn user is case they set the env variable
+if _is_true(os.environ.get("HF_HUB_ENABLE_HF_TRANSFER")) and not HF_XET_HIGH_PERFORMANCE:
+    import warnings
-# UNUSED
-# We don't use symlinks in local dir anymore.
-HF_HUB_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD: int = (
-    _as_int(os.environ.get("HF_HUB_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD")) or 5 * 1024 * 1024
-)
+    warnings.warn(
+        "The `HF_HUB_ENABLE_HF_TRANSFER` environment variable is deprecated as 'hf_transfer' is not used anymore. "
+        "Please use `HF_XET_HIGH_PERFORMANCE` instead to enable high performance transfer with Xet. "
+        "Visit https://huggingface.co/docs/huggingface_hub/package_reference/environment_variables#hfxethighperformance for more details.",
+        DeprecationWarning,
+    )
 # Used to override the etag timeout on a system level
 HF_HUB_ETAG_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_ETAG_TIMEOUT")) or DEFAULT_ETAG_TIMEOUT
@@ -234,43 +236,6 @@ HF_HUB_DOWNLOAD_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_DOWNLOAD_TIMEOUT")
 # Allows to add information about the requester in the user-agent (eg. partner name)
 HF_HUB_USER_AGENT_ORIGIN: Optional[str] = os.environ.get("HF_HUB_USER_AGENT_ORIGIN")
-# List frameworks that are handled by the InferenceAPI service. Useful to scan endpoints and check which models are
-# deployed and running. Since 95% of the models are using the top 4 frameworks listed below, we scan only those by
-# default. We still keep the full list of supported frameworks in case we want to scan all of them.
-MAIN_INFERENCE_API_FRAMEWORKS = [
-    "diffusers",
-    "sentence-transformers",
-    "text-generation-inference",
-    "transformers",
-]
-ALL_INFERENCE_API_FRAMEWORKS = MAIN_INFERENCE_API_FRAMEWORKS + [
-    "adapter-transformers",
-    "allennlp",
-    "asteroid",
-    "bertopic",
-    "doctr",
-    "espnet",
-    "fairseq",
-    "fastai",
-    "fasttext",
-    "flair",
-    "k2",
-    "keras",
-    "mindspore",
-    "nemo",
-    "open_clip",
-    "paddlenlp",
-    "peft",
-    "pyannote-audio",
-    "sklearn",
-    "spacy",
-    "span-marker",
-    "speechbrain",
-    "stanza",
-    "timm",
-]
 # If OAuth didn't work after 2 redirects, there's likely a third-party cookie issue in the Space iframe view.
 # In this case, we redirect the user to the non-iframe view.
 OAUTH_MAX_REDIRECTS = 2

huggingface_hub/dataclasses.py CHANGED Viewed

@@ -1,15 +1,13 @@
 import inspect
-from dataclasses import _MISSING_TYPE, MISSING, Field, field, fields
-from functools import wraps
+from dataclasses import _MISSING_TYPE, MISSING, Field, field, fields, make_dataclass
+from functools import lru_cache, wraps
 from typing import (
+    Annotated,
     Any,
     Callable,
-    Dict,
     ForwardRef,
-    List,
     Literal,
     Optional,
-    Tuple,
     Type,
     TypeVar,
     Union,
@@ -18,6 +16,19 @@ from typing import (
     overload,
 )
+try:
+    # Python 3.11+
+    from typing import NotRequired, Required  # type: ignore
+except ImportError:
+    try:
+        # In case typing_extensions is installed
+        from typing_extensions import NotRequired, Required  # type: ignore
+    except ImportError:
+        # Fallback: create dummy types that will never match
+        Required = type("Required", (), {})  # type: ignore
+        NotRequired = type("NotRequired", (), {})  # type: ignore
 from .errors import (
     StrictDataclassClassValidationError,
     StrictDataclassDefinitionError,
@@ -27,6 +38,9 @@ from .errors import (
 Validator_T = Callable[[Any], None]
 T = TypeVar("T")
+TypedDictType = TypeVar("TypedDictType", bound=dict[str, Any])
+_TYPED_DICT_DEFAULT_VALUE = object()  # used as default value in TypedDict fields (to distinguish from None)
 # The overload decorator helps type checkers understand the different return types
@@ -103,7 +117,7 @@ def strict(
             )
         # List and store validators
-        field_validators: Dict[str, List[Validator_T]] = {}
+        field_validators: dict[str, list[Validator_T]] = {}
         for f in fields(cls):  # type: ignore [arg-type]
             validators = []
             validators.append(_create_type_validator(f))
@@ -238,15 +252,101 @@ def strict(
     return wrap(cls) if cls is not None else wrap
+def validate_typed_dict(schema: type[TypedDictType], data: dict) -> None:
+    """
+    Validate that a dictionary conforms to the types defined in a TypedDict class.
+    Under the hood, the typed dict is converted to a strict dataclass and validated using the `@strict` decorator.
+    Args:
+        schema (`type[TypedDictType]`):
+            The TypedDict class defining the expected structure and types.
+        data (`dict`):
+            The dictionary to validate.
+    Raises:
+        `StrictDataclassFieldValidationError`:
+            If any field in the dictionary does not conform to the expected type.
+    Example:
+    ```py
+    >>> from typing import Annotated, TypedDict
+    >>> from huggingface_hub.dataclasses import validate_typed_dict
+    >>> def positive_int(value: int):
+    ...     if not value >= 0:
+    ...         raise ValueError(f"Value must be positive, got {value}")
+    >>> class User(TypedDict):
+    ...     name: str
+    ...     age: Annotated[int, positive_int]
+    >>> # Valid data
+    >>> validate_typed_dict(User, {"name": "John", "age": 30})
+    >>> # Invalid type for age
+    >>> validate_typed_dict(User, {"name": "John", "age": "30"})
+    huggingface_hub.errors.StrictDataclassFieldValidationError: Validation error for field 'age':
+        TypeError: Field 'age' expected int, got str (value: '30')
+    >>> # Invalid value for age
+    >>> validate_typed_dict(User, {"name": "John", "age": -1})
+    huggingface_hub.errors.StrictDataclassFieldValidationError: Validation error for field 'age':
+        ValueError: Value must be positive, got -1
+    ```
+    """
+    # Convert typed dict to dataclass
+    strict_cls = _build_strict_cls_from_typed_dict(schema)
+    # Validate the data by instantiating the strict dataclass
+    strict_cls(**data)  # will raise if validation fails
+@lru_cache
+def _build_strict_cls_from_typed_dict(schema: type[TypedDictType]) -> Type:
+    # Extract type hints from the TypedDict class
+    type_hints = {
+        # We do not use `get_type_hints` here to avoid evaluating ForwardRefs (which might fail).
+        # ForwardRefs are not validated by @strict anyway.
+        name: value if value is not None else type(None)
+        for name, value in schema.__dict__.get("__annotations__", {}).items()
+    }
+    # If the TypedDict is not total, wrap fields as NotRequired (unless explicitly Required or NotRequired)
+    if not getattr(schema, "__total__", True):
+        for key, value in type_hints.items():
+            origin = get_origin(value)
+            if origin is Annotated:
+                base, *meta = get_args(value)
+                if not _is_required_or_notrequired(base):
+                    base = NotRequired[base]
+                type_hints[key] = Annotated[tuple([base] + list(meta))]
+            elif not _is_required_or_notrequired(value):
+                type_hints[key] = NotRequired[value]
+    # Convert type hints to dataclass fields
+    fields = []
+    for key, value in type_hints.items():
+        if get_origin(value) is Annotated:
+            base, *meta = get_args(value)
+            fields.append((key, base, field(default=_TYPED_DICT_DEFAULT_VALUE, metadata={"validator": meta[0]})))
+        else:
+            fields.append((key, value, field(default=_TYPED_DICT_DEFAULT_VALUE)))
+    # Create a strict dataclass from the TypedDict fields
+    return strict(make_dataclass(schema.__name__, fields))
 def validated_field(
-    validator: Union[List[Validator_T], Validator_T],
+    validator: Union[list[Validator_T], Validator_T],
     default: Union[Any, _MISSING_TYPE] = MISSING,
     default_factory: Union[Callable[[], Any], _MISSING_TYPE] = MISSING,
     init: bool = True,
     repr: bool = True,
     hash: Optional[bool] = None,
     compare: bool = True,
-    metadata: Optional[Dict] = None,
+    metadata: Optional[dict] = None,
     **kwargs: Any,
 ) -> Any:
     """
@@ -255,7 +355,7 @@ def validated_field(
     Useful to apply several checks to a field. If only applying one rule, check out the [`as_validated_field`] decorator.
     Args:
-        validator (`Callable` or `List[Callable]`):
+        validator (`Callable` or `list[Callable]`):
             A method that takes a value as input and raises ValueError/TypeError if the value is invalid.
             Can be a list of validators to apply multiple checks.
         **kwargs:
@@ -297,7 +397,7 @@ def as_validated_field(validator: Validator_T):
         repr: bool = True,
         hash: Optional[bool] = None,
         compare: bool = True,
-        metadata: Optional[Dict] = None,
+        metadata: Optional[dict] = None,
         **kwargs: Any,
     ):
         return validated_field(
@@ -328,11 +428,19 @@ def type_validator(name: str, value: Any, expected_type: Any) -> None:
         _validate_simple_type(name, value, expected_type)
     elif isinstance(expected_type, ForwardRef) or isinstance(expected_type, str):
         return
+    elif origin is Required:
+        if value is _TYPED_DICT_DEFAULT_VALUE:
+            raise TypeError(f"Field '{name}' is required but missing.")
+        type_validator(name, value, args[0])
+    elif origin is NotRequired:
+        if value is _TYPED_DICT_DEFAULT_VALUE:
+            return
+        type_validator(name, value, args[0])
     else:
         raise TypeError(f"Unsupported type for field '{name}': {expected_type}")
-def _validate_union(name: str, value: Any, args: Tuple[Any, ...]) -> None:
+def _validate_union(name: str, value: Any, args: tuple[Any, ...]) -> None:
     """Validate that value matches one of the types in a Union."""
     errors = []
     for t in args:
@@ -347,14 +455,14 @@ def _validate_union(name: str, value: Any, args: Tuple[Any, ...]) -> None:
     )
-def _validate_literal(name: str, value: Any, args: Tuple[Any, ...]) -> None:
+def _validate_literal(name: str, value: Any, args: tuple[Any, ...]) -> None:
     """Validate Literal type."""
     if value not in args:
         raise TypeError(f"Field '{name}' expected one of {args}, got {value}")
-def _validate_list(name: str, value: Any, args: Tuple[Any, ...]) -> None:
-    """Validate List[T] type."""
+def _validate_list(name: str, value: Any, args: tuple[Any, ...]) -> None:
+    """Validate list[T] type."""
     if not isinstance(value, list):
         raise TypeError(f"Field '{name}' expected a list, got {type(value).__name__}")
@@ -367,8 +475,8 @@ def _validate_list(name: str, value: Any, args: Tuple[Any, ...]) -> None:
             raise TypeError(f"Invalid item at index {i} in list '{name}'") from e
-def _validate_dict(name: str, value: Any, args: Tuple[Any, ...]) -> None:
-    """Validate Dict[K, V] type."""
+def _validate_dict(name: str, value: Any, args: tuple[Any, ...]) -> None:
+    """Validate dict[K, V] type."""
     if not isinstance(value, dict):
         raise TypeError(f"Field '{name}' expected a dict, got {type(value).__name__}")
@@ -382,19 +490,19 @@ def _validate_dict(name: str, value: Any, args: Tuple[Any, ...]) -> None:
             raise TypeError(f"Invalid key or value in dict '{name}'") from e
-def _validate_tuple(name: str, value: Any, args: Tuple[Any, ...]) -> None:
+def _validate_tuple(name: str, value: Any, args: tuple[Any, ...]) -> None:
     """Validate Tuple type."""
     if not isinstance(value, tuple):
         raise TypeError(f"Field '{name}' expected a tuple, got {type(value).__name__}")
-    # Handle variable-length tuples: Tuple[T, ...]
+    # Handle variable-length tuples: tuple[T, ...]
     if len(args) == 2 and args[1] is Ellipsis:
         for i, item in enumerate(value):
             try:
                 type_validator(f"{name}[{i}]", item, args[0])
             except TypeError as e:
                 raise TypeError(f"Invalid item at index {i} in tuple '{name}'") from e
-    # Handle fixed-length tuples: Tuple[T1, T2, ...]
+    # Handle fixed-length tuples: tuple[T1, T2, ...]
     elif len(args) != len(value):
         raise TypeError(f"Field '{name}' expected a tuple of length {len(args)}, got {len(value)}")
     else:
@@ -405,8 +513,8 @@ def _validate_tuple(name: str, value: Any, args: Tuple[Any, ...]) -> None:
                 raise TypeError(f"Invalid item at index {i} in tuple '{name}'") from e
-def _validate_set(name: str, value: Any, args: Tuple[Any, ...]) -> None:
-    """Validate Set[T] type."""
+def _validate_set(name: str, value: Any, args: tuple[Any, ...]) -> None:
+    """Validate set[T] type."""
     if not isinstance(value, set):
         raise TypeError(f"Field '{name}' expected a set, got {type(value).__name__}")
@@ -464,6 +572,11 @@ def _is_validator(validator: Any) -> bool:
     return True
+def _is_required_or_notrequired(type_hint: Any) -> bool:
+    """Helper to check if a type is Required/NotRequired."""
+    return type_hint in (Required, NotRequired) or (get_origin(type_hint) in (Required, NotRequired))
 _BASIC_TYPE_VALIDATORS = {
     Union: _validate_union,
     Literal: _validate_literal,
@@ -476,6 +589,7 @@ _BASIC_TYPE_VALIDATORS = {
 __all__ = [
     "strict",
+    "validate_typed_dict",
     "validated_field",
     "Validator_T",
     "StrictDataclassClassValidationError",

huggingface-hub 0.36.0rc0__py3-none-any.whl → 1.0.0__py3-none-any.whl

Potentially problematic release.

huggingface-hub 0.36.0rc0py3-none-any.whl → 1.0.0py3-none-any.whl