PyPI - huggingface-hub - Versions diffs - 0.21.4__py3-none-any.whl → 0.22.0rc0__py3-none-any.whl - Mend

huggingface-hub 0.21.4py3-none-any.whl → 0.22.0rc0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of huggingface-hub might be problematic. Click here for more details.

Files changed (96) hide show

huggingface_hub/__init__.py +217 -1
huggingface_hub/_commit_api.py +14 -15
huggingface_hub/_inference_endpoints.py +12 -11
huggingface_hub/_login.py +1 -0
huggingface_hub/_multi_commits.py +1 -0
huggingface_hub/_snapshot_download.py +9 -1
huggingface_hub/_tensorboard_logger.py +1 -0
huggingface_hub/_webhooks_payload.py +1 -0
huggingface_hub/_webhooks_server.py +1 -0
huggingface_hub/commands/_cli_utils.py +1 -0
huggingface_hub/commands/delete_cache.py +1 -0
huggingface_hub/commands/download.py +1 -0
huggingface_hub/commands/env.py +1 -0
huggingface_hub/commands/scan_cache.py +1 -0
huggingface_hub/commands/upload.py +1 -0
huggingface_hub/community.py +1 -0
huggingface_hub/constants.py +3 -1
huggingface_hub/errors.py +38 -0
huggingface_hub/file_download.py +24 -24
huggingface_hub/hf_api.py +47 -35
huggingface_hub/hub_mixin.py +210 -54
huggingface_hub/inference/_client.py +554 -239
huggingface_hub/inference/_common.py +195 -41
huggingface_hub/inference/_generated/_async_client.py +558 -239
huggingface_hub/inference/_generated/types/__init__.py +115 -0
huggingface_hub/inference/_generated/types/audio_classification.py +43 -0
huggingface_hub/inference/_generated/types/audio_to_audio.py +31 -0
huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +116 -0
huggingface_hub/inference/_generated/types/base.py +149 -0
huggingface_hub/inference/_generated/types/chat_completion.py +106 -0
huggingface_hub/inference/_generated/types/depth_estimation.py +29 -0
huggingface_hub/inference/_generated/types/document_question_answering.py +85 -0
huggingface_hub/inference/_generated/types/feature_extraction.py +19 -0
huggingface_hub/inference/_generated/types/fill_mask.py +50 -0
huggingface_hub/inference/_generated/types/image_classification.py +43 -0
huggingface_hub/inference/_generated/types/image_segmentation.py +52 -0
huggingface_hub/inference/_generated/types/image_to_image.py +55 -0
huggingface_hub/inference/_generated/types/image_to_text.py +105 -0
huggingface_hub/inference/_generated/types/object_detection.py +55 -0
huggingface_hub/inference/_generated/types/question_answering.py +77 -0
huggingface_hub/inference/_generated/types/sentence_similarity.py +28 -0
huggingface_hub/inference/_generated/types/summarization.py +46 -0
huggingface_hub/inference/_generated/types/table_question_answering.py +45 -0
huggingface_hub/inference/_generated/types/text2text_generation.py +45 -0
huggingface_hub/inference/_generated/types/text_classification.py +43 -0
huggingface_hub/inference/_generated/types/text_generation.py +161 -0
huggingface_hub/inference/_generated/types/text_to_audio.py +105 -0
huggingface_hub/inference/_generated/types/text_to_image.py +57 -0
huggingface_hub/inference/_generated/types/token_classification.py +53 -0
huggingface_hub/inference/_generated/types/translation.py +46 -0
huggingface_hub/inference/_generated/types/video_classification.py +47 -0
huggingface_hub/inference/_generated/types/visual_question_answering.py +53 -0
huggingface_hub/inference/_generated/types/zero_shot_classification.py +56 -0
huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +51 -0
huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +55 -0
huggingface_hub/inference/_templating.py +105 -0
huggingface_hub/inference/_types.py +4 -152
huggingface_hub/keras_mixin.py +39 -17
huggingface_hub/lfs.py +20 -8
huggingface_hub/repocard.py +11 -3
huggingface_hub/repocard_data.py +12 -2
huggingface_hub/serialization/__init__.py +1 -0
huggingface_hub/serialization/_base.py +1 -0
huggingface_hub/serialization/_numpy.py +1 -0
huggingface_hub/serialization/_tensorflow.py +1 -0
huggingface_hub/serialization/_torch.py +1 -0
huggingface_hub/utils/__init__.py +4 -1
huggingface_hub/utils/_cache_manager.py +7 -0
huggingface_hub/utils/_chunk_utils.py +1 -0
huggingface_hub/utils/_datetime.py +1 -0
huggingface_hub/utils/_errors.py +10 -1
huggingface_hub/utils/_experimental.py +1 -0
huggingface_hub/utils/_fixes.py +19 -3
huggingface_hub/utils/_git_credential.py +1 -0
huggingface_hub/utils/_headers.py +10 -3
huggingface_hub/utils/_hf_folder.py +1 -0
huggingface_hub/utils/_http.py +1 -0
huggingface_hub/utils/_pagination.py +1 -0
huggingface_hub/utils/_paths.py +1 -0
huggingface_hub/utils/_runtime.py +22 -0
huggingface_hub/utils/_subprocess.py +1 -0
huggingface_hub/utils/_token.py +1 -0
huggingface_hub/utils/_typing.py +29 -1
huggingface_hub/utils/_validators.py +1 -0
huggingface_hub/utils/endpoint_helpers.py +1 -0
huggingface_hub/utils/logging.py +1 -1
huggingface_hub/utils/sha.py +1 -0
huggingface_hub/utils/tqdm.py +1 -0
{huggingface_hub-0.21.4.dist-info → huggingface_hub-0.22.0rc0.dist-info}/METADATA +14 -15
huggingface_hub-0.22.0rc0.dist-info/RECORD +113 -0
{huggingface_hub-0.21.4.dist-info → huggingface_hub-0.22.0rc0.dist-info}/WHEEL +1 -1
huggingface_hub/inference/_text_generation.py +0 -551
huggingface_hub-0.21.4.dist-info/RECORD +0 -81
{huggingface_hub-0.21.4.dist-info → huggingface_hub-0.22.0rc0.dist-info}/LICENSE +0 -0
{huggingface_hub-0.21.4.dist-info → huggingface_hub-0.22.0rc0.dist-info}/entry_points.txt +0 -0
{huggingface_hub-0.21.4.dist-info → huggingface_hub-0.22.0rc0.dist-info}/top_level.txt +0 -0

huggingface_hub/hub_mixin.py CHANGED Viewed

@@ -1,17 +1,19 @@
 import inspect
 import json
 import os
-from dataclasses import asdict, is_dataclass
+from dataclasses import asdict, dataclass, is_dataclass
 from pathlib import Path
-from typing import TYPE_CHECKING, Dict, List, Optional, Type, TypeVar, Union, get_args
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type, TypeVar, Union, get_args
 from .constants import CONFIG_NAME, PYTORCH_WEIGHTS_NAME, SAFETENSORS_SINGLE_FILE
 from .file_download import hf_hub_download
 from .hf_api import HfApi
+from .repocard import ModelCard, ModelCardData
 from .utils import (
     EntryNotFoundError,
     HfHubHTTPError,
     SoftTemporaryDirectory,
+    is_jsonable,
     is_safetensors_available,
     is_torch_available,
     logging,
@@ -36,6 +38,26 @@ logger = logging.get_logger(__name__)
 # Generic variable that is either ModelHubMixin or a subclass thereof
 T = TypeVar("T", bound="ModelHubMixin")
+DEFAULT_MODEL_CARD = """
+---
+# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
+# Doc / guide: https://huggingface.co/docs/hub/model-cards
+{{ card_data }}
+---
+This model has been pushed to the Hub using **{{ library_name }}**:
+- Repo: {{ repo_url | default("[More Information Needed]", true) }}
+- Docs: {{ docs_url | default("[More Information Needed]", true) }}
+"""
+@dataclass
+class MixinInfo:
+    library_name: Optional[str] = None
+    tags: Optional[List[str]] = None
+    repo_url: Optional[str] = None
+    docs_url: Optional[str] = None
 class ModelHubMixin:
     """
@@ -45,21 +67,35 @@ class ModelHubMixin:
     have to be overwritten in  [`_from_pretrained`] and [`_save_pretrained`]. [`PyTorchModelHubMixin`] is a good example
     of mixin integration with the Hub. Check out our [integration guide](../guides/integrations) for more instructions.
+    When inheriting from [`ModelHubMixin`], you can define class-level attributes. These attributes are not passed to
+    `__init__` but to the class definition itself. This is useful to define metadata about the library integrating
+    [`ModelHubMixin`].
+    Args:
+        library_name (`str`, *optional*):
+            Name of the library integrating ModelHubMixin. Used to generate model card.
+        tags (`List[str]`, *optional*):
+            Tags to be added to the model card. Used to generate model card.
+        repo_url (`str`, *optional*):
+            URL of the library repository. Used to generate model card.
+        docs_url (`str`, *optional*):
+            URL of the library documentation. Used to generate model card.
     Example:
     ```python
-    >>> from dataclasses import dataclass
     >>> from huggingface_hub import ModelHubMixin
-    # Define your model configuration (optional)
-    >>> @dataclass
-    ... class Config:
-    ...     foo: int = 512
-    ...     bar: str = "cpu"
-    # Inherit from ModelHubMixin (and optionally from your framework's model class)
-    >>> class MyCustomModel(ModelHubMixin):
-    ...     def __init__(self, config: Config):
+    # Inherit from ModelHubMixin
+    >>> class MyCustomModel(
+    ...         ModelHubMixin,
+    ...         library_name="my-library",
+    ...         tags=["x-custom-tag"],
+    ...         repo_url="https://github.com/huggingface/my-cool-library",
+    ...         docs_url="https://huggingface.co/docs/my-cool-library",
+    ...         # ^ optional metadata to generate model card
+    ...     ):
+    ...     def __init__(self, size: int = 512, device: str = "cpu"):
     ...         # define how to initialize your model
     ...         super().__init__()
     ...         ...
@@ -85,7 +121,7 @@ class ModelHubMixin:
     ...         # define how to deserialize your model
     ...         ...
-    >>> model = MyCustomModel(config=Config(foo=256, bar="gpu"))
+    >>> model = MyCustomModel(size=256, device="gpu")
     # Save model weights to local directory
     >>> model.save_pretrained("my-awesome-model")
@@ -95,28 +131,107 @@ class ModelHubMixin:
     # Download and initialize weights from the Hub
     >>> reloaded_model = MyCustomModel.from_pretrained("username/my-awesome-model")
-    >>> reloaded_model.config
-    Config(foo=256, bar="gpu")
+    >>> reloaded_model._hub_mixin_config
+    {"size": 256, "device": "gpu"}
+    # Model card has been correctly populated
+    >>> from huggingface_hub import ModelCard
+    >>> card = ModelCard.load("username/my-awesome-model")
+    >>> card.data.tags
+    ["x-custom-tag", "pytorch_model_hub_mixin", "model_hub_mixin"]
+    >>> card.data.library_name
+    "my-library"
     ```
     """
-    config: Optional[Union[dict, "DataclassInstance"]] = None
-    # ^ optional config attribute automatically set in `from_pretrained` (if not already set by the subclass)
+    _hub_mixin_config: Optional[Union[dict, "DataclassInstance"]] = None
+    # ^ optional config attribute automatically set in `from_pretrained`
+    _hub_mixin_info: MixinInfo
+    # ^ information about the library integrating ModelHubMixin (used to generate model card)
+    _hub_mixin_init_parameters: Dict[str, inspect.Parameter]
+    _hub_mixin_jsonable_default_values: Dict[str, Any]
+    _hub_mixin_inject_config: bool
+    # ^ internal values to handle config
+    def __init_subclass__(
+        cls,
+        *,
+        library_name: Optional[str] = None,
+        tags: Optional[List[str]] = None,
+        repo_url: Optional[str] = None,
+        docs_url: Optional[str] = None,
+    ) -> None:
+        """Inspect __init__ signature only once when subclassing + handle modelcard."""
+        super().__init_subclass__()
+        # Will be reused when creating modelcard
+        tags = tags or []
+        tags.append("model_hub_mixin")
+        cls._hub_mixin_info = MixinInfo(
+            library_name=library_name,
+            tags=tags,
+            repo_url=repo_url,
+            docs_url=docs_url,
+        )
+        # Inspect __init__ signature to handle config
+        cls._hub_mixin_init_parameters = dict(inspect.signature(cls.__init__).parameters)
+        cls._hub_mixin_jsonable_default_values = {
+            param.name: param.default
+            for param in cls._hub_mixin_init_parameters.values()
+            if param.default is not inspect.Parameter.empty and is_jsonable(param.default)
+        }
+        cls._hub_mixin_inject_config = "config" in inspect.signature(cls._from_pretrained).parameters
     def __new__(cls, *args, **kwargs) -> "ModelHubMixin":
+        """Create a new instance of the class and handle config.
+        3 cases:
+        - If `self._hub_mixin_config` is already set, do nothing.
+        - If `config` is passed as a dataclass, set it as `self._hub_mixin_config`.
+        - Otherwise, build `self._hub_mixin_config` from default values and passed values.
+        """
         instance = super().__new__(cls)
-        # Set `config` attribute if not already set by the subclass
-        if instance.config is None:
-            if "config" in kwargs:
-                instance.config = kwargs["config"]
-            elif len(args) > 0:
-                sig = inspect.signature(cls.__init__)
-                parameters = list(sig.parameters)[1:]  # remove `self`
-                for key, value in zip(parameters, args):
-                    if key == "config":
-                        instance.config = value
-                        break
+        # If `config` is already set, return early
+        if instance._hub_mixin_config is not None:
+            return instance
+        # Infer passed values
+        passed_values = {
+            **{
+                key: value
+                for key, value in zip(
+                    # [1:] to skip `self` parameter
+                    list(cls._hub_mixin_init_parameters)[1:],
+                    args,
+                )
+            },
+            **kwargs,
+        }
+        # If config passed as dataclass => set it and return early
+        if is_dataclass(passed_values.get("config")):
+            instance._hub_mixin_config = passed_values["config"]
+            return instance
+        # Otherwise, build config from default + passed values
+        init_config = {
+            # default values
+            **cls._hub_mixin_jsonable_default_values,
+            # passed values
+            **{key: value for key, value in passed_values.items() if is_jsonable(value)},
+        }
+        init_config.pop("config", {})
+        # Populate `init_config` with provided config
+        provided_config = passed_values.get("config")
+        if isinstance(provided_config, dict):
+            init_config.update(provided_config)
+        # Set `config` attribute and return
+        if init_config != {}:
+            instance._hub_mixin_config = init_config
         return instance
     def save_pretrained(
@@ -150,13 +265,21 @@ class ModelHubMixin:
         # save model weights/files (framework-specific)
         self._save_pretrained(save_directory)
-        # save config (if provided)
+        # save config (if provided and if not serialized yet in `_save_pretrained`)
         if config is None:
-            config = self.config
+            config = self._hub_mixin_config
         if config is not None:
             if is_dataclass(config):
                 config = asdict(config)  # type: ignore[arg-type]
-            (save_directory / CONFIG_NAME).write_text(json.dumps(config, indent=2))
+            config_path = save_directory / CONFIG_NAME
+            if not config_path.exists():
+                config_str = json.dumps(config, sort_keys=True, indent=2)
+                config_path.write_text(config_str)
+        # save model card
+        model_card_path = save_directory / "README.md"
+        if not model_card_path.exists():  # do not overwrite if already exists
+            self.generate_model_card().save(save_directory / "README.md")
         # push to the Hub if required
         if push_to_hub:
@@ -246,32 +369,42 @@ class ModelHubMixin:
             except HfHubHTTPError as e:
                 logger.info(f"{CONFIG_NAME} not found on the HuggingFace Hub: {str(e)}")
+        # Read config
         config = None
         if config_file is not None:
-            # Read config
             with open(config_file, "r", encoding="utf-8") as f:
                 config = json.load(f)
-            # Check if class expect a `config` argument
-            init_parameters = inspect.signature(cls.__init__).parameters
-            if "config" in init_parameters:
+            # Populate model_kwargs from config
+            for param in cls._hub_mixin_init_parameters.values():
+                if param.name not in model_kwargs and param.name in config:
+                    model_kwargs[param.name] = config[param.name]
+            # Check if `config` argument was passed at init
+            if "config" in cls._hub_mixin_init_parameters:
                 # Check if `config` argument is a dataclass
-                config_annotation = init_parameters["config"].annotation
+                config_annotation = cls._hub_mixin_init_parameters["config"].annotation
                 if config_annotation is inspect.Parameter.empty:
                     pass  # no annotation
                 elif is_dataclass(config_annotation):
-                    config = config_annotation(**config)  # expect a dataclass
+                    config = _load_dataclass(config_annotation, config)
                 else:
                     # if Optional/Union annotation => check if a dataclass is in the Union
                     for _sub_annotation in get_args(config_annotation):
                         if is_dataclass(_sub_annotation):
-                            config = _sub_annotation(**config)
+                            config = _load_dataclass(_sub_annotation, config)
                             break
                 # Forward config to model initialization
                 model_kwargs["config"] = config
-            elif any(param.kind == inspect.Parameter.VAR_KEYWORD for param in init_parameters.values()):
-                # If __init__ accepts **kwargs, let's forward the config as well (as a dict)
+            if any(param.kind == inspect.Parameter.VAR_KEYWORD for param in cls._hub_mixin_init_parameters.values()):
+                for key, value in config.items():
+                    if key not in model_kwargs:
+                        model_kwargs[key] = value
+            # Finally, also inject if `_from_pretrained` expects it
+            if cls._hub_mixin_inject_config:
                 model_kwargs["config"] = config
         instance = cls._from_pretrained(
@@ -288,8 +421,8 @@ class ModelHubMixin:
         # Implicitly set the config as instance attribute if not already set by the class
         # This way `config` will be available when calling `save_pretrained` or `push_to_hub`.
-        if config is not None and instance.config is None:
-            instance.config = config
+        if config is not None and (getattr(instance, "_hub_mixin_config", None) in (None, {})):
+            instance._hub_mixin_config = config
         return instance
@@ -418,6 +551,13 @@ class ModelHubMixin:
                 delete_patterns=delete_patterns,
             )
+    def generate_model_card(self, *args, **kwargs) -> ModelCard:
+        card = ModelCard.from_template(
+            card_data=ModelCardData(**asdict(self._hub_mixin_info)),
+            template_str=DEFAULT_MODEL_CARD,
+        )
+        return card
 class PyTorchModelHubMixin(ModelHubMixin):
     """
@@ -428,26 +568,26 @@ class PyTorchModelHubMixin(ModelHubMixin):
     Example:
     ```python
-    >>> from dataclasses import dataclass
     >>> import torch
     >>> import torch.nn as nn
     >>> from huggingface_hub import PyTorchModelHubMixin
-    >>> @dataclass
-    ... class Config:
-    ...     hidden_size: int = 512
-    ...     vocab_size: int = 30000
-    ...     output_size: int = 4
-    >>> class MyModel(nn.Module, PyTorchModelHubMixin):
-    ...     def __init__(self, config: Config):
+    >>> class MyModel(
+    ...         nn.Module,
+    ...         PyTorchModelHubMixin,
+    ...         library_name="keras-nlp",
+    ...         repo_url="https://github.com/keras-team/keras-nlp",
+    ...         docs_url="https://keras.io/keras_nlp/",
+    ...         # ^ optional metadata to generate model card
+    ...     ):
+    ...     def __init__(self, hidden_size: int = 512, vocab_size: int = 30000, output_size: int = 4):
     ...         super().__init__()
-    ...         self.param = nn.Parameter(torch.rand(config.hidden_size, config.vocab_size))
-    ...         self.linear = nn.Linear(config.output_size, config.vocab_size)
+    ...         self.param = nn.Parameter(torch.rand(hidden_size, vocab_size))
+    ...         self.linear = nn.Linear(output_size, vocab_size)
     ...     def forward(self, x):
     ...         return self.linear(x + self.param)
-    >>> model = MyModel()
+    >>> model = MyModel(hidden_size=256)
     # Save model weights to local directory
     >>> model.save_pretrained("my-awesome-model")
@@ -457,9 +597,17 @@ class PyTorchModelHubMixin(ModelHubMixin):
     # Download and initialize weights from the Hub
     >>> model = MyModel.from_pretrained("username/my-awesome-model")
+    >>> model.hidden_size
+    256
     ```
     """
+    def __init_subclass__(cls, *args, tags: Optional[List[str]] = None, **kwargs) -> None:
+        tags = tags or []
+        tags.append("pytorch_model_hub_mixin")
+        kwargs["tags"] = tags
+        return super().__init_subclass__(*args, **kwargs)
     def _save_pretrained(self, save_directory: Path) -> None:
         """Save weights from a Pytorch model to a local directory."""
         model_to_save = self.module if hasattr(self, "module") else self  # type: ignore
@@ -536,3 +684,11 @@ class PyTorchModelHubMixin(ModelHubMixin):
             )
             model.to(map_location)  # type: ignore [attr-defined]
         return model
+def _load_dataclass(datacls: Type["DataclassInstance"], data: dict) -> "DataclassInstance":
+    """Load a dataclass instance from a dictionary.
+    Fields not expected by the dataclass are ignored.
+    """
+    return datacls(**{k: v for k, v in data.items() if k in datacls.__dataclass_fields__})

huggingface-hub 0.21.4__py3-none-any.whl → 0.22.0rc0__py3-none-any.whl

Potentially problematic release.

huggingface-hub 0.21.4py3-none-any.whl → 0.22.0rc0py3-none-any.whl