PyPI - huggingface-hub - Versions diffs - 0.12.1__py3-none-any.whl → 0.13.0rc0__py3-none-any.whl - Mend

huggingface-hub 0.12.1py3-none-any.whl → 0.13.0rc0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

huggingface_hub/__init__.py +165 -127
huggingface_hub/_commit_api.py +25 -51
huggingface_hub/_login.py +4 -13
huggingface_hub/_snapshot_download.py +45 -23
huggingface_hub/_space_api.py +7 -0
huggingface_hub/commands/delete_cache.py +13 -39
huggingface_hub/commands/env.py +1 -3
huggingface_hub/commands/huggingface_cli.py +1 -3
huggingface_hub/commands/lfs.py +4 -8
huggingface_hub/commands/scan_cache.py +5 -16
huggingface_hub/commands/user.py +27 -45
huggingface_hub/community.py +4 -4
huggingface_hub/constants.py +22 -19
huggingface_hub/fastai_utils.py +14 -23
huggingface_hub/file_download.py +166 -108
huggingface_hub/hf_api.py +500 -255
huggingface_hub/hub_mixin.py +181 -176
huggingface_hub/inference_api.py +4 -10
huggingface_hub/keras_mixin.py +39 -71
huggingface_hub/lfs.py +8 -24
huggingface_hub/repocard.py +33 -48
huggingface_hub/repocard_data.py +141 -30
huggingface_hub/repository.py +41 -112
huggingface_hub/templates/modelcard_template.md +39 -34
huggingface_hub/utils/__init__.py +1 -0
huggingface_hub/utils/_cache_assets.py +1 -4
huggingface_hub/utils/_cache_manager.py +17 -39
huggingface_hub/utils/_deprecation.py +8 -12
huggingface_hub/utils/_errors.py +10 -57
huggingface_hub/utils/_fixes.py +2 -6
huggingface_hub/utils/_git_credential.py +5 -16
huggingface_hub/utils/_headers.py +22 -11
huggingface_hub/utils/_http.py +1 -4
huggingface_hub/utils/_paths.py +5 -12
huggingface_hub/utils/_runtime.py +2 -1
huggingface_hub/utils/_telemetry.py +120 -0
huggingface_hub/utils/_validators.py +5 -13
huggingface_hub/utils/endpoint_helpers.py +1 -3
huggingface_hub/utils/logging.py +10 -8
{huggingface_hub-0.12.1.dist-info → huggingface_hub-0.13.0rc0.dist-info}/METADATA +7 -14
huggingface_hub-0.13.0rc0.dist-info/RECORD +56 -0
huggingface_hub/py.typed +0 -0
huggingface_hub-0.12.1.dist-info/RECORD +0 -56
{huggingface_hub-0.12.1.dist-info → huggingface_hub-0.13.0rc0.dist-info}/LICENSE +0 -0
{huggingface_hub-0.12.1.dist-info → huggingface_hub-0.13.0rc0.dist-info}/WHEEL +0 -0
{huggingface_hub-0.12.1.dist-info → huggingface_hub-0.13.0rc0.dist-info}/entry_points.txt +0 -0
{huggingface_hub-0.12.1.dist-info → huggingface_hub-0.13.0rc0.dist-info}/top_level.txt +0 -0

huggingface_hub/keras_mixin.py CHANGED Viewed

@@ -5,10 +5,8 @@ import warnings
 from pathlib import Path
 from shutil import copytree
 from typing import Any, Dict, List, Optional, Union
-from urllib.parse import quote
-from huggingface_hub import CommitOperationDelete, ModelHubMixin, snapshot_download
-from huggingface_hub._commit_api import CommitOperation
+from huggingface_hub import ModelHubMixin, snapshot_download
 from huggingface_hub.utils import (
     get_tf_version,
     is_graphviz_available,
@@ -17,8 +15,8 @@ from huggingface_hub.utils import (
     yaml_dump,
 )
-from .constants import CONFIG_NAME, DEFAULT_REVISION
-from .hf_api import HfApi, _parse_revision_from_pr_url, _prepare_upload_folder_commit
+from .constants import CONFIG_NAME
+from .hf_api import HfApi
 from .utils import SoftTemporaryDirectory, logging, validate_hf_hub_args
@@ -36,7 +34,7 @@ def _flatten_dict(dictionary, parent_key=""):
         dictionary (`dict`):
             The nested dictionary to be flattened.
         parent_key (`str`):
-            The parent key to be prefixed to the childer keys.
+            The parent key to be prefixed to the children keys.
             Necessary for recursing over the nested dictionary.
     Returns:
@@ -63,9 +61,7 @@ def _create_hyperparameter_table(model):
         optimizer_params = model.optimizer.get_config()
         # flatten the configuration
         optimizer_params = _flatten_dict(optimizer_params)
-        optimizer_params[
-            "training_precision"
-        ] = tf.keras.mixed_precision.global_policy().name
+        optimizer_params["training_precision"] = tf.keras.mixed_precision.global_policy().name
         table = "| Hyperparameters | Value |\n| :-- | :-- |\n"
         for key, value in optimizer_params.items():
             table += f"| {key} | {value} |\n"
@@ -170,9 +166,7 @@ def save_pretrained_keras(
     if is_tf_available():
         import tensorflow as tf
     else:
-        raise ImportError(
-            "Called a Tensorflow-specific function but could not import it."
-        )
+        raise ImportError("Called a Tensorflow-specific function but could not import it.")
     if not model.built:
         raise ValueError("Model should be built before trying to save")
@@ -183,10 +177,7 @@ def save_pretrained_keras(
     # saving config
     if config:
         if not isinstance(config, dict):
-            raise RuntimeError(
-                "Provided config to save_pretrained_keras should be a dict. Got:"
-                f" '{type(config)}'"
-            )
+            raise RuntimeError(f"Provided config to save_pretrained_keras should be a dict. Got: '{type(config)}'")
         with (save_directory / CONFIG_NAME).open("w") as f:
             json.dump(config, f)
@@ -213,25 +204,22 @@ def save_pretrained_keras(
             path = save_directory / "history.json"
             if path.exists():
                 warnings.warn(
-                    "`history.json` file already exists, it will be overwritten by the"
-                    " history of this version.",
+                    "`history.json` file already exists, it will be overwritten by the history of this version.",
                     UserWarning,
                 )
             with path.open("w", encoding="utf-8") as f:
                 json.dump(model.history.history, f, indent=2, sort_keys=True)
     _create_model_card(model, save_directory, plot_model, metadata)
-    tf.keras.models.save_model(
-        model, save_directory, include_optimizer=include_optimizer, **model_save_kwargs
-    )
+    tf.keras.models.save_model(model, save_directory, include_optimizer=include_optimizer, **model_save_kwargs)
-def from_pretrained_keras(*args, **kwargs):
+def from_pretrained_keras(*args, **kwargs) -> "KerasModelHubMixin":
     r"""
     Instantiate a pretrained Keras model from a pre-trained model from the Hub.
     The model is expected to be in `SavedModel` format.
-    Parameters:
+    Args:
         pretrained_model_name_or_path (`str` or `os.PathLike`):
             Can be either:
                 - A string, the `model id` of a pretrained model hosted inside a
@@ -299,6 +287,7 @@ def push_to_hub_keras(
     create_pr: Optional[bool] = None,
     allow_patterns: Optional[Union[List[str], str]] = None,
     ignore_patterns: Optional[Union[List[str], str]] = None,
+    delete_patterns: Optional[Union[List[str], str]] = None,
     log_dir: Optional[str] = None,
     include_optimizer: bool = False,
     tags: Optional[Union[list, str]] = None,
@@ -306,17 +295,16 @@ def push_to_hub_keras(
     **model_save_kwargs,
 ):
     """
-    Upload model checkpoint or tokenizer files to the Hub while synchronizing a
-    local clone of the repo in `repo_path_or_name`.
+    Upload model checkpoint to the Hub.
-    Use `allow_patterns` and `ignore_patterns` to precisely filter which files should be
-    pushed to the hub. See [`upload_folder`] reference for more details.
+    Use `allow_patterns` and `ignore_patterns` to precisely filter which files should be pushed to the hub. Use
+    `delete_patterns` to delete existing remote files in the same commit. See [`upload_folder`] reference for more
+    details.
-    Parameters:
+    Args:
         model (`Keras.Model`):
-            The [Keras
-            model](`https://www.tensorflow.org/api_docs/python/tf/keras/Model`)
-            you'd like to push to the Hub. The model must be compiled and built.
+            The [Keras model](`https://www.tensorflow.org/api_docs/python/tf/keras/Model`) you'd like to push to the
+            Hub. The model must be compiled and built.
         repo_id (`str`):
             Repository name to which push
         commit_message (`str`, *optional*, defaults to "Add Keras model"):
@@ -342,6 +330,8 @@ def push_to_hub_keras(
             If provided, only files matching at least one pattern are pushed.
         ignore_patterns (`List[str]` or `str`, *optional*):
             If provided, files matching any of the patterns are not pushed.
+        delete_patterns (`List[str]` or `str`, *optional*):
+            If provided, remote files matching any of the patterns will be deleted from the repo.
         log_dir (`str`, *optional*):
             TensorBoard logging directory to be pushed. The Hub automatically
             hosts and displays a TensorBoard instance if log files are included
@@ -362,13 +352,7 @@ def push_to_hub_keras(
         The url of the commit of your model in the given repository.
     """
     api = HfApi(endpoint=api_endpoint)
-    api.create_repo(
-        repo_id=repo_id,
-        repo_type="model",
-        token=token,
-        private=private,
-        exist_ok=True,
-    )
+    repo_id = api.create_repo(repo_id=repo_id, token=token, private=private, exist_ok=True).repo_id
     # Push the files to the repo in a single commit
     with SoftTemporaryDirectory() as tmp:
@@ -383,46 +367,32 @@ def push_to_hub_keras(
             **model_save_kwargs,
         )
-        # If log dir is provided, delete old logs + add new ones
-        operations: List[CommitOperation] = []
+        # If `log_dir` provided, delete remote logs and upload new ones
         if log_dir is not None:
-            # Delete previous log files from Hub
-            operations += [
-                CommitOperationDelete(path_in_repo=file)
-                for file in api.list_repo_files(repo_id=repo_id, token=token)
-                if file.startswith("logs/")
-            ]
-            # Copy new log files
+            delete_patterns = (
+                []
+                if delete_patterns is None
+                else (
+                    [delete_patterns]  # convert `delete_patterns` to a list
+                    if isinstance(delete_patterns, str)
+                    else delete_patterns
+                )
+            )
+            delete_patterns.append("logs/*")
             copytree(log_dir, saved_path / "logs")
-        # NOTE: `_prepare_upload_folder_commit` and `create_commit` calls are
-        #       duplicate code from `upload_folder`. We are not directly using
-        #       `upload_folder` since we want to add delete operations to the
-        #       commit as well.
-        operations += _prepare_upload_folder_commit(
-            saved_path,
-            path_in_repo="",
-            allow_patterns=allow_patterns,
-            ignore_patterns=ignore_patterns,
-        )
-        commit_info = api.create_commit(
+        return api.upload_folder(
             repo_type="model",
             repo_id=repo_id,
-            operations=operations,
+            folder_path=saved_path,
             commit_message=commit_message,
             token=token,
             revision=branch,
             create_pr=create_pr,
+            allow_patterns=allow_patterns,
+            ignore_patterns=ignore_patterns,
+            delete_patterns=delete_patterns,
         )
-        revision = branch
-        if revision is None:
-            revision = (
-                quote(_parse_revision_from_pr_url(commit_info.pr_url), safe="")
-                if commit_info.pr_url is not None
-                else DEFAULT_REVISION
-            )
-        return f"{api.endpoint}/{repo_id}/tree/{revision}/"
 class KerasModelHubMixin(ModelHubMixin):
@@ -486,9 +456,7 @@ class KerasModelHubMixin(ModelHubMixin):
         if is_tf_available():
             import tensorflow as tf
         else:
-            raise ImportError(
-                "Called a TensorFlow-specific function but could not import it."
-            )
+            raise ImportError("Called a TensorFlow-specific function but could not import it.")
         # TODO - Figure out what to do about these config values. Config is not going to be needed to load model
         cfg = model_kwargs.pop("config", None)

huggingface_hub/lfs.py CHANGED Viewed

@@ -23,9 +23,10 @@ from os.path import getsize
 from typing import BinaryIO, Iterable, List, Optional, Tuple
 import requests
-from huggingface_hub.constants import ENDPOINT, REPO_TYPES_URL_PREFIXES
 from requests.auth import HTTPBasicAuth
+from huggingface_hub.constants import ENDPOINT, REPO_TYPES_URL_PREFIXES
 from .utils import (
     get_token_to_send,
     hf_raise_for_status,
@@ -92,10 +93,7 @@ def _validate_lfs_action(lfs_action: dict):
     """validates response from the LFS batch endpoint"""
     if not (
         isinstance(lfs_action.get("href"), str)
-        and (
-            lfs_action.get("header") is None
-            or isinstance(lfs_action.get("header"), dict)
-        )
+        and (lfs_action.get("header") is None or isinstance(lfs_action.get("header"), dict))
     ):
         raise ValueError("lfs_action is improperly formatted")
     return lfs_action
@@ -103,10 +101,7 @@ def _validate_lfs_action(lfs_action: dict):
 def _validate_batch_actions(lfs_batch_actions: dict):
     """validates response from the LFS batch endpoint"""
-    if not (
-        isinstance(lfs_batch_actions.get("oid"), str)
-        and isinstance(lfs_batch_actions.get("size"), int)
-    ):
+    if not (isinstance(lfs_batch_actions.get("oid"), str) and isinstance(lfs_batch_actions.get("size"), int)):
         raise ValueError("lfs_batch_actions is improperly formatted")
     upload_action = lfs_batch_actions.get("actions", {}).get("upload")
@@ -120,10 +115,7 @@ def _validate_batch_actions(lfs_batch_actions: dict):
 def _validate_batch_error(lfs_batch_error: dict):
     """validates response from the LFS batch endpoint"""
-    if not (
-        isinstance(lfs_batch_error.get("oid"), str)
-        and isinstance(lfs_batch_error.get("size"), int)
-    ):
+    if not (isinstance(lfs_batch_error.get("oid"), str) and isinstance(lfs_batch_error.get("size"), int)):
         raise ValueError("lfs_batch_error is improperly formatted")
     error_info = lfs_batch_error.get("error")
     if not (
@@ -258,10 +250,7 @@ def lfs_upload(
         if isinstance(chunk_size, str):
             chunk_size = int(chunk_size, 10)
         else:
-            raise ValueError(
-                "Malformed response from LFS batch endpoint: `chunk_size`"
-                " should be a string"
-            )
+            raise ValueError("Malformed response from LFS batch endpoint: `chunk_size` should be a string")
         _upload_multi_part(
             completion_url=upload_action["href"],
             fileobj=fileobj,
@@ -385,10 +374,7 @@ def _upload_multi_part(
             hf_raise_for_status(part_upload_res)
             etag = part_upload_res.headers.get("etag")
             if etag is None or etag == "":
-                raise ValueError(
-                    f"Invalid etag (`{etag}`) returned for part {part_idx +1} of"
-                    f" {num_parts}"
-                )
+                raise ValueError(f"Invalid etag (`{etag}`) returned for part {part_idx +1} of {num_parts}")
             completion_payload["parts"][part_idx]["etag"] = etag
     completion_res = requests.post(
@@ -466,9 +452,7 @@ class SliceFileObj(AbstractContextManager):
         if pos >= self._len:
             return b""
         remaining_amount = self._len - pos
-        data = self.fileobj.read(
-            remaining_amount if n < 0 else min(n, remaining_amount)
-        )
+        data = self.fileobj.read(remaining_amount if n < 0 else min(n, remaining_amount))
         return data
     def tell(self) -> int:

huggingface_hub/repocard.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Any, Dict, Optional, Type, Union
 import requests
 import yaml
 from huggingface_hub.file_download import hf_hub_download
 from huggingface_hub.hf_api import upload_file
 from huggingface_hub.repocard_data import (
@@ -12,6 +13,7 @@ from huggingface_hub.repocard_data import (
     DatasetCardData,
     EvalResult,
     ModelCardData,
+    SpaceCardData,
     eval_results_to_model_index,
     model_index_to_eval_results,
 )
@@ -24,9 +26,7 @@ from .utils.logging import get_logger
 TEMPLATE_MODELCARD_PATH = Path(__file__).parent / "templates" / "modelcard_template.md"
-TEMPLATE_DATASETCARD_PATH = (
-    Path(__file__).parent / "templates" / "datasetcard_template.md"
-)
+TEMPLATE_DATASETCARD_PATH = Path(__file__).parent / "templates" / "datasetcard_template.md"
 # exact same regex as in the Hub server. Please keep in sync.
 # See https://github.com/huggingface/moon-landing/blob/main/server/lib/ViewMarkdown.ts#L18
@@ -40,7 +40,7 @@ class RepoCard:
     default_template_path = TEMPLATE_MODELCARD_PATH
     repo_type = "model"
-    def __init__(self, content: str):
+    def __init__(self, content: str, ignore_metadata_errors: bool = False):
         """Initialize a RepoCard from string content. The content should be a
         Markdown file with a YAML block at the beginning and a Markdown body.
@@ -76,6 +76,7 @@ class RepoCard:
         # Set the content of the RepoCard, as well as underlying .data and .text attributes.
         # See the `content` property setter for more details.
+        self.ignore_metadata_errors = ignore_metadata_errors
         self.content = content
     @property
@@ -101,13 +102,11 @@ class RepoCard:
                 raise ValueError("repo card metadata block should be a dict")
         else:
             # Model card without metadata... create empty metadata
-            logger.warning(
-                "Repo card metadata block was not found. Setting CardData to empty."
-            )
+            logger.warning("Repo card metadata block was not found. Setting CardData to empty.")
             data_dict = {}
             self.text = content
-        self.data = self.card_data_class(**data_dict)
+        self.data = self.card_data_class(**data_dict, ignore_metadata_errors=self.ignore_metadata_errors)
     def __str__(self):
         return self.content
@@ -138,6 +137,7 @@ class RepoCard:
         repo_id_or_path: Union[str, Path],
         repo_type: Optional[str] = None,
         token: Optional[str] = None,
+        ignore_metadata_errors: bool = False,
     ):
         """Initialize a RepoCard from a Hugging Face Hub repo's README.md or a local filepath.
@@ -145,13 +145,14 @@ class RepoCard:
             repo_id_or_path (`Union[str, Path]`):
                 The repo ID associated with a Hugging Face Hub repo or a local filepath.
             repo_type (`str`, *optional*):
-                The type of Hugging Face repo to push to. Defaults to None, which will use
-                use "model". Other options are "dataset" and "space". Not used when loading from
-                a local filepath. If this is called from a child class, the default value will be
-                the child class's `repo_type`.
+                The type of Hugging Face repo to push to. Defaults to None, which will use use "model". Other options
+                are "dataset" and "space". Not used when loading from a local filepath. If this is called from a child
+                class, the default value will be the child class's `repo_type`.
             token (`str`, *optional*):
-                Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to
-                the stored token.
+                Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to the stored token.
+            ignore_metadata_errors (`str`):
+                If True, errors while parsing the metadata section will be ignored. Some information might be lost during
+                the process. Use it at your own risk.
         Returns:
             [`huggingface_hub.repocard.RepoCard`]: The RepoCard (or subclass) initialized from the repo's
@@ -176,13 +177,11 @@ class RepoCard:
                 token=token,
             )
         else:
-            raise ValueError(
-                f"Cannot load RepoCard: path not found on disk ({repo_id_or_path})."
-            )
+            raise ValueError(f"Cannot load RepoCard: path not found on disk ({repo_id_or_path}).")
         # Preserve newlines in the existing file.
         with Path(card_path).open(mode="r", newline="", encoding="utf-8") as f:
-            return cls(f.read())
+            return cls(f.read(), ignore_metadata_errors=ignore_metadata_errors)
     def validate(self, repo_type: Optional[str] = None):
         """Validates card against Hugging Face Hub's card validation logic.
@@ -215,9 +214,7 @@ class RepoCard:
         headers = {"Accept": "text/plain"}
         try:
-            r = requests.post(
-                "https://huggingface.co/api/validate-yaml", body, headers=headers
-            )
+            r = requests.post("https://huggingface.co/api/validate-yaml", body, headers=headers)
             r.raise_for_status()
         except requests.exceptions.HTTPError as exc:
             if r.status_code == 400:
@@ -321,9 +318,7 @@ class RepoCard:
         kwargs = card_data.to_dict().copy()
         kwargs.update(template_kwargs)  # Template_kwargs have priority
-        template = jinja2.Template(
-            Path(template_path or cls.default_template_path).read_text()
-        )
+        template = jinja2.Template(Path(template_path or cls.default_template_path).read_text())
         content = template.render(card_data=card_data.to_yaml(), **kwargs)
         return cls(content)
@@ -367,7 +362,7 @@ class ModelCard(RepoCard):
             ...     license='mit',
             ...     library_name='timm',
             ...     tags=['image-classification', 'resnet'],
-            ...     datasets='beans',
+            ...     datasets=['beans'],
             ...     metrics=['accuracy'],
             ... )
             >>> card = ModelCard.from_template(
@@ -472,10 +467,16 @@ class DatasetCard(RepoCard):
         return super().from_template(card_data, template_path, **template_kwargs)
-def _detect_line_ending(content: str) -> Literal["\r", "\n", "\r\n", None]:
+class SpaceCard(RepoCard):
+    card_data_class = SpaceCardData
+    default_template_path = TEMPLATE_MODELCARD_PATH
+    repo_type = "space"
+def _detect_line_ending(content: str) -> Literal["\r", "\n", "\r\n", None]:  # noqa: F722
     """Detect the line ending of a string. Used by RepoCard to avoid making huge diff on newlines.
-    Uses same implem as in Hub server, keep it in sync.
+    Uses same implementation as in Hub server, keep it in sync.
     Returns:
         str: The detected line ending of the string.
@@ -531,11 +532,7 @@ def metadata_save(local_path: Union[str, Path], data: Dict) -> None:
         # sort_keys: keep dict order
         match = REGEX_YAML_BLOCK.search(content)
         if match:
-            output = (
-                content[: match.start()]
-                + f"---{line_break}{data_yaml}---{line_break}"
-                + content[match.end() :]
-            )
+            output = content[: match.start()] + f"---{line_break}{data_yaml}---{line_break}" + content[match.end() :]
         else:
             output = f"---{line_break}{data_yaml}---{line_break}{content}"
@@ -739,11 +736,7 @@ def metadata_update(
         ```
     """
-    commit_message = (
-        commit_message
-        if commit_message is not None
-        else "Update metadata with huggingface_hub"
-    )
+    commit_message = commit_message if commit_message is not None else "Update metadata with huggingface_hub"
     # Card class given repo_type
     card_class: Type[RepoCard]
@@ -762,10 +755,7 @@ def metadata_update(
         card = card_class.load(repo_id, token=token, repo_type=repo_type)
     except EntryNotFoundError:
         if repo_type == "space":
-            raise ValueError(
-                "Cannot update metadata on a Space that doesn't contain a `README.md`"
-                " file."
-            )
+            raise ValueError("Cannot update metadata on a Space that doesn't contain a `README.md` file.")
         # Initialize a ModelCard or DatasetCard from default template and no data.
         card = card_class.from_template(CardData())
@@ -807,18 +797,13 @@ def metadata_update(
                         card.data.eval_results.append(new_result)
         else:
             # Any metadata that is not a result metric
-            if (
-                hasattr(card.data, key)
-                and getattr(card.data, key) is not None
-                and not overwrite
-                and getattr(card.data, key) != value
-            ):
+            if card.data.get(key) is not None and not overwrite and card.data.get(key) != value:
                 raise ValueError(
                     f"You passed a new value for the existing meta data field '{key}'."
                     " Set `overwrite=True` to overwrite existing metadata."
                 )
             else:
-                setattr(card.data, key, value)
+                card.data[key] = value
     return card.push_to_hub(
         repo_id,

huggingface-hub 0.12.1__py3-none-any.whl → 0.13.0rc0__py3-none-any.whl

huggingface-hub 0.12.1py3-none-any.whl → 0.13.0rc0py3-none-any.whl