PyPI - huggingface-hub - Versions diffs - 0.18.0__py3-none-any.whl → 0.19.0__py3-none-any.whl - Mend

huggingface-hub 0.18.0py3-none-any.whl → 0.19.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of huggingface-hub might be problematic. Click here for more details.

Files changed (43) hide show

huggingface_hub/__init__.py +31 -5
huggingface_hub/_inference_endpoints.py +348 -0
huggingface_hub/_login.py +9 -7
huggingface_hub/_multi_commits.py +1 -1
huggingface_hub/_snapshot_download.py +6 -7
huggingface_hub/_space_api.py +7 -4
huggingface_hub/_tensorboard_logger.py +1 -0
huggingface_hub/_webhooks_payload.py +7 -7
huggingface_hub/commands/lfs.py +3 -6
huggingface_hub/commands/user.py +1 -4
huggingface_hub/constants.py +27 -0
huggingface_hub/file_download.py +142 -134
huggingface_hub/hf_api.py +1036 -501
huggingface_hub/hf_file_system.py +57 -12
huggingface_hub/hub_mixin.py +3 -5
huggingface_hub/inference/_client.py +43 -8
huggingface_hub/inference/_common.py +8 -16
huggingface_hub/inference/_generated/_async_client.py +41 -8
huggingface_hub/inference/_text_generation.py +43 -0
huggingface_hub/inference_api.py +1 -1
huggingface_hub/lfs.py +32 -14
huggingface_hub/repocard_data.py +7 -0
huggingface_hub/repository.py +19 -3
huggingface_hub/templates/modelcard_template.md +1 -1
huggingface_hub/utils/__init__.py +1 -1
huggingface_hub/utils/_cache_assets.py +3 -3
huggingface_hub/utils/_cache_manager.py +6 -7
huggingface_hub/utils/_datetime.py +3 -1
huggingface_hub/utils/_errors.py +10 -0
huggingface_hub/utils/_hf_folder.py +4 -2
huggingface_hub/utils/_http.py +10 -1
huggingface_hub/utils/_runtime.py +4 -2
huggingface_hub/utils/endpoint_helpers.py +27 -175
huggingface_hub/utils/insecure_hashlib.py +34 -0
huggingface_hub/utils/logging.py +4 -6
huggingface_hub/utils/sha.py +2 -1
{huggingface_hub-0.18.0.dist-info → huggingface_hub-0.19.0.dist-info}/METADATA +16 -15
huggingface_hub-0.19.0.dist-info/RECORD +74 -0
{huggingface_hub-0.18.0.dist-info → huggingface_hub-0.19.0.dist-info}/WHEEL +1 -1
huggingface_hub-0.18.0.dist-info/RECORD +0 -72
{huggingface_hub-0.18.0.dist-info → huggingface_hub-0.19.0.dist-info}/LICENSE +0 -0
{huggingface_hub-0.18.0.dist-info → huggingface_hub-0.19.0.dist-info}/entry_points.txt +0 -0
{huggingface_hub-0.18.0.dist-info → huggingface_hub-0.19.0.dist-info}/top_level.txt +0 -0

huggingface_hub/_webhooks_payload.py CHANGED Viewed

@@ -55,7 +55,7 @@ class ObjectId(BaseModel):
 class WebhookPayloadUrl(BaseModel):
     web: str
-    api: Optional[str]
+    api: Optional[str] = None
 class WebhookPayloadMovedTo(BaseModel):
@@ -74,7 +74,7 @@ class WebhookPayloadEvent(BaseModel):
 class WebhookPayloadDiscussionChanges(BaseModel):
     base: str
-    mergeCommitId: Optional[str]
+    mergeCommitId: Optional[str] = None
 class WebhookPayloadComment(ObjectId):
@@ -92,16 +92,16 @@ class WebhookPayloadDiscussion(ObjectId):
     isPullRequest: bool
     status: DiscussionStatus_T
     changes: Optional[WebhookPayloadDiscussionChanges]
-    pinned: Optional[bool]
+    pinned: Optional[bool] = None
 class WebhookPayloadRepo(ObjectId):
     owner: ObjectId
-    head_sha: Optional[str]
+    head_sha: Optional[str] = None
     name: str
     private: bool
-    subdomain: Optional[str]
-    tags: Optional[List[str]]
+    subdomain: Optional[str] = None
+    tags: Optional[List[str]] = None
     type: Literal["dataset", "model", "space"]
     url: WebhookPayloadUrl
@@ -112,4 +112,4 @@ class WebhookPayload(BaseModel):
     discussion: Optional[WebhookPayloadDiscussion]
     comment: Optional[WebhookPayloadComment]
     webhook: WebhookPayloadWebhook
-    movedTo: Optional[WebhookPayloadMovedTo]
+    movedTo: Optional[WebhookPayloadMovedTo] = None

huggingface_hub/commands/lfs.py CHANGED Viewed

@@ -56,16 +56,13 @@ class LfsCommands(BaseHuggingfaceCLICommand):
     @staticmethod
     def register_subcommand(parser: _SubParsersAction):
         enable_parser = parser.add_parser(
-            "lfs-enable-largefiles",
-            help="Configure your repository to enable upload of files > 5GB.",
+            "lfs-enable-largefiles", help="Configure your repository to enable upload of files > 5GB."
         )
         enable_parser.add_argument("path", type=str, help="Local path to repository you want to configure.")
         enable_parser.set_defaults(func=lambda args: LfsEnableCommand(args))
-        upload_parser = parser.add_parser(
-            LFS_MULTIPART_UPLOAD_COMMAND,
-            help="Command will get called by git-lfs, do not call it directly.",
-        )
+        # Command will get called by git-lfs, do not call it directly.
+        upload_parser = parser.add_parser(LFS_MULTIPART_UPLOAD_COMMAND, add_help=False)
         upload_parser.set_defaults(func=lambda args: LfsUploadCommand(args))

huggingface_hub/commands/user.py CHANGED Viewed

@@ -58,10 +58,7 @@ class UserCommands(BaseHuggingfaceCLICommand):
         logout_parser.set_defaults(func=lambda args: LogoutCommand(args))
         # new system: git-based repo system
-        repo_parser = parser.add_parser(
-            "repo",
-            help="{create, ls-files} Commands to interact with your huggingface.co repos.",
-        )
+        repo_parser = parser.add_parser("repo", help="{create} Commands to interact with your huggingface.co repos.")
         repo_subparsers = repo_parser.add_subparsers(help="huggingface.co repos related commands")
         repo_create_parser = repo_subparsers.add_parser("create", help="Create a new repo on huggingface.co")
         repo_create_parser.add_argument(

huggingface_hub/constants.py CHANGED Viewed

@@ -5,6 +5,7 @@ from typing import Optional
 # Possible values for env variables
 ENV_VARS_TRUE_VALUES = {"1", "ON", "YES", "TRUE"}
 ENV_VARS_TRUE_AND_AUTO_VALUES = ENV_VARS_TRUE_VALUES.union({"AUTO"})
@@ -29,6 +30,11 @@ TF_WEIGHTS_NAME = "model.ckpt"
 FLAX_WEIGHTS_NAME = "flax_model.msgpack"
 CONFIG_NAME = "config.json"
 REPOCARD_NAME = "README.md"
+DEFAULT_ETAG_TIMEOUT = 10
+DEFAULT_DOWNLOAD_TIMEOUT = 10
+DEFAULT_REQUEST_TIMEOUT = 10
+DOWNLOAD_CHUNK_SIZE = 10 * 1024 * 1024
+HF_TRANSFER_CONCURRENCY = 100
 # Git-related constants
@@ -48,6 +54,10 @@ HUGGINGFACE_HEADER_X_LINKED_SIZE = "X-Linked-Size"
 INFERENCE_ENDPOINT = os.environ.get("HF_INFERENCE_ENDPOINT", "https://api-inference.huggingface.co")
+# See https://huggingface.co/docs/inference-endpoints/index
+INFERENCE_ENDPOINTS_ENDPOINT = "https://api.endpoints.huggingface.cloud/v2"
 REPO_ID_SEPARATOR = "--"
 # ^ this substring is not allowed in repo_ids on hf.co
 # and is the canonical one we use for serialization of repo ids elsewhere.
@@ -82,9 +92,14 @@ hf_cache_home = os.path.expanduser(
 default_cache_path = os.path.join(hf_cache_home, "hub")
 default_assets_cache_path = os.path.join(hf_cache_home, "assets")
+# Legacy env variables
 HUGGINGFACE_HUB_CACHE = os.getenv("HUGGINGFACE_HUB_CACHE", default_cache_path)
 HUGGINGFACE_ASSETS_CACHE = os.getenv("HUGGINGFACE_ASSETS_CACHE", default_assets_cache_path)
+# New env variables
+HF_HUB_CACHE = os.getenv("HF_HUB_CACHE", HUGGINGFACE_HUB_CACHE)
+HF_ASSETS_CACHE = os.getenv("HF_ASSETS_CACHE", HUGGINGFACE_ASSETS_CACHE)
 HF_HUB_OFFLINE = _is_true(os.environ.get("HF_HUB_OFFLINE") or os.environ.get("TRANSFORMERS_OFFLINE"))
 # Opt-out from telemetry requests
@@ -97,6 +112,12 @@ _OLD_HF_TOKEN_PATH = os.path.expanduser("~/.huggingface/token")
 HF_TOKEN_PATH = os.path.join(hf_cache_home, "token")
+if _staging_mode:
+    # In staging mode, we use a different cache to ensure we don't mix up production and staging data or tokens
+    _staging_home = os.path.join(os.path.expanduser("~"), ".cache", "huggingface_staging")
+    HUGGINGFACE_HUB_CACHE = os.path.join(_staging_home, "hub")
+    HF_TOKEN_PATH = os.path.join(_staging_home, "token")
 # Here, `True` will disable progress bars globally without possibility of enabling it
 # programmatically. `False` will enable them without possibility of disabling them.
 # If environment variable is not set (None), then the user is free to enable/disable
@@ -130,6 +151,12 @@ HF_HUB_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD: int = (
     _as_int(os.environ.get("HF_HUB_LOCAL_DIR_AUTO_SYMLINK_THRESHOLD")) or 5 * 1024 * 1024
 )
+# Used to override the etag timeout on a system level
+HF_HUB_ETAG_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_ETAG_TIMEOUT")) or DEFAULT_ETAG_TIMEOUT
+# Used to override the get request timeout on a system level
+HF_HUB_DOWNLOAD_TIMEOUT: int = _as_int(os.environ.get("HF_HUB_DOWNLOAD_TIMEOUT")) or DEFAULT_DOWNLOAD_TIMEOUT
 # List frameworks that are handled by the InferenceAPI service. Useful to scan endpoints and check which models are
 # deployed and running. Since 95% of the models are using the top 4 frameworks listed below, we scan only those by
 # default. We still keep the full list of supported frameworks in case we want to scan all of them.

huggingface_hub/file_download.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import copy
 import fnmatch
+import inspect
 import io
 import json
 import os
@@ -7,33 +8,39 @@ import re
 import shutil
 import stat
 import tempfile
+import time
 import uuid
 import warnings
 from contextlib import contextmanager
 from dataclasses import dataclass
 from functools import partial
-from hashlib import sha256
 from pathlib import Path
 from typing import Any, BinaryIO, Dict, Generator, Literal, Optional, Tuple, Union
 from urllib.parse import quote, urlparse
 import requests
 from filelock import FileLock
-from requests.exceptions import ProxyError, Timeout
 from huggingface_hub import constants
 from . import __version__  # noqa: F401 # for backward compatibility
 from .constants import (
+    DEFAULT_ETAG_TIMEOUT,
+    DEFAULT_REQUEST_TIMEOUT,
     DEFAULT_REVISION,
+    DOWNLOAD_CHUNK_SIZE,
     ENDPOINT,
+    HF_HUB_CACHE,
     HF_HUB_DISABLE_SYMLINKS_WARNING,
+    HF_HUB_DOWNLOAD_TIMEOUT,
     HF_HUB_ENABLE_HF_TRANSFER,
+    HF_HUB_ETAG_TIMEOUT,
+    HF_TRANSFER_CONCURRENCY,
     HUGGINGFACE_CO_URL_TEMPLATE,
     HUGGINGFACE_HEADER_X_LINKED_ETAG,
     HUGGINGFACE_HEADER_X_LINKED_SIZE,
     HUGGINGFACE_HEADER_X_REPO_COMMIT,
-    HUGGINGFACE_HUB_CACHE,
+    HUGGINGFACE_HUB_CACHE,  # noqa: F401 # for backward compatibility
     REPO_ID_SEPARATOR,
     REPO_TYPES,
     REPO_TYPES_URL_PREFIXES,
@@ -52,10 +59,10 @@ from .utils import (
     get_graphviz_version,  # noqa: F401 # for backward compatibility
     get_jinja_version,  # noqa: F401 # for backward compatibility
     get_pydot_version,  # noqa: F401 # for backward compatibility
+    get_session,
     get_tf_version,  # noqa: F401 # for backward compatibility
     get_torch_version,  # noqa: F401 # for backward compatibility
     hf_raise_for_status,
-    http_backoff,
     is_fastai_available,  # noqa: F401 # for backward compatibility
     is_fastcore_available,  # noqa: F401 # for backward compatibility
     is_graphviz_available,  # noqa: F401 # for backward compatibility
@@ -64,12 +71,14 @@ from .utils import (
     is_tf_available,  # noqa: F401 # for backward compatibility
     is_torch_available,  # noqa: F401 # for backward compatibility
     logging,
+    reset_sessions,
     tqdm,
     validate_hf_hub_args,
 )
 from .utils._headers import _http_user_agent
 from .utils._runtime import _PY_VERSION  # noqa: F401 # for backward compatibility
 from .utils._typing import HTTP_METHOD_T
+from .utils.insecure_hashlib import sha256
 logger = logging.get_logger(__name__)
@@ -95,7 +104,7 @@ def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
     """
     # Defaults to HF cache
     if cache_dir is None:
-        cache_dir = HUGGINGFACE_HUB_CACHE
+        cache_dir = HF_HUB_CACHE
     cache_dir = str(Path(cache_dir).expanduser().resolve())  # make it unique
     # Check symlink compatibility only once (per cache directory) at first time use
@@ -200,9 +209,6 @@ def hf_hub_url(
         revision (`str`, *optional*):
             An optional Git revision id which can be a branch name, a tag, or a
             commit hash.
-        endpoint (`str`, *optional*):
-            Hugging Face Hub base url. Will default to https://huggingface.co/. Otherwise, one can set the `HF_ENDPOINT`
-            environment variable.
     Example:
@@ -319,7 +325,7 @@ def filename_to_url(
         )
     if cache_dir is None:
-        cache_dir = HUGGINGFACE_HUB_CACHE
+        cache_dir = HF_HUB_CACHE
     if isinstance(cache_dir, Path):
         cache_dir = str(cache_dir)
@@ -367,46 +373,24 @@ def _raise_if_offline_mode_is_enabled(msg: Optional[str] = None):
 def _request_wrapper(
-    method: HTTP_METHOD_T,
-    url: str,
-    *,
-    max_retries: int = 0,
-    base_wait_time: float = 0.5,
-    max_wait_time: float = 2,
-    timeout: Optional[float] = 10.0,
-    follow_relative_redirects: bool = False,
-    **params,
+    method: HTTP_METHOD_T, url: str, *, follow_relative_redirects: bool = False, **params
 ) -> requests.Response:
     """Wrapper around requests methods to add several features.
     What it does:
-    1. Ensure offline mode is disabled (env variable `HF_HUB_OFFLINE` not set to 1).
-       If enabled, a `OfflineModeIsEnabled` exception is raised.
-    2. Follow relative redirections if `follow_relative_redirects=True` even when
-       `allow_redirection` kwarg is set to False.
-    3. Retry in case request fails with a `Timeout` or `ProxyError`, with exponential backoff.
+    1. Ensure offline mode is disabled (env variable `HF_HUB_OFFLINE` not set to 1). If enabled, a
+       `OfflineModeIsEnabled` exception is raised.
+    2. Follow relative redirects if `follow_relative_redirects=True` even when `allow_redirection=False`.
     Args:
         method (`str`):
             HTTP method, such as 'GET' or 'HEAD'.
         url (`str`):
             The URL of the resource to fetch.
-        max_retries (`int`, *optional*, defaults to `0`):
-            Maximum number of retries, defaults to 0 (no retries).
-        base_wait_time (`float`, *optional*, defaults to `0.5`):
-            Duration (in seconds) to wait before retrying the first time.
-            Wait time between retries then grows exponentially, capped by
-            `max_wait_time`.
-        max_wait_time (`float`, *optional*, defaults to `2`):
-            Maximum amount of time between two retries, in seconds.
-        timeout (`float`, *optional*, defaults to `10`):
-            How many seconds to wait for the server to send data before
-            giving up which is passed to `requests.request`.
         follow_relative_redirects (`bool`, *optional*, defaults to `False`)
-            If True, relative redirection (redirection to the same site) will be
-            resolved even when `allow_redirection` kwarg is set to False. Useful when we
-            want to follow a redirection to a renamed repository without following
-            redirection to a CDN.
+            If True, relative redirection (redirection to the same site) will be resolved even when `allow_redirection`
+            kwarg is set to False. Useful when we want to follow a redirection to a renamed repository without
+            following redirection to a CDN.
         **params (`dict`, *optional*):
             Params to pass to `requests.request`.
     """
@@ -418,10 +402,6 @@ def _request_wrapper(
         response = _request_wrapper(
             method=method,
             url=url,
-            max_retries=max_retries,
-            base_wait_time=base_wait_time,
-            max_wait_time=max_wait_time,
-            timeout=timeout,
             follow_relative_redirects=False,
             **params,
         )
@@ -437,38 +417,14 @@ def _request_wrapper(
                 #
                 # Highly inspired by `resolve_redirects` from requests library.
                 # See https://github.com/psf/requests/blob/main/requests/sessions.py#L159
-                return _request_wrapper(
-                    method=method,
-                    url=urlparse(url)._replace(path=parsed_target.path).geturl(),
-                    max_retries=max_retries,
-                    base_wait_time=base_wait_time,
-                    max_wait_time=max_wait_time,
-                    timeout=timeout,
-                    follow_relative_redirects=True,  # resolve recursively
-                    **params,
-                )
+                next_url = urlparse(url)._replace(path=parsed_target.path).geturl()
+                return _request_wrapper(method=method, url=next_url, follow_relative_redirects=True, **params)
         return response
-    # 3. Exponential backoff
-    return http_backoff(
-        method=method,
-        url=url,
-        max_retries=max_retries,
-        base_wait_time=base_wait_time,
-        max_wait_time=max_wait_time,
-        retry_on_exceptions=(Timeout, ProxyError),
-        retry_on_status_codes=(),
-        timeout=timeout,
-        **params,
-    )
-def _request_with_retry(*args, **kwargs) -> requests.Response:
-    """Deprecated method. Please use `_request_wrapper` instead.
-    Alias to keep backward compatibility (used in Transformers).
-    """
-    return _request_wrapper(*args, **kwargs)
+    # Perform request and return if status_code is not in the retry list.
+    response = get_session().request(method=method, url=url, **params)
+    hf_raise_for_status(response)
+    return response
 def http_get(
@@ -478,49 +434,39 @@ def http_get(
     proxies=None,
     resume_size: float = 0,
     headers: Optional[Dict[str, str]] = None,
-    timeout: Optional[float] = 10.0,
-    max_retries: int = 0,
     expected_size: Optional[int] = None,
+    _nb_retries: int = 5,
 ):
     """
     Download a remote file. Do not gobble up errors, and will return errors tailored to the Hugging Face Hub.
+    If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely a
+    transient error (network outage?). We log a warning message and try to resume the download a few times before
+    giving up. The method gives up after 5 attempts if no new data has being received from the server.
     """
-    if not resume_size:
-        if HF_HUB_ENABLE_HF_TRANSFER:
+    hf_transfer = None
+    if HF_HUB_ENABLE_HF_TRANSFER:
+        if resume_size != 0:
+            warnings.warn("'hf_transfer' does not support `resume_size`: falling back to regular download method")
+        elif proxies is not None:
+            warnings.warn("'hf_transfer' does not support `proxies`: falling back to regular download method")
+        else:
             try:
-                # Download file using an external Rust-based package. Download is faster
-                # (~2x speed-up) but support less features (no progress bars).
-                from hf_transfer import download
-                logger.debug(f"Download {url} using HF_TRANSFER.")
-                max_files = 100
-                chunk_size = 10 * 1024 * 1024  # 10 MB
-                download(url, temp_file.name, max_files, chunk_size, headers=headers)
-                return
+                import hf_transfer  # type: ignore[no-redef]
             except ImportError:
                 raise ValueError(
                     "Fast download using 'hf_transfer' is enabled"
                     " (HF_HUB_ENABLE_HF_TRANSFER=1) but 'hf_transfer' package is not"
                     " available in your environment. Try `pip install hf_transfer`."
                 )
-            except Exception as e:
-                raise RuntimeError(
-                    "An error occurred while downloading using `hf_transfer`. Consider"
-                    " disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
-                ) from e
+    initial_headers = headers
     headers = copy.deepcopy(headers) or {}
     if resume_size > 0:
         headers["Range"] = "bytes=%d-" % (resume_size,)
     r = _request_wrapper(
-        method="GET",
-        url=url,
-        stream=True,
-        proxies=proxies,
-        headers=headers,
-        timeout=timeout,
-        max_retries=max_retries,
+        method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=HF_HUB_DOWNLOAD_TIMEOUT
     )
     hf_raise_for_status(r)
     content_length = r.headers.get("Content-Length")
@@ -541,28 +487,90 @@ def http_get(
     if len(displayed_name) > 40:
         displayed_name = f"(…){displayed_name[-40:]}"
-    progress = tqdm(
+    consistency_error_message = (
+        f"Consistency check failed: file should be of size {expected_size} but has size"
+        f" {{actual_size}} ({displayed_name}).\nWe are sorry for the inconvenience. Please retry download and"
+        " pass `force_download=True, resume_download=False` as argument.\nIf the issue persists, please let us"
+        " know by opening an issue on https://github.com/huggingface/huggingface_hub."
+    )
+    # Stream file to buffer
+    with tqdm(
         unit="B",
         unit_scale=True,
         total=total,
         initial=resume_size,
         desc=displayed_name,
         disable=bool(logger.getEffectiveLevel() == logging.NOTSET),
-    )
-    for chunk in r.iter_content(chunk_size=10 * 1024 * 1024):
-        if chunk:  # filter out keep-alive new chunks
-            progress.update(len(chunk))
-            temp_file.write(chunk)
-    if expected_size is not None and expected_size != temp_file.tell():
-        raise EnvironmentError(
-            f"Consistency check failed: file should be of size {expected_size} but has size"
-            f" {temp_file.tell()} ({displayed_name}).\nWe are sorry for the inconvenience. Please retry download and"
-            " pass `force_download=True, resume_download=False` as argument.\nIf the issue persists, please let us"
-            " know by opening an issue on https://github.com/huggingface/huggingface_hub."
-        )
+    ) as progress:
+        if hf_transfer and total is not None and total > 5 * DOWNLOAD_CHUNK_SIZE:
+            supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters
+            if not supports_callback:
+                warnings.warn(
+                    "You are using an outdated version of `hf_transfer`. "
+                    "Consider upgrading to latest version to enable progress bars "
+                    "using `pip install -U hf_transfer`."
+                )
+            try:
+                hf_transfer.download(
+                    url=url,
+                    filename=temp_file.name,
+                    max_files=HF_TRANSFER_CONCURRENCY,
+                    chunk_size=DOWNLOAD_CHUNK_SIZE,
+                    headers=headers,
+                    parallel_failures=3,
+                    max_retries=5,
+                    **({"callback": progress.update} if supports_callback else {}),
+                )
+            except Exception as e:
+                raise RuntimeError(
+                    "An error occurred while downloading using `hf_transfer`. Consider"
+                    " disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
+                ) from e
+            if not supports_callback:
+                progress.update(total)
+            if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
+                raise EnvironmentError(
+                    consistency_error_message.format(
+                        actual_size=os.path.getsize(temp_file.name),
+                    )
+                )
+            return
+        new_resume_size = resume_size
+        try:
+            for chunk in r.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
+                if chunk:  # filter out keep-alive new chunks
+                    progress.update(len(chunk))
+                    temp_file.write(chunk)
+                    new_resume_size += len(chunk)
+                    # Some data has been downloaded from the server so we reset the number of retries.
+                    _nb_retries = 5
+        except (requests.ConnectionError, requests.ReadTimeout) as e:
+            # If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
+            # a transient error (network outage?). We log a warning message and try to resume the download a few times
+            # before giving up. Tre retry mechanism is basic but should be enough in most cases.
+            if _nb_retries <= 0:
+                logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
+                raise
+            logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
+            time.sleep(1)
+            reset_sessions()  # In case of SSLError it's best to reset the shared requests.Session objects
+            return http_get(
+                url=url,
+                temp_file=temp_file,
+                proxies=proxies,
+                resume_size=new_resume_size,
+                headers=initial_headers,
+                expected_size=expected_size,
+                _nb_retries=_nb_retries - 1,
+            )
-    progress.close()
+        if expected_size is not None and expected_size != temp_file.tell():
+            raise EnvironmentError(
+                consistency_error_message.format(
+                    actual_size=temp_file.tell(),
+                )
+            )
 @validate_hf_hub_args
@@ -576,7 +584,7 @@ def cached_download(
     force_download: bool = False,
     force_filename: Optional[str] = None,
     proxies: Optional[Dict] = None,
-    etag_timeout: float = 10,
+    etag_timeout: float = DEFAULT_ETAG_TIMEOUT,
     resume_download: bool = False,
     token: Union[bool, str, None] = None,
     local_files_only: bool = False,
@@ -656,6 +664,10 @@ def cached_download(
     </Tip>
     """
+    if HF_HUB_ETAG_TIMEOUT != DEFAULT_ETAG_TIMEOUT:
+        # Respect environment variable above user value
+        etag_timeout = HF_HUB_ETAG_TIMEOUT
     if not legacy_cache_layout:
         warnings.warn(
             "'cached_download' is the legacy way to download files from the HF hub, please consider upgrading to"
@@ -664,7 +676,7 @@ def cached_download(
         )
     if cache_dir is None:
-        cache_dir = HUGGINGFACE_HUB_CACHE
+        cache_dir = HF_HUB_CACHE
     if isinstance(cache_dir, Path):
         cache_dir = str(cache_dir)
@@ -995,7 +1007,6 @@ def hf_hub_download(
     subfolder: Optional[str] = None,
     repo_type: Optional[str] = None,
     revision: Optional[str] = None,
-    endpoint: Optional[str] = None,
     library_name: Optional[str] = None,
     library_version: Optional[str] = None,
     cache_dir: Union[str, Path, None] = None,
@@ -1005,11 +1016,12 @@ def hf_hub_download(
     force_download: bool = False,
     force_filename: Optional[str] = None,
     proxies: Optional[Dict] = None,
-    etag_timeout: float = 10,
+    etag_timeout: float = DEFAULT_ETAG_TIMEOUT,
     resume_download: bool = False,
     token: Union[bool, str, None] = None,
     local_files_only: bool = False,
     legacy_cache_layout: bool = False,
+    endpoint: Optional[str] = None,
 ) -> str:
     """Download a given file if it's not already present in the local cache.
@@ -1069,9 +1081,6 @@ def hf_hub_download(
         revision (`str`, *optional*):
             An optional Git revision id which can be a branch name, a tag, or a
             commit hash.
-        endpoint (`str`, *optional*):
-            Hugging Face Hub base url. Will default to https://huggingface.co/. Otherwise, one can set the `HF_ENDPOINT`
-            environment variable.
         library_name (`str`, *optional*):
             The name of the library to which the object corresponds.
         library_version (`str`, *optional*):
@@ -1138,6 +1147,10 @@ def hf_hub_download(
     </Tip>
     """
+    if HF_HUB_ETAG_TIMEOUT != DEFAULT_ETAG_TIMEOUT:
+        # Respect environment variable above user value
+        etag_timeout = HF_HUB_ETAG_TIMEOUT
     if force_filename is not None:
         warnings.warn(
             "The `force_filename` parameter is deprecated as a new caching system, "
@@ -1173,13 +1186,14 @@ def hf_hub_download(
         )
     if cache_dir is None:
-        cache_dir = HUGGINGFACE_HUB_CACHE
+        cache_dir = HF_HUB_CACHE
     if revision is None:
         revision = DEFAULT_REVISION
     if isinstance(cache_dir, Path):
         cache_dir = str(cache_dir)
     if isinstance(local_dir, Path):
         local_dir = str(local_dir)
+    locks_dir = os.path.join(cache_dir, ".locks")
     if subfolder == "":
         subfolder = None
@@ -1393,7 +1407,8 @@ def hf_hub_download(
             return pointer_path
     # Prevent parallel downloads of the same file with a lock.
-    lock_path = blob_path + ".lock"
+    # etag could be duplicated across repos,
+    lock_path = os.path.join(locks_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type), f"{etag}.lock")
     # Some Windows versions do not allow for paths longer than 255 characters.
     # In this case, we must specify it is an extended path by using the "\\?\" prefix.
@@ -1403,6 +1418,7 @@ def hf_hub_download(
     if os.name == "nt" and len(os.path.abspath(blob_path)) > 255:
         blob_path = "\\\\?\\" + os.path.abspath(blob_path)
+    Path(lock_path).parent.mkdir(parents=True, exist_ok=True)
     with FileLock(lock_path):
         # If the download just completed while the lock was activated.
         if os.path.exists(pointer_path) and not force_download:
@@ -1477,11 +1493,6 @@ def hf_hub_download(
                 _chmod_and_replace(temp_file.name, local_dir_filepath)
             pointer_path = local_dir_filepath  # for return value
-    try:
-        os.remove(lock_path)
-    except OSError:
-        pass
     return pointer_path
@@ -1542,7 +1553,7 @@ def try_to_load_from_cache(
     if repo_type not in REPO_TYPES:
         raise ValueError(f"Invalid repo type: {repo_type}. Accepted repo types are: {str(REPO_TYPES)}")
     if cache_dir is None:
-        cache_dir = HUGGINGFACE_HUB_CACHE
+        cache_dir = HF_HUB_CACHE
     object_id = repo_id.replace("/", "--")
     repo_cache = os.path.join(cache_dir, f"{repo_type}s--{object_id}")
@@ -1583,7 +1594,7 @@ def get_hf_file_metadata(
     url: str,
     token: Union[bool, str, None] = None,
     proxies: Optional[Dict] = None,
-    timeout: Optional[float] = 10.0,
+    timeout: Optional[float] = DEFAULT_REQUEST_TIMEOUT,
 ) -> HfFileMetadata:
     """Fetch metadata of a file versioned on the Hub for a given url.
@@ -1624,12 +1635,9 @@ def get_hf_file_metadata(
     # Return
     return HfFileMetadata(
         commit_hash=r.headers.get(HUGGINGFACE_HEADER_X_REPO_COMMIT),
-        etag=_normalize_etag(
-            # We favor a custom header indicating the etag of the linked resource, and
-            # we fallback to the regular etag header.
-            r.headers.get(HUGGINGFACE_HEADER_X_LINKED_ETAG)
-            or r.headers.get("ETag")
-        ),
+        # We favor a custom header indicating the etag of the linked resource, and
+        # we fallback to the regular etag header.
+        etag=_normalize_etag(r.headers.get(HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")),
         # Either from response headers (if redirected) or defaults to request url
         # Do not use directly `url`, as `_request_wrapper` might have followed relative
         # redirects.

huggingface-hub 0.18.0__py3-none-any.whl → 0.19.0__py3-none-any.whl

Potentially problematic release.

huggingface-hub 0.18.0py3-none-any.whl → 0.19.0py3-none-any.whl