PyPI - huggingface-hub - Versions diffs - 0.29.0rc2__py3-none-any.whl → 1.1.3__py3-none-any.whl - Mend

huggingface-hub 0.29.0rc2py3-none-any.whl → 1.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

huggingface_hub/__init__.py +160 -46
huggingface_hub/_commit_api.py +277 -71
huggingface_hub/_commit_scheduler.py +15 -15
huggingface_hub/_inference_endpoints.py +33 -22
huggingface_hub/_jobs_api.py +301 -0
huggingface_hub/_local_folder.py +18 -3
huggingface_hub/_login.py +31 -63
huggingface_hub/_oauth.py +460 -0
huggingface_hub/_snapshot_download.py +241 -81
huggingface_hub/_space_api.py +18 -10
huggingface_hub/_tensorboard_logger.py +15 -19
huggingface_hub/_upload_large_folder.py +196 -76
huggingface_hub/_webhooks_payload.py +3 -3
huggingface_hub/_webhooks_server.py +15 -25
huggingface_hub/{commands → cli}/__init__.py +1 -15
huggingface_hub/cli/_cli_utils.py +173 -0
huggingface_hub/cli/auth.py +147 -0
huggingface_hub/cli/cache.py +841 -0
huggingface_hub/cli/download.py +189 -0
huggingface_hub/cli/hf.py +60 -0
huggingface_hub/cli/inference_endpoints.py +377 -0
huggingface_hub/cli/jobs.py +772 -0
huggingface_hub/cli/lfs.py +175 -0
huggingface_hub/cli/repo.py +315 -0
huggingface_hub/cli/repo_files.py +94 -0
huggingface_hub/{commands/env.py → cli/system.py} +10 -13
huggingface_hub/cli/upload.py +294 -0
huggingface_hub/cli/upload_large_folder.py +117 -0
huggingface_hub/community.py +20 -12
huggingface_hub/constants.py +83 -59
huggingface_hub/dataclasses.py +609 -0
huggingface_hub/errors.py +99 -30
huggingface_hub/fastai_utils.py +30 -41
huggingface_hub/file_download.py +606 -346
huggingface_hub/hf_api.py +2445 -1132
huggingface_hub/hf_file_system.py +269 -152
huggingface_hub/hub_mixin.py +61 -66
huggingface_hub/inference/_client.py +501 -630
huggingface_hub/inference/_common.py +133 -121
huggingface_hub/inference/_generated/_async_client.py +536 -722
huggingface_hub/inference/_generated/types/__init__.py +6 -1
huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +5 -6
huggingface_hub/inference/_generated/types/base.py +10 -7
huggingface_hub/inference/_generated/types/chat_completion.py +77 -31
huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
huggingface_hub/inference/_generated/types/image_to_image.py +8 -2
huggingface_hub/inference/_generated/types/image_to_text.py +2 -3
huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
huggingface_hub/inference/_generated/types/summarization.py +2 -2
huggingface_hub/inference/_generated/types/table_question_answering.py +5 -5
huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
huggingface_hub/inference/_generated/types/text_generation.py +11 -11
huggingface_hub/inference/_generated/types/text_to_audio.py +1 -2
huggingface_hub/inference/_generated/types/text_to_speech.py +1 -2
huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
huggingface_hub/inference/_generated/types/token_classification.py +2 -2
huggingface_hub/inference/_generated/types/translation.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
huggingface_hub/inference/_mcp/__init__.py +0 -0
huggingface_hub/inference/_mcp/_cli_hacks.py +88 -0
huggingface_hub/inference/_mcp/agent.py +100 -0
huggingface_hub/inference/_mcp/cli.py +247 -0
huggingface_hub/inference/_mcp/constants.py +81 -0
huggingface_hub/inference/_mcp/mcp_client.py +395 -0
huggingface_hub/inference/_mcp/types.py +45 -0
huggingface_hub/inference/_mcp/utils.py +128 -0
huggingface_hub/inference/_providers/__init__.py +149 -20
huggingface_hub/inference/_providers/_common.py +160 -37
huggingface_hub/inference/_providers/black_forest_labs.py +12 -9
huggingface_hub/inference/_providers/cerebras.py +6 -0
huggingface_hub/inference/_providers/clarifai.py +13 -0
huggingface_hub/inference/_providers/cohere.py +32 -0
huggingface_hub/inference/_providers/fal_ai.py +231 -22
huggingface_hub/inference/_providers/featherless_ai.py +38 -0
huggingface_hub/inference/_providers/fireworks_ai.py +22 -1
huggingface_hub/inference/_providers/groq.py +9 -0
huggingface_hub/inference/_providers/hf_inference.py +143 -33
huggingface_hub/inference/_providers/hyperbolic.py +9 -5
huggingface_hub/inference/_providers/nebius.py +47 -5
huggingface_hub/inference/_providers/novita.py +48 -5
huggingface_hub/inference/_providers/nscale.py +44 -0
huggingface_hub/inference/_providers/openai.py +25 -0
huggingface_hub/inference/_providers/publicai.py +6 -0
huggingface_hub/inference/_providers/replicate.py +46 -9
huggingface_hub/inference/_providers/sambanova.py +37 -1
huggingface_hub/inference/_providers/scaleway.py +28 -0
huggingface_hub/inference/_providers/together.py +34 -5
huggingface_hub/inference/_providers/wavespeed.py +138 -0
huggingface_hub/inference/_providers/zai_org.py +17 -0
huggingface_hub/lfs.py +33 -100
huggingface_hub/repocard.py +34 -38
huggingface_hub/repocard_data.py +79 -59
huggingface_hub/serialization/__init__.py +0 -1
huggingface_hub/serialization/_base.py +12 -15
huggingface_hub/serialization/_dduf.py +8 -8
huggingface_hub/serialization/_torch.py +69 -69
huggingface_hub/utils/__init__.py +27 -8
huggingface_hub/utils/_auth.py +7 -7
huggingface_hub/utils/_cache_manager.py +92 -147
huggingface_hub/utils/_chunk_utils.py +2 -3
huggingface_hub/utils/_deprecation.py +1 -1
huggingface_hub/utils/_dotenv.py +55 -0
huggingface_hub/utils/_experimental.py +7 -5
huggingface_hub/utils/_fixes.py +0 -10
huggingface_hub/utils/_git_credential.py +5 -5
huggingface_hub/utils/_headers.py +8 -30
huggingface_hub/utils/_http.py +399 -237
huggingface_hub/utils/_pagination.py +6 -6
huggingface_hub/utils/_parsing.py +98 -0
huggingface_hub/utils/_paths.py +5 -5
huggingface_hub/utils/_runtime.py +74 -22
huggingface_hub/utils/_safetensors.py +21 -21
huggingface_hub/utils/_subprocess.py +13 -11
huggingface_hub/utils/_telemetry.py +4 -4
huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -4
huggingface_hub/utils/_typing.py +25 -5
huggingface_hub/utils/_validators.py +55 -74
huggingface_hub/utils/_verification.py +167 -0
huggingface_hub/utils/_xet.py +235 -0
huggingface_hub/utils/_xet_progress_reporting.py +162 -0
huggingface_hub/utils/insecure_hashlib.py +3 -5
huggingface_hub/utils/logging.py +8 -11
huggingface_hub/utils/tqdm.py +33 -4
{huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/METADATA +94 -82
huggingface_hub-1.1.3.dist-info/RECORD +155 -0
{huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/WHEEL +1 -1
huggingface_hub-1.1.3.dist-info/entry_points.txt +6 -0
huggingface_hub/commands/delete_cache.py +0 -428
huggingface_hub/commands/download.py +0 -200
huggingface_hub/commands/huggingface_cli.py +0 -61
huggingface_hub/commands/lfs.py +0 -200
huggingface_hub/commands/repo_files.py +0 -128
huggingface_hub/commands/scan_cache.py +0 -181
huggingface_hub/commands/tag.py +0 -159
huggingface_hub/commands/upload.py +0 -299
huggingface_hub/commands/upload_large_folder.py +0 -129
huggingface_hub/commands/user.py +0 -304
huggingface_hub/commands/version.py +0 -37
huggingface_hub/inference_api.py +0 -217
huggingface_hub/keras_mixin.py +0 -500
huggingface_hub/repository.py +0 -1477
huggingface_hub/serialization/_tensorflow.py +0 -95
huggingface_hub/utils/_hf_folder.py +0 -68
huggingface_hub-0.29.0rc2.dist-info/RECORD +0 -131
huggingface_hub-0.29.0rc2.dist-info/entry_points.txt +0 -6
{huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info/licenses}/LICENSE +0 -0
{huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/top_level.txt +0 -0

huggingface_hub/file_download.py CHANGED Viewed

@@ -1,7 +1,5 @@
-import contextlib
 import copy
 import errno
-import inspect
 import os
 import re
 import shutil
@@ -11,26 +9,20 @@ import uuid
 import warnings
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Any, BinaryIO, Dict, Literal, NoReturn, Optional, Tuple, Union
+from typing import Any, BinaryIO, Literal, NoReturn, Optional, Union, overload
 from urllib.parse import quote, urlparse
-import requests
+import httpx
+from tqdm.auto import tqdm as base_tqdm
-from . import (
-    __version__,  # noqa: F401 # for backward compatibility
-    constants,
-)
+from . import constants
 from ._local_folder import get_local_download_paths, read_download_metadata, write_download_metadata
-from .constants import (
-    HUGGINGFACE_CO_URL_TEMPLATE,  # noqa: F401 # for backward compatibility
-    HUGGINGFACE_HUB_CACHE,  # noqa: F401 # for backward compatibility
-)
 from .errors import (
-    EntryNotFoundError,
     FileMetadataError,
     GatedRepoError,
     HfHubHTTPError,
     LocalEntryNotFoundError,
+    RemoteEntryNotFoundError,
     RepositoryNotFoundError,
     RevisionNotFoundError,
 )
@@ -38,33 +30,20 @@ from .utils import (
     OfflineModeIsEnabled,
     SoftTemporaryDirectory,
     WeakFileLock,
+    XetFileData,
     build_hf_headers,
-    get_fastai_version,  # noqa: F401 # for backward compatibility
-    get_fastcore_version,  # noqa: F401 # for backward compatibility
-    get_graphviz_version,  # noqa: F401 # for backward compatibility
-    get_jinja_version,  # noqa: F401 # for backward compatibility
-    get_pydot_version,  # noqa: F401 # for backward compatibility
-    get_session,
-    get_tf_version,  # noqa: F401 # for backward compatibility
-    get_torch_version,  # noqa: F401 # for backward compatibility
     hf_raise_for_status,
-    is_fastai_available,  # noqa: F401 # for backward compatibility
-    is_fastcore_available,  # noqa: F401 # for backward compatibility
-    is_graphviz_available,  # noqa: F401 # for backward compatibility
-    is_jinja_available,  # noqa: F401 # for backward compatibility
-    is_pydot_available,  # noqa: F401 # for backward compatibility
-    is_tf_available,  # noqa: F401 # for backward compatibility
-    is_torch_available,  # noqa: F401 # for backward compatibility
     logging,
-    reset_sessions,
+    parse_xet_file_data_from_response,
+    refresh_xet_connection_info,
     tqdm,
     validate_hf_hub_args,
 )
-from .utils._http import _adjust_range_header
-from .utils._runtime import _PY_VERSION  # noqa: F401 # for backward compatibility
+from .utils._http import _adjust_range_header, http_backoff, http_stream_backoff
+from .utils._runtime import is_xet_available
 from .utils._typing import HTTP_METHOD_T
 from .utils.sha import sha_fileobj
-from .utils.tqdm import is_tqdm_disabled
+from .utils.tqdm import _get_progress_bar_context
 logger = logging.get_logger(__name__)
@@ -82,7 +61,7 @@ REGEX_COMMIT_HASH = re.compile(r"^[0-9a-f]{40}$")
 # Regex to check if the file etag IS a valid sha256
 REGEX_SHA256 = re.compile(r"^[0-9a-f]{64}$")
-_are_symlinks_supported_in_dir: Dict[str, bool] = {}
+_are_symlinks_supported_in_dir: dict[str, bool] = {}
 def are_symlinks_supported(cache_dir: Union[str, Path, None] = None) -> bool:
@@ -160,12 +139,43 @@ class HfFileMetadata:
         size (`size`):
             Size of the file. In case of an LFS file, contains the size of the actual
             LFS file, not the pointer.
+        xet_file_data (`XetFileData`, *optional*):
+            Xet information for the file. This is only set if the file is stored using Xet storage.
     """
     commit_hash: Optional[str]
     etag: Optional[str]
     location: str
     size: Optional[int]
+    xet_file_data: Optional[XetFileData]
+@dataclass
+class DryRunFileInfo:
+    """Information returned when performing a dry run of a file download.
+    Returned by [`hf_hub_download`] when `dry_run=True`.
+    Args:
+        commit_hash (`str`):
+            The commit_hash related to the file.
+        file_size (`int`):
+            Size of the file. In case of an LFS file, contains the size of the actual LFS file, not the pointer.
+        filename (`str`):
+            Name of the file in the repo.
+        is_cached (`bool`):
+            Whether the file is already cached locally.
+        will_download (`bool`):
+            Whether the file will be downloaded if `hf_hub_download` is called with `dry_run=False`.
+            In practice, will_download is `True` if the file is not cached or if `force_download=True`.
+    """
+    commit_hash: str
+    file_size: int
+    filename: str
+    local_path: str
+    is_cached: bool
+    will_download: bool
 @validate_hf_hub_args
@@ -210,26 +220,23 @@ def hf_hub_url(
     'https://huggingface.co/julien-c/EsperBERTo-small/resolve/main/pytorch_model.bin'
     ```
-    <Tip>
-    Notes:
-        Cloudfront is replicated over the globe so downloads are way faster for
-        the end user (and it also lowers our bandwidth costs).
-        Cloudfront aggressively caches files by default (default TTL is 24
-        hours), however this is not an issue here because we implement a
-        git-based versioning system on huggingface.co, which means that we store
-        the files on S3/Cloudfront in a content-addressable way (i.e., the file
-        name is its hash). Using content-addressable filenames means cache can't
-        ever be stale.
-        In terms of client-side caching from this library, we base our caching
-        on the objects' entity tag (`ETag`), which is an identifier of a
-        specific version of a resource [1]_. An object's ETag is: its git-sha1
-        if stored in git, or its sha256 if stored in git-lfs.
-    </Tip>
+    > [!TIP]
+    > Notes:
+    >
+    >     Cloudfront is replicated over the globe so downloads are way faster for
+    >     the end user (and it also lowers our bandwidth costs).
+    >
+    >     Cloudfront aggressively caches files by default (default TTL is 24
+    >     hours), however this is not an issue here because we implement a
+    >     git-based versioning system on huggingface.co, which means that we store
+    >     the files on S3/Cloudfront in a content-addressable way (i.e., the file
+    >     name is its hash). Using content-addressable filenames means cache can't
+    >     ever be stale.
+    >
+    >     In terms of client-side caching from this library, we base our caching
+    >     on the objects' entity tag (`ETag`), which is an identifier of a
+    >     specific version of a resource [1]_. An object's ETag is: its git-sha1
+    >     if stored in git, or its sha256 if stored in git-lfs.
     References:
@@ -248,7 +255,7 @@ def hf_hub_url(
     if revision is None:
         revision = constants.DEFAULT_REVISION
-    url = HUGGINGFACE_CO_URL_TEMPLATE.format(
+    url = constants.HUGGINGFACE_CO_URL_TEMPLATE.format(
         repo_id=repo_id, revision=quote(revision, safe=""), filename=quote(filename)
     )
     # Update endpoint if provided
@@ -257,63 +264,92 @@ def hf_hub_url(
     return url
-def _request_wrapper(
-    method: HTTP_METHOD_T, url: str, *, follow_relative_redirects: bool = False, **params
-) -> requests.Response:
-    """Wrapper around requests methods to follow relative redirects if `follow_relative_redirects=True` even when
-    `allow_redirection=False`.
+def _httpx_follow_relative_redirects(method: HTTP_METHOD_T, url: str, **httpx_kwargs) -> httpx.Response:
+    """Perform an HTTP request with backoff and follow relative redirects only.
+    This is useful to follow a redirection to a renamed repository without following redirection to a CDN.
+    A backoff mechanism retries the HTTP call on 5xx errors and network errors.
     Args:
         method (`str`):
             HTTP method, such as 'GET' or 'HEAD'.
         url (`str`):
             The URL of the resource to fetch.
-        follow_relative_redirects (`bool`, *optional*, defaults to `False`)
-            If True, relative redirection (redirection to the same site) will be resolved even when `allow_redirection`
-            kwarg is set to False. Useful when we want to follow a redirection to a renamed repository without
-            following redirection to a CDN.
-        **params (`dict`, *optional*):
-            Params to pass to `requests.request`.
+        **httpx_kwargs (`dict`, *optional*):
+            Params to pass to `httpx.request`.
     """
-    # Recursively follow relative redirects
-    if follow_relative_redirects:
-        response = _request_wrapper(
+    while True:
+        # Make the request
+        response = http_backoff(
             method=method,
             url=url,
-            follow_relative_redirects=False,
-            **params,
+            **httpx_kwargs,
+            follow_redirects=False,
+            retry_on_exceptions=(),
+            retry_on_status_codes=(429,),
         )
+        hf_raise_for_status(response)
-        # If redirection, we redirect only relative paths.
-        # This is useful in case of a renamed repository.
+        # Check if response is a relative redirect
         if 300 <= response.status_code <= 399:
             parsed_target = urlparse(response.headers["Location"])
             if parsed_target.netloc == "":
-                # This means it is a relative 'location' headers, as allowed by RFC 7231.
-                # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')
-                # We want to follow this relative redirect !
-                #
-                # Highly inspired by `resolve_redirects` from requests library.
-                # See https://github.com/psf/requests/blob/main/requests/sessions.py#L159
-                next_url = urlparse(url)._replace(path=parsed_target.path).geturl()
-                return _request_wrapper(method=method, url=next_url, follow_relative_redirects=True, **params)
-        return response
-    # Perform request and return if status_code is not in the retry list.
-    response = get_session().request(method=method, url=url, **params)
-    hf_raise_for_status(response)
+                # Relative redirect -> update URL and retry
+                url = urlparse(url)._replace(path=parsed_target.path).geturl()
+                continue
+        # Break if no relative redirect
+        break
     return response
+def _get_file_length_from_http_response(response: httpx.Response) -> Optional[int]:
+    """
+    Get the length of the file from the HTTP response headers.
+    This function extracts the file size from the HTTP response headers, either from the
+    `Content-Range` or `Content-Length` header, if available (in that order).
+    Args:
+        response (`httpx.Response`):
+            The HTTP response object.
+    Returns:
+        `int` or `None`: The length of the file in bytes, or None if not available.
+    """
+    # If HTTP response contains compressed body (e.g. gzip), the `Content-Length` header will
+    # contain the length of the compressed body, not the uncompressed file size.
+    # And at the start of transmission there's no way to know the uncompressed file size for gzip,
+    # thus we return None in that case.
+    content_encoding = response.headers.get("Content-Encoding", "identity").lower()
+    if content_encoding != "identity":
+        # gzip/br/deflate/zstd etc
+        return None
+    content_range = response.headers.get("Content-Range")
+    if content_range is not None:
+        return int(content_range.rsplit("/")[-1])
+    content_length = response.headers.get("Content-Length")
+    if content_length is not None:
+        return int(content_length)
+    return None
+@validate_hf_hub_args
 def http_get(
     url: str,
     temp_file: BinaryIO,
     *,
-    proxies: Optional[Dict] = None,
     resume_size: int = 0,
-    headers: Optional[Dict[str, Any]] = None,
+    headers: Optional[dict[str, Any]] = None,
     expected_size: Optional[int] = None,
     displayed_filename: Optional[str] = None,
+    tqdm_class: Optional[type[base_tqdm]] = None,
     _nb_retries: int = 5,
     _tqdm_bar: Optional[tqdm] = None,
 ) -> None:
@@ -329,8 +365,6 @@ def http_get(
             The URL of the file to download.
         temp_file (`BinaryIO`):
             The file-like object where to save the file.
-        proxies (`dict`, *optional*):
-            Dictionary mapping protocol to the URL of the proxy passed to `requests.request`.
         resume_size (`int`, *optional*):
             The number of bytes already downloaded. If set to 0 (default), the whole file is download. If set to a
             positive number, the download will resume at the given position.
@@ -347,137 +381,85 @@ def http_get(
         # If the file is already fully downloaded, we don't need to download it again.
         return
-    hf_transfer = None
-    if constants.HF_HUB_ENABLE_HF_TRANSFER:
-        if resume_size != 0:
-            warnings.warn("'hf_transfer' does not support `resume_size`: falling back to regular download method")
-        elif proxies is not None:
-            warnings.warn("'hf_transfer' does not support `proxies`: falling back to regular download method")
-        else:
-            try:
-                import hf_transfer  # type: ignore[no-redef]
-            except ImportError:
-                raise ValueError(
-                    "Fast download using 'hf_transfer' is enabled"
-                    " (HF_HUB_ENABLE_HF_TRANSFER=1) but 'hf_transfer' package is not"
-                    " available in your environment. Try `pip install hf_transfer`."
-                )
     initial_headers = headers
     headers = copy.deepcopy(headers) or {}
     if resume_size > 0:
         headers["Range"] = _adjust_range_header(headers.get("Range"), resume_size)
+    elif expected_size and expected_size > constants.MAX_HTTP_DOWNLOAD_SIZE:
+        # Any files over 50GB will not be available through basic http requests.
+        raise ValueError(
+            "The file is too large to be downloaded using the regular download method. "
+            " Install `hf_xet` with `pip install hf_xet` for xet-powered downloads."
+        )
-    r = _request_wrapper(
-        method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT
-    )
-    hf_raise_for_status(r)
-    content_length = r.headers.get("Content-Length")
-    # NOTE: 'total' is the total number of bytes to download, not the number of bytes in the file.
-    #       If the file is compressed, the number of bytes in the saved file will be higher than 'total'.
-    total = resume_size + int(content_length) if content_length is not None else None
-    if displayed_filename is None:
-        displayed_filename = url
-        content_disposition = r.headers.get("Content-Disposition")
-        if content_disposition is not None:
-            match = HEADER_FILENAME_PATTERN.search(content_disposition)
-            if match is not None:
-                # Means file is on CDN
-                displayed_filename = match.groupdict()["filename"]
-    # Truncate filename if too long to display
-    if len(displayed_filename) > 40:
-        displayed_filename = f"(…){displayed_filename[-40:]}"
-    consistency_error_message = (
-        f"Consistency check failed: file should be of size {expected_size} but has size"
-        f" {{actual_size}} ({displayed_filename}).\nThis is usually due to network issues while downloading the file."
-        " Please retry with `force_download=True`."
-    )
-    # Stream file to buffer
-    progress_cm: tqdm = (
-        tqdm(  # type: ignore[assignment]
-            unit="B",
-            unit_scale=True,
+    with http_stream_backoff(
+        method="GET",
+        url=url,
+        headers=headers,
+        timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT,
+        retry_on_exceptions=(),
+        retry_on_status_codes=(429,),
+    ) as response:
+        hf_raise_for_status(response)
+        total: Optional[int] = _get_file_length_from_http_response(response)
+        if displayed_filename is None:
+            displayed_filename = url
+            content_disposition = response.headers.get("Content-Disposition")
+            if content_disposition is not None:
+                match = HEADER_FILENAME_PATTERN.search(content_disposition)
+                if match is not None:
+                    # Means file is on CDN
+                    displayed_filename = match.groupdict()["filename"]
+        # Truncate filename if too long to display
+        if len(displayed_filename) > 40:
+            displayed_filename = f"(…){displayed_filename[-40:]}"
+        consistency_error_message = (
+            f"Consistency check failed: file should be of size {expected_size} but has size"
+            f" {{actual_size}} ({displayed_filename}).\nThis is usually due to network issues while downloading the file."
+            " Please retry with `force_download=True`."
+        )
+        progress_cm = _get_progress_bar_context(
+            desc=displayed_filename,
+            log_level=logger.getEffectiveLevel(),
             total=total,
             initial=resume_size,
-            desc=displayed_filename,
-            disable=is_tqdm_disabled(logger.getEffectiveLevel()),
             name="huggingface_hub.http_get",
+            tqdm_class=tqdm_class,
+            _tqdm_bar=_tqdm_bar,
         )
-        if _tqdm_bar is None
-        else contextlib.nullcontext(_tqdm_bar)
-        # ^ `contextlib.nullcontext` mimics a context manager that does nothing
-        #   Makes it easier to use the same code path for both cases but in the later
-        #   case, the progress bar is not closed when exiting the context manager.
-    )
-    with progress_cm as progress:
-        if hf_transfer and total is not None and total > 5 * constants.DOWNLOAD_CHUNK_SIZE:
-            supports_callback = "callback" in inspect.signature(hf_transfer.download).parameters
-            if not supports_callback:
-                warnings.warn(
-                    "You are using an outdated version of `hf_transfer`. "
-                    "Consider upgrading to latest version to enable progress bars "
-                    "using `pip install -U hf_transfer`."
-                )
+        with progress_cm as progress:
+            new_resume_size = resume_size
             try:
-                hf_transfer.download(
+                for chunk in response.iter_bytes(chunk_size=constants.DOWNLOAD_CHUNK_SIZE):
+                    if chunk:  # filter out keep-alive new chunks
+                        progress.update(len(chunk))
+                        temp_file.write(chunk)
+                        new_resume_size += len(chunk)
+                        # Some data has been downloaded from the server so we reset the number of retries.
+                        _nb_retries = 5
+            except (httpx.ConnectError, httpx.TimeoutException) as e:
+                # If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
+                # a transient error (network outage?). We log a warning message and try to resume the download a few times
+                # before giving up. Tre retry mechanism is basic but should be enough in most cases.
+                if _nb_retries <= 0:
+                    logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
+                    raise
+                logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
+                time.sleep(1)
+                return http_get(
                     url=url,
-                    filename=temp_file.name,
-                    max_files=constants.HF_TRANSFER_CONCURRENCY,
-                    chunk_size=constants.DOWNLOAD_CHUNK_SIZE,
-                    headers=headers,
-                    parallel_failures=3,
-                    max_retries=5,
-                    **({"callback": progress.update} if supports_callback else {}),
+                    temp_file=temp_file,
+                    resume_size=new_resume_size,
+                    headers=initial_headers,
+                    expected_size=expected_size,
+                    tqdm_class=tqdm_class,
+                    _nb_retries=_nb_retries - 1,
+                    _tqdm_bar=_tqdm_bar,
                 )
-            except Exception as e:
-                raise RuntimeError(
-                    "An error occurred while downloading using `hf_transfer`. Consider"
-                    " disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling."
-                ) from e
-            if not supports_callback:
-                progress.update(total)
-            if expected_size is not None and expected_size != os.path.getsize(temp_file.name):
-                raise EnvironmentError(
-                    consistency_error_message.format(
-                        actual_size=os.path.getsize(temp_file.name),
-                    )
-                )
-            return
-        new_resume_size = resume_size
-        try:
-            for chunk in r.iter_content(chunk_size=constants.DOWNLOAD_CHUNK_SIZE):
-                if chunk:  # filter out keep-alive new chunks
-                    progress.update(len(chunk))
-                    temp_file.write(chunk)
-                    new_resume_size += len(chunk)
-                    # Some data has been downloaded from the server so we reset the number of retries.
-                    _nb_retries = 5
-        except (requests.ConnectionError, requests.ReadTimeout) as e:
-            # If ConnectionError (SSLError) or ReadTimeout happen while streaming data from the server, it is most likely
-            # a transient error (network outage?). We log a warning message and try to resume the download a few times
-            # before giving up. Tre retry mechanism is basic but should be enough in most cases.
-            if _nb_retries <= 0:
-                logger.warning("Error while downloading from %s: %s\nMax retries exceeded.", url, str(e))
-                raise
-            logger.warning("Error while downloading from %s: %s\nTrying to resume download...", url, str(e))
-            time.sleep(1)
-            reset_sessions()  # In case of SSLError it's best to reset the shared requests.Session objects
-            return http_get(
-                url=url,
-                temp_file=temp_file,
-                proxies=proxies,
-                resume_size=new_resume_size,
-                headers=initial_headers,
-                expected_size=expected_size,
-                _nb_retries=_nb_retries - 1,
-                _tqdm_bar=_tqdm_bar,
-            )
     if expected_size is not None and expected_size != temp_file.tell():
         raise EnvironmentError(
@@ -487,6 +469,114 @@ def http_get(
         )
+def xet_get(
+    *,
+    incomplete_path: Path,
+    xet_file_data: XetFileData,
+    headers: dict[str, str],
+    expected_size: Optional[int] = None,
+    displayed_filename: Optional[str] = None,
+    tqdm_class: Optional[type[base_tqdm]] = None,
+    _tqdm_bar: Optional[tqdm] = None,
+) -> None:
+    """
+    Download a file using Xet storage service.
+    Args:
+        incomplete_path (`Path`):
+            The path to the file to download.
+        xet_file_data (`XetFileData`):
+            The file metadata needed to make the request to the xet storage service.
+        headers (`dict[str, str]`):
+            The headers to send to the xet storage service.
+        expected_size (`int`, *optional*):
+            The expected size of the file to download. If set, the download will raise an error if the size of the
+            received content is different from the expected one.
+        displayed_filename (`str`, *optional*):
+            The filename of the file that is being downloaded. Value is used only to display a nice progress bar. If
+            not set, the filename is guessed from the URL or the `Content-Disposition` header.
+    **How it works:**
+        The file download system uses Xet storage, which is a content-addressable storage system that breaks files into chunks
+        for efficient storage and transfer.
+        `hf_xet.download_files` manages downloading files by:
+        - Taking a list of files to download (each with its unique content hash)
+        - Connecting to a storage server (CAS server) that knows how files are chunked
+        - Using authentication to ensure secure access
+        - Providing progress updates during download
+        Authentication works by regularly refreshing access tokens through `refresh_xet_connection_info` to maintain a valid
+        connection to the storage server.
+        The download process works like this:
+        1. Create a local cache folder at `~/.cache/huggingface/xet/chunk-cache` to store reusable file chunks
+        2. Download files in parallel:
+            2.1. Prepare to write the file to disk
+            2.2. Ask the server "how is this file split into chunks?" using the file's unique hash
+                The server responds with:
+                - Which chunks make up the complete file
+                - Where each chunk can be downloaded from
+            2.3. For each needed chunk:
+                - Checks if we already have it in our local cache
+                - If not, download it from cloud storage (S3)
+                - Save it to cache for future use
+                - Assemble the chunks in order to recreate the original file
+    """
+    try:
+        from hf_xet import PyXetDownloadInfo, download_files  # type: ignore[no-redef]
+    except ImportError:
+        raise ValueError(
+            "To use optimized download using Xet storage, you need to install the hf_xet package. "
+            'Try `pip install "huggingface_hub[hf_xet]"` or `pip install hf_xet`.'
+        )
+    connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers=headers)
+    def token_refresher() -> tuple[str, int]:
+        connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers=headers)
+        if connection_info is None:
+            raise ValueError("Failed to refresh token using xet metadata.")
+        return connection_info.access_token, connection_info.expiration_unix_epoch
+    xet_download_info = [
+        PyXetDownloadInfo(
+            destination_path=str(incomplete_path.absolute()), hash=xet_file_data.file_hash, file_size=expected_size
+        )
+    ]
+    if not displayed_filename:
+        displayed_filename = incomplete_path.name
+    # Truncate filename if too long to display
+    if len(displayed_filename) > 40:
+        displayed_filename = f"{displayed_filename[:40]}(…)"
+    progress_cm = _get_progress_bar_context(
+        desc=displayed_filename,
+        log_level=logger.getEffectiveLevel(),
+        total=expected_size,
+        initial=0,
+        name="huggingface_hub.xet_get",
+        tqdm_class=tqdm_class,
+        _tqdm_bar=_tqdm_bar,
+    )
+    with progress_cm as progress:
+        def progress_updater(progress_bytes: float):
+            progress.update(progress_bytes)
+        download_files(
+            xet_download_info,
+            endpoint=connection_info.endpoint,
+            token_info=(connection_info.access_token, connection_info.expiration_unix_epoch),
+            token_refresher=token_refresher,
+            progress_updater=[progress_updater],
+        )
 def _normalize_etag(etag: Optional[str]) -> Optional[str]:
     """Normalize ETag HTTP header, so it can be used to create nice filepaths.
@@ -601,10 +691,10 @@ def _create_symlink(src: str, dst: str, new_blob: bool = False) -> None:
     # Symlinks are not supported => let's move or copy the file.
     if new_blob:
-        logger.info(f"Symlink not supported. Moving file from {abs_src} to {abs_dst}")
+        logger.debug(f"Symlink not supported. Moving file from {abs_src} to {abs_dst}")
         shutil.move(abs_src, abs_dst, copy_function=_copy_no_matter_what)
     else:
-        logger.info(f"Symlink not supported. Copying file from {abs_src} to {abs_dst}")
+        logger.debug(f"Symlink not supported. Copying file from {abs_src} to {abs_dst}")
         shutil.copyfile(abs_src, abs_dst)
@@ -660,6 +750,78 @@ def _check_disk_space(expected_size: int, target_dir: Union[str, Path]) -> None:
             pass
+@overload
+def hf_hub_download(
+    repo_id: str,
+    filename: str,
+    *,
+    subfolder: Optional[str] = None,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    user_agent: Union[dict, str, None] = None,
+    force_download: bool = False,
+    etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+    token: Union[bool, str, None] = None,
+    local_files_only: bool = False,
+    headers: Optional[dict[str, str]] = None,
+    endpoint: Optional[str] = None,
+    tqdm_class: Optional[type[base_tqdm]] = None,
+    dry_run: Literal[False] = False,
+) -> str: ...
+@overload
+def hf_hub_download(
+    repo_id: str,
+    filename: str,
+    *,
+    subfolder: Optional[str] = None,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    user_agent: Union[dict, str, None] = None,
+    force_download: bool = False,
+    etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+    token: Union[bool, str, None] = None,
+    local_files_only: bool = False,
+    headers: Optional[dict[str, str]] = None,
+    endpoint: Optional[str] = None,
+    tqdm_class: Optional[type[base_tqdm]] = None,
+    dry_run: Literal[True] = True,
+) -> DryRunFileInfo: ...
+@overload
+def hf_hub_download(
+    repo_id: str,
+    filename: str,
+    *,
+    subfolder: Optional[str] = None,
+    repo_type: Optional[str] = None,
+    revision: Optional[str] = None,
+    library_name: Optional[str] = None,
+    library_version: Optional[str] = None,
+    cache_dir: Union[str, Path, None] = None,
+    local_dir: Union[str, Path, None] = None,
+    user_agent: Union[dict, str, None] = None,
+    force_download: bool = False,
+    etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
+    token: Union[bool, str, None] = None,
+    local_files_only: bool = False,
+    headers: Optional[dict[str, str]] = None,
+    endpoint: Optional[str] = None,
+    tqdm_class: Optional[type[base_tqdm]] = None,
+    dry_run: bool = False,
+) -> Union[str, DryRunFileInfo]: ...
 @validate_hf_hub_args
 def hf_hub_download(
     repo_id: str,
@@ -672,18 +834,16 @@ def hf_hub_download(
     library_version: Optional[str] = None,
     cache_dir: Union[str, Path, None] = None,
     local_dir: Union[str, Path, None] = None,
-    user_agent: Union[Dict, str, None] = None,
+    user_agent: Union[dict, str, None] = None,
     force_download: bool = False,
-    proxies: Optional[Dict] = None,
     etag_timeout: float = constants.DEFAULT_ETAG_TIMEOUT,
     token: Union[bool, str, None] = None,
     local_files_only: bool = False,
-    headers: Optional[Dict[str, str]] = None,
+    headers: Optional[dict[str, str]] = None,
     endpoint: Optional[str] = None,
-    resume_download: Optional[bool] = None,
-    force_filename: Optional[str] = None,
-    local_dir_use_symlinks: Union[bool, Literal["auto"]] = "auto",
-) -> str:
+    tqdm_class: Optional[type[base_tqdm]] = None,
+    dry_run: bool = False,
+) -> Union[str, DryRunFileInfo]:
     """Download a given file if it's not already present in the local cache.
     The new cache file layout looks like this:
@@ -745,9 +905,6 @@ def hf_hub_download(
         force_download (`bool`, *optional*, defaults to `False`):
             Whether the file should be downloaded even if it already exists in
             the local cache.
-        proxies (`dict`, *optional*):
-            Dictionary mapping protocol to the URL of the proxy passed to
-            `requests.request`.
         etag_timeout (`float`, *optional*, defaults to `10`):
             When fetching ETag, how many seconds to wait for the server to send
             data before giving up which is passed to `requests.request`.
@@ -761,9 +918,19 @@ def hf_hub_download(
             local cached file if it exists.
         headers (`dict`, *optional*):
             Additional headers to be sent with the request.
+        tqdm_class (`tqdm`, *optional*):
+            If provided, overwrites the default behavior for the progress bar. Passed
+            argument must inherit from `tqdm.auto.tqdm` or at least mimic its behavior.
+            Defaults to the custom HF progress bar that can be disabled by setting
+            `HF_HUB_DISABLE_PROGRESS_BARS` environment variable.
+        dry_run (`bool`, *optional*, defaults to `False`):
+            If `True`, perform a dry run without actually downloading the file. Returns a
+            [`DryRunFileInfo`] object containing information about what would be downloaded.
     Returns:
-        `str`: Local path of file or if networking is off, last version of file cached on disk.
+        `str` or [`DryRunFileInfo`]:
+            - If `dry_run=False`: Local path of file or if networking is off, last version of file cached on disk.
+            - If `dry_run=True`: A [`DryRunFileInfo`] object containing download information.
     Raises:
         [`~utils.RepositoryNotFoundError`]
@@ -771,7 +938,7 @@ def hf_hub_download(
             or because it is set to `private` and you do not have access.
         [`~utils.RevisionNotFoundError`]
             If the revision to download from cannot be found.
-        [`~utils.EntryNotFoundError`]
+        [`~utils.RemoteEntryNotFoundError`]
             If the file to download cannot be found.
         [`~utils.LocalEntryNotFoundError`]
             If network is disabled or unavailable and file is not found in cache.
@@ -787,20 +954,6 @@ def hf_hub_download(
         # Respect environment variable above user value
         etag_timeout = constants.HF_HUB_ETAG_TIMEOUT
-    if force_filename is not None:
-        warnings.warn(
-            "The `force_filename` parameter is deprecated as a new caching system, "
-            "which keeps the filenames as they are on the Hub, is now in place.",
-            FutureWarning,
-        )
-    if resume_download is not None:
-        warnings.warn(
-            "`resume_download` is deprecated and will be removed in version 1.0.0. "
-            "Downloads always resume when possible. "
-            "If you want to force a new download, use `force_download=True`.",
-            FutureWarning,
-        )
     if cache_dir is None:
         cache_dir = constants.HF_HUB_CACHE
     if revision is None:
@@ -830,15 +983,6 @@ def hf_hub_download(
     )
     if local_dir is not None:
-        if local_dir_use_symlinks != "auto":
-            warnings.warn(
-                "`local_dir_use_symlinks` parameter is deprecated and will be ignored. "
-                "The process to download files to a local folder has been updated and do "
-                "not rely on symlinks anymore. You only need to pass a destination folder "
-                "as`local_dir`.\n"
-                "For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder."
-            )
         return _hf_hub_download_to_local_dir(
             # Destination
             local_dir=local_dir,
@@ -851,12 +995,13 @@ def hf_hub_download(
             endpoint=endpoint,
             etag_timeout=etag_timeout,
             headers=hf_headers,
-            proxies=proxies,
             token=token,
             # Additional options
             cache_dir=cache_dir,
             force_download=force_download,
             local_files_only=local_files_only,
+            tqdm_class=tqdm_class,
+            dry_run=dry_run,
         )
     else:
         return _hf_hub_download_to_cache_dir(
@@ -871,11 +1016,12 @@ def hf_hub_download(
             endpoint=endpoint,
             etag_timeout=etag_timeout,
             headers=hf_headers,
-            proxies=proxies,
             token=token,
             # Additional options
             local_files_only=local_files_only,
             force_download=force_download,
+            tqdm_class=tqdm_class,
+            dry_run=dry_run,
         )
@@ -891,13 +1037,14 @@ def _hf_hub_download_to_cache_dir(
     # HTTP info
     endpoint: Optional[str],
     etag_timeout: float,
-    headers: Dict[str, str],
-    proxies: Optional[Dict],
+    headers: dict[str, str],
     token: Optional[Union[bool, str]],
     # Additional options
     local_files_only: bool,
     force_download: bool,
-) -> str:
+    tqdm_class: Optional[type[base_tqdm]],
+    dry_run: bool,
+) -> Union[str, DryRunFileInfo]:
     """Download a given file to a cache folder, if not already present.
     Method should not be called directly. Please use `hf_hub_download` instead.
@@ -905,7 +1052,7 @@ def _hf_hub_download_to_cache_dir(
     locks_dir = os.path.join(cache_dir, ".locks")
     storage_folder = os.path.join(cache_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type))
-    # cross platform transcription of filename, to be used as a local file path.
+    # cross-platform transcription of filename, to be used as a local file path.
     relative_filename = os.path.join(*filename.split("/"))
     if os.name == "nt":
         if relative_filename.startswith("..\\") or "\\..\\" in relative_filename:
@@ -917,18 +1064,27 @@ def _hf_hub_download_to_cache_dir(
     # if user provides a commit_hash and they already have the file on disk, shortcut everything.
     if REGEX_COMMIT_HASH.match(revision):
         pointer_path = _get_pointer_path(storage_folder, revision, relative_filename)
-        if os.path.exists(pointer_path) and not force_download:
-            return pointer_path
+        if os.path.exists(pointer_path):
+            if dry_run:
+                return DryRunFileInfo(
+                    commit_hash=revision,
+                    file_size=os.path.getsize(pointer_path),
+                    filename=filename,
+                    is_cached=True,
+                    local_path=pointer_path,
+                    will_download=force_download,
+                )
+            if not force_download:
+                return pointer_path
     # Try to get metadata (etag, commit_hash, url, size) from the server.
     # If we can't, a HEAD request error is returned.
-    (url_to_download, etag, commit_hash, expected_size, head_call_error) = _get_metadata_or_catch_error(
+    (url_to_download, etag, commit_hash, expected_size, xet_file_data, head_call_error) = _get_metadata_or_catch_error(
         repo_id=repo_id,
         filename=filename,
         repo_type=repo_type,
         revision=revision,
         endpoint=endpoint,
-        proxies=proxies,
         etag_timeout=etag_timeout,
         headers=headers,
         token=token,
@@ -962,8 +1118,18 @@ def _hf_hub_download_to_cache_dir(
             # Return pointer file if exists
             if commit_hash is not None:
                 pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
-                if os.path.exists(pointer_path) and not force_download:
-                    return pointer_path
+                if os.path.exists(pointer_path):
+                    if dry_run:
+                        return DryRunFileInfo(
+                            commit_hash=commit_hash,
+                            file_size=os.path.getsize(pointer_path),
+                            filename=filename,
+                            is_cached=True,
+                            local_path=pointer_path,
+                            will_download=force_download,
+                        )
+                    if not force_download:
+                        return pointer_path
         # Otherwise, raise appropriate error
         _raise_on_head_call_error(head_call_error, force_download, local_files_only)
@@ -976,6 +1142,17 @@ def _hf_hub_download_to_cache_dir(
     blob_path = os.path.join(storage_folder, "blobs", etag)
     pointer_path = _get_pointer_path(storage_folder, commit_hash, relative_filename)
+    if dry_run:
+        is_cached = os.path.exists(pointer_path) or os.path.exists(blob_path)
+        return DryRunFileInfo(
+            commit_hash=commit_hash,
+            file_size=expected_size,
+            filename=filename,
+            is_cached=is_cached,
+            local_path=pointer_path,
+            will_download=force_download or not is_cached,
+        )
     os.makedirs(os.path.dirname(blob_path), exist_ok=True)
     os.makedirs(os.path.dirname(pointer_path), exist_ok=True)
@@ -984,39 +1161,53 @@ def _hf_hub_download_to_cache_dir(
     # In that case store a ref.
     _cache_commit_hash_for_specific_revision(storage_folder, revision, commit_hash)
-    # If file already exists, return it (except if force_download=True)
-    if not force_download:
-        if os.path.exists(pointer_path):
-            return pointer_path
-        if os.path.exists(blob_path):
-            # we have the blob already, but not the pointer
-            _create_symlink(blob_path, pointer_path, new_blob=False)
-            return pointer_path
     # Prevent parallel downloads of the same file with a lock.
     # etag could be duplicated across repos,
     lock_path = os.path.join(locks_dir, repo_folder_name(repo_id=repo_id, repo_type=repo_type), f"{etag}.lock")
     # Some Windows versions do not allow for paths longer than 255 characters.
     # In this case, we must specify it as an extended path by using the "\\?\" prefix.
-    if os.name == "nt" and len(os.path.abspath(lock_path)) > 255:
+    if (
+        os.name == "nt"
+        and len(os.path.abspath(lock_path)) > 255
+        and not os.path.abspath(lock_path).startswith("\\\\?\\")
+    ):
         lock_path = "\\\\?\\" + os.path.abspath(lock_path)
-    if os.name == "nt" and len(os.path.abspath(blob_path)) > 255:
+    if (
+        os.name == "nt"
+        and len(os.path.abspath(blob_path)) > 255
+        and not os.path.abspath(blob_path).startswith("\\\\?\\")
+    ):
         blob_path = "\\\\?\\" + os.path.abspath(blob_path)
     Path(lock_path).parent.mkdir(parents=True, exist_ok=True)
+    # pointer already exists -> immediate return
+    if not force_download and os.path.exists(pointer_path):
+        return pointer_path
+    # Blob exists but pointer must be (safely) created -> take the lock
+    if not force_download and os.path.exists(blob_path):
+        with WeakFileLock(lock_path):
+            if not os.path.exists(pointer_path):
+                _create_symlink(blob_path, pointer_path, new_blob=False)
+            return pointer_path
+    # Local file doesn't exist or etag isn't a match => retrieve file from remote (or cache)
     with WeakFileLock(lock_path):
         _download_to_tmp_and_move(
             incomplete_path=Path(blob_path + ".incomplete"),
             destination_path=Path(blob_path),
             url_to_download=url_to_download,
-            proxies=proxies,
             headers=headers,
             expected_size=expected_size,
             filename=filename,
             force_download=force_download,
+            etag=etag,
+            xet_file_data=xet_file_data,
+            tqdm_class=tqdm_class,
         )
         if not os.path.exists(pointer_path):
             _create_symlink(blob_path, pointer_path, new_blob=True)
@@ -1036,14 +1227,15 @@ def _hf_hub_download_to_local_dir(
     # HTTP info
     endpoint: Optional[str],
     etag_timeout: float,
-    headers: Dict[str, str],
-    proxies: Optional[Dict],
+    headers: dict[str, str],
     token: Union[bool, str, None],
     # Additional options
     cache_dir: str,
     force_download: bool,
     local_files_only: bool,
-) -> str:
+    tqdm_class: Optional[type[base_tqdm]],
+    dry_run: bool,
+) -> Union[str, DryRunFileInfo]:
     """Download a given file to a local folder, if not already present.
     Method should not be called directly. Please use `hf_hub_download` instead.
@@ -1058,22 +1250,31 @@ def _hf_hub_download_to_local_dir(
     # Local file exists + metadata exists + commit_hash matches => return file
     if (
-        not force_download
-        and REGEX_COMMIT_HASH.match(revision)
+        REGEX_COMMIT_HASH.match(revision)
         and paths.file_path.is_file()
         and local_metadata is not None
         and local_metadata.commit_hash == revision
     ):
-        return str(paths.file_path)
+        local_file = str(paths.file_path)
+        if dry_run:
+            return DryRunFileInfo(
+                commit_hash=revision,
+                file_size=os.path.getsize(local_file),
+                filename=filename,
+                is_cached=True,
+                local_path=local_file,
+                will_download=force_download,
+            )
+        if not force_download:
+            return local_file
     # Local file doesn't exist or commit_hash doesn't match => we need the etag
-    (url_to_download, etag, commit_hash, expected_size, head_call_error) = _get_metadata_or_catch_error(
+    (url_to_download, etag, commit_hash, expected_size, xet_file_data, head_call_error) = _get_metadata_or_catch_error(
         repo_id=repo_id,
         filename=filename,
         repo_type=repo_type,
         revision=revision,
         endpoint=endpoint,
-        proxies=proxies,
         etag_timeout=etag_timeout,
         headers=headers,
         token=token,
@@ -1082,11 +1283,24 @@ def _hf_hub_download_to_local_dir(
     if head_call_error is not None:
         # No HEAD call but local file exists => default to local file
-        if not force_download and paths.file_path.is_file():
-            logger.warning(
-                f"Couldn't access the Hub to check for update but local file already exists. Defaulting to existing file. (error: {head_call_error})"
-            )
-            return str(paths.file_path)
+        if paths.file_path.is_file():
+            if dry_run or not force_download:
+                logger.warning(
+                    f"Couldn't access the Hub to check for update but local file already exists. Defaulting to existing file. (error: {head_call_error})"
+                )
+            local_path = str(paths.file_path)
+            if dry_run and local_metadata is not None:
+                return DryRunFileInfo(
+                    commit_hash=local_metadata.commit_hash,
+                    file_size=os.path.getsize(local_path),
+                    filename=filename,
+                    is_cached=True,
+                    local_path=local_path,
+                    will_download=force_download,
+                )
+            if not force_download:
+                return local_path
         # Otherwise => raise
         _raise_on_head_call_error(head_call_error, force_download, local_files_only)
@@ -1101,6 +1315,15 @@ def _hf_hub_download_to_local_dir(
         # etag matches => update metadata and return file
         if local_metadata is not None and local_metadata.etag == etag:
             write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
+            if dry_run:
+                return DryRunFileInfo(
+                    commit_hash=commit_hash,
+                    file_size=expected_size,
+                    filename=filename,
+                    is_cached=True,
+                    local_path=str(paths.file_path),
+                    will_download=False,
+                )
             return str(paths.file_path)
         # metadata is outdated + etag is a sha256
@@ -1112,6 +1335,15 @@ def _hf_hub_download_to_local_dir(
                 file_hash = sha_fileobj(f).hex()
             if file_hash == etag:
                 write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
+                if dry_run:
+                    return DryRunFileInfo(
+                        commit_hash=commit_hash,
+                        file_size=expected_size,
+                        filename=filename,
+                        is_cached=True,
+                        local_path=str(paths.file_path),
+                        will_download=False,
+                    )
                 return str(paths.file_path)
     # Local file doesn't exist or etag isn't a match => retrieve file from remote (or cache)
@@ -1130,8 +1362,28 @@ def _hf_hub_download_to_local_dir(
                 paths.file_path.parent.mkdir(parents=True, exist_ok=True)
                 shutil.copyfile(cached_path, paths.file_path)
             write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
+            if dry_run:
+                return DryRunFileInfo(
+                    commit_hash=commit_hash,
+                    file_size=expected_size,
+                    filename=filename,
+                    is_cached=True,
+                    local_path=str(paths.file_path),
+                    will_download=False,
+                )
             return str(paths.file_path)
+    if dry_run:
+        is_cached = paths.file_path.is_file()
+        return DryRunFileInfo(
+            commit_hash=commit_hash,
+            file_size=expected_size,
+            filename=filename,
+            is_cached=is_cached,
+            local_path=str(paths.file_path),
+            will_download=force_download or not is_cached,
+        )
     # Otherwise, let's download the file!
     with WeakFileLock(paths.lock_path):
         paths.file_path.unlink(missing_ok=True)  # delete outdated file first
@@ -1139,11 +1391,13 @@ def _hf_hub_download_to_local_dir(
             incomplete_path=paths.incomplete_path(etag),
             destination_path=paths.file_path,
             url_to_download=url_to_download,
-            proxies=proxies,
             headers=headers,
             expected_size=expected_size,
             filename=filename,
             force_download=force_download,
+            etag=etag,
+            xet_file_data=xet_file_data,
+            tqdm_class=tqdm_class,
         )
     write_download_metadata(local_dir=local_dir, filename=filename, commit_hash=commit_hash, etag=etag)
@@ -1247,12 +1501,12 @@ def try_to_load_from_cache(
 def get_hf_file_metadata(
     url: str,
     token: Union[bool, str, None] = None,
-    proxies: Optional[Dict] = None,
     timeout: Optional[float] = constants.DEFAULT_REQUEST_TIMEOUT,
     library_name: Optional[str] = None,
     library_version: Optional[str] = None,
-    user_agent: Union[Dict, str, None] = None,
-    headers: Optional[Dict[str, str]] = None,
+    user_agent: Union[dict, str, None] = None,
+    headers: Optional[dict[str, str]] = None,
+    endpoint: Optional[str] = None,
 ) -> HfFileMetadata:
     """Fetch metadata of a file versioned on the Hub for a given url.
@@ -1265,9 +1519,6 @@ def get_hf_file_metadata(
                   folder.
                 - If `False` or `None`, no token is provided.
                 - If a string, it's used as the authentication token.
-        proxies (`dict`, *optional*):
-            Dictionary mapping protocol to the URL of the proxy passed to
-            `requests.request`.
         timeout (`float`, *optional*, defaults to 10):
             How many seconds to wait for the server to send metadata before giving up.
         library_name (`str`, *optional*):
@@ -1278,6 +1529,8 @@ def get_hf_file_metadata(
             The user-agent info in the form of a dictionary or a string.
         headers (`dict`, *optional*):
             Additional headers to be sent with the request.
+        endpoint (`str`, *optional*):
+            Endpoint of the Hub. Defaults to <https://huggingface.co>.
     Returns:
         A [`HfFileMetadata`] object containing metadata such as location, etag, size and
@@ -1293,30 +1546,23 @@ def get_hf_file_metadata(
     hf_headers["Accept-Encoding"] = "identity"  # prevent any compression => we want to know the real size of the file
     # Retrieve metadata
-    r = _request_wrapper(
-        method="HEAD",
-        url=url,
-        headers=hf_headers,
-        allow_redirects=False,
-        follow_relative_redirects=True,
-        proxies=proxies,
-        timeout=timeout,
-    )
-    hf_raise_for_status(r)
+    response = _httpx_follow_relative_redirects(method="HEAD", url=url, headers=hf_headers, timeout=timeout)
+    hf_raise_for_status(response)
     # Return
     return HfFileMetadata(
-        commit_hash=r.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT),
-        # We favor a custom header indicating the etag of the linked resource, and
-        # we fallback to the regular etag header.
-        etag=_normalize_etag(r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or r.headers.get("ETag")),
+        commit_hash=response.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT),
+        # We favor a custom header indicating the etag of the linked resource, and we fall back to the regular etag header.
+        etag=_normalize_etag(
+            response.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_ETAG) or response.headers.get("ETag")
+        ),
         # Either from response headers (if redirected) or defaults to request url
-        # Do not use directly `url`, as `_request_wrapper` might have followed relative
-        # redirects.
-        location=r.headers.get("Location") or r.request.url,  # type: ignore
+        # Do not use directly `url` as we might have followed relative redirects.
+        location=response.headers.get("Location") or str(response.request.url),  # type: ignore
         size=_int_or_none(
-            r.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_SIZE) or r.headers.get("Content-Length")
+            response.headers.get(constants.HUGGINGFACE_HEADER_X_LINKED_SIZE) or response.headers.get("Content-Length")
         ),
+        xet_file_data=parse_xet_file_data_from_response(response, endpoint=endpoint),  # type: ignore
     )
@@ -1327,19 +1573,18 @@ def _get_metadata_or_catch_error(
     repo_type: str,
     revision: str,
     endpoint: Optional[str],
-    proxies: Optional[Dict],
     etag_timeout: Optional[float],
-    headers: Dict[str, str],  # mutated inplace!
+    headers: dict[str, str],  # mutated inplace!
     token: Union[bool, str, None],
     local_files_only: bool,
     relative_filename: Optional[str] = None,  # only used to store `.no_exists` in cache
     storage_folder: Optional[str] = None,  # only used to store `.no_exists` in cache
 ) -> Union[
     # Either an exception is caught and returned
-    Tuple[None, None, None, None, Exception],
+    tuple[None, None, None, None, None, Exception],
     # Or the metadata is returned as
-    # `(url_to_download, etag, commit_hash, expected_size, None)`
-    Tuple[str, str, str, int, None],
+    # `(url_to_download, etag, commit_hash, expected_size, xet_file_data, None)`
+    tuple[str, str, str, int, Optional[XetFileData], None],
 ]:
     """Get metadata for a file on the Hub, safely handling network issues.
@@ -1356,6 +1601,7 @@ def _get_metadata_or_catch_error(
             None,
             None,
             None,
+            None,
             OfflineModeIsEnabled(
                 f"Cannot access file since 'local_files_only=True' as been set. (repo_id: {repo_id}, repo_type: {repo_type}, revision: {revision}, filename: {filename})"
             ),
@@ -1367,6 +1613,7 @@ def _get_metadata_or_catch_error(
     commit_hash: Optional[str] = None
     expected_size: Optional[int] = None
     head_error_call: Optional[Exception] = None
+    xet_file_data: Optional[XetFileData] = None
     # Try to get metadata from the server.
     # Do not raise yet if the file is not found or not accessible.
@@ -1374,9 +1621,9 @@ def _get_metadata_or_catch_error(
         try:
             try:
                 metadata = get_hf_file_metadata(
-                    url=url, proxies=proxies, timeout=etag_timeout, headers=headers, token=token
+                    url=url, timeout=etag_timeout, headers=headers, token=token, endpoint=endpoint
                 )
-            except EntryNotFoundError as http_error:
+            except RemoteEntryNotFoundError as http_error:
                 if storage_folder is not None and relative_filename is not None:
                     # Cache the non-existence of the file
                     commit_hash = http_error.response.headers.get(constants.HUGGINGFACE_HEADER_X_REPO_COMMIT)
@@ -1414,32 +1661,30 @@ def _get_metadata_or_catch_error(
             if expected_size is None:
                 raise FileMetadataError("Distant resource does not have a Content-Length.")
+            xet_file_data = metadata.xet_file_data
             # In case of a redirect, save an extra redirect on the request.get call,
             # and ensure we download the exact atomic version even if it changed
             # between the HEAD and the GET (unlikely, but hey).
             #
             # If url domain is different => we are downloading from a CDN => url is signed => don't send auth
             # If url domain is the same => redirect due to repo rename AND downloading a regular file => keep auth
-            if url != metadata.location:
+            if xet_file_data is None and url != metadata.location:
                 url_to_download = metadata.location
                 if urlparse(url).netloc != urlparse(metadata.location).netloc:
                     # Remove authorization header when downloading a LFS blob
                     headers.pop("authorization", None)
-        except (requests.exceptions.SSLError, requests.exceptions.ProxyError):
-            # Actually raise for those subclasses of ConnectionError
+        except httpx.ProxyError:
+            # Actually raise on proxy error
             raise
-        except (
-            requests.exceptions.ConnectionError,
-            requests.exceptions.Timeout,
-            OfflineModeIsEnabled,
-        ) as error:
+        except (httpx.ConnectError, httpx.TimeoutException, OfflineModeIsEnabled) as error:
             # Otherwise, our Internet connection is down.
             # etag is None
             head_error_call = error
-        except (RevisionNotFoundError, EntryNotFoundError):
+        except (RevisionNotFoundError, RemoteEntryNotFoundError):
             # The repo was found but the revision or entry doesn't exist on the Hub (never existed or got deleted)
             raise
-        except requests.HTTPError as error:
+        except HfHubHTTPError as error:
             # Multiple reasons for an http error:
             # - Repository is private and invalid/missing token sent
             # - Repository is gated and invalid/missing token sent
@@ -1458,7 +1703,7 @@ def _get_metadata_or_catch_error(
     if not (local_files_only or etag is not None or head_error_call is not None):
         raise RuntimeError("etag is empty due to uncovered problems")
-    return (url_to_download, etag, commit_hash, expected_size, head_error_call)  # type: ignore [return-value]
+    return (url_to_download, etag, commit_hash, expected_size, xet_file_data, head_error_call)  # type: ignore [return-value]
 def _raise_on_head_call_error(head_call_error: Exception, force_download: bool, local_files_only: bool) -> NoReturn:
@@ -1497,18 +1742,20 @@ def _download_to_tmp_and_move(
     incomplete_path: Path,
     destination_path: Path,
     url_to_download: str,
-    proxies: Optional[Dict],
-    headers: Dict[str, str],
+    headers: dict[str, str],
     expected_size: Optional[int],
     filename: str,
     force_download: bool,
+    etag: Optional[str],
+    xet_file_data: Optional[XetFileData],
+    tqdm_class: Optional[type[base_tqdm]] = None,
 ) -> None:
     """Download content from a URL to a destination path.
     Internal logic:
     - return early if file is already downloaded
     - resume download if possible (from incomplete file)
-    - do not resume download if `force_download=True` or `HF_HUB_ENABLE_HF_TRANSFER=True`
+    - do not resume download if `force_download=True`
     - check disk space before downloading
     - download content to a temporary file
     - set correct permissions on temporary file
@@ -1520,16 +1767,11 @@ def _download_to_tmp_and_move(
         # Do nothing if already exists (except if force_download=True)
         return
-    if incomplete_path.exists() and (force_download or (constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies)):
+    if incomplete_path.exists() and force_download:
         # By default, we will try to resume the download if possible.
-        # However, if the user has set `force_download=True` or if `hf_transfer` is enabled, then we should
+        # However, if the user has set `force_download=True`, then we should
         # not resume the download => delete the incomplete file.
-        message = f"Removing incomplete file '{incomplete_path}'"
-        if force_download:
-            message += " (force_download=True)"
-        elif constants.HF_HUB_ENABLE_HF_TRANSFER and not proxies:
-            message += " (hf_transfer=True)"
-        logger.info(message)
+        logger.debug(f"Removing incomplete file '{incomplete_path}' (force_download=True)")
         incomplete_path.unlink(missing_ok=True)
     with incomplete_path.open("ab") as f:
@@ -1537,23 +1779,41 @@ def _download_to_tmp_and_move(
         message = f"Downloading '{filename}' to '{incomplete_path}'"
         if resume_size > 0 and expected_size is not None:
             message += f" (resume from {resume_size}/{expected_size})"
-        logger.info(message)
+        logger.debug(message)
         if expected_size is not None:  # might be None if HTTP header not set correctly
             # Check disk space in both tmp and destination path
             _check_disk_space(expected_size, incomplete_path.parent)
             _check_disk_space(expected_size, destination_path.parent)
-        http_get(
-            url_to_download,
-            f,
-            proxies=proxies,
-            resume_size=resume_size,
-            headers=headers,
-            expected_size=expected_size,
-        )
+        if xet_file_data is not None and is_xet_available():
+            logger.debug("Xet Storage is enabled for this repo. Downloading file from Xet Storage..")
+            xet_get(
+                incomplete_path=incomplete_path,
+                xet_file_data=xet_file_data,
+                headers=headers,
+                expected_size=expected_size,
+                displayed_filename=filename,
+                tqdm_class=tqdm_class,
+            )
+        else:
+            if xet_file_data is not None and not constants.HF_HUB_DISABLE_XET:
+                logger.warning(
+                    "Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. "
+                    "Falling back to regular HTTP download. "
+                    "For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`"
+                )
+            http_get(
+                url_to_download,
+                f,
+                resume_size=resume_size,
+                headers=headers,
+                expected_size=expected_size,
+                tqdm_class=tqdm_class,
+            )
-    logger.info(f"Download complete. Moving file to {destination_path}")
+    logger.debug(f"Download complete. Moving file to {destination_path}")
     _chmod_and_move(incomplete_path, destination_path)

huggingface-hub 0.29.0rc2__py3-none-any.whl → 1.1.3__py3-none-any.whl

huggingface-hub 0.29.0rc2py3-none-any.whl → 1.1.3py3-none-any.whl