PyPI - huggingface-hub - Versions diffs - 1.0.0rc5__py3-none-any.whl → 1.0.0rc7__py3-none-any.whl - Mend

huggingface-hub 1.0.0rc5py3-none-any.whl → 1.0.0rc7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of huggingface-hub might be problematic. Click here for more details.

Files changed (36) hide show

huggingface_hub/__init__.py +12 -1
huggingface_hub/_commit_api.py +1 -5
huggingface_hub/_jobs_api.py +1 -1
huggingface_hub/_login.py +3 -3
huggingface_hub/_snapshot_download.py +4 -3
huggingface_hub/_upload_large_folder.py +2 -15
huggingface_hub/_webhooks_server.py +1 -1
huggingface_hub/cli/_cli_utils.py +1 -1
huggingface_hub/cli/auth.py +0 -20
huggingface_hub/cli/cache.py +561 -304
huggingface_hub/cli/download.py +2 -2
huggingface_hub/cli/repo.py +0 -7
huggingface_hub/cli/upload.py +0 -8
huggingface_hub/community.py +16 -8
huggingface_hub/constants.py +10 -11
huggingface_hub/file_download.py +9 -61
huggingface_hub/hf_api.py +170 -126
huggingface_hub/hf_file_system.py +31 -6
huggingface_hub/inference/_client.py +1 -1
huggingface_hub/inference/_generated/_async_client.py +1 -1
huggingface_hub/inference/_providers/__init__.py +15 -2
huggingface_hub/inference/_providers/_common.py +39 -0
huggingface_hub/inference/_providers/clarifai.py +13 -0
huggingface_hub/lfs.py +3 -65
huggingface_hub/serialization/_torch.py +1 -1
huggingface_hub/utils/__init__.py +0 -2
huggingface_hub/utils/_cache_manager.py +17 -42
huggingface_hub/utils/_http.py +25 -3
huggingface_hub/utils/_parsing.py +98 -0
huggingface_hub/utils/_runtime.py +1 -14
{huggingface_hub-1.0.0rc5.dist-info → huggingface_hub-1.0.0rc7.dist-info}/METADATA +4 -14
{huggingface_hub-1.0.0rc5.dist-info → huggingface_hub-1.0.0rc7.dist-info}/RECORD +36 -34
{huggingface_hub-1.0.0rc5.dist-info → huggingface_hub-1.0.0rc7.dist-info}/LICENSE +0 -0
{huggingface_hub-1.0.0rc5.dist-info → huggingface_hub-1.0.0rc7.dist-info}/WHEEL +0 -0
{huggingface_hub-1.0.0rc5.dist-info → huggingface_hub-1.0.0rc7.dist-info}/entry_points.txt +0 -0
{huggingface_hub-1.0.0rc5.dist-info → huggingface_hub-1.0.0rc7.dist-info}/top_level.txt +0 -0

huggingface_hub/hf_file_system.py CHANGED Viewed

@@ -102,18 +102,22 @@ class HfFileSystem(fsspec.AbstractFileSystem):
         *args,
         endpoint: Optional[str] = None,
         token: Union[bool, str, None] = None,
+        block_size: Optional[int] = None,
         **storage_options,
     ):
         super().__init__(*args, **storage_options)
         self.endpoint = endpoint or constants.ENDPOINT
         self.token = token
         self._api = HfApi(endpoint=endpoint, token=token)
+        self.block_size = block_size
         # Maps (repo_type, repo_id, revision) to a 2-tuple with:
         #  * the 1st element indicating whether the repositoy and the revision exist
         #  * the 2nd element being the exception raised if the repository or revision doesn't exist
         self._repo_and_revision_exists_cache: dict[
             tuple[str, str, Optional[str]], tuple[bool, Optional[Exception]]
         ] = {}
+        # Maps parent directory path to path infos
+        self.dircache: dict[str, list[dict[str, Any]]] = {}
     def _repo_and_revision_exist(
         self, repo_type: str, repo_id: str, revision: Optional[str]
@@ -265,12 +269,15 @@ class HfFileSystem(fsspec.AbstractFileSystem):
         block_size: Optional[int] = None,
         **kwargs,
     ) -> "HfFileSystemFile":
+        block_size = block_size if block_size is not None else self.block_size
+        if block_size is not None:
+            kwargs["block_size"] = block_size
         if "a" in mode:
             raise NotImplementedError("Appending to remote files is not yet supported.")
         if block_size == 0:
-            return HfFileSystemStreamFile(self, path, mode=mode, revision=revision, block_size=block_size, **kwargs)
+            return HfFileSystemStreamFile(self, path, mode=mode, revision=revision, **kwargs)
         else:
-            return HfFileSystemFile(self, path, mode=mode, revision=revision, block_size=block_size, **kwargs)
+            return HfFileSystemFile(self, path, mode=mode, revision=revision, **kwargs)
     def _rm(self, path: str, revision: Optional[str] = None, **kwargs) -> None:
         resolved_path = self.resolve_path(path, revision=revision)
@@ -439,7 +446,7 @@ class HfFileSystem(fsspec.AbstractFileSystem):
                     common_path_depth = common_path[len(path) :].count("/")
                     maxdepth -= common_path_depth
                 out = [o for o in out if not o["name"].startswith(common_path + "/")]
-                for cached_path in self.dircache:
+                for cached_path in list(self.dircache):
                     if cached_path.startswith(common_path + "/"):
                         self.dircache.pop(cached_path, None)
                 self.dircache.pop(common_path, None)
@@ -923,6 +930,18 @@ class HfFileSystem(fsspec.AbstractFileSystem):
         # See https://github.com/huggingface/huggingface_hub/issues/1733
         raise NotImplementedError("Transactional commits are not supported.")
+    def __reduce__(self):
+        # re-populate the instance cache at HfFileSystem._cache and re-populate the cache attributes of every instance
+        return make_instance, (
+            type(self),
+            self.storage_args,
+            self.storage_options,
+            {
+                "dircache": self.dircache,
+                "_repo_and_revision_exists_cache": self._repo_and_revision_exists_cache,
+            },
+        )
 class HfFileSystemFile(fsspec.spec.AbstractBufferedFile):
     def __init__(self, fs: HfFileSystem, path: str, revision: Optional[str] = None, **kwargs):
@@ -986,9 +1005,8 @@ class HfFileSystemFile(fsspec.spec.AbstractBufferedFile):
     def read(self, length=-1):
         """Read remote file.
-        If `length` is not provided or is -1, the entire file is downloaded and read. On POSIX systems and if
-        `hf_transfer` is not enabled, the file is loaded in memory directly. Otherwise, the file is downloaded to a
-        temporary file and read from there.
+        If `length` is not provided or is -1, the entire file is downloaded and read. On POSIX systems the file is
+        loaded in memory directly. Otherwise, the file is downloaded to a temporary file and read from there.
         """
         if self.mode == "rb" and (length is None or length == -1) and self.loc == 0:
             with self.fs.open(self.path, "rb", block_size=0) as f:  # block_size=0 enables fast streaming
@@ -1158,3 +1176,10 @@ def _partial_read(response: httpx.Response, length: int = -1) -> bytes:
             return bytes(buf[:length])
     return bytes(buf)  # may be < length if response ended
+def make_instance(cls, args, kwargs, instance_cache_attributes_dict):
+    fs = cls(*args, **kwargs)
+    for attr, cached_value in instance_cache_attributes_dict.items():
+        setattr(fs, attr, cached_value)
+    return fs

huggingface_hub/inference/_client.py CHANGED Viewed

@@ -135,7 +135,7 @@ class InferenceClient:
             Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2
             arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL.
         provider (`str`, *optional*):
-            Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `publicai`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"` or `"zai-org"`.
+            Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `publicai`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"` or `"zai-org"`.
             Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
             If model is a URL or `base_url` is passed, then `provider` is not used.
         token (`str`, *optional*):

huggingface_hub/inference/_generated/_async_client.py CHANGED Viewed

@@ -126,7 +126,7 @@ class AsyncInferenceClient:
             Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2
             arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL.
         provider (`str`, *optional*):
-            Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `publicai`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"` or `"zai-org"`.
+            Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `publicai`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"` or `"zai-org"`.
             Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
             If model is a URL or `base_url` is passed, then `provider` is not used.
         token (`str`, *optional*):

huggingface_hub/inference/_providers/__init__.py CHANGED Viewed

@@ -6,9 +6,10 @@ from huggingface_hub.inference._providers.featherless_ai import (
 )
 from huggingface_hub.utils import logging
-from ._common import TaskProviderHelper, _fetch_inference_provider_mapping
+from ._common import AutoRouterConversationalTask, TaskProviderHelper, _fetch_inference_provider_mapping
 from .black_forest_labs import BlackForestLabsTextToImageTask
 from .cerebras import CerebrasConversationalTask
+from .clarifai import ClarifaiConversationalTask
 from .cohere import CohereConversationalTask
 from .fal_ai import (
     FalAIAutomaticSpeechRecognitionTask,
@@ -50,6 +51,7 @@ logger = logging.get_logger(__name__)
 PROVIDER_T = Literal[
     "black-forest-labs",
     "cerebras",
+    "clarifai",
     "cohere",
     "fal-ai",
     "featherless-ai",
@@ -71,6 +73,8 @@ PROVIDER_T = Literal[
 PROVIDER_OR_POLICY_T = Union[PROVIDER_T, Literal["auto"]]
+CONVERSATIONAL_AUTO_ROUTER = AutoRouterConversationalTask()
 PROVIDERS: dict[PROVIDER_T, dict[str, TaskProviderHelper]] = {
     "black-forest-labs": {
         "text-to-image": BlackForestLabsTextToImageTask(),
@@ -78,6 +82,9 @@ PROVIDERS: dict[PROVIDER_T, dict[str, TaskProviderHelper]] = {
     "cerebras": {
         "conversational": CerebrasConversationalTask(),
     },
+    "clarifai": {
+        "conversational": ClarifaiConversationalTask(),
+    },
     "cohere": {
         "conversational": CohereConversationalTask(),
     },
@@ -201,13 +208,19 @@ def get_provider_helper(
     if provider is None:
         logger.info(
-            "Defaulting to 'auto' which will select the first provider available for the model, sorted by the user's order in https://hf.co/settings/inference-providers."
+            "No provider specified for task `conversational`. Defaulting to server-side auto routing."
+            if task == "conversational"
+            else "Defaulting to 'auto' which will select the first provider available for the model, sorted by the user's order in https://hf.co/settings/inference-providers."
         )
         provider = "auto"
     if provider == "auto":
         if model is None:
             raise ValueError("Specifying a model is required when provider is 'auto'")
+        if task == "conversational":
+            # Special case: we have a dedicated auto-router for conversational models. No need to fetch provider mapping.
+            return CONVERSATIONAL_AUTO_ROUTER
         provider_mapping = _fetch_inference_provider_mapping(model)
         provider = next(iter(provider_mapping)).provider

huggingface_hub/inference/_providers/_common.py CHANGED Viewed

@@ -24,6 +24,7 @@ HARDCODED_MODEL_INFERENCE_MAPPING: dict[str, dict[str, InferenceProviderMapping]
     #                                    status="live")
     "cerebras": {},
     "cohere": {},
+    "clarifai": {},
     "fal-ai": {},
     "fireworks-ai": {},
     "groq": {},
@@ -278,6 +279,44 @@ class BaseConversationalTask(TaskProviderHelper):
         return filter_none({"messages": inputs, **parameters, "model": provider_mapping_info.provider_id})
+class AutoRouterConversationalTask(BaseConversationalTask):
+    """
+    Auto-router for conversational tasks.
+    We let the Hugging Face router select the best provider for the model, based on availability and user preferences.
+    This is a special case since the selection is done server-side (avoid 1 API call to fetch provider mapping).
+    """
+    def __init__(self):
+        super().__init__(provider="auto", base_url="https://router.huggingface.co")
+    def _prepare_base_url(self, api_key: str) -> str:
+        """Return the base URL to use for the request.
+        Usually not overwritten in subclasses."""
+        # Route to the proxy if the api_key is a HF TOKEN
+        if not api_key.startswith("hf_"):
+            raise ValueError("Cannot select auto-router when using non-Hugging Face API key.")
+        else:
+            return self.base_url  # No `/auto` suffix in the URL
+    def _prepare_mapping_info(self, model: Optional[str]) -> InferenceProviderMapping:
+        """
+        In auto-router, we don't need to fetch provider mapping info.
+        We just return a dummy mapping info with provider_id set to the HF model ID.
+        """
+        if model is None:
+            raise ValueError("Please provide an HF model ID.")
+        return InferenceProviderMapping(
+            provider="auto",
+            hf_model_id=model,
+            providerId=model,
+            status="live",
+            task="conversational",
+        )
 class BaseTextGenerationTask(TaskProviderHelper):
     """
     Base class for text-generation (completion) tasks.

huggingface_hub/inference/_providers/clarifai.py ADDED Viewed

@@ -0,0 +1,13 @@
+from ._common import BaseConversationalTask
+_PROVIDER = "clarifai"
+_BASE_URL = "https://api.clarifai.com"
+class ClarifaiConversationalTask(BaseConversationalTask):
+    def __init__(self):
+        super().__init__(provider=_PROVIDER, base_url=_BASE_URL)
+    def _prepare_route(self, mapped_model: str, api_key: str) -> str:
+        return "/v2/ext/openai/v1/chat/completions"

huggingface_hub/lfs.py CHANGED Viewed

@@ -16,11 +16,9 @@
 import io
 import re
-import warnings
 from dataclasses import dataclass
 from math import ceil
 from os.path import getsize
-from pathlib import Path
 from typing import TYPE_CHECKING, BinaryIO, Iterable, Optional, TypedDict
 from urllib.parse import unquote
@@ -33,12 +31,10 @@ from .utils import (
     hf_raise_for_status,
     http_backoff,
     logging,
-    tqdm,
     validate_hf_hub_args,
 )
 from .utils._lfs import SliceFileObj
 from .utils.sha import sha256, sha_fileobj
-from .utils.tqdm import is_tqdm_disabled
 if TYPE_CHECKING:
@@ -332,23 +328,9 @@ def _upload_multi_part(operation: "CommitOperationAdd", header: dict, chunk_size
     # 1. Get upload URLs for each part
     sorted_parts_urls = _get_sorted_parts_urls(header=header, upload_info=operation.upload_info, chunk_size=chunk_size)
-    # 2. Upload parts (either with hf_transfer or in pure Python)
-    use_hf_transfer = constants.HF_HUB_ENABLE_HF_TRANSFER
-    if (
-        constants.HF_HUB_ENABLE_HF_TRANSFER
-        and not isinstance(operation.path_or_fileobj, str)
-        and not isinstance(operation.path_or_fileobj, Path)
-    ):
-        warnings.warn(
-            "hf_transfer is enabled but does not support uploading from bytes or BinaryIO, falling back to regular"
-            " upload"
-        )
-        use_hf_transfer = False
-    response_headers = (
-        _upload_parts_hf_transfer(operation=operation, sorted_parts_urls=sorted_parts_urls, chunk_size=chunk_size)
-        if use_hf_transfer
-        else _upload_parts_iteratively(operation=operation, sorted_parts_urls=sorted_parts_urls, chunk_size=chunk_size)
+    # 2. Upload parts (pure Python)
+    response_headers = _upload_parts_iteratively(
+        operation=operation, sorted_parts_urls=sorted_parts_urls, chunk_size=chunk_size
     )
     # 3. Send completion request
@@ -409,47 +391,3 @@ def _upload_parts_iteratively(
                 hf_raise_for_status(part_upload_res)
                 headers.append(part_upload_res.headers)
     return headers  # type: ignore
-def _upload_parts_hf_transfer(
-    operation: "CommitOperationAdd", sorted_parts_urls: list[str], chunk_size: int
-) -> list[dict]:
-    # Upload file using an external Rust-based package. Upload is faster but support less features (no progress bars).
-    try:
-        from hf_transfer import multipart_upload
-    except ImportError:
-        raise ValueError(
-            "Fast uploading using 'hf_transfer' is enabled (HF_HUB_ENABLE_HF_TRANSFER=1) but 'hf_transfer' package is"
-            " not available in your environment. Try `pip install hf_transfer`."
-        )
-    total = operation.upload_info.size
-    desc = operation.path_in_repo
-    if len(desc) > 40:
-        desc = f"(…){desc[-40:]}"
-    with tqdm(
-        unit="B",
-        unit_scale=True,
-        total=total,
-        initial=0,
-        desc=desc,
-        disable=is_tqdm_disabled(logger.getEffectiveLevel()),
-        name="huggingface_hub.lfs_upload",
-    ) as progress:
-        try:
-            output = multipart_upload(
-                file_path=operation.path_or_fileobj,
-                parts_urls=sorted_parts_urls,
-                chunk_size=chunk_size,
-                max_files=128,
-                parallel_failures=127,  # could be removed
-                max_retries=5,
-                callback=progress.update,
-            )
-        except Exception as e:
-            raise RuntimeError(
-                "An error occurred while uploading using `hf_transfer`. Consider disabling HF_HUB_ENABLE_HF_TRANSFER for"
-                " better error handling."
-            ) from e
-        return output

huggingface_hub/serialization/_torch.py CHANGED Viewed

@@ -266,7 +266,7 @@ def save_torch_state_dict(
     safe_file_kwargs = {"metadata": per_file_metadata} if safe_serialization else {}
     for filename, tensors in state_dict_split.filename_to_tensors.items():
         shard = {tensor: state_dict[tensor] for tensor in tensors}
-        save_file_fn(shard, os.path.join(save_directory, filename), **safe_file_kwargs)
+        save_file_fn(shard, os.path.join(save_directory, filename), **safe_file_kwargs)  # ty: ignore[invalid-argument-type]
         logger.debug(f"Shard saved to {filename}")
     # Save the index (if any)

huggingface_hub/utils/__init__.py CHANGED Viewed

@@ -75,7 +75,6 @@ from ._runtime import (
     get_gradio_version,
     get_graphviz_version,
     get_hf_hub_version,
-    get_hf_transfer_version,
     get_jinja_version,
     get_numpy_version,
     get_pillow_version,
@@ -94,7 +93,6 @@ from ._runtime import (
     is_google_colab,
     is_gradio_available,
     is_graphviz_available,
-    is_hf_transfer_available,
     is_jinja_available,
     is_notebook,
     is_numpy_available,

huggingface_hub/utils/_cache_manager.py CHANGED Viewed

@@ -16,7 +16,6 @@
 import os
 import shutil
-import time
 from collections import defaultdict
 from dataclasses import dataclass
 from pathlib import Path
@@ -26,6 +25,7 @@ from huggingface_hub.errors import CacheNotFound, CorruptedCacheException
 from ..constants import HF_HUB_CACHE
 from . import logging
+from ._parsing import format_timesince
 from ._terminal import tabulate
@@ -79,7 +79,7 @@ class CachedFileInfo:
         Example: "2 weeks ago".
         """
-        return _format_timesince(self.blob_last_accessed)
+        return format_timesince(self.blob_last_accessed)
     @property
     def blob_last_modified_str(self) -> str:
@@ -89,7 +89,7 @@ class CachedFileInfo:
         Example: "2 weeks ago".
         """
-        return _format_timesince(self.blob_last_modified)
+        return format_timesince(self.blob_last_modified)
     @property
     def size_on_disk_str(self) -> str:
@@ -153,7 +153,7 @@ class CachedRevisionInfo:
         Example: "2 weeks ago".
         """
-        return _format_timesince(self.last_modified)
+        return format_timesince(self.last_modified)
     @property
     def size_on_disk_str(self) -> str:
@@ -223,7 +223,7 @@ class CachedRepoInfo:
         Example: "2 weeks ago".
         """
-        return _format_timesince(self.last_accessed)
+        return format_timesince(self.last_accessed)
     @property
     def last_modified_str(self) -> str:
@@ -233,7 +233,7 @@ class CachedRepoInfo:
         Example: "2 weeks ago".
         """
-        return _format_timesince(self.last_modified)
+        return format_timesince(self.last_modified)
     @property
     def size_on_disk_str(self) -> str:
@@ -244,6 +244,11 @@ class CachedRepoInfo:
         """
         return _format_size(self.size_on_disk)
+    @property
+    def cache_id(self) -> str:
+        """Canonical `type/id` identifier used across cache tooling."""
+        return f"{self.repo_type}/{self.repo_id}"
     @property
     def refs(self) -> dict[str, CachedRevisionInfo]:
         """
@@ -607,15 +612,12 @@ def scan_cache_dir(cache_dir: Optional[Union[str, Path]] = None) -> HFCacheInfo:
     You can also print a detailed report directly from the `hf` command line using:
     ```text
-    > hf cache scan
-    REPO ID                     REPO TYPE SIZE ON DISK NB FILES REFS                LOCAL PATH
-    --------------------------- --------- ------------ -------- ------------------- -------------------------------------------------------------------------
-    glue                        dataset         116.3K       15 1.17.0, main, 2.4.0 /Users/lucain/.cache/huggingface/hub/datasets--glue
-    google/fleurs               dataset          64.9M        6 main, refs/pr/1     /Users/lucain/.cache/huggingface/hub/datasets--google--fleurs
-    Jean-Baptiste/camembert-ner model           441.0M        7 main                /Users/lucain/.cache/huggingface/hub/models--Jean-Baptiste--camembert-ner
-    bert-base-cased             model             1.9G       13 main                /Users/lucain/.cache/huggingface/hub/models--bert-base-cased
-    t5-base                     model            10.1K        3 main                /Users/lucain/.cache/huggingface/hub/models--t5-base
-    t5-small                    model           970.7M       11 refs/pr/1, main     /Users/lucain/.cache/huggingface/hub/models--t5-small
+    > hf cache ls
+    ID                          SIZE     LAST_ACCESSED LAST_MODIFIED REFS
+    --------------------------- -------- ------------- ------------- -----------
+    dataset/nyu-mll/glue          157.4M 2 days ago    2 days ago    main script
+    model/LiquidAI/LFM2-VL-1.6B     3.2G 4 days ago    4 days ago    main
+    model/microsoft/UserLM-8b      32.1G 4 days ago    4 days ago    main
     Done in 0.0s. Scanned 6 repo(s) for a total of 3.4G.
     Got 1 warning(s) while scanning. Use -vvv to print details.
@@ -816,33 +818,6 @@ def _format_size(num: int) -> str:
     return f"{num_f:.1f}Y"
-_TIMESINCE_CHUNKS = (
-    # Label, divider, max value
-    ("second", 1, 60),
-    ("minute", 60, 60),
-    ("hour", 60 * 60, 24),
-    ("day", 60 * 60 * 24, 6),
-    ("week", 60 * 60 * 24 * 7, 6),
-    ("month", 60 * 60 * 24 * 30, 11),
-    ("year", 60 * 60 * 24 * 365, None),
-)
-def _format_timesince(ts: float) -> str:
-    """Format timestamp in seconds into a human-readable string, relative to now.
-    Vaguely inspired by Django's `timesince` formatter.
-    """
-    delta = time.time() - ts
-    if delta < 20:
-        return "a few seconds ago"
-    for label, divider, max_value in _TIMESINCE_CHUNKS:  # noqa: B007
-        value = round(delta / divider)
-        if max_value is not None and value <= max_value:
-            break
-    return f"{value} {label}{'s' if value > 1 else ''} ago"
 def _try_delete_path(path: Path, path_type: str) -> None:
     """Try to delete a local file or folder.

huggingface_hub/utils/_http.py CHANGED Viewed

@@ -109,6 +109,20 @@ async def async_hf_request_event_hook(request: httpx.Request) -> None:
     return hf_request_event_hook(request)
+async def async_hf_response_event_hook(response: httpx.Response) -> None:
+    if response.status_code >= 400:
+        # If response will raise, read content from stream to have it available when raising the exception
+        # If content-length is not set or is too large, skip reading the content to avoid OOM
+        if "Content-length" in response.headers:
+            try:
+                length = int(response.headers["Content-length"])
+            except ValueError:
+                return
+            if length < 1_000_000:
+                await response.aread()
 def default_client_factory() -> httpx.Client:
     """
     Factory function to create a `httpx.Client` with the default transport.
@@ -125,7 +139,7 @@ def default_async_client_factory() -> httpx.AsyncClient:
     Factory function to create a `httpx.AsyncClient` with the default transport.
     """
     return httpx.AsyncClient(
-        event_hooks={"request": [async_hf_request_event_hook]},
+        event_hooks={"request": [async_hf_request_event_hook], "response": [async_hf_response_event_hook]},
         follow_redirects=True,
         timeout=httpx.Timeout(constants.DEFAULT_REQUEST_TIMEOUT, write=60.0),
     )
@@ -626,8 +640,16 @@ def _format(error_type: type[HfHubHTTPError], custom_message: str, response: htt
         try:
             data = response.json()
         except httpx.ResponseNotRead:
-            response.read()  # In case of streaming response, we need to read the response first
-            data = response.json()
+            try:
+                response.read()  # In case of streaming response, we need to read the response first
+                data = response.json()
+            except RuntimeError:
+                # In case of async streaming response, we can't read the stream here.
+                # In practice if user is using the default async client from `get_async_client`, the stream will have
+                # already been read in the async event hook `async_hf_response_event_hook`.
+                #
+                # Here, we are skipping reading the response to avoid RuntimeError but it happens only if async + stream + used httpx.AsyncClient directly.
+                data = {}
         error = data.get("error")
         if error is not None:

huggingface_hub/utils/_parsing.py ADDED Viewed

@@ -0,0 +1,98 @@
+# coding=utf-8
+# Copyright 2025-present, the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Parsing helpers shared across modules."""
+import re
+import time
+from typing import Dict
+RE_NUMBER_WITH_UNIT = re.compile(r"(\d+)([a-z]+)", re.IGNORECASE)
+BYTE_UNITS: Dict[str, int] = {
+    "k": 1_000,
+    "m": 1_000_000,
+    "g": 1_000_000_000,
+    "t": 1_000_000_000_000,
+    "p": 1_000_000_000_000_000,
+}
+TIME_UNITS: Dict[str, int] = {
+    "s": 1,
+    "m": 60,
+    "h": 60 * 60,
+    "d": 24 * 60 * 60,
+    "w": 7 * 24 * 60 * 60,
+    "mo": 30 * 24 * 60 * 60,
+    "y": 365 * 24 * 60 * 60,
+}
+def parse_size(value: str) -> int:
+    """Parse a size expressed as a string with digits and unit (like `"10MB"`) to an integer (in bytes)."""
+    return _parse_with_unit(value, BYTE_UNITS)
+def parse_duration(value: str) -> int:
+    """Parse a duration expressed as a string with digits and unit (like `"10s"`) to an integer (in seconds)."""
+    return _parse_with_unit(value, TIME_UNITS)
+def _parse_with_unit(value: str, units: Dict[str, int]) -> int:
+    """Parse a numeric value with optional unit."""
+    stripped = value.strip()
+    if not stripped:
+        raise ValueError("Value cannot be empty.")
+    try:
+        return int(value)
+    except ValueError:
+        pass
+    match = RE_NUMBER_WITH_UNIT.fullmatch(stripped)
+    if not match:
+        raise ValueError(f"Invalid value '{value}'. Must match pattern '\\d+[a-z]+' or be a plain number.")
+    number = int(match.group(1))
+    unit = match.group(2).lower()
+    if unit not in units:
+        raise ValueError(f"Unknown unit '{unit}'. Must be one of {list(units.keys())}.")
+    return number * units[unit]
+def format_timesince(ts: float) -> str:
+    """Format timestamp in seconds into a human-readable string, relative to now.
+    Vaguely inspired by Django's `timesince` formatter.
+    """
+    _TIMESINCE_CHUNKS = (
+        # Label, divider, max value
+        ("second", 1, 60),
+        ("minute", 60, 60),
+        ("hour", 60 * 60, 24),
+        ("day", 60 * 60 * 24, 6),
+        ("week", 60 * 60 * 24 * 7, 6),
+        ("month", 60 * 60 * 24 * 30, 11),
+        ("year", 60 * 60 * 24 * 365, None),
+    )
+    delta = time.time() - ts
+    if delta < 20:
+        return "a few seconds ago"
+    for label, divider, max_value in _TIMESINCE_CHUNKS:  # noqa: B007
+        value = round(delta / divider)
+        if max_value is not None and value <= max_value:
+            break
+    return f"{value} {label}{'s' if value > 1 else ''} ago"

huggingface-hub 1.0.0rc5__py3-none-any.whl → 1.0.0rc7__py3-none-any.whl

Potentially problematic release.

huggingface-hub 1.0.0rc5py3-none-any.whl → 1.0.0rc7py3-none-any.whl