PyPI - huggingface-hub - Versions diffs - 0.31.0rc0__py3-none-any.whl → 1.1.3__py3-none-any.whl - Mend

huggingface-hub 0.31.0rc0py3-none-any.whl → 1.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (150) hide show

huggingface_hub/__init__.py +145 -46
huggingface_hub/_commit_api.py +168 -119
huggingface_hub/_commit_scheduler.py +15 -15
huggingface_hub/_inference_endpoints.py +15 -12
huggingface_hub/_jobs_api.py +301 -0
huggingface_hub/_local_folder.py +18 -3
huggingface_hub/_login.py +31 -63
huggingface_hub/_oauth.py +460 -0
huggingface_hub/_snapshot_download.py +239 -80
huggingface_hub/_space_api.py +5 -5
huggingface_hub/_tensorboard_logger.py +15 -19
huggingface_hub/_upload_large_folder.py +172 -76
huggingface_hub/_webhooks_payload.py +3 -3
huggingface_hub/_webhooks_server.py +13 -25
huggingface_hub/{commands → cli}/__init__.py +1 -15
huggingface_hub/cli/_cli_utils.py +173 -0
huggingface_hub/cli/auth.py +147 -0
huggingface_hub/cli/cache.py +841 -0
huggingface_hub/cli/download.py +189 -0
huggingface_hub/cli/hf.py +60 -0
huggingface_hub/cli/inference_endpoints.py +377 -0
huggingface_hub/cli/jobs.py +772 -0
huggingface_hub/cli/lfs.py +175 -0
huggingface_hub/cli/repo.py +315 -0
huggingface_hub/cli/repo_files.py +94 -0
huggingface_hub/{commands/env.py → cli/system.py} +10 -13
huggingface_hub/cli/upload.py +294 -0
huggingface_hub/cli/upload_large_folder.py +117 -0
huggingface_hub/community.py +20 -12
huggingface_hub/constants.py +38 -53
huggingface_hub/dataclasses.py +609 -0
huggingface_hub/errors.py +80 -30
huggingface_hub/fastai_utils.py +30 -41
huggingface_hub/file_download.py +435 -351
huggingface_hub/hf_api.py +2050 -1124
huggingface_hub/hf_file_system.py +269 -152
huggingface_hub/hub_mixin.py +43 -63
huggingface_hub/inference/_client.py +347 -434
huggingface_hub/inference/_common.py +133 -121
huggingface_hub/inference/_generated/_async_client.py +397 -541
huggingface_hub/inference/_generated/types/__init__.py +5 -1
huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
huggingface_hub/inference/_generated/types/base.py +10 -7
huggingface_hub/inference/_generated/types/chat_completion.py +59 -23
huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
huggingface_hub/inference/_generated/types/image_to_image.py +6 -2
huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
huggingface_hub/inference/_generated/types/summarization.py +2 -2
huggingface_hub/inference/_generated/types/table_question_answering.py +5 -5
huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
huggingface_hub/inference/_generated/types/text_generation.py +10 -10
huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
huggingface_hub/inference/_generated/types/token_classification.py +2 -2
huggingface_hub/inference/_generated/types/translation.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
huggingface_hub/inference/_mcp/__init__.py +0 -0
huggingface_hub/inference/_mcp/_cli_hacks.py +88 -0
huggingface_hub/inference/_mcp/agent.py +100 -0
huggingface_hub/inference/_mcp/cli.py +247 -0
huggingface_hub/inference/_mcp/constants.py +81 -0
huggingface_hub/inference/_mcp/mcp_client.py +395 -0
huggingface_hub/inference/_mcp/types.py +45 -0
huggingface_hub/inference/_mcp/utils.py +128 -0
huggingface_hub/inference/_providers/__init__.py +82 -7
huggingface_hub/inference/_providers/_common.py +129 -27
huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
huggingface_hub/inference/_providers/cerebras.py +1 -1
huggingface_hub/inference/_providers/clarifai.py +13 -0
huggingface_hub/inference/_providers/cohere.py +20 -3
huggingface_hub/inference/_providers/fal_ai.py +183 -56
huggingface_hub/inference/_providers/featherless_ai.py +38 -0
huggingface_hub/inference/_providers/fireworks_ai.py +18 -0
huggingface_hub/inference/_providers/groq.py +9 -0
huggingface_hub/inference/_providers/hf_inference.py +69 -30
huggingface_hub/inference/_providers/hyperbolic.py +4 -4
huggingface_hub/inference/_providers/nebius.py +33 -5
huggingface_hub/inference/_providers/novita.py +5 -5
huggingface_hub/inference/_providers/nscale.py +44 -0
huggingface_hub/inference/_providers/openai.py +3 -1
huggingface_hub/inference/_providers/publicai.py +6 -0
huggingface_hub/inference/_providers/replicate.py +31 -13
huggingface_hub/inference/_providers/sambanova.py +18 -4
huggingface_hub/inference/_providers/scaleway.py +28 -0
huggingface_hub/inference/_providers/together.py +20 -5
huggingface_hub/inference/_providers/wavespeed.py +138 -0
huggingface_hub/inference/_providers/zai_org.py +17 -0
huggingface_hub/lfs.py +33 -100
huggingface_hub/repocard.py +34 -38
huggingface_hub/repocard_data.py +57 -57
huggingface_hub/serialization/__init__.py +0 -1
huggingface_hub/serialization/_base.py +12 -15
huggingface_hub/serialization/_dduf.py +8 -8
huggingface_hub/serialization/_torch.py +69 -69
huggingface_hub/utils/__init__.py +19 -8
huggingface_hub/utils/_auth.py +7 -7
huggingface_hub/utils/_cache_manager.py +92 -147
huggingface_hub/utils/_chunk_utils.py +2 -3
huggingface_hub/utils/_deprecation.py +1 -1
huggingface_hub/utils/_dotenv.py +55 -0
huggingface_hub/utils/_experimental.py +7 -5
huggingface_hub/utils/_fixes.py +0 -10
huggingface_hub/utils/_git_credential.py +5 -5
huggingface_hub/utils/_headers.py +8 -30
huggingface_hub/utils/_http.py +398 -239
huggingface_hub/utils/_pagination.py +4 -4
huggingface_hub/utils/_parsing.py +98 -0
huggingface_hub/utils/_paths.py +5 -5
huggingface_hub/utils/_runtime.py +61 -24
huggingface_hub/utils/_safetensors.py +21 -21
huggingface_hub/utils/_subprocess.py +9 -9
huggingface_hub/utils/_telemetry.py +4 -4
huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -4
huggingface_hub/utils/_typing.py +25 -5
huggingface_hub/utils/_validators.py +55 -74
huggingface_hub/utils/_verification.py +167 -0
huggingface_hub/utils/_xet.py +64 -17
huggingface_hub/utils/_xet_progress_reporting.py +162 -0
huggingface_hub/utils/insecure_hashlib.py +3 -5
huggingface_hub/utils/logging.py +8 -11
huggingface_hub/utils/tqdm.py +5 -4
{huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/METADATA +94 -85
huggingface_hub-1.1.3.dist-info/RECORD +155 -0
{huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/WHEEL +1 -1
huggingface_hub-1.1.3.dist-info/entry_points.txt +6 -0
huggingface_hub/commands/delete_cache.py +0 -474
huggingface_hub/commands/download.py +0 -200
huggingface_hub/commands/huggingface_cli.py +0 -61
huggingface_hub/commands/lfs.py +0 -200
huggingface_hub/commands/repo_files.py +0 -128
huggingface_hub/commands/scan_cache.py +0 -181
huggingface_hub/commands/tag.py +0 -159
huggingface_hub/commands/upload.py +0 -314
huggingface_hub/commands/upload_large_folder.py +0 -129
huggingface_hub/commands/user.py +0 -304
huggingface_hub/commands/version.py +0 -37
huggingface_hub/inference_api.py +0 -217
huggingface_hub/keras_mixin.py +0 -500
huggingface_hub/repository.py +0 -1477
huggingface_hub/serialization/_tensorflow.py +0 -95
huggingface_hub/utils/_hf_folder.py +0 -68
huggingface_hub-0.31.0rc0.dist-info/RECORD +0 -135
huggingface_hub-0.31.0rc0.dist-info/entry_points.txt +0 -6
{huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info/licenses}/LICENSE +0 -0
{huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/top_level.txt +0 -0

huggingface_hub/_commit_api.py CHANGED Viewed

@@ -4,7 +4,6 @@ Type definitions and utilities for the `create_commit` API
 import base64
 import io
-import math
 import os
 import warnings
 from collections import defaultdict
@@ -12,7 +11,7 @@ from contextlib import contextmanager
 from dataclasses import dataclass, field
 from itertools import groupby
 from pathlib import Path, PurePosixPath
-from typing import TYPE_CHECKING, Any, BinaryIO, Dict, Iterable, Iterator, List, Literal, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, BinaryIO, Iterable, Iterator, Literal, Optional, Union
 from tqdm.contrib.concurrent import thread_map
@@ -23,6 +22,7 @@ from .lfs import UploadInfo, lfs_upload, post_lfs_batch_info
 from .utils import (
     FORBIDDEN_FOLDERS,
     XetTokenType,
+    are_progress_bars_disabled,
     chunk_iterable,
     fetch_xet_connection_info_from_repo_info,
     get_session,
@@ -33,7 +33,7 @@ from .utils import (
     validate_hf_hub_args,
 )
 from .utils import tqdm as hf_tqdm
-from .utils.tqdm import _get_progress_bar_context
+from .utils._runtime import is_xet_available
 if TYPE_CHECKING:
@@ -236,7 +236,7 @@ class CommitOperationAdd:
         config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
         >>> with operation.as_file(with_tqdm=True) as file:
-        ...     requests.put(..., data=file)
+        ...     httpx.put(..., data=file)
         config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
         ```
         """
@@ -307,7 +307,7 @@ def _validate_path_in_repo(path_in_repo: str) -> str:
 CommitOperation = Union[CommitOperationAdd, CommitOperationCopy, CommitOperationDelete]
-def _warn_on_overwriting_operations(operations: List[CommitOperation]) -> None:
+def _warn_on_overwriting_operations(operations: list[CommitOperation]) -> None:
     """
     Warn user when a list of operations is expected to overwrite itself in a single
     commit.
@@ -322,7 +322,7 @@ def _warn_on_overwriting_operations(operations: List[CommitOperation]) -> None:
       delete before upload) but can happen if a user deletes an entire folder and then
       add new files to it.
     """
-    nb_additions_per_path: Dict[str, int] = defaultdict(int)
+    nb_additions_per_path: dict[str, int] = defaultdict(int)
     for operation in operations:
         path_in_repo = operation.path_in_repo
         if isinstance(operation, CommitOperationAdd):
@@ -354,15 +354,95 @@ def _warn_on_overwriting_operations(operations: List[CommitOperation]) -> None:
 @validate_hf_hub_args
-def _upload_lfs_files(
+def _upload_files(
     *,
-    additions: List[CommitOperationAdd],
+    additions: list[CommitOperationAdd],
     repo_type: str,
     repo_id: str,
-    headers: Dict[str, str],
+    headers: dict[str, str],
     endpoint: Optional[str] = None,
     num_threads: int = 5,
     revision: Optional[str] = None,
+    create_pr: Optional[bool] = None,
+):
+    """
+    Negotiates per-file transfer (LFS vs Xet) and uploads in batches.
+    """
+    xet_additions: list[CommitOperationAdd] = []
+    lfs_actions: list[dict[str, Any]] = []
+    lfs_oid2addop: dict[str, CommitOperationAdd] = {}
+    for chunk in chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES):
+        chunk_list = [op for op in chunk]
+        transfers: list[str] = ["basic", "multipart"]
+        has_buffered_io_data = any(isinstance(op.path_or_fileobj, io.BufferedIOBase) for op in chunk_list)
+        if is_xet_available():
+            if not has_buffered_io_data:
+                transfers.append("xet")
+            else:
+                logger.warning(
+                    "Uploading files as a binary IO buffer is not supported by Xet Storage. "
+                    "Falling back to HTTP upload."
+                )
+        actions_chunk, errors_chunk, chosen_transfer = post_lfs_batch_info(
+            upload_infos=[op.upload_info for op in chunk_list],
+            repo_id=repo_id,
+            repo_type=repo_type,
+            revision=revision,
+            endpoint=endpoint,
+            headers=headers,
+            token=None,  # already passed in 'headers'
+            transfers=transfers,
+        )
+        if errors_chunk:
+            message = "\n".join(
+                [
+                    f"Encountered error for file with OID {err.get('oid')}: `{err.get('error', {}).get('message')}"
+                    for err in errors_chunk
+                ]
+            )
+            raise ValueError(f"LFS batch API returned errors:\n{message}")
+        # If server returns a transfer we didn't offer (e.g "xet" while uploading from BytesIO),
+        # fall back to LFS for this chunk.
+        if chosen_transfer == "xet" and ("xet" in transfers):
+            xet_additions.extend(chunk_list)
+        else:
+            lfs_actions.extend(actions_chunk)
+            for op in chunk_list:
+                lfs_oid2addop[op.upload_info.sha256.hex()] = op
+    if len(lfs_actions) > 0:
+        _upload_lfs_files(
+            actions=lfs_actions,
+            oid2addop=lfs_oid2addop,
+            headers=headers,
+            endpoint=endpoint,
+            num_threads=num_threads,
+        )
+    if len(xet_additions) > 0:
+        _upload_xet_files(
+            additions=xet_additions,
+            repo_type=repo_type,
+            repo_id=repo_id,
+            headers=headers,
+            endpoint=endpoint,
+            revision=revision,
+            create_pr=create_pr,
+        )
+@validate_hf_hub_args
+def _upload_lfs_files(
+    *,
+    actions: list[dict[str, Any]],
+    oid2addop: dict[str, CommitOperationAdd],
+    headers: dict[str, str],
+    endpoint: Optional[str] = None,
+    num_threads: int = 5,
 ):
     """
     Uploads the content of `additions` to the Hub using the large file storage protocol.
@@ -371,14 +451,26 @@ def _upload_lfs_files(
         - LFS Batch API: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md
     Args:
-        additions (`List` of `CommitOperationAdd`):
-            The files to be uploaded
-        repo_type (`str`):
+        actions (`list[dict[str, Any]]`):
+            LFS batch actions returned by the server.
+        oid2addop (`dict[str, CommitOperationAdd]`):
+            A dictionary mapping the OID of the file to the corresponding `CommitOperationAdd` object.
+        headers (`dict[str, str]`):
+            Headers to use for the request, including authorization headers and user agent.
+        endpoint (`str`, *optional*):
+            The endpoint to use for the request. Defaults to `constants.ENDPOINT`.
+        num_threads (`int`, *optional*):
+            The number of concurrent threads to use when uploading. Defaults to 5.
+    Raises:
+        [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
+            If an upload failed for any reason
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
             Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
         repo_id (`str`):
             A namespace (user or an organization) and a repo name separated
             by a `/`.
-        headers (`Dict[str, str]`):
+        headers (`dict[str, str]`):
             Headers to use for the request, including authorization headers and user agent.
         num_threads (`int`, *optional*):
             The number of concurrent threads to use when uploading. Defaults to 5.
@@ -390,53 +482,20 @@ def _upload_lfs_files(
             If an upload failed for any reason
         [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
             If the server returns malformed responses
-        [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
+        [`HfHubHTTPError`]
             If the LFS batch endpoint returned an HTTP error.
     """
-    # Step 1: retrieve upload instructions from the LFS batch endpoint.
-    #         Upload instructions are retrieved by chunk of 256 files to avoid reaching
-    #         the payload limit.
-    batch_actions: List[Dict] = []
-    for chunk in chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES):
-        batch_actions_chunk, batch_errors_chunk = post_lfs_batch_info(
-            upload_infos=[op.upload_info for op in chunk],
-            repo_id=repo_id,
-            repo_type=repo_type,
-            revision=revision,
-            endpoint=endpoint,
-            headers=headers,
-            token=None,  # already passed in 'headers'
-        )
-        # If at least 1 error, we do not retrieve information for other chunks
-        if batch_errors_chunk:
-            message = "\n".join(
-                [
-                    f"Encountered error for file with OID {err.get('oid')}: `{err.get('error', {}).get('message')}"
-                    for err in batch_errors_chunk
-                ]
-            )
-            raise ValueError(f"LFS batch endpoint returned errors:\n{message}")
-        batch_actions += batch_actions_chunk
-    oid2addop = {add_op.upload_info.sha256.hex(): add_op for add_op in additions}
-    # Step 2: ignore files that have already been uploaded
+    # Filter out files already present upstream
     filtered_actions = []
-    for action in batch_actions:
+    for action in actions:
         if action.get("actions") is None:
             logger.debug(
-                f"Content of file {oid2addop[action['oid']].path_in_repo} is already"
-                " present upstream - skipping upload."
+                f"Content of file {oid2addop[action['oid']].path_in_repo} is already present upstream - skipping upload."
             )
         else:
             filtered_actions.append(action)
-    if len(filtered_actions) == 0:
-        logger.debug("No LFS files to upload.")
-        return
-    # Step 3: upload files concurrently according to these instructions
+    # Upload according to server-provided actions
     def _wrapped_lfs_upload(batch_action) -> None:
         try:
             operation = oid2addop[batch_action["oid"]]
@@ -444,11 +503,7 @@ def _upload_lfs_files(
         except Exception as exc:
             raise RuntimeError(f"Error while uploading '{operation.path_in_repo}' to the Hub.") from exc
-    if constants.HF_HUB_ENABLE_HF_TRANSFER:
-        logger.debug(f"Uploading {len(filtered_actions)} LFS files to the Hub using `hf_transfer`.")
-        for action in hf_tqdm(filtered_actions, name="huggingface_hub.lfs_upload"):
-            _wrapped_lfs_upload(action)
-    elif len(filtered_actions) == 1:
+    if len(filtered_actions) == 1:
         logger.debug("Uploading 1 LFS file to the Hub")
         _wrapped_lfs_upload(filtered_actions[0])
     else:
@@ -467,10 +522,10 @@ def _upload_lfs_files(
 @validate_hf_hub_args
 def _upload_xet_files(
     *,
-    additions: List[CommitOperationAdd],
+    additions: list[CommitOperationAdd],
     repo_type: str,
     repo_id: str,
-    headers: Dict[str, str],
+    headers: dict[str, str],
     endpoint: Optional[str] = None,
     revision: Optional[str] = None,
     create_pr: Optional[bool] = None,
@@ -480,14 +535,14 @@ def _upload_xet_files(
     This chunks the files and deduplicates the chunks before uploading them to xetcas storage.
     Args:
-        additions (`List` of `CommitOperationAdd`):
+        additions (`` of `CommitOperationAdd`):
             The files to be uploaded.
         repo_type (`str`):
             Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
         repo_id (`str`):
             A namespace (user or an organization) and a repo name separated
             by a `/`.
-        headers (`Dict[str, str]`):
+        headers (`dict[str, str]`):
             Headers to use for the request, including authorization headers and user agent.
         endpoint: (`str`, *optional*):
             The endpoint to use for the xetcas service. Defaults to `constants.ENDPOINT`.
@@ -501,7 +556,7 @@ def _upload_xet_files(
             If an upload failed for any reason.
         [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
             If the server returns malformed responses or if the user is unauthorized to upload to xet storage.
-        [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
+        [`HfHubHTTPError`]
             If the LFS batch endpoint returned an HTTP error.
     **How it works:**
@@ -529,9 +584,12 @@ def _upload_xet_files(
     """
     if len(additions) == 0:
         return
     # at this point, we know that hf_xet is installed
     from hf_xet import upload_bytes, upload_files
+    from .utils._xet_progress_reporting import XetProgressReporter
     try:
         xet_connection_info = fetch_xet_connection_info_from_repo_info(
             token_type=XetTokenType.WRITE,
@@ -553,7 +611,7 @@ def _upload_xet_files(
     xet_endpoint = xet_connection_info.endpoint
     access_token_info = (xet_connection_info.access_token, xet_connection_info.expiration_unix_epoch)
-    def token_refresher() -> Tuple[str, int]:
+    def token_refresher() -> tuple[str, int]:
         new_xet_connection = fetch_xet_connection_info_from_repo_info(
             token_type=XetTokenType.WRITE,
             repo_id=repo_id,
@@ -567,51 +625,42 @@ def _upload_xet_files(
             raise XetRefreshTokenError("Failed to refresh xet token")
         return new_xet_connection.access_token, new_xet_connection.expiration_unix_epoch
-    num_chunks = math.ceil(len(additions) / UPLOAD_BATCH_MAX_NUM_FILES)
-    num_chunks_num_digits = int(math.log10(num_chunks)) + 1
-    for i, chunk in enumerate(chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES)):
-        _chunk = [op for op in chunk]
+    if not are_progress_bars_disabled():
+        progress = XetProgressReporter()
+        progress_callback = progress.update_progress
+    else:
+        progress, progress_callback = None, None
-        bytes_ops = [op for op in _chunk if isinstance(op.path_or_fileobj, bytes)]
-        paths_ops = [op for op in _chunk if isinstance(op.path_or_fileobj, (str, Path))]
-        expected_size = sum(op.upload_info.size for op in bytes_ops + paths_ops)
+    try:
+        all_bytes_ops = [op for op in additions if isinstance(op.path_or_fileobj, bytes)]
+        all_paths_ops = [op for op in additions if isinstance(op.path_or_fileobj, (str, Path))]
+        if len(all_paths_ops) > 0:
+            all_paths = [str(op.path_or_fileobj) for op in all_paths_ops]
+            upload_files(
+                all_paths,
+                xet_endpoint,
+                access_token_info,
+                token_refresher,
+                progress_callback,
+                repo_type,
+            )
+        if len(all_bytes_ops) > 0:
+            all_bytes = [op.path_or_fileobj for op in all_bytes_ops]
+            upload_bytes(
+                all_bytes,
+                xet_endpoint,
+                access_token_info,
+                token_refresher,
+                progress_callback,
+                repo_type,
+            )
+    finally:
+        if progress is not None:
+            progress.close(False)
-        if num_chunks > 1:
-            description = f"Uploading Batch [{str(i + 1).zfill(num_chunks_num_digits)}/{num_chunks}]..."
-        else:
-            description = "Uploading..."
-        progress_cm = _get_progress_bar_context(
-            desc=description,
-            total=expected_size,
-            initial=0,
-            unit="B",
-            unit_scale=True,
-            name="huggingface_hub.xet_put",
-            log_level=logger.getEffectiveLevel(),
-        )
-        with progress_cm as progress:
-            def update_progress(increment: int):
-                progress.update(increment)
-            if len(paths_ops) > 0:
-                upload_files(
-                    [str(op.path_or_fileobj) for op in paths_ops],
-                    xet_endpoint,
-                    access_token_info,
-                    token_refresher,
-                    update_progress,
-                    repo_type,
-                )
-            if len(bytes_ops) > 0:
-                upload_bytes(
-                    [op.path_or_fileobj for op in bytes_ops],
-                    xet_endpoint,
-                    access_token_info,
-                    token_refresher,
-                    update_progress,
-                    repo_type,
-                )
     return
@@ -635,7 +684,7 @@ def _fetch_upload_modes(
     additions: Iterable[CommitOperationAdd],
     repo_type: str,
     repo_id: str,
-    headers: Dict[str, str],
+    headers: dict[str, str],
     revision: str,
     endpoint: Optional[str] = None,
     create_pr: bool = False,
@@ -654,7 +703,7 @@ def _fetch_upload_modes(
         repo_id (`str`):
             A namespace (user or an organization) and a repo name separated
             by a `/`.
-        headers (`Dict[str, str]`):
+        headers (`dict[str, str]`):
             Headers to use for the request, including authorization headers and user agent.
         revision (`str`):
             The git revision to upload the files to. Can be any valid git revision.
@@ -672,12 +721,12 @@ def _fetch_upload_modes(
     endpoint = endpoint if endpoint is not None else constants.ENDPOINT
     # Fetch upload mode (LFS or regular) chunk by chunk.
-    upload_modes: Dict[str, UploadMode] = {}
-    should_ignore_info: Dict[str, bool] = {}
-    oid_info: Dict[str, Optional[str]] = {}
+    upload_modes: dict[str, UploadMode] = {}
+    should_ignore_info: dict[str, bool] = {}
+    oid_info: dict[str, Optional[str]] = {}
     for chunk in chunk_iterable(additions, 256):
-        payload: Dict = {
+        payload: dict = {
             "files": [
                 {
                     "path": op.path_in_repo,
@@ -720,10 +769,10 @@ def _fetch_files_to_copy(
     copies: Iterable[CommitOperationCopy],
     repo_type: str,
     repo_id: str,
-    headers: Dict[str, str],
+    headers: dict[str, str],
     revision: str,
     endpoint: Optional[str] = None,
-) -> Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]]:
+) -> dict[tuple[str, Optional[str]], Union["RepoFile", bytes]]:
     """
     Fetch information about the files to copy.
@@ -739,12 +788,12 @@ def _fetch_files_to_copy(
         repo_id (`str`):
             A namespace (user or an organization) and a repo name separated
             by a `/`.
-        headers (`Dict[str, str]`):
+        headers (`dict[str, str]`):
             Headers to use for the request, including authorization headers and user agent.
         revision (`str`):
             The git revision to upload the files to. Can be any valid git revision.
-    Returns: `Dict[Tuple[str, Optional[str]], Union[RepoFile, bytes]]]`
+    Returns: `dict[tuple[str, Optional[str]], Union[RepoFile, bytes]]]`
         Key is the file path and revision of the file to copy.
         Value is the raw content as bytes (for regular files) or the file information as a RepoFile (for LFS files).
@@ -757,9 +806,9 @@ def _fetch_files_to_copy(
     from .hf_api import HfApi, RepoFolder
     hf_api = HfApi(endpoint=endpoint, headers=headers)
-    files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]] = {}
+    files_to_copy: dict[tuple[str, Optional[str]], Union["RepoFile", bytes]] = {}
     # Store (path, revision) -> oid mapping
-    oid_info: Dict[Tuple[str, Optional[str]], Optional[str]] = {}
+    oid_info: dict[tuple[str, Optional[str]], Optional[str]] = {}
     # 1. Fetch OIDs for destination paths in batches.
     dest_paths = [op.path_in_repo for op in copies]
     for offset in range(0, len(dest_paths), FETCH_LFS_BATCH_SIZE):
@@ -819,11 +868,11 @@ def _fetch_files_to_copy(
 def _prepare_commit_payload(
     operations: Iterable[CommitOperation],
-    files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]],
+    files_to_copy: dict[tuple[str, Optional[str]], Union["RepoFile", bytes]],
     commit_message: str,
     commit_description: Optional[str] = None,
     parent_commit: Optional[str] = None,
-) -> Iterable[Dict[str, Any]]:
+) -> Iterable[dict[str, Any]]:
     """
     Builds the payload to POST to the `/commit` API of the Hub.

huggingface_hub/_commit_scheduler.py CHANGED Viewed

@@ -7,7 +7,7 @@ from dataclasses import dataclass
 from io import SEEK_END, SEEK_SET, BytesIO
 from pathlib import Path
 from threading import Lock, Thread
-from typing import Dict, List, Optional, Union
+from typing import Optional, Union
 from .hf_api import DEFAULT_IGNORE_PATTERNS, CommitInfo, CommitOperationAdd, HfApi
 from .utils import filter_repo_objects
@@ -53,9 +53,9 @@ class CommitScheduler:
             Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
         token (`str`, *optional*):
             The token to use to commit to the repo. Defaults to the token saved on the machine.
-        allow_patterns (`List[str]` or `str`, *optional*):
+        allow_patterns (`list[str]` or `str`, *optional*):
             If provided, only files matching at least one pattern are uploaded.
-        ignore_patterns (`List[str]` or `str`, *optional*):
+        ignore_patterns (`list[str]` or `str`, *optional*):
             If provided, files matching any of the patterns are not uploaded.
         squash_history (`bool`, *optional*):
             Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is
@@ -108,8 +108,8 @@ class CommitScheduler:
         revision: Optional[str] = None,
         private: Optional[bool] = None,
         token: Optional[str] = None,
-        allow_patterns: Optional[Union[List[str], str]] = None,
-        ignore_patterns: Optional[Union[List[str], str]] = None,
+        allow_patterns: Optional[Union[list[str], str]] = None,
+        ignore_patterns: Optional[Union[list[str], str]] = None,
         squash_history: bool = False,
         hf_api: Optional["HfApi"] = None,
     ) -> None:
@@ -138,7 +138,7 @@ class CommitScheduler:
         self.token = token
         # Keep track of already uploaded files
-        self.last_uploaded: Dict[Path, float] = {}  # key is local path, value is timestamp
+        self.last_uploaded: dict[Path, float] = {}  # key is local path, value is timestamp
         # Scheduler
         if not every > 0:
@@ -205,13 +205,10 @@ class CommitScheduler:
         """
         Push folder to the Hub and return the commit info.
-        <Tip warning={true}>
-        This method is not meant to be called directly. It is run in the background by the scheduler, respecting a
-        queue mechanism to avoid concurrent commits. Making a direct call to the method might lead to concurrency
-        issues.
-        </Tip>
+        > [!WARNING]
+        > This method is not meant to be called directly. It is run in the background by the scheduler, respecting a
+        > queue mechanism to avoid concurrent commits. Making a direct call to the method might lead to concurrency
+        > issues.
         The default behavior of `push_to_hub` is to assume an append-only folder. It lists all files in the folder and
         uploads only changed files. If no changes are found, the method returns without committing anything. If you want
@@ -232,7 +229,7 @@ class CommitScheduler:
             prefix = f"{self.path_in_repo.strip('/')}/" if self.path_in_repo else ""
             # Filter with pattern + filter out unchanged files + retrieve current file size
-            files_to_upload: List[_FileToUpload] = []
+            files_to_upload: list[_FileToUpload] = []
             for relpath in filter_repo_objects(
                 relpath_to_abspath.keys(), allow_patterns=self.allow_patterns, ignore_patterns=self.ignore_patterns
             ):
@@ -315,10 +312,13 @@ class PartialFileIO(BytesIO):
         return self._size_limit
     def __getattribute__(self, name: str):
-        if name.startswith("_") or name in ("read", "tell", "seek"):  # only 3 public methods supported
+        if name.startswith("_") or name in ("read", "tell", "seek", "fileno"):  # only 4 public methods supported
             return super().__getattribute__(name)
         raise NotImplementedError(f"PartialFileIO does not support '{name}'.")
+    def fileno(self):
+        raise AttributeError("PartialFileIO does not have a fileno.")
     def tell(self) -> int:
         """Return the current file position."""
         return self._file.tell()

huggingface_hub/_inference_endpoints.py CHANGED Viewed

@@ -2,7 +2,7 @@ import time
 from dataclasses import dataclass, field
 from datetime import datetime
 from enum import Enum
-from typing import TYPE_CHECKING, Dict, Optional, Union
+from typing import TYPE_CHECKING, Optional, Union
 from huggingface_hub.errors import InferenceEndpointError, InferenceEndpointTimeoutError
@@ -62,7 +62,7 @@ class InferenceEndpoint:
             The timestamp of the last update of the Inference Endpoint.
         type ([`InferenceEndpointType`]):
             The type of the Inference Endpoint (public, protected, private).
-        raw (`Dict`):
+        raw (`dict`):
             The raw dictionary data returned from the API.
         token (`str` or `bool`, *optional*):
             Authentication token for the Inference Endpoint, if set when requesting the API. Will default to the
@@ -100,6 +100,7 @@ class InferenceEndpoint:
     namespace: str
     repository: str = field(init=False)
     status: InferenceEndpointStatus = field(init=False)
+    health_route: str = field(init=False)
     url: Optional[str] = field(init=False)
     # Other fields
@@ -111,7 +112,7 @@ class InferenceEndpoint:
     type: InferenceEndpointType = field(repr=False, init=False)
     # Raw dict from the API
-    raw: Dict = field(repr=False)
+    raw: dict = field(repr=False)
     # Internal fields
     _token: Union[str, bool, None] = field(repr=False, compare=False)
@@ -119,7 +120,7 @@ class InferenceEndpoint:
     @classmethod
     def from_raw(
-        cls, raw: Dict, namespace: str, token: Union[str, bool, None] = None, api: Optional["HfApi"] = None
+        cls, raw: dict, namespace: str, token: Union[str, bool, None] = None, api: Optional["HfApi"] = None
     ) -> "InferenceEndpoint":
         """Initialize object from raw dictionary."""
         if api is None:
@@ -220,7 +221,8 @@ class InferenceEndpoint:
                 )
             if self.status == InferenceEndpointStatus.RUNNING and self.url is not None:
                 # Verify the endpoint is actually reachable
-                response = get_session().get(self.url, headers=self._api._build_hf_headers(token=self._token))
+                _health_url = f"{self.url.rstrip('/')}/{self.health_route.lstrip('/')}"
+                response = get_session().get(_health_url, headers=self._api._build_hf_headers(token=self._token))
                 if response.status_code == 200:
                     logger.info("Inference Endpoint is ready to be used.")
                     return self
@@ -258,8 +260,8 @@ class InferenceEndpoint:
         framework: Optional[str] = None,
         revision: Optional[str] = None,
         task: Optional[str] = None,
-        custom_image: Optional[Dict] = None,
-        secrets: Optional[Dict[str, str]] = None,
+        custom_image: Optional[dict] = None,
+        secrets: Optional[dict[str, str]] = None,
     ) -> "InferenceEndpoint":
         """Update the Inference Endpoint.
@@ -291,10 +293,10 @@ class InferenceEndpoint:
                 The specific model revision to deploy on the Inference Endpoint (e.g. `"6c0e6080953db56375760c0471a8c5f2929baf11"`).
             task (`str`, *optional*):
                 The task on which to deploy the model (e.g. `"text-classification"`).
-            custom_image (`Dict`, *optional*):
+            custom_image (`dict`, *optional*):
                 A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
                 Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
-            secrets (`Dict[str, str]`, *optional*):
+            secrets (`dict[str, str]`, *optional*):
                 Secret values to inject in the container environment.
         Returns:
             [`InferenceEndpoint`]: the same Inference Endpoint, mutated in place with the latest data.
@@ -327,7 +329,7 @@ class InferenceEndpoint:
         """Pause the Inference Endpoint.
         A paused Inference Endpoint will not be charged. It can be resumed at any time using [`InferenceEndpoint.resume`].
-        This is different than scaling the Inference Endpoint to zero with [`InferenceEndpoint.scale_to_zero`], which
+        This is different from scaling the Inference Endpoint to zero with [`InferenceEndpoint.scale_to_zero`], which
         would be automatically restarted when a request is made to it.
         This is an alias for [`HfApi.pause_inference_endpoint`]. The current object is mutated in place with the
@@ -365,8 +367,8 @@ class InferenceEndpoint:
     def scale_to_zero(self) -> "InferenceEndpoint":
         """Scale Inference Endpoint to zero.
-        An Inference Endpoint scaled to zero will not be charged. It will be resume on the next request to it, with a
-        cold start delay. This is different than pausing the Inference Endpoint with [`InferenceEndpoint.pause`], which
+        An Inference Endpoint scaled to zero will not be charged. It will be resumed on the next request to it, with a
+        cold start delay. This is different from pausing the Inference Endpoint with [`InferenceEndpoint.pause`], which
         would require a manual resume with [`InferenceEndpoint.resume`].
         This is an alias for [`HfApi.scale_to_zero_inference_endpoint`]. The current object is mutated in place with the
@@ -400,6 +402,7 @@ class InferenceEndpoint:
         self.repository = self.raw["model"]["repository"]
         self.status = self.raw["status"]["state"]
         self.url = self.raw["status"].get("url")
+        self.health_route = self.raw["healthRoute"]
         # Other fields
         self.framework = self.raw["model"]["framework"]

huggingface-hub 0.31.0rc0__py3-none-any.whl → 1.1.3__py3-none-any.whl

huggingface-hub 0.31.0rc0py3-none-any.whl → 1.1.3py3-none-any.whl