PyPI - huggingface-hub - Versions diffs - 0.29.0rc2__py3-none-any.whl → 1.1.3__py3-none-any.whl - Mend

huggingface-hub 0.29.0rc2py3-none-any.whl → 1.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (153) hide show

huggingface_hub/__init__.py +160 -46
huggingface_hub/_commit_api.py +277 -71
huggingface_hub/_commit_scheduler.py +15 -15
huggingface_hub/_inference_endpoints.py +33 -22
huggingface_hub/_jobs_api.py +301 -0
huggingface_hub/_local_folder.py +18 -3
huggingface_hub/_login.py +31 -63
huggingface_hub/_oauth.py +460 -0
huggingface_hub/_snapshot_download.py +241 -81
huggingface_hub/_space_api.py +18 -10
huggingface_hub/_tensorboard_logger.py +15 -19
huggingface_hub/_upload_large_folder.py +196 -76
huggingface_hub/_webhooks_payload.py +3 -3
huggingface_hub/_webhooks_server.py +15 -25
huggingface_hub/{commands → cli}/__init__.py +1 -15
huggingface_hub/cli/_cli_utils.py +173 -0
huggingface_hub/cli/auth.py +147 -0
huggingface_hub/cli/cache.py +841 -0
huggingface_hub/cli/download.py +189 -0
huggingface_hub/cli/hf.py +60 -0
huggingface_hub/cli/inference_endpoints.py +377 -0
huggingface_hub/cli/jobs.py +772 -0
huggingface_hub/cli/lfs.py +175 -0
huggingface_hub/cli/repo.py +315 -0
huggingface_hub/cli/repo_files.py +94 -0
huggingface_hub/{commands/env.py → cli/system.py} +10 -13
huggingface_hub/cli/upload.py +294 -0
huggingface_hub/cli/upload_large_folder.py +117 -0
huggingface_hub/community.py +20 -12
huggingface_hub/constants.py +83 -59
huggingface_hub/dataclasses.py +609 -0
huggingface_hub/errors.py +99 -30
huggingface_hub/fastai_utils.py +30 -41
huggingface_hub/file_download.py +606 -346
huggingface_hub/hf_api.py +2445 -1132
huggingface_hub/hf_file_system.py +269 -152
huggingface_hub/hub_mixin.py +61 -66
huggingface_hub/inference/_client.py +501 -630
huggingface_hub/inference/_common.py +133 -121
huggingface_hub/inference/_generated/_async_client.py +536 -722
huggingface_hub/inference/_generated/types/__init__.py +6 -1
huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +5 -6
huggingface_hub/inference/_generated/types/base.py +10 -7
huggingface_hub/inference/_generated/types/chat_completion.py +77 -31
huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
huggingface_hub/inference/_generated/types/image_to_image.py +8 -2
huggingface_hub/inference/_generated/types/image_to_text.py +2 -3
huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
huggingface_hub/inference/_generated/types/summarization.py +2 -2
huggingface_hub/inference/_generated/types/table_question_answering.py +5 -5
huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
huggingface_hub/inference/_generated/types/text_generation.py +11 -11
huggingface_hub/inference/_generated/types/text_to_audio.py +1 -2
huggingface_hub/inference/_generated/types/text_to_speech.py +1 -2
huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
huggingface_hub/inference/_generated/types/token_classification.py +2 -2
huggingface_hub/inference/_generated/types/translation.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
huggingface_hub/inference/_mcp/__init__.py +0 -0
huggingface_hub/inference/_mcp/_cli_hacks.py +88 -0
huggingface_hub/inference/_mcp/agent.py +100 -0
huggingface_hub/inference/_mcp/cli.py +247 -0
huggingface_hub/inference/_mcp/constants.py +81 -0
huggingface_hub/inference/_mcp/mcp_client.py +395 -0
huggingface_hub/inference/_mcp/types.py +45 -0
huggingface_hub/inference/_mcp/utils.py +128 -0
huggingface_hub/inference/_providers/__init__.py +149 -20
huggingface_hub/inference/_providers/_common.py +160 -37
huggingface_hub/inference/_providers/black_forest_labs.py +12 -9
huggingface_hub/inference/_providers/cerebras.py +6 -0
huggingface_hub/inference/_providers/clarifai.py +13 -0
huggingface_hub/inference/_providers/cohere.py +32 -0
huggingface_hub/inference/_providers/fal_ai.py +231 -22
huggingface_hub/inference/_providers/featherless_ai.py +38 -0
huggingface_hub/inference/_providers/fireworks_ai.py +22 -1
huggingface_hub/inference/_providers/groq.py +9 -0
huggingface_hub/inference/_providers/hf_inference.py +143 -33
huggingface_hub/inference/_providers/hyperbolic.py +9 -5
huggingface_hub/inference/_providers/nebius.py +47 -5
huggingface_hub/inference/_providers/novita.py +48 -5
huggingface_hub/inference/_providers/nscale.py +44 -0
huggingface_hub/inference/_providers/openai.py +25 -0
huggingface_hub/inference/_providers/publicai.py +6 -0
huggingface_hub/inference/_providers/replicate.py +46 -9
huggingface_hub/inference/_providers/sambanova.py +37 -1
huggingface_hub/inference/_providers/scaleway.py +28 -0
huggingface_hub/inference/_providers/together.py +34 -5
huggingface_hub/inference/_providers/wavespeed.py +138 -0
huggingface_hub/inference/_providers/zai_org.py +17 -0
huggingface_hub/lfs.py +33 -100
huggingface_hub/repocard.py +34 -38
huggingface_hub/repocard_data.py +79 -59
huggingface_hub/serialization/__init__.py +0 -1
huggingface_hub/serialization/_base.py +12 -15
huggingface_hub/serialization/_dduf.py +8 -8
huggingface_hub/serialization/_torch.py +69 -69
huggingface_hub/utils/__init__.py +27 -8
huggingface_hub/utils/_auth.py +7 -7
huggingface_hub/utils/_cache_manager.py +92 -147
huggingface_hub/utils/_chunk_utils.py +2 -3
huggingface_hub/utils/_deprecation.py +1 -1
huggingface_hub/utils/_dotenv.py +55 -0
huggingface_hub/utils/_experimental.py +7 -5
huggingface_hub/utils/_fixes.py +0 -10
huggingface_hub/utils/_git_credential.py +5 -5
huggingface_hub/utils/_headers.py +8 -30
huggingface_hub/utils/_http.py +399 -237
huggingface_hub/utils/_pagination.py +6 -6
huggingface_hub/utils/_parsing.py +98 -0
huggingface_hub/utils/_paths.py +5 -5
huggingface_hub/utils/_runtime.py +74 -22
huggingface_hub/utils/_safetensors.py +21 -21
huggingface_hub/utils/_subprocess.py +13 -11
huggingface_hub/utils/_telemetry.py +4 -4
huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -4
huggingface_hub/utils/_typing.py +25 -5
huggingface_hub/utils/_validators.py +55 -74
huggingface_hub/utils/_verification.py +167 -0
huggingface_hub/utils/_xet.py +235 -0
huggingface_hub/utils/_xet_progress_reporting.py +162 -0
huggingface_hub/utils/insecure_hashlib.py +3 -5
huggingface_hub/utils/logging.py +8 -11
huggingface_hub/utils/tqdm.py +33 -4
{huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/METADATA +94 -82
huggingface_hub-1.1.3.dist-info/RECORD +155 -0
{huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/WHEEL +1 -1
huggingface_hub-1.1.3.dist-info/entry_points.txt +6 -0
huggingface_hub/commands/delete_cache.py +0 -428
huggingface_hub/commands/download.py +0 -200
huggingface_hub/commands/huggingface_cli.py +0 -61
huggingface_hub/commands/lfs.py +0 -200
huggingface_hub/commands/repo_files.py +0 -128
huggingface_hub/commands/scan_cache.py +0 -181
huggingface_hub/commands/tag.py +0 -159
huggingface_hub/commands/upload.py +0 -299
huggingface_hub/commands/upload_large_folder.py +0 -129
huggingface_hub/commands/user.py +0 -304
huggingface_hub/commands/version.py +0 -37
huggingface_hub/inference_api.py +0 -217
huggingface_hub/keras_mixin.py +0 -500
huggingface_hub/repository.py +0 -1477
huggingface_hub/serialization/_tensorflow.py +0 -95
huggingface_hub/utils/_hf_folder.py +0 -68
huggingface_hub-0.29.0rc2.dist-info/RECORD +0 -131
huggingface_hub-0.29.0rc2.dist-info/entry_points.txt +0 -6
{huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info/licenses}/LICENSE +0 -0
{huggingface_hub-0.29.0rc2.dist-info → huggingface_hub-1.1.3.dist-info}/top_level.txt +0 -0

huggingface_hub/_commit_api.py CHANGED Viewed

@@ -11,17 +11,20 @@ from contextlib import contextmanager
 from dataclasses import dataclass, field
 from itertools import groupby
 from pathlib import Path, PurePosixPath
-from typing import TYPE_CHECKING, Any, BinaryIO, Dict, Iterable, Iterator, List, Literal, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, BinaryIO, Iterable, Iterator, Literal, Optional, Union
 from tqdm.contrib.concurrent import thread_map
 from . import constants
-from .errors import EntryNotFoundError
+from .errors import EntryNotFoundError, HfHubHTTPError, XetAuthorizationError, XetRefreshTokenError
 from .file_download import hf_hub_url
 from .lfs import UploadInfo, lfs_upload, post_lfs_batch_info
 from .utils import (
     FORBIDDEN_FOLDERS,
+    XetTokenType,
+    are_progress_bars_disabled,
     chunk_iterable,
+    fetch_xet_connection_info_from_repo_info,
     get_session,
     hf_raise_for_status,
     logging,
@@ -30,6 +33,7 @@ from .utils import (
     validate_hf_hub_args,
 )
 from .utils import tqdm as hf_tqdm
+from .utils._runtime import is_xet_available
 if TYPE_CHECKING:
@@ -47,6 +51,8 @@ UploadMode = Literal["lfs", "regular"]
 # See https://github.com/huggingface/huggingface_hub/issues/1503
 FETCH_LFS_BATCH_SIZE = 500
+UPLOAD_BATCH_MAX_NUM_FILES = 256
 @dataclass
 class CommitOperationDelete:
@@ -230,7 +236,7 @@ class CommitOperationAdd:
         config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
         >>> with operation.as_file(with_tqdm=True) as file:
-        ...     requests.put(..., data=file)
+        ...     httpx.put(..., data=file)
         config.json: 100%|█████████████████████████| 8.19k/8.19k [00:02<00:00, 3.72kB/s]
         ```
         """
@@ -301,7 +307,7 @@ def _validate_path_in_repo(path_in_repo: str) -> str:
 CommitOperation = Union[CommitOperationAdd, CommitOperationCopy, CommitOperationDelete]
-def _warn_on_overwriting_operations(operations: List[CommitOperation]) -> None:
+def _warn_on_overwriting_operations(operations: list[CommitOperation]) -> None:
     """
     Warn user when a list of operations is expected to overwrite itself in a single
     commit.
@@ -316,7 +322,7 @@ def _warn_on_overwriting_operations(operations: List[CommitOperation]) -> None:
       delete before upload) but can happen if a user deletes an entire folder and then
       add new files to it.
     """
-    nb_additions_per_path: Dict[str, int] = defaultdict(int)
+    nb_additions_per_path: dict[str, int] = defaultdict(int)
     for operation in operations:
         path_in_repo = operation.path_in_repo
         if isinstance(operation, CommitOperationAdd):
@@ -348,15 +354,95 @@ def _warn_on_overwriting_operations(operations: List[CommitOperation]) -> None:
 @validate_hf_hub_args
-def _upload_lfs_files(
+def _upload_files(
     *,
-    additions: List[CommitOperationAdd],
+    additions: list[CommitOperationAdd],
     repo_type: str,
     repo_id: str,
-    headers: Dict[str, str],
+    headers: dict[str, str],
     endpoint: Optional[str] = None,
     num_threads: int = 5,
     revision: Optional[str] = None,
+    create_pr: Optional[bool] = None,
+):
+    """
+    Negotiates per-file transfer (LFS vs Xet) and uploads in batches.
+    """
+    xet_additions: list[CommitOperationAdd] = []
+    lfs_actions: list[dict[str, Any]] = []
+    lfs_oid2addop: dict[str, CommitOperationAdd] = {}
+    for chunk in chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES):
+        chunk_list = [op for op in chunk]
+        transfers: list[str] = ["basic", "multipart"]
+        has_buffered_io_data = any(isinstance(op.path_or_fileobj, io.BufferedIOBase) for op in chunk_list)
+        if is_xet_available():
+            if not has_buffered_io_data:
+                transfers.append("xet")
+            else:
+                logger.warning(
+                    "Uploading files as a binary IO buffer is not supported by Xet Storage. "
+                    "Falling back to HTTP upload."
+                )
+        actions_chunk, errors_chunk, chosen_transfer = post_lfs_batch_info(
+            upload_infos=[op.upload_info for op in chunk_list],
+            repo_id=repo_id,
+            repo_type=repo_type,
+            revision=revision,
+            endpoint=endpoint,
+            headers=headers,
+            token=None,  # already passed in 'headers'
+            transfers=transfers,
+        )
+        if errors_chunk:
+            message = "\n".join(
+                [
+                    f"Encountered error for file with OID {err.get('oid')}: `{err.get('error', {}).get('message')}"
+                    for err in errors_chunk
+                ]
+            )
+            raise ValueError(f"LFS batch API returned errors:\n{message}")
+        # If server returns a transfer we didn't offer (e.g "xet" while uploading from BytesIO),
+        # fall back to LFS for this chunk.
+        if chosen_transfer == "xet" and ("xet" in transfers):
+            xet_additions.extend(chunk_list)
+        else:
+            lfs_actions.extend(actions_chunk)
+            for op in chunk_list:
+                lfs_oid2addop[op.upload_info.sha256.hex()] = op
+    if len(lfs_actions) > 0:
+        _upload_lfs_files(
+            actions=lfs_actions,
+            oid2addop=lfs_oid2addop,
+            headers=headers,
+            endpoint=endpoint,
+            num_threads=num_threads,
+        )
+    if len(xet_additions) > 0:
+        _upload_xet_files(
+            additions=xet_additions,
+            repo_type=repo_type,
+            repo_id=repo_id,
+            headers=headers,
+            endpoint=endpoint,
+            revision=revision,
+            create_pr=create_pr,
+        )
+@validate_hf_hub_args
+def _upload_lfs_files(
+    *,
+    actions: list[dict[str, Any]],
+    oid2addop: dict[str, CommitOperationAdd],
+    headers: dict[str, str],
+    endpoint: Optional[str] = None,
+    num_threads: int = 5,
 ):
     """
     Uploads the content of `additions` to the Hub using the large file storage protocol.
@@ -365,14 +451,26 @@ def _upload_lfs_files(
         - LFS Batch API: https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md
     Args:
-        additions (`List` of `CommitOperationAdd`):
-            The files to be uploaded
-        repo_type (`str`):
+        actions (`list[dict[str, Any]]`):
+            LFS batch actions returned by the server.
+        oid2addop (`dict[str, CommitOperationAdd]`):
+            A dictionary mapping the OID of the file to the corresponding `CommitOperationAdd` object.
+        headers (`dict[str, str]`):
+            Headers to use for the request, including authorization headers and user agent.
+        endpoint (`str`, *optional*):
+            The endpoint to use for the request. Defaults to `constants.ENDPOINT`.
+        num_threads (`int`, *optional*):
+            The number of concurrent threads to use when uploading. Defaults to 5.
+    Raises:
+        [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
+            If an upload failed for any reason
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
             Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
         repo_id (`str`):
             A namespace (user or an organization) and a repo name separated
             by a `/`.
-        headers (`Dict[str, str]`):
+        headers (`dict[str, str]`):
             Headers to use for the request, including authorization headers and user agent.
         num_threads (`int`, *optional*):
             The number of concurrent threads to use when uploading. Defaults to 5.
@@ -384,53 +482,20 @@ def _upload_lfs_files(
             If an upload failed for any reason
         [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
             If the server returns malformed responses
-        [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError)
+        [`HfHubHTTPError`]
             If the LFS batch endpoint returned an HTTP error.
     """
-    # Step 1: retrieve upload instructions from the LFS batch endpoint.
-    #         Upload instructions are retrieved by chunk of 256 files to avoid reaching
-    #         the payload limit.
-    batch_actions: List[Dict] = []
-    for chunk in chunk_iterable(additions, chunk_size=256):
-        batch_actions_chunk, batch_errors_chunk = post_lfs_batch_info(
-            upload_infos=[op.upload_info for op in chunk],
-            repo_id=repo_id,
-            repo_type=repo_type,
-            revision=revision,
-            endpoint=endpoint,
-            headers=headers,
-            token=None,  # already passed in 'headers'
-        )
-        # If at least 1 error, we do not retrieve information for other chunks
-        if batch_errors_chunk:
-            message = "\n".join(
-                [
-                    f"Encountered error for file with OID {err.get('oid')}: `{err.get('error', {}).get('message')}"
-                    for err in batch_errors_chunk
-                ]
-            )
-            raise ValueError(f"LFS batch endpoint returned errors:\n{message}")
-        batch_actions += batch_actions_chunk
-    oid2addop = {add_op.upload_info.sha256.hex(): add_op for add_op in additions}
-    # Step 2: ignore files that have already been uploaded
+    # Filter out files already present upstream
     filtered_actions = []
-    for action in batch_actions:
+    for action in actions:
         if action.get("actions") is None:
             logger.debug(
-                f"Content of file {oid2addop[action['oid']].path_in_repo} is already"
-                " present upstream - skipping upload."
+                f"Content of file {oid2addop[action['oid']].path_in_repo} is already present upstream - skipping upload."
             )
         else:
             filtered_actions.append(action)
-    if len(filtered_actions) == 0:
-        logger.debug("No LFS files to upload.")
-        return
-    # Step 3: upload files concurrently according to these instructions
+    # Upload according to server-provided actions
     def _wrapped_lfs_upload(batch_action) -> None:
         try:
             operation = oid2addop[batch_action["oid"]]
@@ -438,11 +503,7 @@ def _upload_lfs_files(
         except Exception as exc:
             raise RuntimeError(f"Error while uploading '{operation.path_in_repo}' to the Hub.") from exc
-    if constants.HF_HUB_ENABLE_HF_TRANSFER:
-        logger.debug(f"Uploading {len(filtered_actions)} LFS files to the Hub using `hf_transfer`.")
-        for action in hf_tqdm(filtered_actions, name="huggingface_hub.lfs_upload"):
-            _wrapped_lfs_upload(action)
-    elif len(filtered_actions) == 1:
+    if len(filtered_actions) == 1:
         logger.debug("Uploading 1 LFS file to the Hub")
         _wrapped_lfs_upload(filtered_actions[0])
     else:
@@ -458,6 +519,151 @@ def _upload_lfs_files(
         )
+@validate_hf_hub_args
+def _upload_xet_files(
+    *,
+    additions: list[CommitOperationAdd],
+    repo_type: str,
+    repo_id: str,
+    headers: dict[str, str],
+    endpoint: Optional[str] = None,
+    revision: Optional[str] = None,
+    create_pr: Optional[bool] = None,
+):
+    """
+    Uploads the content of `additions` to the Hub using the xet storage protocol.
+    This chunks the files and deduplicates the chunks before uploading them to xetcas storage.
+    Args:
+        additions (`` of `CommitOperationAdd`):
+            The files to be uploaded.
+        repo_type (`str`):
+            Type of the repo to upload to: `"model"`, `"dataset"` or `"space"`.
+        repo_id (`str`):
+            A namespace (user or an organization) and a repo name separated
+            by a `/`.
+        headers (`dict[str, str]`):
+            Headers to use for the request, including authorization headers and user agent.
+        endpoint: (`str`, *optional*):
+            The endpoint to use for the xetcas service. Defaults to `constants.ENDPOINT`.
+        revision (`str`, *optional*):
+            The git revision to upload to.
+        create_pr (`bool`, *optional*):
+            Whether or not to create a Pull Request with that commit.
+    Raises:
+        [`EnvironmentError`](https://docs.python.org/3/library/exceptions.html#EnvironmentError)
+            If an upload failed for any reason.
+        [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
+            If the server returns malformed responses or if the user is unauthorized to upload to xet storage.
+        [`HfHubHTTPError`]
+            If the LFS batch endpoint returned an HTTP error.
+    **How it works:**
+        The file download system uses Xet storage, which is a content-addressable storage system that breaks files into chunks
+            for efficient storage and transfer.
+        `hf_xet.upload_files` manages uploading files by:
+            - Taking a list of file paths to upload
+            - Breaking files into smaller chunks for efficient storage
+            - Avoiding duplicate storage by recognizing identical chunks across files
+            - Connecting to a storage server (CAS server) that manages these chunks
+        The upload process works like this:
+        1. Create a local folder at ~/.cache/huggingface/xet/chunk-cache to store file chunks for reuse.
+        2. Process files in parallel (up to 8 files at once):
+            2.1. Read the file content.
+            2.2. Split the file content into smaller chunks based on content patterns: each chunk gets a unique ID based on what's in it.
+            2.3. For each chunk:
+                - Check if it already exists in storage.
+                - Skip uploading chunks that already exist.
+            2.4. Group chunks into larger blocks for efficient transfer.
+            2.5. Upload these blocks to the storage server.
+            2.6. Create and upload information about how the file is structured.
+        3. Return reference files that contain information about the uploaded files, which can be used later to download them.
+    """
+    if len(additions) == 0:
+        return
+    # at this point, we know that hf_xet is installed
+    from hf_xet import upload_bytes, upload_files
+    from .utils._xet_progress_reporting import XetProgressReporter
+    try:
+        xet_connection_info = fetch_xet_connection_info_from_repo_info(
+            token_type=XetTokenType.WRITE,
+            repo_id=repo_id,
+            repo_type=repo_type,
+            revision=revision,
+            headers=headers,
+            endpoint=endpoint,
+            params={"create_pr": "1"} if create_pr else None,
+        )
+    except HfHubHTTPError as e:
+        if e.response.status_code == 401:
+            raise XetAuthorizationError(
+                f"You are unauthorized to upload to xet storage for {repo_type}/{repo_id}. "
+                f"Please check that you have configured your access token with write access to the repo."
+            ) from e
+        raise
+    xet_endpoint = xet_connection_info.endpoint
+    access_token_info = (xet_connection_info.access_token, xet_connection_info.expiration_unix_epoch)
+    def token_refresher() -> tuple[str, int]:
+        new_xet_connection = fetch_xet_connection_info_from_repo_info(
+            token_type=XetTokenType.WRITE,
+            repo_id=repo_id,
+            repo_type=repo_type,
+            revision=revision,
+            headers=headers,
+            endpoint=endpoint,
+            params={"create_pr": "1"} if create_pr else None,
+        )
+        if new_xet_connection is None:
+            raise XetRefreshTokenError("Failed to refresh xet token")
+        return new_xet_connection.access_token, new_xet_connection.expiration_unix_epoch
+    if not are_progress_bars_disabled():
+        progress = XetProgressReporter()
+        progress_callback = progress.update_progress
+    else:
+        progress, progress_callback = None, None
+    try:
+        all_bytes_ops = [op for op in additions if isinstance(op.path_or_fileobj, bytes)]
+        all_paths_ops = [op for op in additions if isinstance(op.path_or_fileobj, (str, Path))]
+        if len(all_paths_ops) > 0:
+            all_paths = [str(op.path_or_fileobj) for op in all_paths_ops]
+            upload_files(
+                all_paths,
+                xet_endpoint,
+                access_token_info,
+                token_refresher,
+                progress_callback,
+                repo_type,
+            )
+        if len(all_bytes_ops) > 0:
+            all_bytes = [op.path_or_fileobj for op in all_bytes_ops]
+            upload_bytes(
+                all_bytes,
+                xet_endpoint,
+                access_token_info,
+                token_refresher,
+                progress_callback,
+                repo_type,
+            )
+    finally:
+        if progress is not None:
+            progress.close(False)
+    return
 def _validate_preupload_info(preupload_info: dict):
     files = preupload_info.get("files")
     if not isinstance(files, list):
@@ -478,15 +684,15 @@ def _fetch_upload_modes(
     additions: Iterable[CommitOperationAdd],
     repo_type: str,
     repo_id: str,
-    headers: Dict[str, str],
+    headers: dict[str, str],
     revision: str,
     endpoint: Optional[str] = None,
     create_pr: bool = False,
     gitignore_content: Optional[str] = None,
 ) -> None:
     """
-    Requests the Hub "preupload" endpoint to determine whether each input file should be uploaded as a regular git blob
-    or as git LFS blob. Input `additions` are mutated in-place with the upload mode.
+    Requests the Hub "preupload" endpoint to determine whether each input file should be uploaded as a regular git blob,
+    as a git LFS blob, or as a XET file. Input `additions` are mutated in-place with the upload mode.
     Args:
         additions (`Iterable` of :class:`CommitOperationAdd`):
@@ -497,7 +703,7 @@ def _fetch_upload_modes(
         repo_id (`str`):
             A namespace (user or an organization) and a repo name separated
             by a `/`.
-        headers (`Dict[str, str]`):
+        headers (`dict[str, str]`):
             Headers to use for the request, including authorization headers and user agent.
         revision (`str`):
             The git revision to upload the files to. Can be any valid git revision.
@@ -515,12 +721,12 @@ def _fetch_upload_modes(
     endpoint = endpoint if endpoint is not None else constants.ENDPOINT
     # Fetch upload mode (LFS or regular) chunk by chunk.
-    upload_modes: Dict[str, UploadMode] = {}
-    should_ignore_info: Dict[str, bool] = {}
-    oid_info: Dict[str, Optional[str]] = {}
+    upload_modes: dict[str, UploadMode] = {}
+    should_ignore_info: dict[str, bool] = {}
+    oid_info: dict[str, Optional[str]] = {}
     for chunk in chunk_iterable(additions, 256):
-        payload: Dict = {
+        payload: dict = {
             "files": [
                 {
                     "path": op.path_in_repo,
@@ -563,10 +769,10 @@ def _fetch_files_to_copy(
     copies: Iterable[CommitOperationCopy],
     repo_type: str,
     repo_id: str,
-    headers: Dict[str, str],
+    headers: dict[str, str],
     revision: str,
     endpoint: Optional[str] = None,
-) -> Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]]:
+) -> dict[tuple[str, Optional[str]], Union["RepoFile", bytes]]:
     """
     Fetch information about the files to copy.
@@ -582,12 +788,12 @@ def _fetch_files_to_copy(
         repo_id (`str`):
             A namespace (user or an organization) and a repo name separated
             by a `/`.
-        headers (`Dict[str, str]`):
+        headers (`dict[str, str]`):
             Headers to use for the request, including authorization headers and user agent.
         revision (`str`):
             The git revision to upload the files to. Can be any valid git revision.
-    Returns: `Dict[Tuple[str, Optional[str]], Union[RepoFile, bytes]]]`
+    Returns: `dict[tuple[str, Optional[str]], Union[RepoFile, bytes]]]`
         Key is the file path and revision of the file to copy.
         Value is the raw content as bytes (for regular files) or the file information as a RepoFile (for LFS files).
@@ -600,9 +806,9 @@ def _fetch_files_to_copy(
     from .hf_api import HfApi, RepoFolder
     hf_api = HfApi(endpoint=endpoint, headers=headers)
-    files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]] = {}
+    files_to_copy: dict[tuple[str, Optional[str]], Union["RepoFile", bytes]] = {}
     # Store (path, revision) -> oid mapping
-    oid_info: Dict[Tuple[str, Optional[str]], Optional[str]] = {}
+    oid_info: dict[tuple[str, Optional[str]], Optional[str]] = {}
     # 1. Fetch OIDs for destination paths in batches.
     dest_paths = [op.path_in_repo for op in copies]
     for offset in range(0, len(dest_paths), FETCH_LFS_BATCH_SIZE):
@@ -662,11 +868,11 @@ def _fetch_files_to_copy(
 def _prepare_commit_payload(
     operations: Iterable[CommitOperation],
-    files_to_copy: Dict[Tuple[str, Optional[str]], Union["RepoFile", bytes]],
+    files_to_copy: dict[tuple[str, Optional[str]], Union["RepoFile", bytes]],
     commit_message: str,
     commit_description: Optional[str] = None,
     parent_commit: Optional[str] = None,
-) -> Iterable[Dict[str, Any]]:
+) -> Iterable[dict[str, Any]]:
     """
     Builds the payload to POST to the `/commit` API of the Hub.

huggingface_hub/_commit_scheduler.py CHANGED Viewed

@@ -7,7 +7,7 @@ from dataclasses import dataclass
 from io import SEEK_END, SEEK_SET, BytesIO
 from pathlib import Path
 from threading import Lock, Thread
-from typing import Dict, List, Optional, Union
+from typing import Optional, Union
 from .hf_api import DEFAULT_IGNORE_PATTERNS, CommitInfo, CommitOperationAdd, HfApi
 from .utils import filter_repo_objects
@@ -53,9 +53,9 @@ class CommitScheduler:
             Whether to make the repo private. If `None` (default), the repo will be public unless the organization's default is private. This value is ignored if the repo already exists.
         token (`str`, *optional*):
             The token to use to commit to the repo. Defaults to the token saved on the machine.
-        allow_patterns (`List[str]` or `str`, *optional*):
+        allow_patterns (`list[str]` or `str`, *optional*):
             If provided, only files matching at least one pattern are uploaded.
-        ignore_patterns (`List[str]` or `str`, *optional*):
+        ignore_patterns (`list[str]` or `str`, *optional*):
             If provided, files matching any of the patterns are not uploaded.
         squash_history (`bool`, *optional*):
             Whether to squash the history of the repo after each commit. Defaults to `False`. Squashing commits is
@@ -108,8 +108,8 @@ class CommitScheduler:
         revision: Optional[str] = None,
         private: Optional[bool] = None,
         token: Optional[str] = None,
-        allow_patterns: Optional[Union[List[str], str]] = None,
-        ignore_patterns: Optional[Union[List[str], str]] = None,
+        allow_patterns: Optional[Union[list[str], str]] = None,
+        ignore_patterns: Optional[Union[list[str], str]] = None,
         squash_history: bool = False,
         hf_api: Optional["HfApi"] = None,
     ) -> None:
@@ -138,7 +138,7 @@ class CommitScheduler:
         self.token = token
         # Keep track of already uploaded files
-        self.last_uploaded: Dict[Path, float] = {}  # key is local path, value is timestamp
+        self.last_uploaded: dict[Path, float] = {}  # key is local path, value is timestamp
         # Scheduler
         if not every > 0:
@@ -205,13 +205,10 @@ class CommitScheduler:
         """
         Push folder to the Hub and return the commit info.
-        <Tip warning={true}>
-        This method is not meant to be called directly. It is run in the background by the scheduler, respecting a
-        queue mechanism to avoid concurrent commits. Making a direct call to the method might lead to concurrency
-        issues.
-        </Tip>
+        > [!WARNING]
+        > This method is not meant to be called directly. It is run in the background by the scheduler, respecting a
+        > queue mechanism to avoid concurrent commits. Making a direct call to the method might lead to concurrency
+        > issues.
         The default behavior of `push_to_hub` is to assume an append-only folder. It lists all files in the folder and
         uploads only changed files. If no changes are found, the method returns without committing anything. If you want
@@ -232,7 +229,7 @@ class CommitScheduler:
             prefix = f"{self.path_in_repo.strip('/')}/" if self.path_in_repo else ""
             # Filter with pattern + filter out unchanged files + retrieve current file size
-            files_to_upload: List[_FileToUpload] = []
+            files_to_upload: list[_FileToUpload] = []
             for relpath in filter_repo_objects(
                 relpath_to_abspath.keys(), allow_patterns=self.allow_patterns, ignore_patterns=self.ignore_patterns
             ):
@@ -315,10 +312,13 @@ class PartialFileIO(BytesIO):
         return self._size_limit
     def __getattribute__(self, name: str):
-        if name.startswith("_") or name in ("read", "tell", "seek"):  # only 3 public methods supported
+        if name.startswith("_") or name in ("read", "tell", "seek", "fileno"):  # only 4 public methods supported
             return super().__getattribute__(name)
         raise NotImplementedError(f"PartialFileIO does not support '{name}'.")
+    def fileno(self):
+        raise AttributeError("PartialFileIO does not have a fileno.")
     def tell(self) -> int:
         """Return the current file position."""
         return self._file.tell()

huggingface-hub 0.29.0rc2__py3-none-any.whl → 1.1.3__py3-none-any.whl

huggingface-hub 0.29.0rc2py3-none-any.whl → 1.1.3py3-none-any.whl