PyPI - huggingface-hub - Versions diffs - 0.30.1__tar.gz → 0.31.0__tar.gz - Mend

huggingface-hub 0.30.1tar.gz → 0.31.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of huggingface-hub might be problematic. Click here for more details.

Files changed (143) hide show

{huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: huggingface_hub
-Version: 0.30.1
+Version: 0.31.0
 Summary: Client library to download and publish models, datasets and other repos on the huggingface.co hub
 Home-page: https://github.com/huggingface/huggingface_hub
 Author: Hugging Face, Inc.

{huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/setup.py RENAMED Viewed

@@ -14,6 +14,7 @@ def get_version() -> str:
 install_requires = [
     "filelock",
     "fsspec>=2023.5.0",
+    "hf-xet>=1.1.0,<2.0.0; platform_machine=='x86_64' or platform_machine=='amd64' or platform_machine=='arm64' or platform_machine=='aarch64'",
     "packaging>=20.9",
     "pyyaml>=5.1",
     "requests",
@@ -55,7 +56,7 @@ extras["tensorflow-testing"] = [
     "keras<3.0",
 ]
-extras["hf_xet"] = ["hf_xet>=0.1.4"]
+extras["hf_xet"] = ["hf_xet>=1.1.0,<2.0.0"]
 extras["testing"] = (
     extras["cli"]

{huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/__init__.py RENAMED Viewed

@@ -46,7 +46,7 @@ import sys
 from typing import TYPE_CHECKING
-__version__ = "0.30.1"
+__version__ = "0.31.0"
 # Alphabetical order of definitions is ensured in tests
 # WARNING: any comment added in this dictionary definition will be lost when

{huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_commit_api.py RENAMED Viewed

@@ -530,7 +530,7 @@ def _upload_xet_files(
     if len(additions) == 0:
         return
     # at this point, we know that hf_xet is installed
-    from hf_xet import upload_files
+    from hf_xet import upload_bytes, upload_files
     try:
         xet_connection_info = fetch_xet_connection_info_from_repo_info(
@@ -571,8 +571,10 @@ def _upload_xet_files(
     num_chunks_num_digits = int(math.log10(num_chunks)) + 1
     for i, chunk in enumerate(chunk_iterable(additions, chunk_size=UPLOAD_BATCH_MAX_NUM_FILES)):
         _chunk = [op for op in chunk]
-        paths = [str(op.path_or_fileobj) for op in _chunk]
-        expected_size = sum([os.path.getsize(path) for path in paths])
+        bytes_ops = [op for op in _chunk if isinstance(op.path_or_fileobj, bytes)]
+        paths_ops = [op for op in _chunk if isinstance(op.path_or_fileobj, (str, Path))]
+        expected_size = sum(op.upload_info.size for op in bytes_ops + paths_ops)
         if num_chunks > 1:
             description = f"Uploading Batch [{str(i + 1).zfill(num_chunks_num_digits)}/{num_chunks}]..."
@@ -592,7 +594,24 @@ def _upload_xet_files(
             def update_progress(increment: int):
                 progress.update(increment)
-            upload_files(paths, xet_endpoint, access_token_info, token_refresher, update_progress, repo_type)
+            if len(paths_ops) > 0:
+                upload_files(
+                    [str(op.path_or_fileobj) for op in paths_ops],
+                    xet_endpoint,
+                    access_token_info,
+                    token_refresher,
+                    update_progress,
+                    repo_type,
+                )
+            if len(bytes_ops) > 0:
+                upload_bytes(
+                    [op.path_or_fileobj for op in bytes_ops],
+                    xet_endpoint,
+                    access_token_info,
+                    token_refresher,
+                    update_progress,
+                    repo_type,
+                )
     return

{huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_inference_endpoints.py RENAMED Viewed

@@ -6,14 +6,13 @@ from typing import TYPE_CHECKING, Dict, Optional, Union
 from huggingface_hub.errors import InferenceEndpointError, InferenceEndpointTimeoutError
-from .inference._client import InferenceClient
-from .inference._generated._async_client import AsyncInferenceClient
 from .utils import get_session, logging, parse_datetime
 if TYPE_CHECKING:
     from .hf_api import HfApi
+    from .inference._client import InferenceClient
+    from .inference._generated._async_client import AsyncInferenceClient
 logger = logging.get_logger(__name__)
@@ -138,7 +137,7 @@ class InferenceEndpoint:
         self._populate_from_raw()
     @property
-    def client(self) -> InferenceClient:
+    def client(self) -> "InferenceClient":
         """Returns a client to make predictions on this Inference Endpoint.
         Returns:
@@ -152,13 +151,15 @@ class InferenceEndpoint:
                 "Cannot create a client for this Inference Endpoint as it is not yet deployed. "
                 "Please wait for the Inference Endpoint to be deployed using `endpoint.wait()` and try again."
             )
+        from .inference._client import InferenceClient
         return InferenceClient(
             model=self.url,
             token=self._token,  # type: ignore[arg-type] # boolean token shouldn't be possible. In practice it's ok.
         )
     @property
-    def async_client(self) -> AsyncInferenceClient:
+    def async_client(self) -> "AsyncInferenceClient":
         """Returns a client to make predictions on this Inference Endpoint.
         Returns:
@@ -172,6 +173,8 @@ class InferenceEndpoint:
                 "Cannot create a client for this Inference Endpoint as it is not yet deployed. "
                 "Please wait for the Inference Endpoint to be deployed using `endpoint.wait()` and try again."
             )
+        from .inference._generated._async_client import AsyncInferenceClient
         return AsyncInferenceClient(
             model=self.url,
             token=self._token,  # type: ignore[arg-type] # boolean token shouldn't be possible. In practice it's ok.

{huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_snapshot_download.py RENAMED Viewed

@@ -200,12 +200,13 @@ def snapshot_download(
                     commit_hash = f.read()
         # Try to locate snapshot folder for this commit hash
-        if commit_hash is not None:
+        if commit_hash is not None and local_dir is None:
             snapshot_folder = os.path.join(storage_folder, "snapshots", commit_hash)
             if os.path.exists(snapshot_folder):
                 # Snapshot folder exists => let's return it
                 # (but we can't check if all the files are actually there)
                 return snapshot_folder
         # If local_dir is not None, return it if it exists and is not empty
         if local_dir is not None:
             local_dir = Path(local_dir)

{huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_space_api.py RENAMED Viewed

@@ -81,11 +81,6 @@ class SpaceHardware(str, Enum):
     H100 = "h100"
     H100X8 = "h100x8"
-    # TPU
-    V5E_1X1 = "v5e-1x1"
-    V5E_2X2 = "v5e-2x2"
-    V5E_2X4 = "v5e-2x4"
 class SpaceStorage(str, Enum):
     """

{huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/_upload_large_folder.py RENAMED Viewed

@@ -44,6 +44,7 @@ logger = logging.getLogger(__name__)
 WAITING_TIME_IF_NO_TASKS = 10  # seconds
 MAX_NB_REGULAR_FILES_PER_COMMIT = 75
 MAX_NB_LFS_FILES_PER_COMMIT = 150
+COMMIT_SIZE_SCALE: List[int] = [20, 50, 75, 100, 125, 200, 250, 400, 600, 1000]
 def upload_large_folder_internal(
@@ -184,6 +185,8 @@ class LargeUploadStatus:
         self.last_commit_attempt: Optional[float] = None
         self._started_at = datetime.now()
+        self._chunk_idx: int = 1
+        self._chunk_lock: Lock = Lock()
         # Setup queues
         for item in self.items:
@@ -199,6 +202,21 @@ class LargeUploadStatus:
             else:
                 logger.debug(f"Skipping file {paths.path_in_repo} (already uploaded and committed)")
+    def target_chunk(self) -> int:
+        with self._chunk_lock:
+            return COMMIT_SIZE_SCALE[self._chunk_idx]
+    def update_chunk(self, success: bool, nb_items: int, duration: float) -> None:
+        with self._chunk_lock:
+            if not success:
+                logger.warning(f"Failed to commit {nb_items} files at once. Will retry with less files in next batch.")
+                self._chunk_idx -= 1
+            elif nb_items >= COMMIT_SIZE_SCALE[self._chunk_idx] and duration < 40:
+                logger.info(f"Successfully committed {nb_items} at once. Increasing the limit for next batch.")
+                self._chunk_idx += 1
+            self._chunk_idx = max(0, min(self._chunk_idx, len(COMMIT_SIZE_SCALE) - 1))
     def current_report(self) -> str:
         """Generate a report of the current status of the large upload."""
         nb_hashed = 0
@@ -351,6 +369,8 @@ def _worker_job(
                 status.nb_workers_preupload_lfs -= 1
         elif job == WorkerJob.COMMIT:
+            start_ts = time.time()
+            success = True
             try:
                 _commit(items, api=api, repo_id=repo_id, repo_type=repo_type, revision=revision)
             except KeyboardInterrupt:
@@ -360,6 +380,9 @@ def _worker_job(
                 traceback.format_exc()
                 for item in items:
                     status.queue_commit.put(item)
+                success = False
+            duration = time.time() - start_ts
+            status.update_chunk(success, len(items), duration)
             with status.lock:
                 status.last_commit_attempt = time.time()
                 status.nb_workers_commit -= 1
@@ -393,7 +416,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
         elif status.queue_get_upload_mode.qsize() >= 10:
             status.nb_workers_get_upload_mode += 1
             logger.debug("Job: get upload mode (>10 files ready)")
-            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
+            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, status.target_chunk()))
         # 4. Preupload LFS file if at least 1 file and no worker is preuploading LFS
         elif status.queue_preupload_lfs.qsize() > 0 and status.nb_workers_preupload_lfs == 0:
@@ -411,7 +434,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
         elif status.queue_get_upload_mode.qsize() > 0 and status.nb_workers_get_upload_mode == 0:
             status.nb_workers_get_upload_mode += 1
             logger.debug("Job: get upload mode (no other worker getting upload mode)")
-            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
+            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, status.target_chunk()))
         # 7. Preupload LFS file if at least 1 file
         #    Skip if hf_transfer is enabled and there is already a worker preuploading LFS
@@ -432,7 +455,7 @@ def _determine_next_job(status: LargeUploadStatus) -> Optional[Tuple[WorkerJob,
         elif status.queue_get_upload_mode.qsize() > 0:
             status.nb_workers_get_upload_mode += 1
             logger.debug("Job: get upload mode")
-            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, 50))
+            return (WorkerJob.GET_UPLOAD_MODE, _get_n(status.queue_get_upload_mode, status.target_chunk()))
         # 10. Commit if at least 1 file and 1 min since last commit attempt
         elif (

{huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/commands/upload.py RENAMED Viewed

@@ -59,6 +59,7 @@ from huggingface_hub.constants import HF_HUB_ENABLE_HF_TRANSFER
 from huggingface_hub.errors import RevisionNotFoundError
 from huggingface_hub.hf_api import HfApi
 from huggingface_hub.utils import disable_progress_bars, enable_progress_bars
+from huggingface_hub.utils._runtime import is_xet_available
 logger = logging.get_logger(__name__)
@@ -215,7 +216,7 @@ class UploadCommand(BaseHuggingfaceCLICommand):
             if self.delete is not None and len(self.delete) > 0:
                 warnings.warn("Ignoring `--delete` since a single file is uploaded.")
-        if not HF_HUB_ENABLE_HF_TRANSFER:
+        if not is_xet_available() and not HF_HUB_ENABLE_HF_TRANSFER:
             logger.info(
                 "Consider using `hf_transfer` for faster uploads. This solution comes with some limitations. See"
                 " https://huggingface.co/docs/huggingface_hub/hf_transfer for more details."

{huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/constants.py RENAMED Viewed

@@ -36,6 +36,7 @@ DEFAULT_DOWNLOAD_TIMEOUT = 10
 DEFAULT_REQUEST_TIMEOUT = 10
 DOWNLOAD_CHUNK_SIZE = 10 * 1024 * 1024
 HF_TRANSFER_CONCURRENCY = 100
+MAX_HTTP_DOWNLOAD_SIZE = 50 * 1000 * 1000 * 1000  # 50 GB
 # Constants for serialization

{huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/file_download.py RENAMED Viewed

@@ -44,7 +44,6 @@ from .utils import (
     get_graphviz_version,  # noqa: F401 # for backward compatibility
     get_jinja_version,  # noqa: F401 # for backward compatibility
     get_pydot_version,  # noqa: F401 # for backward compatibility
-    get_session,
     get_tf_version,  # noqa: F401 # for backward compatibility
     get_torch_version,  # noqa: F401 # for backward compatibility
     hf_raise_for_status,
@@ -62,7 +61,7 @@ from .utils import (
     tqdm,
     validate_hf_hub_args,
 )
-from .utils._http import _adjust_range_header
+from .utils._http import _adjust_range_header, http_backoff
 from .utils._runtime import _PY_VERSION, is_xet_available  # noqa: F401 # for backward compatibility
 from .utils._typing import HTTP_METHOD_T
 from .utils.sha import sha_fileobj
@@ -268,6 +267,8 @@ def _request_wrapper(
     """Wrapper around requests methods to follow relative redirects if `follow_relative_redirects=True` even when
     `allow_redirection=False`.
+    A backoff mechanism retries the HTTP call on 429, 503 and 504 errors.
     Args:
         method (`str`):
             HTTP method, such as 'GET' or 'HEAD'.
@@ -305,11 +306,40 @@ def _request_wrapper(
         return response
     # Perform request and return if status_code is not in the retry list.
-    response = get_session().request(method=method, url=url, **params)
+    response = http_backoff(method=method, url=url, **params, retry_on_exceptions=(), retry_on_status_codes=(429,))
     hf_raise_for_status(response)
     return response
+def _get_file_length_from_http_response(response: requests.Response) -> Optional[int]:
+    """
+    Get the length of the file from the HTTP response headers.
+    This function extracts the file size from the HTTP response headers, either from the
+    `Content-Range` or `Content-Length` header, if available (in that order).
+        The HTTP response object containing the headers.
+        `int` or `None`: The length of the file in bytes if the information is available,
+        otherwise `None`.
+    Args:
+        response (`requests.Response`):
+            The HTTP response object.
+    Returns:
+        `int` or `None`: The length of the file in bytes, or None if not available.
+    """
+    content_range = response.headers.get("Content-Range")
+    if content_range is not None:
+        return int(content_range.rsplit("/")[-1])
+    content_length = response.headers.get("Content-Length")
+    if content_length is not None:
+        return int(content_length)
+    return None
 def http_get(
     url: str,
     temp_file: BinaryIO,
@@ -352,12 +382,15 @@ def http_get(
         # If the file is already fully downloaded, we don't need to download it again.
         return
+    has_custom_range_header = headers is not None and any(h.lower() == "range" for h in headers)
     hf_transfer = None
     if constants.HF_HUB_ENABLE_HF_TRANSFER:
         if resume_size != 0:
             warnings.warn("'hf_transfer' does not support `resume_size`: falling back to regular download method")
         elif proxies is not None:
             warnings.warn("'hf_transfer' does not support `proxies`: falling back to regular download method")
+        elif has_custom_range_header:
+            warnings.warn("'hf_transfer' ignores custom 'Range' headers; falling back to regular download method")
         else:
             try:
                 import hf_transfer  # type: ignore[no-redef]
@@ -372,12 +405,24 @@ def http_get(
     headers = copy.deepcopy(headers) or {}
     if resume_size > 0:
         headers["Range"] = _adjust_range_header(headers.get("Range"), resume_size)
+    elif expected_size and expected_size > constants.MAX_HTTP_DOWNLOAD_SIZE:
+        # Any files over 50GB will not be available through basic http request.
+        # Setting the range header to 0-0 will force the server to return the file size in the Content-Range header.
+        # Since hf_transfer splits the download into chunks, the process will succeed afterwards.
+        if hf_transfer:
+            headers["Range"] = "bytes=0-0"
+        else:
+            raise ValueError(
+                "The file is too large to be downloaded using the regular download method. Use `hf_transfer` or `hf_xet` instead."
+                " Try `pip install hf_transfer` or `pip install hf_xet`."
+            )
     r = _request_wrapper(
         method="GET", url=url, stream=True, proxies=proxies, headers=headers, timeout=constants.HF_HUB_DOWNLOAD_TIMEOUT
     )
     hf_raise_for_status(r)
-    content_length = r.headers.get("Content-Length")
+    content_length = _get_file_length_from_http_response(r)
     # NOTE: 'total' is the total number of bytes to download, not the number of bytes in the file.
     #       If the file is compressed, the number of bytes in the saved file will be higher than 'total'.
@@ -425,7 +470,7 @@ def http_get(
                     filename=temp_file.name,
                     max_files=constants.HF_TRANSFER_CONCURRENCY,
                     chunk_size=constants.DOWNLOAD_CHUNK_SIZE,
-                    headers=headers,
+                    headers=initial_headers,
                     parallel_failures=3,
                     max_retries=5,
                     **({"callback": progress.update} if supports_callback else {}),
@@ -537,11 +582,11 @@ def xet_get(
     """
     try:
-        from hf_xet import PyPointerFile, download_files  # type: ignore[no-redef]
+        from hf_xet import PyXetDownloadInfo, download_files  # type: ignore[no-redef]
     except ImportError:
         raise ValueError(
             "To use optimized download using Xet storage, you need to install the hf_xet package. "
-            "Try `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`."
+            'Try `pip install "huggingface_hub[hf_xet]"` or `pip install hf_xet`.'
         )
     connection_info = refresh_xet_connection_info(file_data=xet_file_data, headers=headers)
@@ -552,8 +597,10 @@ def xet_get(
             raise ValueError("Failed to refresh token using xet metadata.")
         return connection_info.access_token, connection_info.expiration_unix_epoch
-    pointer_files = [
-        PyPointerFile(path=str(incomplete_path.absolute()), hash=xet_file_data.file_hash, filesize=expected_size)
+    xet_download_info = [
+        PyXetDownloadInfo(
+            destination_path=str(incomplete_path.absolute()), hash=xet_file_data.file_hash, file_size=expected_size
+        )
     ]
     if not displayed_filename:
@@ -578,7 +625,7 @@ def xet_get(
             progress.update(progress_bytes)
         download_files(
-            pointer_files,
+            xet_download_info,
             endpoint=connection_info.endpoint,
             token_info=(connection_info.access_token, connection_info.expiration_unix_epoch),
             token_refresher=token_refresher,
@@ -1672,6 +1719,7 @@ def _download_to_tmp_and_move(
                     "Falling back to regular HTTP download. "
                     "For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`"
                 )
             http_get(
                 url_to_download,
                 f,

{huggingface_hub-0.30.1 → huggingface_hub-0.31.0}/src/huggingface_hub/hf_api.py RENAMED Viewed

@@ -708,14 +708,21 @@ class RepoFolder:
 @dataclass
 class InferenceProviderMapping:
+    hf_model_id: str
     status: Literal["live", "staging"]
     provider_id: str
     task: str
+    adapter: Optional[str] = None
+    adapter_weights_path: Optional[str] = None
     def __init__(self, **kwargs):
+        self.hf_model_id = kwargs.pop("hf_model_id")
         self.status = kwargs.pop("status")
         self.provider_id = kwargs.pop("providerId")
         self.task = kwargs.pop("task")
+        self.adapter = kwargs.pop("adapter", None)
+        self.adapter_weights_path = kwargs.pop("adapterWeightsPath", None)
         self.__dict__.update(**kwargs)
@@ -847,7 +854,9 @@ class ModelInfo:
         self.inference_provider_mapping = kwargs.pop("inferenceProviderMapping", None)
         if self.inference_provider_mapping:
             self.inference_provider_mapping = {
-                provider: InferenceProviderMapping(**value)
+                provider: InferenceProviderMapping(
+                    **{**value, "hf_model_id": self.id}
+                )  # little hack to simplify Inference Providers logic
                 for provider, value in self.inference_provider_mapping.items()
             }
@@ -4466,18 +4475,17 @@ class HfApi:
             expand="xetEnabled",
             token=token,
         ).xet_enabled
-        has_binary_data = any(
-            isinstance(addition.path_or_fileobj, (bytes, io.BufferedIOBase))
-            for addition in new_lfs_additions_to_upload
+        has_buffered_io_data = any(
+            isinstance(addition.path_or_fileobj, io.BufferedIOBase) for addition in new_lfs_additions_to_upload
         )
-        if xet_enabled and not has_binary_data and is_xet_available():
+        if xet_enabled and not has_buffered_io_data and is_xet_available():
             logger.info("Uploading files using Xet Storage..")
             _upload_xet_files(**upload_kwargs, create_pr=create_pr)  # type: ignore [arg-type]
         else:
             if xet_enabled and is_xet_available():
-                if has_binary_data:
+                if has_buffered_io_data:
                     logger.warning(
-                        "Uploading files as bytes or binary IO objects is not supported by Xet Storage. "
+                        "Uploading files as a binary IO buffer is not supported by Xet Storage. "
                         "Falling back to HTTP upload."
                     )
             _upload_lfs_files(**upload_kwargs, num_threads=num_threads)  # type: ignore [arg-type]
@@ -7564,8 +7572,13 @@ class HfApi:
         revision: Optional[str] = None,
         task: Optional[str] = None,
         custom_image: Optional[Dict] = None,
+        env: Optional[Dict[str, str]] = None,
         secrets: Optional[Dict[str, str]] = None,
         type: InferenceEndpointType = InferenceEndpointType.PROTECTED,
+        domain: Optional[str] = None,
+        path: Optional[str] = None,
+        cache_http_responses: Optional[bool] = None,
+        tags: Optional[List[str]] = None,
         namespace: Optional[str] = None,
         token: Union[bool, str, None] = None,
     ) -> InferenceEndpoint:
@@ -7603,10 +7616,20 @@ class HfApi:
             custom_image (`Dict`, *optional*):
                 A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
                 Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
+            env (`Dict[str, str]`, *optional*):
+                Non-secret environment variables to inject in the container environment.
             secrets (`Dict[str, str]`, *optional*):
                 Secret values to inject in the container environment.
             type ([`InferenceEndpointType]`, *optional*):
                 The type of the Inference Endpoint, which can be `"protected"` (default), `"public"` or `"private"`.
+            domain (`str`, *optional*):
+                The custom domain for the Inference Endpoint deployment, if setup the inference endpoint will be available at this domain (e.g. `"my-new-domain.cool-website.woof"`).
+            path (`str`, *optional*):
+                The custom path to the deployed model, should start with a `/` (e.g. `"/models/google-bert/bert-base-uncased"`).
+            cache_http_responses (`bool`, *optional*):
+                Whether to cache HTTP responses from the Inference Endpoint. Defaults to `False`.
+            tags (`List[str]`, *optional*):
+                A list of tags to associate with the Inference Endpoint.
             namespace (`str`, *optional*):
                 The namespace where the Inference Endpoint will be created. Defaults to the current user's namespace.
             token (Union[bool, str, None], optional):
@@ -7657,17 +7680,18 @@ class HfApi:
             ...     type="protected",
             ...     instance_size="x1",
             ...     instance_type="nvidia-a10g",
+            ...     env={
+            ...           "MAX_BATCH_PREFILL_TOKENS": "2048",
+            ...           "MAX_INPUT_LENGTH": "1024",
+            ...           "MAX_TOTAL_TOKENS": "1512",
+            ...           "MODEL_ID": "/repository"
+            ...         },
             ...     custom_image={
             ...         "health_route": "/health",
-            ...         "env": {
-            ...             "MAX_BATCH_PREFILL_TOKENS": "2048",
-            ...             "MAX_INPUT_LENGTH": "1024",
-            ...             "MAX_TOTAL_TOKENS": "1512",
-            ...             "MODEL_ID": "/repository"
-            ...         },
             ...         "url": "ghcr.io/huggingface/text-generation-inference:1.1.0",
             ...     },
             ...    secrets={"MY_SECRET_KEY": "secret_value"},
+            ...    tags=["dev", "text-generation"],
             ... )
             ```
@@ -7701,8 +7725,21 @@ class HfApi:
             },
             "type": type,
         }
+        if env:
+            payload["model"]["env"] = env
         if secrets:
             payload["model"]["secrets"] = secrets
+        if domain is not None or path is not None:
+            payload["route"] = {}
+            if domain is not None:
+                payload["route"]["domain"] = domain
+            if path is not None:
+                payload["route"]["path"] = path
+        if cache_http_responses is not None:
+            payload["cacheHttpResponses"] = cache_http_responses
+        if tags is not None:
+            payload["tags"] = tags
         response = get_session().post(
             f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}",
             headers=self._build_hf_headers(token=token),
@@ -7864,15 +7901,21 @@ class HfApi:
         revision: Optional[str] = None,
         task: Optional[str] = None,
         custom_image: Optional[Dict] = None,
+        env: Optional[Dict[str, str]] = None,
         secrets: Optional[Dict[str, str]] = None,
+        # Route update
+        domain: Optional[str] = None,
+        path: Optional[str] = None,
         # Other
+        cache_http_responses: Optional[bool] = None,
+        tags: Optional[List[str]] = None,
         namespace: Optional[str] = None,
         token: Union[bool, str, None] = None,
     ) -> InferenceEndpoint:
         """Update an Inference Endpoint.
-        This method allows the update of either the compute configuration, the deployed model, or both. All arguments are
-        optional but at least one must be provided.
+        This method allows the update of either the compute configuration, the deployed model, the route, or any combination.
+        All arguments are optional but at least one must be provided.
         For convenience, you can also update an Inference Endpoint using [`InferenceEndpoint.update`].
@@ -7904,8 +7947,21 @@ class HfApi:
             custom_image (`Dict`, *optional*):
                 A custom Docker image to use for the Inference Endpoint. This is useful if you want to deploy an
                 Inference Endpoint running on the `text-generation-inference` (TGI) framework (see examples).
+            env (`Dict[str, str]`, *optional*):
+                Non-secret environment variables to inject in the container environment
             secrets (`Dict[str, str]`, *optional*):
                 Secret values to inject in the container environment.
+            domain (`str`, *optional*):
+                The custom domain for the Inference Endpoint deployment, if setup the inference endpoint will be available at this domain (e.g. `"my-new-domain.cool-website.woof"`).
+            path (`str`, *optional*):
+                The custom path to the deployed model, should start with a `/` (e.g. `"/models/google-bert/bert-base-uncased"`).
+            cache_http_responses (`bool`, *optional*):
+                Whether to cache HTTP responses from the Inference Endpoint.
+            tags (`List[str]`, *optional*):
+                A list of tags to associate with the Inference Endpoint.
             namespace (`str`, *optional*):
                 The namespace where the Inference Endpoint will be updated. Defaults to the current user's namespace.
             token (Union[bool, str, None], optional):
@@ -7943,8 +7999,18 @@ class HfApi:
             payload["model"]["task"] = task
         if custom_image is not None:
             payload["model"]["image"] = {"custom": custom_image}
+        if env is not None:
+            payload["model"]["env"] = env
         if secrets is not None:
             payload["model"]["secrets"] = secrets
+        if domain is not None:
+            payload["route"]["domain"] = domain
+        if path is not None:
+            payload["route"]["path"] = path
+        if cache_http_responses is not None:
+            payload["cacheHttpResponses"] = cache_http_responses
+        if tags is not None:
+            payload["tags"] = tags
         response = get_session().put(
             f"{constants.INFERENCE_ENDPOINTS_ENDPOINT}/endpoint/{namespace}/{name}",

huggingface-hub 0.30.1__tar.gz → 0.31.0__tar.gz

Potentially problematic release.

huggingface-hub 0.30.1tar.gz → 0.31.0tar.gz