PyPI - huggingface-hub - Versions diffs - 0.21.4__py3-none-any.whl → 0.22.0rc0__py3-none-any.whl - Mend

huggingface-hub 0.21.4py3-none-any.whl → 0.22.0rc0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of huggingface-hub might be problematic. Click here for more details.

Files changed (96) hide show

huggingface_hub/__init__.py +217 -1
huggingface_hub/_commit_api.py +14 -15
huggingface_hub/_inference_endpoints.py +12 -11
huggingface_hub/_login.py +1 -0
huggingface_hub/_multi_commits.py +1 -0
huggingface_hub/_snapshot_download.py +9 -1
huggingface_hub/_tensorboard_logger.py +1 -0
huggingface_hub/_webhooks_payload.py +1 -0
huggingface_hub/_webhooks_server.py +1 -0
huggingface_hub/commands/_cli_utils.py +1 -0
huggingface_hub/commands/delete_cache.py +1 -0
huggingface_hub/commands/download.py +1 -0
huggingface_hub/commands/env.py +1 -0
huggingface_hub/commands/scan_cache.py +1 -0
huggingface_hub/commands/upload.py +1 -0
huggingface_hub/community.py +1 -0
huggingface_hub/constants.py +3 -1
huggingface_hub/errors.py +38 -0
huggingface_hub/file_download.py +24 -24
huggingface_hub/hf_api.py +47 -35
huggingface_hub/hub_mixin.py +210 -54
huggingface_hub/inference/_client.py +554 -239
huggingface_hub/inference/_common.py +195 -41
huggingface_hub/inference/_generated/_async_client.py +558 -239
huggingface_hub/inference/_generated/types/__init__.py +115 -0
huggingface_hub/inference/_generated/types/audio_classification.py +43 -0
huggingface_hub/inference/_generated/types/audio_to_audio.py +31 -0
huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +116 -0
huggingface_hub/inference/_generated/types/base.py +149 -0
huggingface_hub/inference/_generated/types/chat_completion.py +106 -0
huggingface_hub/inference/_generated/types/depth_estimation.py +29 -0
huggingface_hub/inference/_generated/types/document_question_answering.py +85 -0
huggingface_hub/inference/_generated/types/feature_extraction.py +19 -0
huggingface_hub/inference/_generated/types/fill_mask.py +50 -0
huggingface_hub/inference/_generated/types/image_classification.py +43 -0
huggingface_hub/inference/_generated/types/image_segmentation.py +52 -0
huggingface_hub/inference/_generated/types/image_to_image.py +55 -0
huggingface_hub/inference/_generated/types/image_to_text.py +105 -0
huggingface_hub/inference/_generated/types/object_detection.py +55 -0
huggingface_hub/inference/_generated/types/question_answering.py +77 -0
huggingface_hub/inference/_generated/types/sentence_similarity.py +28 -0
huggingface_hub/inference/_generated/types/summarization.py +46 -0
huggingface_hub/inference/_generated/types/table_question_answering.py +45 -0
huggingface_hub/inference/_generated/types/text2text_generation.py +45 -0
huggingface_hub/inference/_generated/types/text_classification.py +43 -0
huggingface_hub/inference/_generated/types/text_generation.py +161 -0
huggingface_hub/inference/_generated/types/text_to_audio.py +105 -0
huggingface_hub/inference/_generated/types/text_to_image.py +57 -0
huggingface_hub/inference/_generated/types/token_classification.py +53 -0
huggingface_hub/inference/_generated/types/translation.py +46 -0
huggingface_hub/inference/_generated/types/video_classification.py +47 -0
huggingface_hub/inference/_generated/types/visual_question_answering.py +53 -0
huggingface_hub/inference/_generated/types/zero_shot_classification.py +56 -0
huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +51 -0
huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +55 -0
huggingface_hub/inference/_templating.py +105 -0
huggingface_hub/inference/_types.py +4 -152
huggingface_hub/keras_mixin.py +39 -17
huggingface_hub/lfs.py +20 -8
huggingface_hub/repocard.py +11 -3
huggingface_hub/repocard_data.py +12 -2
huggingface_hub/serialization/__init__.py +1 -0
huggingface_hub/serialization/_base.py +1 -0
huggingface_hub/serialization/_numpy.py +1 -0
huggingface_hub/serialization/_tensorflow.py +1 -0
huggingface_hub/serialization/_torch.py +1 -0
huggingface_hub/utils/__init__.py +4 -1
huggingface_hub/utils/_cache_manager.py +7 -0
huggingface_hub/utils/_chunk_utils.py +1 -0
huggingface_hub/utils/_datetime.py +1 -0
huggingface_hub/utils/_errors.py +10 -1
huggingface_hub/utils/_experimental.py +1 -0
huggingface_hub/utils/_fixes.py +19 -3
huggingface_hub/utils/_git_credential.py +1 -0
huggingface_hub/utils/_headers.py +10 -3
huggingface_hub/utils/_hf_folder.py +1 -0
huggingface_hub/utils/_http.py +1 -0
huggingface_hub/utils/_pagination.py +1 -0
huggingface_hub/utils/_paths.py +1 -0
huggingface_hub/utils/_runtime.py +22 -0
huggingface_hub/utils/_subprocess.py +1 -0
huggingface_hub/utils/_token.py +1 -0
huggingface_hub/utils/_typing.py +29 -1
huggingface_hub/utils/_validators.py +1 -0
huggingface_hub/utils/endpoint_helpers.py +1 -0
huggingface_hub/utils/logging.py +1 -1
huggingface_hub/utils/sha.py +1 -0
huggingface_hub/utils/tqdm.py +1 -0
{huggingface_hub-0.21.4.dist-info → huggingface_hub-0.22.0rc0.dist-info}/METADATA +14 -15
huggingface_hub-0.22.0rc0.dist-info/RECORD +113 -0
{huggingface_hub-0.21.4.dist-info → huggingface_hub-0.22.0rc0.dist-info}/WHEEL +1 -1
huggingface_hub/inference/_text_generation.py +0 -551
huggingface_hub-0.21.4.dist-info/RECORD +0 -81
{huggingface_hub-0.21.4.dist-info → huggingface_hub-0.22.0rc0.dist-info}/LICENSE +0 -0
{huggingface_hub-0.21.4.dist-info → huggingface_hub-0.22.0rc0.dist-info}/entry_points.txt +0 -0
{huggingface_hub-0.21.4.dist-info → huggingface_hub-0.22.0rc0.dist-info}/top_level.txt +0 -0

huggingface_hub/inference/_common.py CHANGED Viewed

@@ -13,10 +13,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Contains utilities used by both the sync and async inference clients."""
 import base64
 import io
 import json
 import logging
+import time
 from contextlib import contextmanager
 from dataclasses import dataclass
 from pathlib import Path
@@ -31,6 +33,7 @@ from typing import (
     Iterable,
     List,
     Literal,
+    NoReturn,
     Optional,
     Set,
     Union,
@@ -39,6 +42,15 @@ from typing import (
 from requests import HTTPError
+from huggingface_hub.errors import (
+    GenerationError,
+    IncompleteGenerationError,
+    OverloadedError,
+    TextGenerationError,
+    UnknownError,
+    ValidationError,
+)
 from ..constants import ENDPOINT
 from ..utils import (
     build_hf_headers,
@@ -48,7 +60,12 @@ from ..utils import (
     is_numpy_available,
     is_pillow_available,
 )
-from ._text_generation import TextGenerationStreamResponse, _parse_text_generation_error
+from ._generated.types import (
+    ChatCompletionStreamOutput,
+    ChatCompletionStreamOutputChoice,
+    ChatCompletionStreamOutputDelta,
+    TextGenerationStreamOutput,
+)
 if TYPE_CHECKING:
@@ -98,10 +115,6 @@ class ModelStatus:
     framework: str
-class InferenceTimeoutError(HTTPError, TimeoutError):
-    """Error raised when a model is unavailable or the request times out."""
 ## IMPORT UTILS
@@ -163,13 +176,15 @@ def _first_or_none(items: List[Any]) -> Optional[Any]:
 @overload
-def _open_as_binary(content: ContentT) -> ContextManager[BinaryT]:
-    ...  # means "if input is not None, output is not None"
+def _open_as_binary(
+    content: ContentT,
+) -> ContextManager[BinaryT]: ...  # means "if input is not None, output is not None"
 @overload
-def _open_as_binary(content: Literal[None]) -> ContextManager[Literal[None]]:
-    ...  # means "if input is None, output is None"
+def _open_as_binary(
+    content: Literal[None],
+) -> ContextManager[Literal[None]]: ...  # means "if input is None, output is None"
 @contextmanager  # type: ignore
@@ -253,48 +268,125 @@ def _bytes_to_image(content: bytes) -> "Image":
 def _stream_text_generation_response(
     bytes_output_as_lines: Iterable[bytes], details: bool
-) -> Union[Iterable[str], Iterable[TextGenerationStreamResponse]]:
+) -> Union[Iterable[str], Iterable[TextGenerationStreamOutput]]:
+    """Used in `InferenceClient.text_generation`."""
     # Parse ServerSentEvents
     for byte_payload in bytes_output_as_lines:
-        # Skip line
-        if byte_payload == b"\n":
-            continue
-        payload = byte_payload.decode("utf-8")
-        # Event data
-        if payload.startswith("data:"):
-            # Decode payload
-            json_payload = json.loads(payload.lstrip("data:").rstrip("/n"))
-            # Either an error as being returned
-            if json_payload.get("error") is not None:
-                raise _parse_text_generation_error(json_payload["error"], json_payload.get("error_type"))
-            # Or parse token payload
-            output = TextGenerationStreamResponse(**json_payload)
-            yield output.token.text if not details else output
+        output = _format_text_generation_stream_output(byte_payload, details)
+        if output is not None:
+            yield output
 async def _async_stream_text_generation_response(
     bytes_output_as_lines: AsyncIterable[bytes], details: bool
-) -> Union[AsyncIterable[str], AsyncIterable[TextGenerationStreamResponse]]:
+) -> Union[AsyncIterable[str], AsyncIterable[TextGenerationStreamOutput]]:
+    """Used in `AsyncInferenceClient.text_generation`."""
     # Parse ServerSentEvents
     async for byte_payload in bytes_output_as_lines:
-        # Skip line
-        if byte_payload == b"\n":
-            continue
+        output = _format_text_generation_stream_output(byte_payload, details)
+        if output is not None:
+            yield output
+def _format_text_generation_stream_output(
+    byte_payload: bytes, details: bool
+) -> Optional[Union[str, TextGenerationStreamOutput]]:
+    if not byte_payload.startswith(b"data:"):
+        return None  # empty line
+    # Decode payload
+    payload = byte_payload.decode("utf-8")
+    json_payload = json.loads(payload.lstrip("data:").rstrip("/n"))
+    # Either an error as being returned
+    if json_payload.get("error") is not None:
+        raise _parse_text_generation_error(json_payload["error"], json_payload.get("error_type"))
+    # Or parse token payload
+    output = TextGenerationStreamOutput.parse_obj_as_instance(json_payload)
+    return output.token.text if not details else output
+def _stream_chat_completion_response_from_text_generation(
+    text_generation_output: Iterable[TextGenerationStreamOutput],
+) -> Iterable[ChatCompletionStreamOutput]:
+    """Used in `InferenceClient.chat_completion`."""
+    created = int(time.time())
+    for item in text_generation_output:
+        yield _format_chat_completion_stream_output_from_text_generation(item, created)
+async def _async_stream_chat_completion_response_from_text_generation(
+    text_generation_output: AsyncIterable[TextGenerationStreamOutput],
+) -> AsyncIterable[ChatCompletionStreamOutput]:
+    """Used in `AsyncInferenceClient.chat_completion`."""
+    created = int(time.time())
+    async for item in text_generation_output:
+        yield _format_chat_completion_stream_output_from_text_generation(item, created)
+def _format_chat_completion_stream_output_from_text_generation(
+    item: TextGenerationStreamOutput, created: int
+) -> ChatCompletionStreamOutput:
+    if item.details is None:
+        # new token generated => return delta
+        return ChatCompletionStreamOutput(
+            choices=[
+                ChatCompletionStreamOutputChoice(
+                    delta=ChatCompletionStreamOutputDelta(
+                        role="assistant",
+                        content=item.token.text,
+                    ),
+                    finish_reason=None,
+                    index=0,
+                )
+            ],
+            created=created,
+        )
+    else:
+        # generation is completed => return finish reason
+        return ChatCompletionStreamOutput(
+            choices=[
+                ChatCompletionStreamOutputChoice(
+                    delta=ChatCompletionStreamOutputDelta(),
+                    finish_reason=item.details.finish_reason,
+                    index=0,
+                )
+            ],
+            created=created,
+        )
+def _stream_chat_completion_response_from_bytes(
+    bytes_lines: Iterable[bytes],
+) -> Iterable[ChatCompletionStreamOutput]:
+    """Used in `InferenceClient.chat_completion` if model is served with TGI."""
+    for item in bytes_lines:
+        output = _format_chat_completion_stream_output_from_text_generation_from_bytes(item)
+        if output is not None:
+            yield output
-        payload = byte_payload.decode("utf-8")
-        # Event data
-        if payload.startswith("data:"):
-            # Decode payload
-            json_payload = json.loads(payload.lstrip("data:").rstrip("/n"))
-            # Either an error as being returned
-            if json_payload.get("error") is not None:
-                raise _parse_text_generation_error(json_payload["error"], json_payload.get("error_type"))
-            # Or parse token payload
-            output = TextGenerationStreamResponse(**json_payload)
-            yield output.token.text if not details else output
+async def _async_stream_chat_completion_response_from_bytes(
+    bytes_lines: AsyncIterable[bytes],
+) -> AsyncIterable[ChatCompletionStreamOutput]:
+    """Used in `AsyncInferenceClient.chat_completion`."""
+    async for item in bytes_lines:
+        output = _format_chat_completion_stream_output_from_text_generation_from_bytes(item)
+        if output is not None:
+            yield output
+def _format_chat_completion_stream_output_from_text_generation_from_bytes(
+    byte_payload: bytes,
+) -> Optional[ChatCompletionStreamOutput]:
+    if not byte_payload.startswith(b"data:"):
+        return None  # empty line
+    # Decode payload
+    payload = byte_payload.decode("utf-8")
+    json_payload = json.loads(payload.lstrip("data:").rstrip("/n"))
+    return ChatCompletionStreamOutput.parse_obj_as_instance(json_payload)
 async def _async_yield_from(client: "ClientSession", response: "ClientResponse") -> AsyncIterable[bytes]:
@@ -314,6 +406,10 @@ async def _async_yield_from(client: "ClientSession", response: "ClientResponse")
 # default API with a warning message. We remember for each model if it's a TGI server
 # or not using `_NON_TGI_SERVERS` global variable.
 #
+# In addition, TGI servers have a built-in API route for chat-completion, which is not
+# available on the default API. We use this route to provide a more consistent behavior
+# when available.
+#
 # For more details, see https://github.com/huggingface/text-generation-inference and
 # https://huggingface.co/docs/api-inference/detailed_parameters#text-generation-task.
@@ -326,3 +422,61 @@ def _set_as_non_tgi(model: Optional[str]) -> None:
 def _is_tgi_server(model: Optional[str]) -> bool:
     return model not in _NON_TGI_SERVERS
+_NON_CHAT_COMPLETION_SERVER: Set[str] = set()
+def _set_as_non_chat_completion_server(model: str) -> None:
+    print("Set as non chat completion", model)
+    _NON_CHAT_COMPLETION_SERVER.add(model)
+def _is_chat_completion_server(model: str) -> bool:
+    return model not in _NON_CHAT_COMPLETION_SERVER
+# TEXT GENERATION ERRORS
+# ----------------------
+# Text-generation errors are parsed separately to handle as much as possible the errors returned by the text generation
+# inference project (https://github.com/huggingface/text-generation-inference).
+# ----------------------
+def raise_text_generation_error(http_error: HTTPError) -> NoReturn:
+    """
+    Try to parse text-generation-inference error message and raise HTTPError in any case.
+    Args:
+        error (`HTTPError`):
+            The HTTPError that have been raised.
+    """
+    # Try to parse a Text Generation Inference error
+    try:
+        # Hacky way to retrieve payload in case of aiohttp error
+        payload = getattr(http_error, "response_error_payload", None) or http_error.response.json()
+        error = payload.get("error")
+        error_type = payload.get("error_type")
+    except Exception:  # no payload
+        raise http_error
+    # If error_type => more information than `hf_raise_for_status`
+    if error_type is not None:
+        exception = _parse_text_generation_error(error, error_type)
+        raise exception from http_error
+    # Otherwise, fallback to default error
+    raise http_error
+def _parse_text_generation_error(error: Optional[str], error_type: Optional[str]) -> TextGenerationError:
+    if error_type == "generation":
+        return GenerationError(error)  # type: ignore
+    if error_type == "incomplete_generation":
+        return IncompleteGenerationError(error)  # type: ignore
+    if error_type == "overloaded":
+        return OverloadedError(error)  # type: ignore
+    if error_type == "validation":
+        return ValidationError(error)  # type: ignore
+    return UnknownError(error)  # type: ignore

huggingface-hub 0.21.4__py3-none-any.whl → 0.22.0rc0__py3-none-any.whl

Potentially problematic release.

huggingface-hub 0.21.4py3-none-any.whl → 0.22.0rc0py3-none-any.whl