PyPI - huggingface-hub - Versions diffs - 0.22.2__py3-none-any.whl → 0.23.0__py3-none-any.whl - Mend

huggingface-hub 0.22.2py3-none-any.whl → 0.23.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of huggingface-hub might be problematic. Click here for more details.

Files changed (45) hide show

huggingface_hub/__init__.py +51 -19
huggingface_hub/_commit_api.py +9 -8
huggingface_hub/_commit_scheduler.py +2 -2
huggingface_hub/_inference_endpoints.py +10 -17
huggingface_hub/_local_folder.py +229 -0
huggingface_hub/_login.py +4 -3
huggingface_hub/_multi_commits.py +1 -1
huggingface_hub/_snapshot_download.py +16 -38
huggingface_hub/_tensorboard_logger.py +16 -6
huggingface_hub/_webhooks_payload.py +22 -1
huggingface_hub/_webhooks_server.py +24 -20
huggingface_hub/commands/download.py +11 -34
huggingface_hub/commands/huggingface_cli.py +2 -0
huggingface_hub/commands/tag.py +159 -0
huggingface_hub/constants.py +3 -5
huggingface_hub/errors.py +58 -0
huggingface_hub/file_download.py +545 -376
huggingface_hub/hf_api.py +756 -622
huggingface_hub/hf_file_system.py +14 -5
huggingface_hub/hub_mixin.py +127 -43
huggingface_hub/inference/_client.py +402 -183
huggingface_hub/inference/_common.py +19 -29
huggingface_hub/inference/_generated/_async_client.py +402 -184
huggingface_hub/inference/_generated/types/__init__.py +23 -6
huggingface_hub/inference/_generated/types/chat_completion.py +197 -43
huggingface_hub/inference/_generated/types/text_generation.py +57 -79
huggingface_hub/inference/_templating.py +2 -4
huggingface_hub/keras_mixin.py +0 -3
huggingface_hub/lfs.py +9 -1
huggingface_hub/repository.py +1 -0
huggingface_hub/utils/__init__.py +12 -6
huggingface_hub/utils/_fixes.py +1 -0
huggingface_hub/utils/_headers.py +2 -4
huggingface_hub/utils/_http.py +2 -4
huggingface_hub/utils/_paths.py +13 -1
huggingface_hub/utils/_runtime.py +10 -0
huggingface_hub/utils/_safetensors.py +0 -13
huggingface_hub/utils/_validators.py +2 -7
huggingface_hub/utils/tqdm.py +124 -46
{huggingface_hub-0.22.2.dist-info → huggingface_hub-0.23.0.dist-info}/METADATA +5 -1
{huggingface_hub-0.22.2.dist-info → huggingface_hub-0.23.0.dist-info}/RECORD +45 -43
{huggingface_hub-0.22.2.dist-info → huggingface_hub-0.23.0.dist-info}/LICENSE +0 -0
{huggingface_hub-0.22.2.dist-info → huggingface_hub-0.23.0.dist-info}/WHEEL +0 -0
{huggingface_hub-0.22.2.dist-info → huggingface_hub-0.23.0.dist-info}/entry_points.txt +0 -0
{huggingface_hub-0.22.2.dist-info → huggingface_hub-0.23.0.dist-info}/top_level.txt +0 -0

huggingface_hub/inference/_common.py CHANGED Viewed

@@ -18,7 +18,6 @@ import base64
 import io
 import json
 import logging
-import time
 from contextlib import contextmanager
 from dataclasses import dataclass
 from pathlib import Path
@@ -70,7 +69,7 @@ from ._generated.types import (
 if TYPE_CHECKING:
     from aiohttp import ClientResponse, ClientSession
-    from PIL import Image
+    from PIL.Image import Image
 # TYPES
 UrlT = str
@@ -307,30 +306,17 @@ def _format_text_generation_stream_output(
     return output.token.text if not details else output
-def _stream_chat_completion_response_from_text_generation(
-    text_generation_output: Iterable[TextGenerationStreamOutput],
-) -> Iterable[ChatCompletionStreamOutput]:
-    """Used in `InferenceClient.chat_completion`."""
-    created = int(time.time())
-    for item in text_generation_output:
-        yield _format_chat_completion_stream_output_from_text_generation(item, created)
-async def _async_stream_chat_completion_response_from_text_generation(
-    text_generation_output: AsyncIterable[TextGenerationStreamOutput],
-) -> AsyncIterable[ChatCompletionStreamOutput]:
-    """Used in `AsyncInferenceClient.chat_completion`."""
-    created = int(time.time())
-    async for item in text_generation_output:
-        yield _format_chat_completion_stream_output_from_text_generation(item, created)
 def _format_chat_completion_stream_output_from_text_generation(
     item: TextGenerationStreamOutput, created: int
 ) -> ChatCompletionStreamOutput:
     if item.details is None:
         # new token generated => return delta
         return ChatCompletionStreamOutput(
+            # explicitly set 'dummy' values to reduce expectations from users
+            id="dummy",
+            model="dummy",
+            object="dummy",
+            system_fingerprint="dummy",
             choices=[
                 ChatCompletionStreamOutputChoice(
                     delta=ChatCompletionStreamOutputDelta(
@@ -346,9 +332,14 @@ def _format_chat_completion_stream_output_from_text_generation(
     else:
         # generation is completed => return finish reason
         return ChatCompletionStreamOutput(
+            # explicitly set 'dummy' values to reduce expectations from users
+            id="dummy",
+            model="dummy",
+            object="dummy",
+            system_fingerprint="dummy",
             choices=[
                 ChatCompletionStreamOutputChoice(
-                    delta=ChatCompletionStreamOutputDelta(),
+                    delta=ChatCompletionStreamOutputDelta(role="assistant"),
                     finish_reason=item.details.finish_reason,
                     index=0,
                 )
@@ -403,8 +394,8 @@ async def _async_yield_from(client: "ClientSession", response: "ClientResponse")
 # Both approaches have very similar APIs, but not exactly the same. What we do first in
 # the `text_generation` method is to assume the model is served via TGI. If we realize
 # it's not the case (i.e. we receive an HTTP 400 Bad Request), we fallback to the
-# default API with a warning message. We remember for each model if it's a TGI server
-# or not using `_NON_TGI_SERVERS` global variable.
+# default API with a warning message. When that's the case, We remember the unsupported
+# attributes for this model in the `_UNSUPPORTED_TEXT_GENERATION_KWARGS` global variable.
 #
 # In addition, TGI servers have a built-in API route for chat-completion, which is not
 # available on the default API. We use this route to provide a more consistent behavior
@@ -413,22 +404,21 @@ async def _async_yield_from(client: "ClientSession", response: "ClientResponse")
 # For more details, see https://github.com/huggingface/text-generation-inference and
 # https://huggingface.co/docs/api-inference/detailed_parameters#text-generation-task.
-_NON_TGI_SERVERS: Set[Optional[str]] = set()
+_UNSUPPORTED_TEXT_GENERATION_KWARGS: Dict[Optional[str], List[str]] = {}
-def _set_as_non_tgi(model: Optional[str]) -> None:
-    _NON_TGI_SERVERS.add(model)
+def _set_unsupported_text_generation_kwargs(model: Optional[str], unsupported_kwargs: List[str]) -> None:
+    _UNSUPPORTED_TEXT_GENERATION_KWARGS.setdefault(model, []).extend(unsupported_kwargs)
-def _is_tgi_server(model: Optional[str]) -> bool:
-    return model not in _NON_TGI_SERVERS
+def _get_unsupported_text_generation_kwargs(model: Optional[str]) -> List[str]:
+    return _UNSUPPORTED_TEXT_GENERATION_KWARGS.get(model, [])
 _NON_CHAT_COMPLETION_SERVER: Set[str] = set()
 def _set_as_non_chat_completion_server(model: str) -> None:
-    print("Set as non chat completion", model)
     _NON_CHAT_COMPLETION_SERVER.add(model)

huggingface-hub 0.22.2__py3-none-any.whl → 0.23.0__py3-none-any.whl

Potentially problematic release.

huggingface-hub 0.22.2py3-none-any.whl → 0.23.0py3-none-any.whl