PyPI - huggingface-hub - Versions diffs - 0.23.5__py3-none-any.whl → 0.24.1__py3-none-any.whl - Mend

huggingface-hub 0.23.5py3-none-any.whl → 0.24.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of huggingface-hub might be problematic. Click here for more details.

Files changed (42) hide show

huggingface_hub/__init__.py +47 -15
huggingface_hub/_commit_api.py +38 -8
huggingface_hub/_inference_endpoints.py +11 -4
huggingface_hub/_local_folder.py +22 -13
huggingface_hub/_snapshot_download.py +12 -7
huggingface_hub/_webhooks_server.py +3 -1
huggingface_hub/commands/huggingface_cli.py +4 -3
huggingface_hub/commands/repo_files.py +128 -0
huggingface_hub/constants.py +12 -0
huggingface_hub/file_download.py +127 -91
huggingface_hub/hf_api.py +976 -341
huggingface_hub/hf_file_system.py +30 -3
huggingface_hub/inference/_client.py +408 -147
huggingface_hub/inference/_common.py +25 -63
huggingface_hub/inference/_generated/_async_client.py +425 -153
huggingface_hub/inference/_generated/types/__init__.py +4 -1
huggingface_hub/inference/_generated/types/chat_completion.py +41 -21
huggingface_hub/inference/_generated/types/feature_extraction.py +23 -5
huggingface_hub/inference/_generated/types/text_generation.py +29 -0
huggingface_hub/lfs.py +11 -6
huggingface_hub/repocard_data.py +3 -3
huggingface_hub/repository.py +6 -6
huggingface_hub/serialization/__init__.py +8 -3
huggingface_hub/serialization/_base.py +13 -16
huggingface_hub/serialization/_tensorflow.py +4 -3
huggingface_hub/serialization/_torch.py +399 -22
huggingface_hub/utils/__init__.py +0 -1
huggingface_hub/utils/_errors.py +1 -1
huggingface_hub/utils/_fixes.py +14 -3
huggingface_hub/utils/_paths.py +17 -6
huggingface_hub/utils/_subprocess.py +0 -1
huggingface_hub/utils/_telemetry.py +9 -1
huggingface_hub/utils/endpoint_helpers.py +2 -186
huggingface_hub/utils/sha.py +36 -1
huggingface_hub/utils/tqdm.py +0 -1
{huggingface_hub-0.23.5.dist-info → huggingface_hub-0.24.1.dist-info}/METADATA +12 -9
{huggingface_hub-0.23.5.dist-info → huggingface_hub-0.24.1.dist-info}/RECORD +41 -41
huggingface_hub/serialization/_numpy.py +0 -68
{huggingface_hub-0.23.5.dist-info → huggingface_hub-0.24.1.dist-info}/LICENSE +0 -0
{huggingface_hub-0.23.5.dist-info → huggingface_hub-0.24.1.dist-info}/WHEEL +0 -0
{huggingface_hub-0.23.5.dist-info → huggingface_hub-0.24.1.dist-info}/entry_points.txt +0 -0
{huggingface_hub-0.23.5.dist-info → huggingface_hub-0.24.1.dist-info}/top_level.txt +0 -0

huggingface_hub/inference/_common.py CHANGED Viewed

@@ -34,7 +34,6 @@ from typing import (
     Literal,
     NoReturn,
     Optional,
-    Set,
     Union,
     overload,
 )
@@ -61,8 +60,6 @@ from ..utils import (
 )
 from ._generated.types import (
     ChatCompletionStreamOutput,
-    ChatCompletionStreamOutputChoice,
-    ChatCompletionStreamOutputDelta,
     TextGenerationStreamOutput,
 )
@@ -271,7 +268,10 @@ def _stream_text_generation_response(
     """Used in `InferenceClient.text_generation`."""
     # Parse ServerSentEvents
     for byte_payload in bytes_output_as_lines:
-        output = _format_text_generation_stream_output(byte_payload, details)
+        try:
+            output = _format_text_generation_stream_output(byte_payload, details)
+        except StopIteration:
+            break
         if output is not None:
             yield output
@@ -282,7 +282,10 @@ async def _async_stream_text_generation_response(
     """Used in `AsyncInferenceClient.text_generation`."""
     # Parse ServerSentEvents
     async for byte_payload in bytes_output_as_lines:
-        output = _format_text_generation_stream_output(byte_payload, details)
+        try:
+            output = _format_text_generation_stream_output(byte_payload, details)
+        except StopIteration:
+            break
         if output is not None:
             yield output
@@ -293,6 +296,9 @@ def _format_text_generation_stream_output(
     if not byte_payload.startswith(b"data:"):
         return None  # empty line
+    if byte_payload == b"data: [DONE]":
+        raise StopIteration("[DONE] signal received.")
     # Decode payload
     payload = byte_payload.decode("utf-8")
     json_payload = json.loads(payload.lstrip("data:").rstrip("/n"))
@@ -306,74 +312,41 @@ def _format_text_generation_stream_output(
     return output.token.text if not details else output
-def _format_chat_completion_stream_output_from_text_generation(
-    item: TextGenerationStreamOutput, created: int
-) -> ChatCompletionStreamOutput:
-    if item.details is None:
-        # new token generated => return delta
-        return ChatCompletionStreamOutput(
-            # explicitly set 'dummy' values to reduce expectations from users
-            id="dummy",
-            model="dummy",
-            object="dummy",
-            system_fingerprint="dummy",
-            choices=[
-                ChatCompletionStreamOutputChoice(
-                    delta=ChatCompletionStreamOutputDelta(
-                        role="assistant",
-                        content=item.token.text,
-                    ),
-                    finish_reason=None,
-                    index=0,
-                )
-            ],
-            created=created,
-        )
-    else:
-        # generation is completed => return finish reason
-        return ChatCompletionStreamOutput(
-            # explicitly set 'dummy' values to reduce expectations from users
-            id="dummy",
-            model="dummy",
-            object="dummy",
-            system_fingerprint="dummy",
-            choices=[
-                ChatCompletionStreamOutputChoice(
-                    delta=ChatCompletionStreamOutputDelta(role="assistant"),
-                    finish_reason=item.details.finish_reason,
-                    index=0,
-                )
-            ],
-            created=created,
-        )
-def _stream_chat_completion_response_from_bytes(
+def _stream_chat_completion_response(
     bytes_lines: Iterable[bytes],
 ) -> Iterable[ChatCompletionStreamOutput]:
     """Used in `InferenceClient.chat_completion` if model is served with TGI."""
     for item in bytes_lines:
-        output = _format_chat_completion_stream_output_from_text_generation_from_bytes(item)
+        try:
+            output = _format_chat_completion_stream_output(item)
+        except StopIteration:
+            break
         if output is not None:
             yield output
-async def _async_stream_chat_completion_response_from_bytes(
+async def _async_stream_chat_completion_response(
     bytes_lines: AsyncIterable[bytes],
 ) -> AsyncIterable[ChatCompletionStreamOutput]:
     """Used in `AsyncInferenceClient.chat_completion`."""
     async for item in bytes_lines:
-        output = _format_chat_completion_stream_output_from_text_generation_from_bytes(item)
+        try:
+            output = _format_chat_completion_stream_output(item)
+        except StopIteration:
+            break
         if output is not None:
             yield output
-def _format_chat_completion_stream_output_from_text_generation_from_bytes(
+def _format_chat_completion_stream_output(
     byte_payload: bytes,
 ) -> Optional[ChatCompletionStreamOutput]:
     if not byte_payload.startswith(b"data:"):
         return None  # empty line
+    if byte_payload == b"data: [DONE]":
+        raise StopIteration("[DONE] signal received.")
     # Decode payload
     payload = byte_payload.decode("utf-8")
     json_payload = json.loads(payload.lstrip("data:").rstrip("/n"))
@@ -415,17 +388,6 @@ def _get_unsupported_text_generation_kwargs(model: Optional[str]) -> List[str]:
     return _UNSUPPORTED_TEXT_GENERATION_KWARGS.get(model, [])
-_NON_CHAT_COMPLETION_SERVER: Set[str] = set()
-def _set_as_non_chat_completion_server(model: str) -> None:
-    _NON_CHAT_COMPLETION_SERVER.add(model)
-def _is_chat_completion_server(model: str) -> bool:
-    return model not in _NON_CHAT_COMPLETION_SERVER
 # TEXT GENERATION ERRORS
 # ----------------------
 # Text-generation errors are parsed separately to handle as much as possible the errors returned by the text generation

huggingface-hub 0.23.5__py3-none-any.whl → 0.24.1__py3-none-any.whl

Potentially problematic release.

huggingface-hub 0.23.5py3-none-any.whl → 0.24.1py3-none-any.whl