PyPI - huggingface-hub - Versions diffs - 0.24.0rc0__py3-none-any.whl → 0.24.1__py3-none-any.whl - Mend

huggingface-hub 0.24.0rc0py3-none-any.whl → 0.24.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of huggingface-hub might be problematic. Click here for more details.

Files changed (11) hide show

huggingface_hub/__init__.py CHANGED Viewed

@@ -46,7 +46,7 @@ import sys
 from typing import TYPE_CHECKING
-__version__ = "0.24.0.rc0"
+__version__ = "0.24.1"
 # Alphabetical order of definitions is ensured in tests
 # WARNING: any comment added in this dictionary definition will be lost when

huggingface_hub/hf_api.py CHANGED Viewed

@@ -149,7 +149,6 @@ ExpandModelProperty_T = Literal[
     "downloads",
     "downloadsAllTime",
     "gated",
-    "gitalyUid",
     "inference",
     "lastModified",
     "library_name",
@@ -177,7 +176,6 @@ ExpandDatasetProperty_T = Literal[
     "downloads",
     "downloadsAllTime",
     "gated",
-    "gitalyUid",
     "lastModified",
     "likes",
     "paperswithcode_id",
@@ -192,7 +190,6 @@ ExpandSpaceProperty_T = Literal[
     "cardData",
     "datasets",
     "disabled",
-    "gitalyUid",
     "lastModified",
     "createdAt",
     "likes",
@@ -1633,7 +1630,7 @@ class HfApi:
             expand (`List[ExpandModelProperty_T]`, *optional*):
                 List properties to return in the response. When used, only the properties in the list will be returned.
                 This parameter cannot be used if `full`, `cardData` or `fetch_config` are passed.
-                Possible values are `"author"`, `"cardData"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"gitalyUid"`, `"inference"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"` and `"widgetData"`.
+                Possible values are `"author"`, `"cardData"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"inference"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"` and `"widgetData"`.
             full (`bool`, *optional*):
                 Whether to fetch all model data, including the `last_modified`,
                 the `sha`, the files and the `tags`. This is set to `True` by
@@ -1836,7 +1833,7 @@ class HfApi:
             expand (`List[ExpandDatasetProperty_T]`, *optional*):
                 List properties to return in the response. When used, only the properties in the list will be returned.
                 This parameter cannot be used if `full` is passed.
-                Possible values are `"author"`, `"cardData"`, `"citation"`, `"createdAt"`, `"disabled"`, `"description"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"gitalyUid"`, `"lastModified"`, `"likes"`, `"paperswithcode_id"`, `"private"`, `"siblings"`, `"sha"` and `"tags"`.
+                Possible values are `"author"`, `"cardData"`, `"citation"`, `"createdAt"`, `"disabled"`, `"description"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"lastModified"`, `"likes"`, `"paperswithcode_id"`, `"private"`, `"siblings"`, `"sha"` and `"tags"`.
             full (`bool`, *optional*):
                 Whether to fetch all dataset data, including the `last_modified`,
                 the `card_data` and  the files. Can contain useful information such as the
@@ -2017,7 +2014,7 @@ class HfApi:
             expand (`List[ExpandSpaceProperty_T]`, *optional*):
                 List properties to return in the response. When used, only the properties in the list will be returned.
                 This parameter cannot be used if `full` is passed.
-                Possible values are `"author"`, `"cardData"`, `"datasets"`, `"disabled"`, `"gitalyUid"`, `"lastModified"`, `"createdAt"`, `"likes"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"` and `"models"`.
+                Possible values are `"author"`, `"cardData"`, `"datasets"`, `"disabled"`, `"lastModified"`, `"createdAt"`, `"likes"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"` and `"models"`.
             full (`bool`, *optional*):
                 Whether to fetch all Spaces data, including the `last_modified`, `siblings`
                 and `card_data` fields.
@@ -2334,7 +2331,7 @@ class HfApi:
             expand (`List[ExpandModelProperty_T]`, *optional*):
                 List properties to return in the response. When used, only the properties in the list will be returned.
                 This parameter cannot be used if `securityStatus` or `files_metadata` are passed.
-                Possible values are `"author"`, `"cardData"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"gitalyUid"`, `"inference"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"` and `"widgetData"`.
+                Possible values are `"author"`, `"cardData"`, `"config"`, `"createdAt"`, `"disabled"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"inference"`, `"lastModified"`, `"library_name"`, `"likes"`, `"mask_token"`, `"model-index"`, `"pipeline_tag"`, `"private"`, `"safetensors"`, `"sha"`, `"siblings"`, `"spaces"`, `"tags"`, `"transformersInfo"` and `"widgetData"`.
             token (Union[bool, str, None], optional):
                 A valid user access token (string). Defaults to the locally saved
                 token, which is the recommended method for authentication (see
@@ -2408,7 +2405,7 @@ class HfApi:
             expand (`List[ExpandDatasetProperty_T]`, *optional*):
                 List properties to return in the response. When used, only the properties in the list will be returned.
                 This parameter cannot be used if `files_metadata` is passed.
-                Possible values are `"author"`, `"cardData"`, `"citation"`, `"createdAt"`, `"disabled"`, `"description"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"gitalyUid"`, `"lastModified"`, `"likes"`, `"paperswithcode_id"`, `"private"`, `"siblings"`, `"sha"` and `"tags"`.
+                Possible values are `"author"`, `"cardData"`, `"citation"`, `"createdAt"`, `"disabled"`, `"description"`, `"downloads"`, `"downloadsAllTime"`, `"gated"`, `"lastModified"`, `"likes"`, `"paperswithcode_id"`, `"private"`, `"siblings"`, `"sha"` and `"tags"`.
             token (Union[bool, str, None], optional):
                 A valid user access token (string). Defaults to the locally saved
                 token, which is the recommended method for authentication (see
@@ -2481,7 +2478,7 @@ class HfApi:
             expand (`List[ExpandSpaceProperty_T]`, *optional*):
                 List properties to return in the response. When used, only the properties in the list will be returned.
                 This parameter cannot be used if `full` is passed.
-                Possible values are `"author"`, `"cardData"`, `"datasets"`, `"disabled"`, `"gitalyUid"`, `"lastModified"`, `"createdAt"`, `"likes"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"` and `"models"`.
+                Possible values are `"author"`, `"cardData"`, `"datasets"`, `"disabled"`, `"lastModified"`, `"createdAt"`, `"likes"`, `"private"`, `"runtime"`, `"sdk"`, `"siblings"`, `"sha"`, `"subdomain"`, `"tags"` and `"models"`.
             token (Union[bool, str, None], optional):
                 A valid user access token (string). Defaults to the locally saved
                 token, which is the recommended method for authentication (see

huggingface_hub/inference/_client.py CHANGED Viewed

@@ -66,11 +66,9 @@ from huggingface_hub.inference._common import (
     _fetch_recommended_models,
     _get_unsupported_text_generation_kwargs,
     _import_numpy,
-    _is_chat_completion_server,
     _open_as_binary,
-    _set_as_non_chat_completion_server,
     _set_unsupported_text_generation_kwargs,
-    _stream_chat_completion_response_from_bytes,
+    _stream_chat_completion_response,
     _stream_text_generation_response,
     raise_text_generation_error,
 )
@@ -82,8 +80,6 @@ from huggingface_hub.inference._generated.types import (
     ChatCompletionInputTool,
     ChatCompletionInputToolTypeClass,
     ChatCompletionOutput,
-    ChatCompletionOutputComplete,
-    ChatCompletionOutputMessage,
     ChatCompletionStreamOutput,
     DocumentQuestionAnsweringOutputElement,
     FillMaskOutputElement,
@@ -189,7 +185,7 @@ class InferenceClient:
             )
         self.model: Optional[str] = model
-        self.token: Union[str, bool, None] = token or api_key
+        self.token: Union[str, bool, None] = token if token is not None else api_key
         self.headers = CaseInsensitiveDict(build_hf_headers(token=self.token))  # 'authorization' + 'user-agent'
         if headers is not None:
             self.headers.update(headers)
@@ -818,123 +814,52 @@ class InferenceClient:
         # since `chat_completion(..., model=xxx)` is also a payload parameter for the
         # server, we need to handle it differently
         model = self.base_url or self.model or model or self.get_recommended_model("text-generation")
+        is_url = model.startswith(("http://", "https://"))
+        # First, resolve the model chat completions URL
+        if model == self.base_url:
+            # base_url passed => add server route
+            model_url = model + "/v1/chat/completions"
+        elif is_url:
+            # model is a URL => use it directly
+            model_url = model
+        else:
+            # model is a model ID => resolve it + add server route
+            model_url = self._resolve_url(model) + "/v1/chat/completions"
+        # `model` is sent in the payload. Not used by the server but can be useful for debugging/routing.
+        # If it's a ID on the Hub => use it. Otherwise, we use a random string.
+        model_id = model if not is_url and model.count("/") == 1 else "tgi"
+        data = self.post(
+            model=model_url,
+            json=dict(
+                model=model_id,
+                messages=messages,
+                frequency_penalty=frequency_penalty,
+                logit_bias=logit_bias,
+                logprobs=logprobs,
+                max_tokens=max_tokens,
+                n=n,
+                presence_penalty=presence_penalty,
+                response_format=response_format,
+                seed=seed,
+                stop=stop,
+                temperature=temperature,
+                tool_choice=tool_choice,
+                tool_prompt=tool_prompt,
+                tools=tools,
+                top_logprobs=top_logprobs,
+                top_p=top_p,
+                stream=stream,
+            ),
+            stream=stream,
+        )
-        if _is_chat_completion_server(model):
-            # First, let's consider the server has a `/v1/chat/completions` endpoint.
-            # If that's the case, we don't have to render the chat template client-side.
-            model_url = self._resolve_url(model)
-            if not model_url.endswith("/chat/completions"):
-                model_url += "/v1/chat/completions"
-            # `model` is sent in the payload. Not used by the server but can be useful for debugging/routing.
-            if not model.startswith("http") and model.count("/") == 1:
-                # If it's a ID on the Hub => use it
-                model_id = model
-            else:
-                # Otherwise, we use a random string
-                model_id = "tgi"
-            try:
-                data = self.post(
-                    model=model_url,
-                    json=dict(
-                        model=model_id,
-                        messages=messages,
-                        frequency_penalty=frequency_penalty,
-                        logit_bias=logit_bias,
-                        logprobs=logprobs,
-                        max_tokens=max_tokens,
-                        n=n,
-                        presence_penalty=presence_penalty,
-                        response_format=response_format,
-                        seed=seed,
-                        stop=stop,
-                        temperature=temperature,
-                        tool_choice=tool_choice,
-                        tool_prompt=tool_prompt,
-                        tools=tools,
-                        top_logprobs=top_logprobs,
-                        top_p=top_p,
-                        stream=stream,
-                    ),
-                    stream=stream,
-                )
-            except HTTPError as e:
-                if e.response.status_code in (400, 404, 500):
-                    # Let's consider the server is not a chat completion server.
-                    # Then we call again `chat_completion` which will render the chat template client side.
-                    # (can be HTTP 500, HTTP 400, HTTP 404 depending on the server)
-                    _set_as_non_chat_completion_server(model)
-                    logger.warning(
-                        f"Server {model_url} does not seem to support chat completion. Falling back to text generation. Error: {e}"
-                    )
-                    return self.chat_completion(
-                        messages=messages,
-                        model=model,
-                        stream=stream,
-                        max_tokens=max_tokens,
-                        seed=seed,
-                        stop=stop,
-                        temperature=temperature,
-                        top_p=top_p,
-                    )
-                raise
-            if stream:
-                return _stream_chat_completion_response_from_bytes(data)  # type: ignore[arg-type]
-            return ChatCompletionOutput.parse_obj_as_instance(data)  # type: ignore[arg-type]
-        # At this point, we know the server is not a chat completion server.
-        # It means it's a transformers-backed server for which we can send a list of messages directly to the
-        # `text-generation` pipeline. We won't receive a detailed response but only the generated text.
         if stream:
-            raise ValueError(
-                "Streaming token is not supported by the model. This is due to the model not been served by a "
-                "Text-Generation-Inference server. Please pass `stream=False` as input."
-            )
-        if tool_choice is not None or tool_prompt is not None or tools is not None:
-            warnings.warn(
-                "Tools are not supported by the model. This is due to the model not been served by a "
-                "Text-Generation-Inference server. The provided tool parameters will be ignored."
-            )
-        if response_format is not None:
-            warnings.warn(
-                "Response format is not supported by the model. This is due to the model not been served by a "
-                "Text-Generation-Inference server. The provided response format will be ignored."
-            )
+            return _stream_chat_completion_response(data)  # type: ignore[arg-type]
-        # generate response
-        text_generation_output = self.text_generation(
-            prompt=messages,  # type: ignore # Not correct type but works implicitly
-            model=model,
-            stream=False,
-            details=False,
-            max_new_tokens=max_tokens,
-            seed=seed,
-            stop_sequences=stop,
-            temperature=temperature,
-            top_p=top_p,
-        )
-        # Format as a ChatCompletionOutput with dummy values for fields we can't provide
-        return ChatCompletionOutput(
-            id="dummy",
-            model="dummy",
-            system_fingerprint="dummy",
-            usage=None,  # type: ignore # set to `None` as we don't want to provide false information
-            created=int(time.time()),
-            choices=[
-                ChatCompletionOutputComplete(
-                    finish_reason="unk",  # type: ignore # set to `unk` as we don't want to provide false information
-                    index=0,
-                    message=ChatCompletionOutputMessage(
-                        content=text_generation_output,
-                        role="assistant",
-                    ),
-                )
-            ],
-        )
+        return ChatCompletionOutput.parse_obj_as_instance(data)  # type: ignore[arg-type]
     def conversational(
         self,
@@ -2251,7 +2176,12 @@ class InferenceClient:
         if stream:
             return _stream_text_generation_response(bytes_output, details)  # type: ignore
-        data = _bytes_to_dict(bytes_output)[0]  # type: ignore[arg-type]
+        data = _bytes_to_dict(bytes_output)  # type: ignore[arg-type]
+        # Data can be a single element (dict) or an iterable of dicts where we select the first element of.
+        if isinstance(data, list):
+            data = data[0]
         return TextGenerationOutput.parse_obj_as_instance(data) if details else data["generated_text"]
     def text_to_image(

huggingface_hub/inference/_common.py CHANGED Viewed

@@ -34,7 +34,6 @@ from typing import (
     Literal,
     NoReturn,
     Optional,
-    Set,
     Union,
     overload,
 )
@@ -61,8 +60,6 @@ from ..utils import (
 )
 from ._generated.types import (
     ChatCompletionStreamOutput,
-    ChatCompletionStreamOutputChoice,
-    ChatCompletionStreamOutputDelta,
     TextGenerationStreamOutput,
 )
@@ -271,7 +268,10 @@ def _stream_text_generation_response(
     """Used in `InferenceClient.text_generation`."""
     # Parse ServerSentEvents
     for byte_payload in bytes_output_as_lines:
-        output = _format_text_generation_stream_output(byte_payload, details)
+        try:
+            output = _format_text_generation_stream_output(byte_payload, details)
+        except StopIteration:
+            break
         if output is not None:
             yield output
@@ -282,7 +282,10 @@ async def _async_stream_text_generation_response(
     """Used in `AsyncInferenceClient.text_generation`."""
     # Parse ServerSentEvents
     async for byte_payload in bytes_output_as_lines:
-        output = _format_text_generation_stream_output(byte_payload, details)
+        try:
+            output = _format_text_generation_stream_output(byte_payload, details)
+        except StopIteration:
+            break
         if output is not None:
             yield output
@@ -293,6 +296,9 @@ def _format_text_generation_stream_output(
     if not byte_payload.startswith(b"data:"):
         return None  # empty line
+    if byte_payload == b"data: [DONE]":
+        raise StopIteration("[DONE] signal received.")
     # Decode payload
     payload = byte_payload.decode("utf-8")
     json_payload = json.loads(payload.lstrip("data:").rstrip("/n"))
@@ -306,72 +312,41 @@ def _format_text_generation_stream_output(
     return output.token.text if not details else output
-def _format_chat_completion_stream_output_from_text_generation(
-    item: TextGenerationStreamOutput, created: int
-) -> ChatCompletionStreamOutput:
-    if item.details is None:
-        # new token generated => return delta
-        return ChatCompletionStreamOutput(
-            # explicitly set 'dummy' values to reduce expectations from users
-            id="dummy",
-            model="dummy",
-            system_fingerprint="dummy",
-            choices=[
-                ChatCompletionStreamOutputChoice(
-                    delta=ChatCompletionStreamOutputDelta(
-                        role="assistant",
-                        content=item.token.text,
-                    ),
-                    finish_reason=None,
-                    index=0,
-                )
-            ],
-            created=created,
-        )
-    else:
-        # generation is completed => return finish reason
-        return ChatCompletionStreamOutput(
-            # explicitly set 'dummy' values to reduce expectations from users
-            id="dummy",
-            model="dummy",
-            system_fingerprint="dummy",
-            choices=[
-                ChatCompletionStreamOutputChoice(
-                    delta=ChatCompletionStreamOutputDelta(role="assistant"),
-                    finish_reason=item.details.finish_reason,
-                    index=0,
-                )
-            ],
-            created=created,
-        )
-def _stream_chat_completion_response_from_bytes(
+def _stream_chat_completion_response(
     bytes_lines: Iterable[bytes],
 ) -> Iterable[ChatCompletionStreamOutput]:
     """Used in `InferenceClient.chat_completion` if model is served with TGI."""
     for item in bytes_lines:
-        output = _format_chat_completion_stream_output_from_text_generation_from_bytes(item)
+        try:
+            output = _format_chat_completion_stream_output(item)
+        except StopIteration:
+            break
         if output is not None:
             yield output
-async def _async_stream_chat_completion_response_from_bytes(
+async def _async_stream_chat_completion_response(
     bytes_lines: AsyncIterable[bytes],
 ) -> AsyncIterable[ChatCompletionStreamOutput]:
     """Used in `AsyncInferenceClient.chat_completion`."""
     async for item in bytes_lines:
-        output = _format_chat_completion_stream_output_from_text_generation_from_bytes(item)
+        try:
+            output = _format_chat_completion_stream_output(item)
+        except StopIteration:
+            break
         if output is not None:
             yield output
-def _format_chat_completion_stream_output_from_text_generation_from_bytes(
+def _format_chat_completion_stream_output(
     byte_payload: bytes,
 ) -> Optional[ChatCompletionStreamOutput]:
     if not byte_payload.startswith(b"data:"):
         return None  # empty line
+    if byte_payload == b"data: [DONE]":
+        raise StopIteration("[DONE] signal received.")
     # Decode payload
     payload = byte_payload.decode("utf-8")
     json_payload = json.loads(payload.lstrip("data:").rstrip("/n"))
@@ -413,17 +388,6 @@ def _get_unsupported_text_generation_kwargs(model: Optional[str]) -> List[str]:
     return _UNSUPPORTED_TEXT_GENERATION_KWARGS.get(model, [])
-_NON_CHAT_COMPLETION_SERVER: Set[str] = set()
-def _set_as_non_chat_completion_server(model: str) -> None:
-    _NON_CHAT_COMPLETION_SERVER.add(model)
-def _is_chat_completion_server(model: str) -> bool:
-    return model not in _NON_CHAT_COMPLETION_SERVER
 # TEXT GENERATION ERRORS
 # ----------------------
 # Text-generation errors are parsed separately to handle as much as possible the errors returned by the text generation

huggingface_hub/inference/_generated/_async_client.py CHANGED Viewed

@@ -44,7 +44,7 @@ from huggingface_hub.inference._common import (
     TASKS_EXPECTING_IMAGES,
     ContentT,
     ModelStatus,
-    _async_stream_chat_completion_response_from_bytes,
+    _async_stream_chat_completion_response,
     _async_stream_text_generation_response,
     _b64_encode,
     _b64_to_image,
@@ -54,9 +54,7 @@ from huggingface_hub.inference._common import (
     _fetch_recommended_models,
     _get_unsupported_text_generation_kwargs,
     _import_numpy,
-    _is_chat_completion_server,
     _open_as_binary,
-    _set_as_non_chat_completion_server,
     _set_unsupported_text_generation_kwargs,
     raise_text_generation_error,
 )
@@ -68,8 +66,6 @@ from huggingface_hub.inference._generated.types import (
     ChatCompletionInputTool,
     ChatCompletionInputToolTypeClass,
     ChatCompletionOutput,
-    ChatCompletionOutputComplete,
-    ChatCompletionOutputMessage,
     ChatCompletionStreamOutput,
     DocumentQuestionAnsweringOutputElement,
     FillMaskOutputElement,
@@ -174,7 +170,7 @@ class AsyncInferenceClient:
             )
         self.model: Optional[str] = model
-        self.token: Union[str, bool, None] = token or api_key
+        self.token: Union[str, bool, None] = token if token is not None else api_key
         self.headers = CaseInsensitiveDict(build_hf_headers(token=self.token))  # 'authorization' + 'user-agent'
         if headers is not None:
             self.headers.update(headers)
@@ -824,123 +820,52 @@ class AsyncInferenceClient:
         # since `chat_completion(..., model=xxx)` is also a payload parameter for the
         # server, we need to handle it differently
         model = self.base_url or self.model or model or self.get_recommended_model("text-generation")
+        is_url = model.startswith(("http://", "https://"))
+        # First, resolve the model chat completions URL
+        if model == self.base_url:
+            # base_url passed => add server route
+            model_url = model + "/v1/chat/completions"
+        elif is_url:
+            # model is a URL => use it directly
+            model_url = model
+        else:
+            # model is a model ID => resolve it + add server route
+            model_url = self._resolve_url(model) + "/v1/chat/completions"
+        # `model` is sent in the payload. Not used by the server but can be useful for debugging/routing.
+        # If it's a ID on the Hub => use it. Otherwise, we use a random string.
+        model_id = model if not is_url and model.count("/") == 1 else "tgi"
+        data = await self.post(
+            model=model_url,
+            json=dict(
+                model=model_id,
+                messages=messages,
+                frequency_penalty=frequency_penalty,
+                logit_bias=logit_bias,
+                logprobs=logprobs,
+                max_tokens=max_tokens,
+                n=n,
+                presence_penalty=presence_penalty,
+                response_format=response_format,
+                seed=seed,
+                stop=stop,
+                temperature=temperature,
+                tool_choice=tool_choice,
+                tool_prompt=tool_prompt,
+                tools=tools,
+                top_logprobs=top_logprobs,
+                top_p=top_p,
+                stream=stream,
+            ),
+            stream=stream,
+        )
-        if _is_chat_completion_server(model):
-            # First, let's consider the server has a `/v1/chat/completions` endpoint.
-            # If that's the case, we don't have to render the chat template client-side.
-            model_url = self._resolve_url(model)
-            if not model_url.endswith("/chat/completions"):
-                model_url += "/v1/chat/completions"
-            # `model` is sent in the payload. Not used by the server but can be useful for debugging/routing.
-            if not model.startswith("http") and model.count("/") == 1:
-                # If it's a ID on the Hub => use it
-                model_id = model
-            else:
-                # Otherwise, we use a random string
-                model_id = "tgi"
-            try:
-                data = await self.post(
-                    model=model_url,
-                    json=dict(
-                        model=model_id,
-                        messages=messages,
-                        frequency_penalty=frequency_penalty,
-                        logit_bias=logit_bias,
-                        logprobs=logprobs,
-                        max_tokens=max_tokens,
-                        n=n,
-                        presence_penalty=presence_penalty,
-                        response_format=response_format,
-                        seed=seed,
-                        stop=stop,
-                        temperature=temperature,
-                        tool_choice=tool_choice,
-                        tool_prompt=tool_prompt,
-                        tools=tools,
-                        top_logprobs=top_logprobs,
-                        top_p=top_p,
-                        stream=stream,
-                    ),
-                    stream=stream,
-                )
-            except _import_aiohttp().ClientResponseError as e:
-                if e.status in (400, 404, 500):
-                    # Let's consider the server is not a chat completion server.
-                    # Then we call again `chat_completion` which will render the chat template client side.
-                    # (can be HTTP 500, HTTP 400, HTTP 404 depending on the server)
-                    _set_as_non_chat_completion_server(model)
-                    logger.warning(
-                        f"Server {model_url} does not seem to support chat completion. Falling back to text generation. Error: {e}"
-                    )
-                    return await self.chat_completion(
-                        messages=messages,
-                        model=model,
-                        stream=stream,
-                        max_tokens=max_tokens,
-                        seed=seed,
-                        stop=stop,
-                        temperature=temperature,
-                        top_p=top_p,
-                    )
-                raise
-            if stream:
-                return _async_stream_chat_completion_response_from_bytes(data)  # type: ignore[arg-type]
-            return ChatCompletionOutput.parse_obj_as_instance(data)  # type: ignore[arg-type]
-        # At this point, we know the server is not a chat completion server.
-        # It means it's a transformers-backed server for which we can send a list of messages directly to the
-        # `text-generation` pipeline. We won't receive a detailed response but only the generated text.
         if stream:
-            raise ValueError(
-                "Streaming token is not supported by the model. This is due to the model not been served by a "
-                "Text-Generation-Inference server. Please pass `stream=False` as input."
-            )
-        if tool_choice is not None or tool_prompt is not None or tools is not None:
-            warnings.warn(
-                "Tools are not supported by the model. This is due to the model not been served by a "
-                "Text-Generation-Inference server. The provided tool parameters will be ignored."
-            )
-        if response_format is not None:
-            warnings.warn(
-                "Response format is not supported by the model. This is due to the model not been served by a "
-                "Text-Generation-Inference server. The provided response format will be ignored."
-            )
-        # generate response
-        text_generation_output = await self.text_generation(
-            prompt=messages,  # type: ignore # Not correct type but works implicitly
-            model=model,
-            stream=False,
-            details=False,
-            max_new_tokens=max_tokens,
-            seed=seed,
-            stop_sequences=stop,
-            temperature=temperature,
-            top_p=top_p,
-        )
+            return _async_stream_chat_completion_response(data)  # type: ignore[arg-type]
-        # Format as a ChatCompletionOutput with dummy values for fields we can't provide
-        return ChatCompletionOutput(
-            id="dummy",
-            model="dummy",
-            system_fingerprint="dummy",
-            usage=None,  # type: ignore # set to `None` as we don't want to provide false information
-            created=int(time.time()),
-            choices=[
-                ChatCompletionOutputComplete(
-                    finish_reason="unk",  # type: ignore # set to `unk` as we don't want to provide false information
-                    index=0,
-                    message=ChatCompletionOutputMessage(
-                        content=text_generation_output,
-                        role="assistant",
-                    ),
-                )
-            ],
-        )
+        return ChatCompletionOutput.parse_obj_as_instance(data)  # type: ignore[arg-type]
     async def conversational(
         self,
@@ -2282,7 +2207,12 @@ class AsyncInferenceClient:
         if stream:
             return _async_stream_text_generation_response(bytes_output, details)  # type: ignore
-        data = _bytes_to_dict(bytes_output)[0]  # type: ignore[arg-type]
+        data = _bytes_to_dict(bytes_output)  # type: ignore[arg-type]
+        # Data can be a single element (dict) or an iterable of dicts where we select the first element of.
+        if isinstance(data, list):
+            data = data[0]
         return TextGenerationOutput.parse_obj_as_instance(data) if details else data["generated_text"]
     async def text_to_image(

{huggingface_hub-0.24.0rc0.dist-info → huggingface_hub-0.24.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: huggingface-hub
-Version: 0.24.0rc0
+Version: 0.24.1
 Summary: Client library to download and publish models, datasets and other repos on the huggingface.co hub
 Home-page: https://github.com/huggingface/huggingface_hub
 Author: Hugging Face, Inc.

{huggingface_hub-0.24.0rc0.dist-info → huggingface_hub-0.24.1.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-huggingface_hub/__init__.py,sha256=Kd7XPNFlbXWrx5Pzhcvl4MqKFYd2ZGGf3_MF2tSvUsc,33901
+huggingface_hub/__init__.py,sha256=Uf3KJ-RqdzyayY4T0Yxr1X26y2w-Mrm9vLUKilotLI8,33897
 huggingface_hub/_commit_api.py,sha256=Yj1ft_WbsnqjSbiYHgdqGmLTF6BTA4E8kAGYW89t2sQ,31057
 huggingface_hub/_commit_scheduler.py,sha256=nlJS_vnLb8i92NLrRwJX8Mg9QZ7f3kfLbLlQuEd5YjU,13647
 huggingface_hub/_inference_endpoints.py,sha256=th6vlJ2vUg314x7uMLzQHfy4AuX5mFlJqNobVIz5yOY,15944
@@ -15,7 +15,7 @@ huggingface_hub/constants.py,sha256=BG3n2gl4JbxMw_JRvNTFyMcNnZIPzvT3KXSH-jm2J08,
 huggingface_hub/errors.py,sha256=IM0lNbExLzaYEs0HrrPvY4-kyj6DiP2Szu7Jy9slHOE,2083
 huggingface_hub/fastai_utils.py,sha256=5I7zAfgHJU_mZnxnf9wgWTHrCRu_EAV8VTangDVfE_o,16676
 huggingface_hub/file_download.py,sha256=Lf1RhCMb9HkXPUy90O_zUc-fonmFTwE2xadbZpVoKrM,84243
-huggingface_hub/hf_api.py,sha256=kFN02B2AFJEhK04PvMDZdWRKhiAz9zD3JZdDdPZJgjY,406833
+huggingface_hub/hf_api.py,sha256=YK4EcYD7vvGOjzAO_7pSrr2len7u4xa7yvwn6CojdIA,406692
 huggingface_hub/hf_file_system.py,sha256=HlYbWFhMrPWNqGUQfQrZR6H70QK0PgsxRvO4FantCNc,39160
 huggingface_hub/hub_mixin.py,sha256=bm5hZGeOHBSUBfiAXJv8cU05nAZr65TxnkUJLWLwAEg,37308
 huggingface_hub/inference_api.py,sha256=UXOKu_Ez2I3hDsjguqCcCrj03WFDndehpngYiIAucdg,8331
@@ -37,12 +37,12 @@ huggingface_hub/commands/tag.py,sha256=gCoR8G95lhHBzyVytTxT7MnqTmjKYtStDnHXcysOJ
 huggingface_hub/commands/upload.py,sha256=Mr69qO60otqCVw0sVSBPykUTkL9HO-pkCyulSD2mROM,13622
 huggingface_hub/commands/user.py,sha256=QApZJOCQEHADhjunM3hlQ72uqHsearCiCE4SdpzGdcc,6893
 huggingface_hub/inference/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-huggingface_hub/inference/_client.py,sha256=m1GX7Yd2VngZR9-RuFqudqEM3dPtKUIYCDphsHMR5Lw,132602
-huggingface_hub/inference/_common.py,sha256=3xbeCOjLgSPRJcbtxKnv1DNXr_TOMivOeQyvg-Ma1HU,16306
+huggingface_hub/inference/_client.py,sha256=6oJjWgDIGqKK52DU7VR2fQkqYqf2UGQbHWMEqVszaZU,129014
+huggingface_hub/inference/_common.py,sha256=EEF8T9jtfLvqhIwwDM0vt8S54yObExoBncJIiHvEew8,14882
 huggingface_hub/inference/_templating.py,sha256=LCy-U_25R-l5dhcEHsyRwiOrgvKQHXkdSmynWCfsPjI,3991
 huggingface_hub/inference/_types.py,sha256=C73l5-RO8P1UMBHF8OAO9CRUq7Xdv33pcADoJsGMPSU,1782
 huggingface_hub/inference/_generated/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-huggingface_hub/inference/_generated/_async_client.py,sha256=adlz58-FvC0-4X9VitsWkkHeD7vnZ_HAqqx33PkciYQ,136329
+huggingface_hub/inference/_generated/_async_client.py,sha256=X7dHAHJdDbAvho3_tsyOjH0spC_aJUCNeG_UclHSo_Q,132715
 huggingface_hub/inference/_generated/types/__init__.py,sha256=uEsA0z8Gcu34q0gNAZVcqHFqJT5BPrhnM9qS_LQgN0Q,5215
 huggingface_hub/inference/_generated/types/audio_classification.py,sha256=wk4kUTLQZoXWLpiUOpKRHRRE-JYqqJlzGVe62VACR-0,1347
 huggingface_hub/inference/_generated/types/audio_to_audio.py,sha256=n7GeCepzt254yoSLsdjrI1j4fzYgjWzxoaKE5gZJc48,881
@@ -107,9 +107,9 @@ huggingface_hub/utils/insecure_hashlib.py,sha256=OjxlvtSQHpbLp9PWSrXBDJ0wHjxCBU-
 huggingface_hub/utils/logging.py,sha256=Cp03s0uEl3kDM9XHQW9a8GAoExODQ-e7kEtgMt-_To8,4728
 huggingface_hub/utils/sha.py,sha256=OFnNGCba0sNcT2gUwaVCJnldxlltrHHe0DS_PCpV3C4,2134
 huggingface_hub/utils/tqdm.py,sha256=jQiVYwRG78HK4_54u0vTtz6Kt9IMGiHy3ixbIn3h2TU,9368
-huggingface_hub-0.24.0rc0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-huggingface_hub-0.24.0rc0.dist-info/METADATA,sha256=ELk2xmUxcdKGyHyj6vzBFxzinEdBPihANDN6klqCEng,13186
-huggingface_hub-0.24.0rc0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-huggingface_hub-0.24.0rc0.dist-info/entry_points.txt,sha256=Y3Z2L02rBG7va_iE6RPXolIgwOdwUFONyRN3kXMxZ0g,131
-huggingface_hub-0.24.0rc0.dist-info/top_level.txt,sha256=8KzlQJAY4miUvjAssOAJodqKOw3harNzuiwGQ9qLSSk,16
-huggingface_hub-0.24.0rc0.dist-info/RECORD,,
+huggingface_hub-0.24.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+huggingface_hub-0.24.1.dist-info/METADATA,sha256=PbJAesxB3sZZtDX1HgX0keQBQBdkp66KoK2XD_U0Ga8,13183
+huggingface_hub-0.24.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+huggingface_hub-0.24.1.dist-info/entry_points.txt,sha256=Y3Z2L02rBG7va_iE6RPXolIgwOdwUFONyRN3kXMxZ0g,131
+huggingface_hub-0.24.1.dist-info/top_level.txt,sha256=8KzlQJAY4miUvjAssOAJodqKOw3harNzuiwGQ9qLSSk,16
+huggingface_hub-0.24.1.dist-info/RECORD,,

{huggingface_hub-0.24.0rc0.dist-info → huggingface_hub-0.24.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{huggingface_hub-0.24.0rc0.dist-info → huggingface_hub-0.24.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{huggingface_hub-0.24.0rc0.dist-info → huggingface_hub-0.24.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{huggingface_hub-0.24.0rc0.dist-info → huggingface_hub-0.24.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

huggingface-hub 0.24.0rc0__py3-none-any.whl → 0.24.1__py3-none-any.whl

Potentially problematic release.

huggingface-hub 0.24.0rc0py3-none-any.whl → 0.24.1py3-none-any.whl