huggingface-hub 0.34.4__py3-none-any.whl → 1.0.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (125) hide show
  1. huggingface_hub/__init__.py +46 -45
  2. huggingface_hub/_commit_api.py +28 -28
  3. huggingface_hub/_commit_scheduler.py +11 -8
  4. huggingface_hub/_inference_endpoints.py +8 -8
  5. huggingface_hub/_jobs_api.py +167 -10
  6. huggingface_hub/_login.py +13 -39
  7. huggingface_hub/_oauth.py +8 -8
  8. huggingface_hub/_snapshot_download.py +14 -28
  9. huggingface_hub/_space_api.py +4 -4
  10. huggingface_hub/_tensorboard_logger.py +13 -14
  11. huggingface_hub/_upload_large_folder.py +15 -15
  12. huggingface_hub/_webhooks_payload.py +3 -3
  13. huggingface_hub/_webhooks_server.py +2 -2
  14. huggingface_hub/cli/_cli_utils.py +2 -2
  15. huggingface_hub/cli/auth.py +5 -6
  16. huggingface_hub/cli/cache.py +14 -20
  17. huggingface_hub/cli/download.py +4 -4
  18. huggingface_hub/cli/jobs.py +560 -11
  19. huggingface_hub/cli/lfs.py +4 -4
  20. huggingface_hub/cli/repo.py +7 -7
  21. huggingface_hub/cli/repo_files.py +2 -2
  22. huggingface_hub/cli/upload.py +4 -4
  23. huggingface_hub/cli/upload_large_folder.py +3 -3
  24. huggingface_hub/commands/_cli_utils.py +2 -2
  25. huggingface_hub/commands/delete_cache.py +13 -13
  26. huggingface_hub/commands/download.py +4 -13
  27. huggingface_hub/commands/lfs.py +4 -4
  28. huggingface_hub/commands/repo_files.py +2 -2
  29. huggingface_hub/commands/scan_cache.py +1 -1
  30. huggingface_hub/commands/tag.py +1 -3
  31. huggingface_hub/commands/upload.py +4 -4
  32. huggingface_hub/commands/upload_large_folder.py +3 -3
  33. huggingface_hub/commands/user.py +5 -6
  34. huggingface_hub/community.py +5 -5
  35. huggingface_hub/constants.py +3 -41
  36. huggingface_hub/dataclasses.py +16 -19
  37. huggingface_hub/errors.py +42 -29
  38. huggingface_hub/fastai_utils.py +8 -9
  39. huggingface_hub/file_download.py +153 -252
  40. huggingface_hub/hf_api.py +815 -600
  41. huggingface_hub/hf_file_system.py +98 -62
  42. huggingface_hub/hub_mixin.py +37 -57
  43. huggingface_hub/inference/_client.py +177 -325
  44. huggingface_hub/inference/_common.py +110 -124
  45. huggingface_hub/inference/_generated/_async_client.py +226 -432
  46. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
  47. huggingface_hub/inference/_generated/types/base.py +10 -7
  48. huggingface_hub/inference/_generated/types/chat_completion.py +18 -16
  49. huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
  50. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
  51. huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
  52. huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
  53. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
  54. huggingface_hub/inference/_generated/types/summarization.py +2 -2
  55. huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
  56. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
  57. huggingface_hub/inference/_generated/types/text_generation.py +10 -10
  58. huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
  59. huggingface_hub/inference/_generated/types/token_classification.py +2 -2
  60. huggingface_hub/inference/_generated/types/translation.py +2 -2
  61. huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
  62. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
  63. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
  64. huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
  65. huggingface_hub/inference/_mcp/agent.py +3 -3
  66. huggingface_hub/inference/_mcp/cli.py +1 -1
  67. huggingface_hub/inference/_mcp/constants.py +2 -3
  68. huggingface_hub/inference/_mcp/mcp_client.py +58 -30
  69. huggingface_hub/inference/_mcp/types.py +10 -7
  70. huggingface_hub/inference/_mcp/utils.py +11 -7
  71. huggingface_hub/inference/_providers/__init__.py +2 -2
  72. huggingface_hub/inference/_providers/_common.py +49 -25
  73. huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
  74. huggingface_hub/inference/_providers/cohere.py +3 -3
  75. huggingface_hub/inference/_providers/fal_ai.py +25 -25
  76. huggingface_hub/inference/_providers/featherless_ai.py +4 -4
  77. huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
  78. huggingface_hub/inference/_providers/hf_inference.py +28 -20
  79. huggingface_hub/inference/_providers/hyperbolic.py +4 -4
  80. huggingface_hub/inference/_providers/nebius.py +10 -10
  81. huggingface_hub/inference/_providers/novita.py +5 -5
  82. huggingface_hub/inference/_providers/nscale.py +4 -4
  83. huggingface_hub/inference/_providers/replicate.py +15 -15
  84. huggingface_hub/inference/_providers/sambanova.py +6 -6
  85. huggingface_hub/inference/_providers/together.py +7 -7
  86. huggingface_hub/lfs.py +20 -31
  87. huggingface_hub/repocard.py +18 -18
  88. huggingface_hub/repocard_data.py +56 -56
  89. huggingface_hub/serialization/__init__.py +0 -1
  90. huggingface_hub/serialization/_base.py +9 -9
  91. huggingface_hub/serialization/_dduf.py +7 -7
  92. huggingface_hub/serialization/_torch.py +28 -28
  93. huggingface_hub/utils/__init__.py +10 -4
  94. huggingface_hub/utils/_auth.py +5 -5
  95. huggingface_hub/utils/_cache_manager.py +31 -31
  96. huggingface_hub/utils/_deprecation.py +1 -1
  97. huggingface_hub/utils/_dotenv.py +3 -3
  98. huggingface_hub/utils/_fixes.py +0 -10
  99. huggingface_hub/utils/_git_credential.py +4 -4
  100. huggingface_hub/utils/_headers.py +7 -29
  101. huggingface_hub/utils/_http.py +366 -208
  102. huggingface_hub/utils/_pagination.py +4 -4
  103. huggingface_hub/utils/_paths.py +5 -5
  104. huggingface_hub/utils/_runtime.py +15 -13
  105. huggingface_hub/utils/_safetensors.py +21 -21
  106. huggingface_hub/utils/_subprocess.py +9 -9
  107. huggingface_hub/utils/_telemetry.py +3 -3
  108. huggingface_hub/utils/_typing.py +25 -5
  109. huggingface_hub/utils/_validators.py +53 -72
  110. huggingface_hub/utils/_xet.py +16 -16
  111. huggingface_hub/utils/_xet_progress_reporting.py +32 -11
  112. huggingface_hub/utils/insecure_hashlib.py +3 -9
  113. huggingface_hub/utils/tqdm.py +3 -3
  114. {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/METADATA +18 -29
  115. huggingface_hub-1.0.0rc0.dist-info/RECORD +161 -0
  116. huggingface_hub/inference_api.py +0 -217
  117. huggingface_hub/keras_mixin.py +0 -500
  118. huggingface_hub/repository.py +0 -1477
  119. huggingface_hub/serialization/_tensorflow.py +0 -95
  120. huggingface_hub/utils/_hf_folder.py +0 -68
  121. huggingface_hub-0.34.4.dist-info/RECORD +0 -166
  122. {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/LICENSE +0 -0
  123. {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/WHEEL +0 -0
  124. {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/entry_points.txt +0 -0
  125. {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/top_level.txt +0 -0
@@ -19,30 +19,15 @@ import io
19
19
  import json
20
20
  import logging
21
21
  import mimetypes
22
- from contextlib import contextmanager
23
22
  from dataclasses import dataclass
24
23
  from pathlib import Path
25
- from typing import (
26
- TYPE_CHECKING,
27
- Any,
28
- AsyncIterable,
29
- BinaryIO,
30
- ContextManager,
31
- Dict,
32
- Generator,
33
- Iterable,
34
- List,
35
- Literal,
36
- NoReturn,
37
- Optional,
38
- Union,
39
- overload,
40
- )
24
+ from typing import TYPE_CHECKING, Any, AsyncIterable, BinaryIO, Iterable, Literal, NoReturn, Optional, Union, overload
41
25
 
42
- from requests import HTTPError
26
+ import httpx
43
27
 
44
28
  from huggingface_hub.errors import (
45
29
  GenerationError,
30
+ HfHubHTTPError,
46
31
  IncompleteGenerationError,
47
32
  OverloadedError,
48
33
  TextGenerationError,
@@ -55,14 +40,12 @@ from ._generated.types import ChatCompletionStreamOutput, TextGenerationStreamOu
55
40
 
56
41
 
57
42
  if TYPE_CHECKING:
58
- from aiohttp import ClientResponse, ClientSession
59
43
  from PIL.Image import Image
60
44
 
61
45
  # TYPES
62
46
  UrlT = str
63
47
  PathT = Union[str, Path]
64
- BinaryT = Union[bytes, BinaryIO]
65
- ContentT = Union[BinaryT, PathT, UrlT, "Image"]
48
+ ContentT = Union[bytes, BinaryIO, PathT, UrlT, "Image", bytearray, memoryview]
66
49
 
67
50
  # Use to set a Accept: image/png header
68
51
  TASKS_EXPECTING_IMAGES = {"text-to-image", "image-to-image"}
@@ -75,40 +58,34 @@ class RequestParameters:
75
58
  url: str
76
59
  task: str
77
60
  model: Optional[str]
78
- json: Optional[Union[str, Dict, List]]
79
- data: Optional[ContentT]
80
- headers: Dict[str, Any]
61
+ json: Optional[Union[str, dict, list]]
62
+ data: Optional[bytes]
63
+ headers: dict[str, Any]
81
64
 
82
65
 
83
- # Add dataclass for ModelStatus. We use this dataclass in get_model_status function.
84
- @dataclass
85
- class ModelStatus:
66
+ class MimeBytes(bytes):
86
67
  """
87
- This Dataclass represents the model status in the HF Inference API.
88
-
89
- Args:
90
- loaded (`bool`):
91
- If the model is currently loaded into HF's Inference API. Models
92
- are loaded on-demand, leading to the user's first request taking longer.
93
- If a model is loaded, you can be assured that it is in a healthy state.
94
- state (`str`):
95
- The current state of the model. This can be 'Loaded', 'Loadable', 'TooBig'.
96
- If a model's state is 'Loadable', it's not too big and has a supported
97
- backend. Loadable models are automatically loaded when the user first
98
- requests inference on the endpoint. This means it is transparent for the
99
- user to load a model, except that the first call takes longer to complete.
100
- compute_type (`Dict`):
101
- Information about the compute resource the model is using or will use, such as 'gpu' type and number of
102
- replicas.
103
- framework (`str`):
104
- The name of the framework that the model was built with, such as 'transformers'
105
- or 'text-generation-inference'.
68
+ A bytes object with a mime type.
69
+ To be returned by `_prepare_payload_open_as_mime_bytes` in subclasses.
70
+
71
+ Example:
72
+ ```python
73
+ >>> b = MimeBytes(b"hello", "text/plain")
74
+ >>> isinstance(b, bytes)
75
+ True
76
+ >>> b.mime_type
77
+ 'text/plain'
78
+ ```
106
79
  """
107
80
 
108
- loaded: bool
109
- state: str
110
- compute_type: Dict
111
- framework: str
81
+ mime_type: Optional[str]
82
+
83
+ def __new__(cls, data: bytes, mime_type: Optional[str] = None):
84
+ obj = super().__new__(cls, data)
85
+ obj.mime_type = mime_type
86
+ if isinstance(data, MimeBytes) and mime_type is None:
87
+ obj.mime_type = data.mime_type
88
+ return obj
112
89
 
113
90
 
114
91
  ## IMPORT UTILS
@@ -148,31 +125,49 @@ def _import_pil_image():
148
125
 
149
126
 
150
127
  @overload
151
- def _open_as_binary(
152
- content: ContentT,
153
- ) -> ContextManager[BinaryT]: ... # means "if input is not None, output is not None"
128
+ def _open_as_mime_bytes(content: ContentT) -> MimeBytes: ... # means "if input is not None, output is not None"
154
129
 
155
130
 
156
131
  @overload
157
- def _open_as_binary(
158
- content: Literal[None],
159
- ) -> ContextManager[Literal[None]]: ... # means "if input is None, output is None"
132
+ def _open_as_mime_bytes(content: Literal[None]) -> Literal[None]: ... # means "if input is None, output is None"
160
133
 
161
134
 
162
- @contextmanager # type: ignore
163
- def _open_as_binary(content: Optional[ContentT]) -> Generator[Optional[BinaryT], None, None]:
135
+ def _open_as_mime_bytes(content: Optional[ContentT]) -> Optional[MimeBytes]:
164
136
  """Open `content` as a binary file, either from a URL, a local path, raw bytes, or a PIL Image.
165
137
 
166
138
  Do nothing if `content` is None.
167
-
168
- TODO: handle base64 as input
169
139
  """
140
+ # If content is None, yield None
141
+ if content is None:
142
+ return None
143
+
144
+ # If content is bytes, return it
145
+ if isinstance(content, bytes):
146
+ return MimeBytes(content)
147
+
148
+ # If content is raw binary data (bytearray, memoryview)
149
+ if isinstance(content, (bytearray, memoryview)):
150
+ return MimeBytes(bytes(content))
151
+
152
+ # If content is a binary file-like object
153
+ if hasattr(content, "read"): # duck-typing instead of isinstance(content, BinaryIO)
154
+ logger.debug("Reading content from BinaryIO")
155
+ data = content.read()
156
+ mime_type = mimetypes.guess_type(content.name)[0] if hasattr(content, "name") else None
157
+ if isinstance(data, str):
158
+ raise TypeError("Expected binary stream (bytes), but got text stream")
159
+ return MimeBytes(data, mime_type=mime_type)
160
+
170
161
  # If content is a string => must be either a URL or a path
171
162
  if isinstance(content, str):
172
163
  if content.startswith("https://") or content.startswith("http://"):
173
164
  logger.debug(f"Downloading content from {content}")
174
- yield get_session().get(content).content # TODO: retrieve as stream and pipe to post request ?
175
- return
165
+ response = get_session().get(content)
166
+ mime_type = response.headers.get("Content-Type")
167
+ if mime_type is None:
168
+ mime_type = mimetypes.guess_type(content)[0]
169
+ return MimeBytes(response.content, mime_type=mime_type)
170
+
176
171
  content = Path(content)
177
172
  if not content.exists():
178
173
  raise FileNotFoundError(
@@ -183,9 +178,7 @@ def _open_as_binary(content: Optional[ContentT]) -> Generator[Optional[BinaryT],
183
178
  # If content is a Path => open it
184
179
  if isinstance(content, Path):
185
180
  logger.debug(f"Opening content from {content}")
186
- with content.open("rb") as f:
187
- yield f
188
- return
181
+ return MimeBytes(content.read_bytes(), mime_type=mimetypes.guess_type(content)[0])
189
182
 
190
183
  # If content is a PIL Image => convert to bytes
191
184
  if is_pillow_available():
@@ -194,38 +187,37 @@ def _open_as_binary(content: Optional[ContentT]) -> Generator[Optional[BinaryT],
194
187
  if isinstance(content, Image.Image):
195
188
  logger.debug("Converting PIL Image to bytes")
196
189
  buffer = io.BytesIO()
197
- content.save(buffer, format=content.format or "PNG")
198
- yield buffer.getvalue()
199
- return
190
+ format = content.format or "PNG"
191
+ content.save(buffer, format=format)
192
+ return MimeBytes(buffer.getvalue(), mime_type=f"image/{format.lower()}")
200
193
 
201
- # Otherwise: already a file-like object or None
202
- yield content # type: ignore
194
+ # If nothing matched, raise error
195
+ raise TypeError(
196
+ f"Unsupported content type: {type(content)}. "
197
+ "Expected one of: bytes, bytearray, BinaryIO, memoryview, Path, str (URL or file path), or PIL.Image.Image."
198
+ )
203
199
 
204
200
 
205
201
  def _b64_encode(content: ContentT) -> str:
206
202
  """Encode a raw file (image, audio) into base64. Can be bytes, an opened file, a path or a URL."""
207
- with _open_as_binary(content) as data:
208
- data_as_bytes = data if isinstance(data, bytes) else data.read()
209
- return base64.b64encode(data_as_bytes).decode()
203
+ raw_bytes = _open_as_mime_bytes(content)
204
+ return base64.b64encode(raw_bytes).decode()
210
205
 
211
206
 
212
207
  def _as_url(content: ContentT, default_mime_type: str) -> str:
213
- if isinstance(content, str) and (content.startswith("https://") or content.startswith("http://")):
208
+ if isinstance(content, str) and content.startswith(("http://", "https://", "data:")):
214
209
  return content
215
210
 
216
- # Handle MIME type detection for different content types
217
- mime_type = None
218
- if isinstance(content, (str, Path)):
219
- mime_type = mimetypes.guess_type(content, strict=False)[0]
220
- elif is_pillow_available():
221
- from PIL import Image
211
+ # Convert content to bytes
212
+ raw_bytes = _open_as_mime_bytes(content)
222
213
 
223
- if isinstance(content, Image.Image):
224
- # Determine MIME type from PIL Image format, in sync with `_open_as_binary`
225
- mime_type = f"image/{(content.format or 'PNG').lower()}"
214
+ # Get MIME type
215
+ mime_type = raw_bytes.mime_type or default_mime_type
226
216
 
227
- mime_type = mime_type or default_mime_type
228
- encoded_data = _b64_encode(content)
217
+ # Encode content to base64
218
+ encoded_data = base64.b64encode(raw_bytes).decode()
219
+
220
+ # Build data URL
229
221
  return f"data:{mime_type};base64,{encoded_data}"
230
222
 
231
223
 
@@ -235,7 +227,7 @@ def _b64_to_image(encoded_image: str) -> "Image":
235
227
  return Image.open(io.BytesIO(base64.b64decode(encoded_image)))
236
228
 
237
229
 
238
- def _bytes_to_list(content: bytes) -> List:
230
+ def _bytes_to_list(content: bytes) -> list:
239
231
  """Parse bytes from a Response object into a Python list.
240
232
 
241
233
  Expects the response body to be JSON-encoded data.
@@ -246,7 +238,7 @@ def _bytes_to_list(content: bytes) -> List:
246
238
  return json.loads(content.decode())
247
239
 
248
240
 
249
- def _bytes_to_dict(content: bytes) -> Dict:
241
+ def _bytes_to_dict(content: bytes) -> dict:
250
242
  """Parse bytes from a Response object into a Python dictionary.
251
243
 
252
244
  Expects the response body to be JSON-encoded data.
@@ -266,24 +258,21 @@ def _bytes_to_image(content: bytes) -> "Image":
266
258
  return Image.open(io.BytesIO(content))
267
259
 
268
260
 
269
- def _as_dict(response: Union[bytes, Dict]) -> Dict:
261
+ def _as_dict(response: Union[bytes, dict]) -> dict:
270
262
  return json.loads(response) if isinstance(response, bytes) else response
271
263
 
272
264
 
273
- ## PAYLOAD UTILS
274
-
275
-
276
265
  ## STREAMING UTILS
277
266
 
278
267
 
279
268
  def _stream_text_generation_response(
280
- bytes_output_as_lines: Iterable[bytes], details: bool
269
+ output_lines: Iterable[str], details: bool
281
270
  ) -> Union[Iterable[str], Iterable[TextGenerationStreamOutput]]:
282
271
  """Used in `InferenceClient.text_generation`."""
283
272
  # Parse ServerSentEvents
284
- for byte_payload in bytes_output_as_lines:
273
+ for line in output_lines:
285
274
  try:
286
- output = _format_text_generation_stream_output(byte_payload, details)
275
+ output = _format_text_generation_stream_output(line, details)
287
276
  except StopIteration:
288
277
  break
289
278
  if output is not None:
@@ -291,13 +280,13 @@ def _stream_text_generation_response(
291
280
 
292
281
 
293
282
  async def _async_stream_text_generation_response(
294
- bytes_output_as_lines: AsyncIterable[bytes], details: bool
283
+ output_lines: AsyncIterable[str], details: bool
295
284
  ) -> Union[AsyncIterable[str], AsyncIterable[TextGenerationStreamOutput]]:
296
285
  """Used in `AsyncInferenceClient.text_generation`."""
297
286
  # Parse ServerSentEvents
298
- async for byte_payload in bytes_output_as_lines:
287
+ async for line in output_lines:
299
288
  try:
300
- output = _format_text_generation_stream_output(byte_payload, details)
289
+ output = _format_text_generation_stream_output(line, details)
301
290
  except StopIteration:
302
291
  break
303
292
  if output is not None:
@@ -305,17 +294,17 @@ async def _async_stream_text_generation_response(
305
294
 
306
295
 
307
296
  def _format_text_generation_stream_output(
308
- byte_payload: bytes, details: bool
297
+ line: str, details: bool
309
298
  ) -> Optional[Union[str, TextGenerationStreamOutput]]:
310
- if not byte_payload.startswith(b"data:"):
299
+ if not line.startswith("data:"):
311
300
  return None # empty line
312
301
 
313
- if byte_payload.strip() == b"data: [DONE]":
302
+ if line.strip() == "data: [DONE]":
314
303
  raise StopIteration("[DONE] signal received.")
315
304
 
316
305
  # Decode payload
317
- payload = byte_payload.decode("utf-8")
318
- json_payload = json.loads(payload.lstrip("data:").rstrip("/n"))
306
+ payload = line.lstrip("data:").rstrip("/n")
307
+ json_payload = json.loads(payload)
319
308
 
320
309
  # Either an error as being returned
321
310
  if json_payload.get("error") is not None:
@@ -327,12 +316,12 @@ def _format_text_generation_stream_output(
327
316
 
328
317
 
329
318
  def _stream_chat_completion_response(
330
- bytes_lines: Iterable[bytes],
319
+ lines: Iterable[str],
331
320
  ) -> Iterable[ChatCompletionStreamOutput]:
332
321
  """Used in `InferenceClient.chat_completion` if model is served with TGI."""
333
- for item in bytes_lines:
322
+ for line in lines:
334
323
  try:
335
- output = _format_chat_completion_stream_output(item)
324
+ output = _format_chat_completion_stream_output(line)
336
325
  except StopIteration:
337
326
  break
338
327
  if output is not None:
@@ -340,12 +329,12 @@ def _stream_chat_completion_response(
340
329
 
341
330
 
342
331
  async def _async_stream_chat_completion_response(
343
- bytes_lines: AsyncIterable[bytes],
332
+ lines: AsyncIterable[str],
344
333
  ) -> AsyncIterable[ChatCompletionStreamOutput]:
345
334
  """Used in `AsyncInferenceClient.chat_completion`."""
346
- async for item in bytes_lines:
335
+ async for line in lines:
347
336
  try:
348
- output = _format_chat_completion_stream_output(item)
337
+ output = _format_chat_completion_stream_output(line)
349
338
  except StopIteration:
350
339
  break
351
340
  if output is not None:
@@ -353,17 +342,16 @@ async def _async_stream_chat_completion_response(
353
342
 
354
343
 
355
344
  def _format_chat_completion_stream_output(
356
- byte_payload: bytes,
345
+ line: str,
357
346
  ) -> Optional[ChatCompletionStreamOutput]:
358
- if not byte_payload.startswith(b"data:"):
347
+ if not line.startswith("data:"):
359
348
  return None # empty line
360
349
 
361
- if byte_payload.strip() == b"data: [DONE]":
350
+ if line.strip() == "data: [DONE]":
362
351
  raise StopIteration("[DONE] signal received.")
363
352
 
364
353
  # Decode payload
365
- payload = byte_payload.decode("utf-8")
366
- json_payload = json.loads(payload.lstrip("data:").rstrip("/n"))
354
+ json_payload = json.loads(line.lstrip("data:").strip())
367
355
 
368
356
  # Either an error as being returned
369
357
  if json_payload.get("error") is not None:
@@ -373,13 +361,9 @@ def _format_chat_completion_stream_output(
373
361
  return ChatCompletionStreamOutput.parse_obj_as_instance(json_payload)
374
362
 
375
363
 
376
- async def _async_yield_from(client: "ClientSession", response: "ClientResponse") -> AsyncIterable[bytes]:
377
- try:
378
- async for byte_payload in response.content:
379
- yield byte_payload.strip()
380
- finally:
381
- # Always close the underlying HTTP session to avoid resource leaks
382
- await client.close()
364
+ async def _async_yield_from(client: httpx.AsyncClient, response: httpx.Response) -> AsyncIterable[str]:
365
+ async for line in response.aiter_lines():
366
+ yield line.strip()
383
367
 
384
368
 
385
369
  # "TGI servers" are servers running with the `text-generation-inference` backend.
@@ -400,14 +384,14 @@ async def _async_yield_from(client: "ClientSession", response: "ClientResponse")
400
384
  # For more details, see https://github.com/huggingface/text-generation-inference and
401
385
  # https://huggingface.co/docs/api-inference/detailed_parameters#text-generation-task.
402
386
 
403
- _UNSUPPORTED_TEXT_GENERATION_KWARGS: Dict[Optional[str], List[str]] = {}
387
+ _UNSUPPORTED_TEXT_GENERATION_KWARGS: dict[Optional[str], list[str]] = {}
404
388
 
405
389
 
406
- def _set_unsupported_text_generation_kwargs(model: Optional[str], unsupported_kwargs: List[str]) -> None:
390
+ def _set_unsupported_text_generation_kwargs(model: Optional[str], unsupported_kwargs: list[str]) -> None:
407
391
  _UNSUPPORTED_TEXT_GENERATION_KWARGS.setdefault(model, []).extend(unsupported_kwargs)
408
392
 
409
393
 
410
- def _get_unsupported_text_generation_kwargs(model: Optional[str]) -> List[str]:
394
+ def _get_unsupported_text_generation_kwargs(model: Optional[str]) -> list[str]:
411
395
  return _UNSUPPORTED_TEXT_GENERATION_KWARGS.get(model, [])
412
396
 
413
397
 
@@ -418,7 +402,7 @@ def _get_unsupported_text_generation_kwargs(model: Optional[str]) -> List[str]:
418
402
  # ----------------------
419
403
 
420
404
 
421
- def raise_text_generation_error(http_error: HTTPError) -> NoReturn:
405
+ def raise_text_generation_error(http_error: HfHubHTTPError) -> NoReturn:
422
406
  """
423
407
  Try to parse text-generation-inference error message and raise HTTPError in any case.
424
408
 
@@ -427,6 +411,8 @@ def raise_text_generation_error(http_error: HTTPError) -> NoReturn:
427
411
  The HTTPError that have been raised.
428
412
  """
429
413
  # Try to parse a Text Generation Inference error
414
+ if http_error.response is None:
415
+ raise http_error
430
416
 
431
417
  try:
432
418
  # Hacky way to retrieve payload in case of aiohttp error