huggingface-hub 0.35.0rc0__py3-none-any.whl → 1.0.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (127) hide show
  1. huggingface_hub/__init__.py +46 -45
  2. huggingface_hub/_commit_api.py +28 -28
  3. huggingface_hub/_commit_scheduler.py +11 -8
  4. huggingface_hub/_inference_endpoints.py +8 -8
  5. huggingface_hub/_jobs_api.py +176 -20
  6. huggingface_hub/_local_folder.py +1 -1
  7. huggingface_hub/_login.py +13 -39
  8. huggingface_hub/_oauth.py +10 -14
  9. huggingface_hub/_snapshot_download.py +14 -28
  10. huggingface_hub/_space_api.py +4 -4
  11. huggingface_hub/_tensorboard_logger.py +13 -14
  12. huggingface_hub/_upload_large_folder.py +120 -13
  13. huggingface_hub/_webhooks_payload.py +3 -3
  14. huggingface_hub/_webhooks_server.py +2 -2
  15. huggingface_hub/cli/_cli_utils.py +2 -2
  16. huggingface_hub/cli/auth.py +8 -6
  17. huggingface_hub/cli/cache.py +18 -20
  18. huggingface_hub/cli/download.py +4 -4
  19. huggingface_hub/cli/hf.py +2 -5
  20. huggingface_hub/cli/jobs.py +599 -22
  21. huggingface_hub/cli/lfs.py +4 -4
  22. huggingface_hub/cli/repo.py +11 -7
  23. huggingface_hub/cli/repo_files.py +2 -2
  24. huggingface_hub/cli/upload.py +4 -4
  25. huggingface_hub/cli/upload_large_folder.py +3 -3
  26. huggingface_hub/commands/_cli_utils.py +2 -2
  27. huggingface_hub/commands/delete_cache.py +13 -13
  28. huggingface_hub/commands/download.py +4 -13
  29. huggingface_hub/commands/lfs.py +4 -4
  30. huggingface_hub/commands/repo_files.py +2 -2
  31. huggingface_hub/commands/scan_cache.py +1 -1
  32. huggingface_hub/commands/tag.py +1 -3
  33. huggingface_hub/commands/upload.py +4 -4
  34. huggingface_hub/commands/upload_large_folder.py +3 -3
  35. huggingface_hub/commands/user.py +4 -5
  36. huggingface_hub/community.py +5 -5
  37. huggingface_hub/constants.py +3 -41
  38. huggingface_hub/dataclasses.py +16 -19
  39. huggingface_hub/errors.py +42 -29
  40. huggingface_hub/fastai_utils.py +8 -9
  41. huggingface_hub/file_download.py +162 -259
  42. huggingface_hub/hf_api.py +841 -616
  43. huggingface_hub/hf_file_system.py +98 -62
  44. huggingface_hub/hub_mixin.py +37 -57
  45. huggingface_hub/inference/_client.py +257 -325
  46. huggingface_hub/inference/_common.py +110 -124
  47. huggingface_hub/inference/_generated/_async_client.py +307 -432
  48. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
  49. huggingface_hub/inference/_generated/types/base.py +10 -7
  50. huggingface_hub/inference/_generated/types/chat_completion.py +18 -16
  51. huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
  52. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
  53. huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
  54. huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
  55. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
  56. huggingface_hub/inference/_generated/types/summarization.py +2 -2
  57. huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
  58. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
  59. huggingface_hub/inference/_generated/types/text_generation.py +10 -10
  60. huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
  61. huggingface_hub/inference/_generated/types/token_classification.py +2 -2
  62. huggingface_hub/inference/_generated/types/translation.py +2 -2
  63. huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
  64. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
  65. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
  66. huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
  67. huggingface_hub/inference/_mcp/agent.py +3 -3
  68. huggingface_hub/inference/_mcp/cli.py +1 -1
  69. huggingface_hub/inference/_mcp/constants.py +2 -3
  70. huggingface_hub/inference/_mcp/mcp_client.py +58 -30
  71. huggingface_hub/inference/_mcp/types.py +10 -7
  72. huggingface_hub/inference/_mcp/utils.py +11 -7
  73. huggingface_hub/inference/_providers/__init__.py +4 -2
  74. huggingface_hub/inference/_providers/_common.py +49 -25
  75. huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
  76. huggingface_hub/inference/_providers/cohere.py +3 -3
  77. huggingface_hub/inference/_providers/fal_ai.py +52 -21
  78. huggingface_hub/inference/_providers/featherless_ai.py +4 -4
  79. huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
  80. huggingface_hub/inference/_providers/hf_inference.py +28 -20
  81. huggingface_hub/inference/_providers/hyperbolic.py +4 -4
  82. huggingface_hub/inference/_providers/nebius.py +10 -10
  83. huggingface_hub/inference/_providers/novita.py +5 -5
  84. huggingface_hub/inference/_providers/nscale.py +4 -4
  85. huggingface_hub/inference/_providers/replicate.py +15 -15
  86. huggingface_hub/inference/_providers/sambanova.py +6 -6
  87. huggingface_hub/inference/_providers/together.py +7 -7
  88. huggingface_hub/lfs.py +20 -31
  89. huggingface_hub/repocard.py +18 -18
  90. huggingface_hub/repocard_data.py +56 -56
  91. huggingface_hub/serialization/__init__.py +0 -1
  92. huggingface_hub/serialization/_base.py +9 -9
  93. huggingface_hub/serialization/_dduf.py +7 -7
  94. huggingface_hub/serialization/_torch.py +28 -28
  95. huggingface_hub/utils/__init__.py +10 -4
  96. huggingface_hub/utils/_auth.py +5 -5
  97. huggingface_hub/utils/_cache_manager.py +31 -31
  98. huggingface_hub/utils/_deprecation.py +1 -1
  99. huggingface_hub/utils/_dotenv.py +25 -21
  100. huggingface_hub/utils/_fixes.py +0 -10
  101. huggingface_hub/utils/_git_credential.py +4 -4
  102. huggingface_hub/utils/_headers.py +7 -29
  103. huggingface_hub/utils/_http.py +366 -208
  104. huggingface_hub/utils/_pagination.py +4 -4
  105. huggingface_hub/utils/_paths.py +5 -5
  106. huggingface_hub/utils/_runtime.py +16 -13
  107. huggingface_hub/utils/_safetensors.py +21 -21
  108. huggingface_hub/utils/_subprocess.py +9 -9
  109. huggingface_hub/utils/_telemetry.py +3 -3
  110. huggingface_hub/utils/_typing.py +25 -5
  111. huggingface_hub/utils/_validators.py +53 -72
  112. huggingface_hub/utils/_xet.py +16 -16
  113. huggingface_hub/utils/_xet_progress_reporting.py +32 -11
  114. huggingface_hub/utils/insecure_hashlib.py +3 -9
  115. huggingface_hub/utils/tqdm.py +3 -3
  116. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/METADATA +18 -29
  117. huggingface_hub-1.0.0rc0.dist-info/RECORD +161 -0
  118. huggingface_hub/inference_api.py +0 -217
  119. huggingface_hub/keras_mixin.py +0 -500
  120. huggingface_hub/repository.py +0 -1477
  121. huggingface_hub/serialization/_tensorflow.py +0 -95
  122. huggingface_hub/utils/_hf_folder.py +0 -68
  123. huggingface_hub-0.35.0rc0.dist-info/RECORD +0 -166
  124. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/LICENSE +0 -0
  125. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/WHEEL +0 -0
  126. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/entry_points.txt +0 -0
  127. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/top_level.txt +0 -0
@@ -21,16 +21,19 @@
21
21
  import asyncio
22
22
  import base64
23
23
  import logging
24
+ import os
24
25
  import re
25
26
  import warnings
26
- from typing import TYPE_CHECKING, Any, AsyncIterable, Dict, List, Literal, Optional, Set, Union, overload
27
+ from contextlib import AsyncExitStack
28
+ from typing import TYPE_CHECKING, Any, AsyncIterable, Literal, Optional, Union, overload
29
+
30
+ import httpx
27
31
 
28
32
  from huggingface_hub import constants
29
- from huggingface_hub.errors import InferenceTimeoutError
33
+ from huggingface_hub.errors import BadRequestError, HfHubHTTPError, InferenceTimeoutError
30
34
  from huggingface_hub.inference._common import (
31
35
  TASKS_EXPECTING_IMAGES,
32
36
  ContentT,
33
- ModelStatus,
34
37
  RequestParameters,
35
38
  _async_stream_chat_completion_response,
36
39
  _async_stream_text_generation_response,
@@ -41,7 +44,6 @@ from huggingface_hub.inference._common import (
41
44
  _bytes_to_list,
42
45
  _get_unsupported_text_generation_kwargs,
43
46
  _import_numpy,
44
- _open_as_binary,
45
47
  _set_unsupported_text_generation_kwargs,
46
48
  raise_text_generation_error,
47
49
  )
@@ -66,6 +68,7 @@ from huggingface_hub.inference._generated.types import (
66
68
  ImageSegmentationSubtask,
67
69
  ImageToImageTargetSize,
68
70
  ImageToTextOutput,
71
+ ImageToVideoTargetSize,
69
72
  ObjectDetectionOutputElement,
70
73
  Padding,
71
74
  QuestionAnsweringOutputElement,
@@ -87,16 +90,19 @@ from huggingface_hub.inference._generated.types import (
87
90
  ZeroShotImageClassificationOutputElement,
88
91
  )
89
92
  from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
90
- from huggingface_hub.utils import build_hf_headers, get_session, hf_raise_for_status
93
+ from huggingface_hub.utils import (
94
+ build_hf_headers,
95
+ get_async_session,
96
+ hf_raise_for_status,
97
+ validate_hf_hub_args,
98
+ )
91
99
  from huggingface_hub.utils._auth import get_token
92
- from huggingface_hub.utils._deprecation import _deprecate_method
93
100
 
94
- from .._common import _async_yield_from, _import_aiohttp
101
+ from .._common import _async_yield_from
95
102
 
96
103
 
97
104
  if TYPE_CHECKING:
98
105
  import numpy as np
99
- from aiohttp import ClientResponse, ClientSession
100
106
  from PIL.Image import Image
101
107
 
102
108
  logger = logging.getLogger(__name__)
@@ -129,18 +135,14 @@ class AsyncInferenceClient:
129
135
  arguments are mutually exclusive and have the exact same behavior.
130
136
  timeout (`float`, `optional`):
131
137
  The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
132
- headers (`Dict[str, str]`, `optional`):
138
+ headers (`dict[str, str]`, `optional`):
133
139
  Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
134
140
  Values in this dictionary will override the default values.
135
141
  bill_to (`str`, `optional`):
136
142
  The billing account to use for the requests. By default the requests are billed on the user's account.
137
143
  Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
138
- cookies (`Dict[str, str]`, `optional`):
144
+ cookies (`dict[str, str]`, `optional`):
139
145
  Additional cookies to send to the server.
140
- trust_env ('bool', 'optional'):
141
- Trust environment settings for proxy configuration if the parameter is `True` (`False` by default).
142
- proxies (`Any`, `optional`):
143
- Proxies to use for the request.
144
146
  base_url (`str`, `optional`):
145
147
  Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
146
148
  follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
@@ -149,6 +151,7 @@ class AsyncInferenceClient:
149
151
  follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
150
152
  """
151
153
 
154
+ @validate_hf_hub_args
152
155
  def __init__(
153
156
  self,
154
157
  model: Optional[str] = None,
@@ -156,10 +159,8 @@ class AsyncInferenceClient:
156
159
  provider: Optional[PROVIDER_OR_POLICY_T] = None,
157
160
  token: Optional[str] = None,
158
161
  timeout: Optional[float] = None,
159
- headers: Optional[Dict[str, str]] = None,
160
- cookies: Optional[Dict[str, str]] = None,
161
- trust_env: bool = False,
162
- proxies: Optional[Any] = None,
162
+ headers: Optional[dict[str, str]] = None,
163
+ cookies: Optional[dict[str, str]] = None,
163
164
  bill_to: Optional[str] = None,
164
165
  # OpenAI compatibility
165
166
  base_url: Optional[str] = None,
@@ -221,15 +222,36 @@ class AsyncInferenceClient:
221
222
 
222
223
  self.cookies = cookies
223
224
  self.timeout = timeout
224
- self.trust_env = trust_env
225
- self.proxies = proxies
226
225
 
227
- # Keep track of the sessions to close them properly
228
- self._sessions: Dict["ClientSession", Set["ClientResponse"]] = dict()
226
+ self.exit_stack = AsyncExitStack()
227
+ self._async_client: Optional[httpx.AsyncClient] = None
229
228
 
230
229
  def __repr__(self):
231
230
  return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
232
231
 
232
+ async def __aenter__(self):
233
+ return self
234
+
235
+ async def __aexit__(self, exc_type, exc_value, traceback):
236
+ await self.close()
237
+
238
+ async def close(self):
239
+ """Close the client.
240
+
241
+ This method is automatically called when using the client as a context manager.
242
+ """
243
+ await self.exit_stack.aclose()
244
+
245
+ async def _get_async_client(self):
246
+ """Get a unique async client for this AsyncInferenceClient instance.
247
+
248
+ Returns the same client instance on subsequent calls, ensuring proper
249
+ connection reuse and resource management through the exit stack.
250
+ """
251
+ if self._async_client is None:
252
+ self._async_client = await self.exit_stack.enter_async_context(get_async_session())
253
+ return self._async_client
254
+
233
255
  @overload
234
256
  async def _inner_post( # type: ignore[misc]
235
257
  self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
@@ -238,83 +260,59 @@ class AsyncInferenceClient:
238
260
  @overload
239
261
  async def _inner_post( # type: ignore[misc]
240
262
  self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
241
- ) -> AsyncIterable[bytes]: ...
263
+ ) -> AsyncIterable[str]: ...
242
264
 
243
265
  @overload
244
266
  async def _inner_post(
245
267
  self, request_parameters: RequestParameters, *, stream: bool = False
246
- ) -> Union[bytes, AsyncIterable[bytes]]: ...
268
+ ) -> Union[bytes, AsyncIterable[str]]: ...
247
269
 
248
270
  async def _inner_post(
249
271
  self, request_parameters: RequestParameters, *, stream: bool = False
250
- ) -> Union[bytes, AsyncIterable[bytes]]:
272
+ ) -> Union[bytes, AsyncIterable[str]]:
251
273
  """Make a request to the inference server."""
252
274
 
253
- aiohttp = _import_aiohttp()
254
-
255
275
  # TODO: this should be handled in provider helpers directly
256
276
  if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
257
277
  request_parameters.headers["Accept"] = "image/png"
258
278
 
259
- with _open_as_binary(request_parameters.data) as data_as_binary:
260
- # Do not use context manager as we don't want to close the connection immediately when returning
261
- # a stream
262
- session = self._get_client_session(headers=request_parameters.headers)
263
-
264
- try:
265
- response = await session.post(
266
- request_parameters.url, json=request_parameters.json, data=data_as_binary, proxy=self.proxies
279
+ try:
280
+ client = await self._get_async_client()
281
+ if stream:
282
+ response = await self.exit_stack.enter_async_context(
283
+ client.stream(
284
+ "POST",
285
+ request_parameters.url,
286
+ json=request_parameters.json,
287
+ data=request_parameters.data,
288
+ headers=request_parameters.headers,
289
+ cookies=self.cookies,
290
+ timeout=self.timeout,
291
+ )
267
292
  )
268
- response_error_payload = None
269
- if response.status != 200:
270
- try:
271
- response_error_payload = await response.json() # get payload before connection closed
272
- except Exception:
273
- pass
274
- response.raise_for_status()
275
- if stream:
276
- return _async_yield_from(session, response)
277
- else:
278
- content = await response.read()
279
- await session.close()
280
- return content
281
- except asyncio.TimeoutError as error:
282
- await session.close()
283
- # Convert any `TimeoutError` to a `InferenceTimeoutError`
284
- raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
285
- except aiohttp.ClientResponseError as error:
286
- error.response_error_payload = response_error_payload
287
- await session.close()
288
- raise error
289
- except Exception:
290
- await session.close()
291
- raise
292
-
293
- async def __aenter__(self):
294
- return self
295
-
296
- async def __aexit__(self, exc_type, exc_value, traceback):
297
- await self.close()
298
-
299
- def __del__(self):
300
- if len(self._sessions) > 0:
301
- warnings.warn(
302
- "Deleting 'AsyncInferenceClient' client but some sessions are still open. "
303
- "This can happen if you've stopped streaming data from the server before the stream was complete. "
304
- "To close the client properly, you must call `await client.close()` "
305
- "or use an async context (e.g. `async with AsyncInferenceClient(): ...`."
306
- )
307
-
308
- async def close(self):
309
- """Close all open sessions.
310
-
311
- By default, 'aiohttp.ClientSession' objects are closed automatically when a call is completed. However, if you
312
- are streaming data from the server and you stop before the stream is complete, you must call this method to
313
- close the session properly.
314
-
315
- Another possibility is to use an async context (e.g. `async with AsyncInferenceClient(): ...`).
316
- """
317
- await asyncio.gather(*[session.close() for session in self._sessions.keys()])
293
+ hf_raise_for_status(response)
294
+ return _async_yield_from(client, response)
295
+ else:
296
+ response = await client.post(
297
+ request_parameters.url,
298
+ json=request_parameters.json,
299
+ data=request_parameters.data,
300
+ headers=request_parameters.headers,
301
+ cookies=self.cookies,
302
+ timeout=self.timeout,
303
+ )
304
+ hf_raise_for_status(response)
305
+ return response.content
306
+ except asyncio.TimeoutError as error:
307
+ # Convert any `TimeoutError` to a `InferenceTimeoutError`
308
+ raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
309
+ except HfHubHTTPError as error:
310
+ if error.response.status_code == 422 and request_parameters.task != "unknown":
311
+ msg = str(error.args[0])
312
+ if len(error.response.text) > 0:
313
+ msg += f"{os.linesep}{error.response.text}{os.linesep}"
314
+ error.args = (msg,) + error.args[1:]
315
+ raise
318
316
 
319
317
  async def audio_classification(
320
318
  self,
@@ -323,7 +321,7 @@ class AsyncInferenceClient:
323
321
  model: Optional[str] = None,
324
322
  top_k: Optional[int] = None,
325
323
  function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
326
- ) -> List[AudioClassificationOutputElement]:
324
+ ) -> list[AudioClassificationOutputElement]:
327
325
  """
328
326
  Perform audio classification on the provided audio content.
329
327
 
@@ -341,12 +339,12 @@ class AsyncInferenceClient:
341
339
  The function to apply to the model outputs in order to retrieve the scores.
342
340
 
343
341
  Returns:
344
- `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
342
+ `list[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
345
343
 
346
344
  Raises:
347
345
  [`InferenceTimeoutError`]:
348
346
  If the model is unavailable or the request times out.
349
- `aiohttp.ClientResponseError`:
347
+ [`HfHubHTTPError`]:
350
348
  If the request fails with an HTTP error status code other than HTTP 503.
351
349
 
352
350
  Example:
@@ -379,7 +377,7 @@ class AsyncInferenceClient:
379
377
  audio: ContentT,
380
378
  *,
381
379
  model: Optional[str] = None,
382
- ) -> List[AudioToAudioOutputElement]:
380
+ ) -> list[AudioToAudioOutputElement]:
383
381
  """
384
382
  Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
385
383
 
@@ -393,12 +391,12 @@ class AsyncInferenceClient:
393
391
  audio_to_audio will be used.
394
392
 
395
393
  Returns:
396
- `List[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
394
+ `list[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
397
395
 
398
396
  Raises:
399
397
  `InferenceTimeoutError`:
400
398
  If the model is unavailable or the request times out.
401
- `aiohttp.ClientResponseError`:
399
+ [`HfHubHTTPError`]:
402
400
  If the request fails with an HTTP error status code other than HTTP 503.
403
401
 
404
402
  Example:
@@ -432,7 +430,7 @@ class AsyncInferenceClient:
432
430
  audio: ContentT,
433
431
  *,
434
432
  model: Optional[str] = None,
435
- extra_body: Optional[Dict] = None,
433
+ extra_body: Optional[dict] = None,
436
434
  ) -> AutomaticSpeechRecognitionOutput:
437
435
  """
438
436
  Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
@@ -443,7 +441,7 @@ class AsyncInferenceClient:
443
441
  model (`str`, *optional*):
444
442
  The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
445
443
  Inference Endpoint. If not provided, the default recommended model for ASR will be used.
446
- extra_body (`Dict`, *optional*):
444
+ extra_body (`dict`, *optional*):
447
445
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
448
446
  for supported parameters.
449
447
  Returns:
@@ -452,7 +450,7 @@ class AsyncInferenceClient:
452
450
  Raises:
453
451
  [`InferenceTimeoutError`]:
454
452
  If the model is unavailable or the request times out.
455
- `aiohttp.ClientResponseError`:
453
+ [`HfHubHTTPError`]:
456
454
  If the request fails with an HTTP error status code other than HTTP 503.
457
455
 
458
456
  Example:
@@ -479,105 +477,105 @@ class AsyncInferenceClient:
479
477
  @overload
480
478
  async def chat_completion( # type: ignore
481
479
  self,
482
- messages: List[Union[Dict, ChatCompletionInputMessage]],
480
+ messages: list[Union[dict, ChatCompletionInputMessage]],
483
481
  *,
484
482
  model: Optional[str] = None,
485
483
  stream: Literal[False] = False,
486
484
  frequency_penalty: Optional[float] = None,
487
- logit_bias: Optional[List[float]] = None,
485
+ logit_bias: Optional[list[float]] = None,
488
486
  logprobs: Optional[bool] = None,
489
487
  max_tokens: Optional[int] = None,
490
488
  n: Optional[int] = None,
491
489
  presence_penalty: Optional[float] = None,
492
490
  response_format: Optional[ChatCompletionInputGrammarType] = None,
493
491
  seed: Optional[int] = None,
494
- stop: Optional[List[str]] = None,
492
+ stop: Optional[list[str]] = None,
495
493
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
496
494
  temperature: Optional[float] = None,
497
495
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
498
496
  tool_prompt: Optional[str] = None,
499
- tools: Optional[List[ChatCompletionInputTool]] = None,
497
+ tools: Optional[list[ChatCompletionInputTool]] = None,
500
498
  top_logprobs: Optional[int] = None,
501
499
  top_p: Optional[float] = None,
502
- extra_body: Optional[Dict] = None,
500
+ extra_body: Optional[dict] = None,
503
501
  ) -> ChatCompletionOutput: ...
504
502
 
505
503
  @overload
506
504
  async def chat_completion( # type: ignore
507
505
  self,
508
- messages: List[Union[Dict, ChatCompletionInputMessage]],
506
+ messages: list[Union[dict, ChatCompletionInputMessage]],
509
507
  *,
510
508
  model: Optional[str] = None,
511
509
  stream: Literal[True] = True,
512
510
  frequency_penalty: Optional[float] = None,
513
- logit_bias: Optional[List[float]] = None,
511
+ logit_bias: Optional[list[float]] = None,
514
512
  logprobs: Optional[bool] = None,
515
513
  max_tokens: Optional[int] = None,
516
514
  n: Optional[int] = None,
517
515
  presence_penalty: Optional[float] = None,
518
516
  response_format: Optional[ChatCompletionInputGrammarType] = None,
519
517
  seed: Optional[int] = None,
520
- stop: Optional[List[str]] = None,
518
+ stop: Optional[list[str]] = None,
521
519
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
522
520
  temperature: Optional[float] = None,
523
521
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
524
522
  tool_prompt: Optional[str] = None,
525
- tools: Optional[List[ChatCompletionInputTool]] = None,
523
+ tools: Optional[list[ChatCompletionInputTool]] = None,
526
524
  top_logprobs: Optional[int] = None,
527
525
  top_p: Optional[float] = None,
528
- extra_body: Optional[Dict] = None,
526
+ extra_body: Optional[dict] = None,
529
527
  ) -> AsyncIterable[ChatCompletionStreamOutput]: ...
530
528
 
531
529
  @overload
532
530
  async def chat_completion(
533
531
  self,
534
- messages: List[Union[Dict, ChatCompletionInputMessage]],
532
+ messages: list[Union[dict, ChatCompletionInputMessage]],
535
533
  *,
536
534
  model: Optional[str] = None,
537
535
  stream: bool = False,
538
536
  frequency_penalty: Optional[float] = None,
539
- logit_bias: Optional[List[float]] = None,
537
+ logit_bias: Optional[list[float]] = None,
540
538
  logprobs: Optional[bool] = None,
541
539
  max_tokens: Optional[int] = None,
542
540
  n: Optional[int] = None,
543
541
  presence_penalty: Optional[float] = None,
544
542
  response_format: Optional[ChatCompletionInputGrammarType] = None,
545
543
  seed: Optional[int] = None,
546
- stop: Optional[List[str]] = None,
544
+ stop: Optional[list[str]] = None,
547
545
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
548
546
  temperature: Optional[float] = None,
549
547
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
550
548
  tool_prompt: Optional[str] = None,
551
- tools: Optional[List[ChatCompletionInputTool]] = None,
549
+ tools: Optional[list[ChatCompletionInputTool]] = None,
552
550
  top_logprobs: Optional[int] = None,
553
551
  top_p: Optional[float] = None,
554
- extra_body: Optional[Dict] = None,
552
+ extra_body: Optional[dict] = None,
555
553
  ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: ...
556
554
 
557
555
  async def chat_completion(
558
556
  self,
559
- messages: List[Union[Dict, ChatCompletionInputMessage]],
557
+ messages: list[Union[dict, ChatCompletionInputMessage]],
560
558
  *,
561
559
  model: Optional[str] = None,
562
560
  stream: bool = False,
563
561
  # Parameters from ChatCompletionInput (handled manually)
564
562
  frequency_penalty: Optional[float] = None,
565
- logit_bias: Optional[List[float]] = None,
563
+ logit_bias: Optional[list[float]] = None,
566
564
  logprobs: Optional[bool] = None,
567
565
  max_tokens: Optional[int] = None,
568
566
  n: Optional[int] = None,
569
567
  presence_penalty: Optional[float] = None,
570
568
  response_format: Optional[ChatCompletionInputGrammarType] = None,
571
569
  seed: Optional[int] = None,
572
- stop: Optional[List[str]] = None,
570
+ stop: Optional[list[str]] = None,
573
571
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
574
572
  temperature: Optional[float] = None,
575
573
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
576
574
  tool_prompt: Optional[str] = None,
577
- tools: Optional[List[ChatCompletionInputTool]] = None,
575
+ tools: Optional[list[ChatCompletionInputTool]] = None,
578
576
  top_logprobs: Optional[int] = None,
579
577
  top_p: Optional[float] = None,
580
- extra_body: Optional[Dict] = None,
578
+ extra_body: Optional[dict] = None,
581
579
  ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]:
582
580
  """
583
581
  A method for completing conversations using a specified language model.
@@ -607,7 +605,7 @@ class AsyncInferenceClient:
607
605
  frequency_penalty (`float`, *optional*):
608
606
  Penalizes new tokens based on their existing frequency
609
607
  in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
610
- logit_bias (`List[float]`, *optional*):
608
+ logit_bias (`list[float]`, *optional*):
611
609
  Adjusts the likelihood of specific tokens appearing in the generated output.
612
610
  logprobs (`bool`, *optional*):
613
611
  Whether to return log probabilities of the output tokens or not. If true, returns the log
@@ -623,7 +621,7 @@ class AsyncInferenceClient:
623
621
  Grammar constraints. Can be either a JSONSchema or a regex.
624
622
  seed (Optional[`int`], *optional*):
625
623
  Seed for reproducible control flow. Defaults to None.
626
- stop (`List[str]`, *optional*):
624
+ stop (`list[str]`, *optional*):
627
625
  Up to four strings which trigger the end of the response.
628
626
  Defaults to None.
629
627
  stream (`bool`, *optional*):
@@ -647,7 +645,7 @@ class AsyncInferenceClient:
647
645
  tools (List of [`ChatCompletionInputTool`], *optional*):
648
646
  A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
649
647
  provide a list of functions the model may generate JSON inputs for.
650
- extra_body (`Dict`, *optional*):
648
+ extra_body (`dict`, *optional*):
651
649
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
652
650
  for supported parameters.
653
651
  Returns:
@@ -659,7 +657,7 @@ class AsyncInferenceClient:
659
657
  Raises:
660
658
  [`InferenceTimeoutError`]:
661
659
  If the model is unavailable or the request times out.
662
- `aiohttp.ClientResponseError`:
660
+ [`HfHubHTTPError`]:
663
661
  If the request fails with an HTTP error status code other than HTTP 503.
664
662
 
665
663
  Example:
@@ -979,8 +977,8 @@ class AsyncInferenceClient:
979
977
  max_question_len: Optional[int] = None,
980
978
  max_seq_len: Optional[int] = None,
981
979
  top_k: Optional[int] = None,
982
- word_boxes: Optional[List[Union[List[float], str]]] = None,
983
- ) -> List[DocumentQuestionAnsweringOutputElement]:
980
+ word_boxes: Optional[list[Union[list[float], str]]] = None,
981
+ ) -> list[DocumentQuestionAnsweringOutputElement]:
984
982
  """
985
983
  Answer questions on document images.
986
984
 
@@ -1010,16 +1008,16 @@ class AsyncInferenceClient:
1010
1008
  top_k (`int`, *optional*):
1011
1009
  The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
1012
1010
  answers if there are not enough options available within the context.
1013
- word_boxes (`List[Union[List[float], str`, *optional*):
1011
+ word_boxes (`list[Union[list[float], str`, *optional*):
1014
1012
  A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
1015
1013
  step and use the provided bounding boxes instead.
1016
1014
  Returns:
1017
- `List[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
1015
+ `list[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
1018
1016
 
1019
1017
  Raises:
1020
1018
  [`InferenceTimeoutError`]:
1021
1019
  If the model is unavailable or the request times out.
1022
- `aiohttp.ClientResponseError`:
1020
+ [`HfHubHTTPError`]:
1023
1021
  If the request fails with an HTTP error status code other than HTTP 503.
1024
1022
 
1025
1023
 
@@ -1034,7 +1032,7 @@ class AsyncInferenceClient:
1034
1032
  """
1035
1033
  model_id = model or self.model
1036
1034
  provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
1037
- inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
1035
+ inputs: dict[str, Any] = {"question": question, "image": _b64_encode(image)}
1038
1036
  request_parameters = provider_helper.prepare_request(
1039
1037
  inputs=inputs,
1040
1038
  parameters={
@@ -1095,7 +1093,7 @@ class AsyncInferenceClient:
1095
1093
  Raises:
1096
1094
  [`InferenceTimeoutError`]:
1097
1095
  If the model is unavailable or the request times out.
1098
- `aiohttp.ClientResponseError`:
1096
+ [`HfHubHTTPError`]:
1099
1097
  If the request fails with an HTTP error status code other than HTTP 503.
1100
1098
 
1101
1099
  Example:
@@ -1133,9 +1131,9 @@ class AsyncInferenceClient:
1133
1131
  text: str,
1134
1132
  *,
1135
1133
  model: Optional[str] = None,
1136
- targets: Optional[List[str]] = None,
1134
+ targets: Optional[list[str]] = None,
1137
1135
  top_k: Optional[int] = None,
1138
- ) -> List[FillMaskOutputElement]:
1136
+ ) -> list[FillMaskOutputElement]:
1139
1137
  """
1140
1138
  Fill in a hole with a missing word (token to be precise).
1141
1139
 
@@ -1145,20 +1143,20 @@ class AsyncInferenceClient:
1145
1143
  model (`str`, *optional*):
1146
1144
  The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
1147
1145
  a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
1148
- targets (`List[str`, *optional*):
1146
+ targets (`list[str`, *optional*):
1149
1147
  When passed, the model will limit the scores to the passed targets instead of looking up in the whole
1150
1148
  vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
1151
1149
  resulting token will be used (with a warning, and that might be slower).
1152
1150
  top_k (`int`, *optional*):
1153
1151
  When passed, overrides the number of predictions to return.
1154
1152
  Returns:
1155
- `List[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
1153
+ `list[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
1156
1154
  probability, token reference, and completed text.
1157
1155
 
1158
1156
  Raises:
1159
1157
  [`InferenceTimeoutError`]:
1160
1158
  If the model is unavailable or the request times out.
1161
- `aiohttp.ClientResponseError`:
1159
+ [`HfHubHTTPError`]:
1162
1160
  If the request fails with an HTTP error status code other than HTTP 503.
1163
1161
 
1164
1162
  Example:
@@ -1192,7 +1190,7 @@ class AsyncInferenceClient:
1192
1190
  model: Optional[str] = None,
1193
1191
  function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
1194
1192
  top_k: Optional[int] = None,
1195
- ) -> List[ImageClassificationOutputElement]:
1193
+ ) -> list[ImageClassificationOutputElement]:
1196
1194
  """
1197
1195
  Perform image classification on the given image using the specified model.
1198
1196
 
@@ -1207,12 +1205,12 @@ class AsyncInferenceClient:
1207
1205
  top_k (`int`, *optional*):
1208
1206
  When specified, limits the output to the top K most probable classes.
1209
1207
  Returns:
1210
- `List[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
1208
+ `list[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
1211
1209
 
1212
1210
  Raises:
1213
1211
  [`InferenceTimeoutError`]:
1214
1212
  If the model is unavailable or the request times out.
1215
- `aiohttp.ClientResponseError`:
1213
+ [`HfHubHTTPError`]:
1216
1214
  If the request fails with an HTTP error status code other than HTTP 503.
1217
1215
 
1218
1216
  Example:
@@ -1245,7 +1243,7 @@ class AsyncInferenceClient:
1245
1243
  overlap_mask_area_threshold: Optional[float] = None,
1246
1244
  subtask: Optional["ImageSegmentationSubtask"] = None,
1247
1245
  threshold: Optional[float] = None,
1248
- ) -> List[ImageSegmentationOutputElement]:
1246
+ ) -> list[ImageSegmentationOutputElement]:
1249
1247
  """
1250
1248
  Perform image segmentation on the given image using the specified model.
1251
1249
 
@@ -1270,12 +1268,12 @@ class AsyncInferenceClient:
1270
1268
  threshold (`float`, *optional*):
1271
1269
  Probability threshold to filter out predicted masks.
1272
1270
  Returns:
1273
- `List[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
1271
+ `list[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
1274
1272
 
1275
1273
  Raises:
1276
1274
  [`InferenceTimeoutError`]:
1277
1275
  If the model is unavailable or the request times out.
1278
- `aiohttp.ClientResponseError`:
1276
+ [`HfHubHTTPError`]:
1279
1277
  If the request fails with an HTTP error status code other than HTTP 503.
1280
1278
 
1281
1279
  Example:
@@ -1353,7 +1351,7 @@ class AsyncInferenceClient:
1353
1351
  Raises:
1354
1352
  [`InferenceTimeoutError`]:
1355
1353
  If the model is unavailable or the request times out.
1356
- `aiohttp.ClientResponseError`:
1354
+ [`HfHubHTTPError`]:
1357
1355
  If the request fails with an HTTP error status code other than HTTP 503.
1358
1356
 
1359
1357
  Example:
@@ -1385,6 +1383,86 @@ class AsyncInferenceClient:
1385
1383
  response = provider_helper.get_response(response, request_parameters)
1386
1384
  return _bytes_to_image(response)
1387
1385
 
1386
+ async def image_to_video(
1387
+ self,
1388
+ image: ContentT,
1389
+ *,
1390
+ model: Optional[str] = None,
1391
+ prompt: Optional[str] = None,
1392
+ negative_prompt: Optional[str] = None,
1393
+ num_frames: Optional[float] = None,
1394
+ num_inference_steps: Optional[int] = None,
1395
+ guidance_scale: Optional[float] = None,
1396
+ seed: Optional[int] = None,
1397
+ target_size: Optional[ImageToVideoTargetSize] = None,
1398
+ **kwargs,
1399
+ ) -> bytes:
1400
+ """
1401
+ Generate a video from an input image.
1402
+
1403
+ Args:
1404
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
1405
+ The input image to generate a video from. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
1406
+ model (`str`, *optional*):
1407
+ The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
1408
+ Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
1409
+ prompt (`str`, *optional*):
1410
+ The text prompt to guide the video generation.
1411
+ negative_prompt (`str`, *optional*):
1412
+ One prompt to guide what NOT to include in video generation.
1413
+ num_frames (`float`, *optional*):
1414
+ The num_frames parameter determines how many video frames are generated.
1415
+ num_inference_steps (`int`, *optional*):
1416
+ For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher
1417
+ quality image at the expense of slower inference.
1418
+ guidance_scale (`float`, *optional*):
1419
+ For diffusion models. A higher guidance scale value encourages the model to generate videos closely
1420
+ linked to the text prompt at the expense of lower image quality.
1421
+ seed (`int`, *optional*):
1422
+ The seed to use for the video generation.
1423
+ target_size (`ImageToVideoTargetSize`, *optional*):
1424
+ The size in pixel of the output video frames.
1425
+ num_inference_steps (`int`, *optional*):
1426
+ The number of denoising steps. More denoising steps usually lead to a higher quality video at the
1427
+ expense of slower inference.
1428
+ seed (`int`, *optional*):
1429
+ Seed for the random number generator.
1430
+
1431
+ Returns:
1432
+ `bytes`: The generated video.
1433
+
1434
+ Examples:
1435
+ ```py
1436
+ # Must be run in an async context
1437
+ >>> from huggingface_hub import AsyncInferenceClient
1438
+ >>> client = AsyncInferenceClient()
1439
+ >>> video = await client.image_to_video("cat.jpg", model="Wan-AI/Wan2.2-I2V-A14B", prompt="turn the cat into a tiger")
1440
+ >>> with open("tiger.mp4", "wb") as f:
1441
+ ... f.write(video)
1442
+ ```
1443
+ """
1444
+ model_id = model or self.model
1445
+ provider_helper = get_provider_helper(self.provider, task="image-to-video", model=model_id)
1446
+ request_parameters = provider_helper.prepare_request(
1447
+ inputs=image,
1448
+ parameters={
1449
+ "prompt": prompt,
1450
+ "negative_prompt": negative_prompt,
1451
+ "num_frames": num_frames,
1452
+ "num_inference_steps": num_inference_steps,
1453
+ "guidance_scale": guidance_scale,
1454
+ "seed": seed,
1455
+ "target_size": target_size,
1456
+ **kwargs,
1457
+ },
1458
+ headers=self.headers,
1459
+ model=model_id,
1460
+ api_key=self.token,
1461
+ )
1462
+ response = await self._inner_post(request_parameters)
1463
+ response = provider_helper.get_response(response, request_parameters)
1464
+ return response
1465
+
1388
1466
  async def image_to_text(self, image: ContentT, *, model: Optional[str] = None) -> ImageToTextOutput:
1389
1467
  """
1390
1468
  Takes an input image and return text.
@@ -1405,7 +1483,7 @@ class AsyncInferenceClient:
1405
1483
  Raises:
1406
1484
  [`InferenceTimeoutError`]:
1407
1485
  If the model is unavailable or the request times out.
1408
- `aiohttp.ClientResponseError`:
1486
+ [`HfHubHTTPError`]:
1409
1487
  If the request fails with an HTTP error status code other than HTTP 503.
1410
1488
 
1411
1489
  Example:
@@ -1429,12 +1507,12 @@ class AsyncInferenceClient:
1429
1507
  api_key=self.token,
1430
1508
  )
1431
1509
  response = await self._inner_post(request_parameters)
1432
- output = ImageToTextOutput.parse_obj(response)
1433
- return output[0] if isinstance(output, list) else output
1510
+ output_list: list[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
1511
+ return output_list[0]
1434
1512
 
1435
1513
  async def object_detection(
1436
1514
  self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
1437
- ) -> List[ObjectDetectionOutputElement]:
1515
+ ) -> list[ObjectDetectionOutputElement]:
1438
1516
  """
1439
1517
  Perform object detection on the given image using the specified model.
1440
1518
 
@@ -1453,12 +1531,12 @@ class AsyncInferenceClient:
1453
1531
  threshold (`float`, *optional*):
1454
1532
  The probability necessary to make a prediction.
1455
1533
  Returns:
1456
- `List[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
1534
+ `list[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
1457
1535
 
1458
1536
  Raises:
1459
1537
  [`InferenceTimeoutError`]:
1460
1538
  If the model is unavailable or the request times out.
1461
- `aiohttp.ClientResponseError`:
1539
+ [`HfHubHTTPError`]:
1462
1540
  If the request fails with an HTTP error status code other than HTTP 503.
1463
1541
  `ValueError`:
1464
1542
  If the request output is not a List.
@@ -1497,7 +1575,7 @@ class AsyncInferenceClient:
1497
1575
  max_question_len: Optional[int] = None,
1498
1576
  max_seq_len: Optional[int] = None,
1499
1577
  top_k: Optional[int] = None,
1500
- ) -> Union[QuestionAnsweringOutputElement, List[QuestionAnsweringOutputElement]]:
1578
+ ) -> Union[QuestionAnsweringOutputElement, list[QuestionAnsweringOutputElement]]:
1501
1579
  """
1502
1580
  Retrieve the answer to a question from a given text.
1503
1581
 
@@ -1529,13 +1607,13 @@ class AsyncInferenceClient:
1529
1607
  topk answers if there are not enough options available within the context.
1530
1608
 
1531
1609
  Returns:
1532
- Union[`QuestionAnsweringOutputElement`, List[`QuestionAnsweringOutputElement`]]:
1610
+ Union[`QuestionAnsweringOutputElement`, list[`QuestionAnsweringOutputElement`]]:
1533
1611
  When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
1534
1612
  When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
1535
1613
  Raises:
1536
1614
  [`InferenceTimeoutError`]:
1537
1615
  If the model is unavailable or the request times out.
1538
- `aiohttp.ClientResponseError`:
1616
+ [`HfHubHTTPError`]:
1539
1617
  If the request fails with an HTTP error status code other than HTTP 503.
1540
1618
 
1541
1619
  Example:
@@ -1570,15 +1648,15 @@ class AsyncInferenceClient:
1570
1648
  return output
1571
1649
 
1572
1650
  async def sentence_similarity(
1573
- self, sentence: str, other_sentences: List[str], *, model: Optional[str] = None
1574
- ) -> List[float]:
1651
+ self, sentence: str, other_sentences: list[str], *, model: Optional[str] = None
1652
+ ) -> list[float]:
1575
1653
  """
1576
1654
  Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
1577
1655
 
1578
1656
  Args:
1579
1657
  sentence (`str`):
1580
1658
  The main sentence to compare to others.
1581
- other_sentences (`List[str]`):
1659
+ other_sentences (`list[str]`):
1582
1660
  The list of sentences to compare to.
1583
1661
  model (`str`, *optional*):
1584
1662
  The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1586,12 +1664,12 @@ class AsyncInferenceClient:
1586
1664
  Defaults to None.
1587
1665
 
1588
1666
  Returns:
1589
- `List[float]`: The embedding representing the input text.
1667
+ `list[float]`: The embedding representing the input text.
1590
1668
 
1591
1669
  Raises:
1592
1670
  [`InferenceTimeoutError`]:
1593
1671
  If the model is unavailable or the request times out.
1594
- `aiohttp.ClientResponseError`:
1672
+ [`HfHubHTTPError`]:
1595
1673
  If the request fails with an HTTP error status code other than HTTP 503.
1596
1674
 
1597
1675
  Example:
@@ -1629,7 +1707,7 @@ class AsyncInferenceClient:
1629
1707
  *,
1630
1708
  model: Optional[str] = None,
1631
1709
  clean_up_tokenization_spaces: Optional[bool] = None,
1632
- generate_parameters: Optional[Dict[str, Any]] = None,
1710
+ generate_parameters: Optional[dict[str, Any]] = None,
1633
1711
  truncation: Optional["SummarizationTruncationStrategy"] = None,
1634
1712
  ) -> SummarizationOutput:
1635
1713
  """
@@ -1643,7 +1721,7 @@ class AsyncInferenceClient:
1643
1721
  Inference Endpoint. If not provided, the default recommended model for summarization will be used.
1644
1722
  clean_up_tokenization_spaces (`bool`, *optional*):
1645
1723
  Whether to clean up the potential extra spaces in the text output.
1646
- generate_parameters (`Dict[str, Any]`, *optional*):
1724
+ generate_parameters (`dict[str, Any]`, *optional*):
1647
1725
  Additional parametrization of the text generation algorithm.
1648
1726
  truncation (`"SummarizationTruncationStrategy"`, *optional*):
1649
1727
  The truncation strategy to use.
@@ -1653,7 +1731,7 @@ class AsyncInferenceClient:
1653
1731
  Raises:
1654
1732
  [`InferenceTimeoutError`]:
1655
1733
  If the model is unavailable or the request times out.
1656
- `aiohttp.ClientResponseError`:
1734
+ [`HfHubHTTPError`]:
1657
1735
  If the request fails with an HTTP error status code other than HTTP 503.
1658
1736
 
1659
1737
  Example:
@@ -1684,7 +1762,7 @@ class AsyncInferenceClient:
1684
1762
 
1685
1763
  async def table_question_answering(
1686
1764
  self,
1687
- table: Dict[str, Any],
1765
+ table: dict[str, Any],
1688
1766
  query: str,
1689
1767
  *,
1690
1768
  model: Optional[str] = None,
@@ -1719,7 +1797,7 @@ class AsyncInferenceClient:
1719
1797
  Raises:
1720
1798
  [`InferenceTimeoutError`]:
1721
1799
  If the model is unavailable or the request times out.
1722
- `aiohttp.ClientResponseError`:
1800
+ [`HfHubHTTPError`]:
1723
1801
  If the request fails with an HTTP error status code other than HTTP 503.
1724
1802
 
1725
1803
  Example:
@@ -1745,12 +1823,12 @@ class AsyncInferenceClient:
1745
1823
  response = await self._inner_post(request_parameters)
1746
1824
  return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
1747
1825
 
1748
- async def tabular_classification(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[str]:
1826
+ async def tabular_classification(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[str]:
1749
1827
  """
1750
1828
  Classifying a target category (a group) based on a set of attributes.
1751
1829
 
1752
1830
  Args:
1753
- table (`Dict[str, Any]`):
1831
+ table (`dict[str, Any]`):
1754
1832
  Set of attributes to classify.
1755
1833
  model (`str`, *optional*):
1756
1834
  The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1763,7 +1841,7 @@ class AsyncInferenceClient:
1763
1841
  Raises:
1764
1842
  [`InferenceTimeoutError`]:
1765
1843
  If the model is unavailable or the request times out.
1766
- `aiohttp.ClientResponseError`:
1844
+ [`HfHubHTTPError`]:
1767
1845
  If the request fails with an HTTP error status code other than HTTP 503.
1768
1846
 
1769
1847
  Example:
@@ -1801,12 +1879,12 @@ class AsyncInferenceClient:
1801
1879
  response = await self._inner_post(request_parameters)
1802
1880
  return _bytes_to_list(response)
1803
1881
 
1804
- async def tabular_regression(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[float]:
1882
+ async def tabular_regression(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[float]:
1805
1883
  """
1806
1884
  Predicting a numerical target value given a set of attributes/features in a table.
1807
1885
 
1808
1886
  Args:
1809
- table (`Dict[str, Any]`):
1887
+ table (`dict[str, Any]`):
1810
1888
  Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
1811
1889
  model (`str`, *optional*):
1812
1890
  The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1819,7 +1897,7 @@ class AsyncInferenceClient:
1819
1897
  Raises:
1820
1898
  [`InferenceTimeoutError`]:
1821
1899
  If the model is unavailable or the request times out.
1822
- `aiohttp.ClientResponseError`:
1900
+ [`HfHubHTTPError`]:
1823
1901
  If the request fails with an HTTP error status code other than HTTP 503.
1824
1902
 
1825
1903
  Example:
@@ -1859,7 +1937,7 @@ class AsyncInferenceClient:
1859
1937
  model: Optional[str] = None,
1860
1938
  top_k: Optional[int] = None,
1861
1939
  function_to_apply: Optional["TextClassificationOutputTransform"] = None,
1862
- ) -> List[TextClassificationOutputElement]:
1940
+ ) -> list[TextClassificationOutputElement]:
1863
1941
  """
1864
1942
  Perform text classification (e.g. sentiment-analysis) on the given text.
1865
1943
 
@@ -1876,12 +1954,12 @@ class AsyncInferenceClient:
1876
1954
  The function to apply to the model outputs in order to retrieve the scores.
1877
1955
 
1878
1956
  Returns:
1879
- `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
1957
+ `list[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
1880
1958
 
1881
1959
  Raises:
1882
1960
  [`InferenceTimeoutError`]:
1883
1961
  If the model is unavailable or the request times out.
1884
- `aiohttp.ClientResponseError`:
1962
+ [`HfHubHTTPError`]:
1885
1963
  If the request fails with an HTTP error status code other than HTTP 503.
1886
1964
 
1887
1965
  Example:
@@ -1930,8 +2008,8 @@ class AsyncInferenceClient:
1930
2008
  repetition_penalty: Optional[float] = None,
1931
2009
  return_full_text: Optional[bool] = None,
1932
2010
  seed: Optional[int] = None,
1933
- stop: Optional[List[str]] = None,
1934
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2011
+ stop: Optional[list[str]] = None,
2012
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
1935
2013
  temperature: Optional[float] = None,
1936
2014
  top_k: Optional[int] = None,
1937
2015
  top_n_tokens: Optional[int] = None,
@@ -1960,8 +2038,8 @@ class AsyncInferenceClient:
1960
2038
  repetition_penalty: Optional[float] = None,
1961
2039
  return_full_text: Optional[bool] = None,
1962
2040
  seed: Optional[int] = None,
1963
- stop: Optional[List[str]] = None,
1964
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2041
+ stop: Optional[list[str]] = None,
2042
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
1965
2043
  temperature: Optional[float] = None,
1966
2044
  top_k: Optional[int] = None,
1967
2045
  top_n_tokens: Optional[int] = None,
@@ -1990,8 +2068,8 @@ class AsyncInferenceClient:
1990
2068
  repetition_penalty: Optional[float] = None,
1991
2069
  return_full_text: Optional[bool] = None, # Manual default value
1992
2070
  seed: Optional[int] = None,
1993
- stop: Optional[List[str]] = None,
1994
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2071
+ stop: Optional[list[str]] = None,
2072
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
1995
2073
  temperature: Optional[float] = None,
1996
2074
  top_k: Optional[int] = None,
1997
2075
  top_n_tokens: Optional[int] = None,
@@ -2020,8 +2098,8 @@ class AsyncInferenceClient:
2020
2098
  repetition_penalty: Optional[float] = None,
2021
2099
  return_full_text: Optional[bool] = None,
2022
2100
  seed: Optional[int] = None,
2023
- stop: Optional[List[str]] = None,
2024
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2101
+ stop: Optional[list[str]] = None,
2102
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2025
2103
  temperature: Optional[float] = None,
2026
2104
  top_k: Optional[int] = None,
2027
2105
  top_n_tokens: Optional[int] = None,
@@ -2050,8 +2128,8 @@ class AsyncInferenceClient:
2050
2128
  repetition_penalty: Optional[float] = None,
2051
2129
  return_full_text: Optional[bool] = None,
2052
2130
  seed: Optional[int] = None,
2053
- stop: Optional[List[str]] = None,
2054
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2131
+ stop: Optional[list[str]] = None,
2132
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2055
2133
  temperature: Optional[float] = None,
2056
2134
  top_k: Optional[int] = None,
2057
2135
  top_n_tokens: Optional[int] = None,
@@ -2079,8 +2157,8 @@ class AsyncInferenceClient:
2079
2157
  repetition_penalty: Optional[float] = None,
2080
2158
  return_full_text: Optional[bool] = None,
2081
2159
  seed: Optional[int] = None,
2082
- stop: Optional[List[str]] = None,
2083
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2160
+ stop: Optional[list[str]] = None,
2161
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2084
2162
  temperature: Optional[float] = None,
2085
2163
  top_k: Optional[int] = None,
2086
2164
  top_n_tokens: Optional[int] = None,
@@ -2136,9 +2214,9 @@ class AsyncInferenceClient:
2136
2214
  Whether to prepend the prompt to the generated text
2137
2215
  seed (`int`, *optional*):
2138
2216
  Random sampling seed
2139
- stop (`List[str]`, *optional*):
2217
+ stop (`list[str]`, *optional*):
2140
2218
  Stop generating tokens if a member of `stop` is generated.
2141
- stop_sequences (`List[str]`, *optional*):
2219
+ stop_sequences (`list[str]`, *optional*):
2142
2220
  Deprecated argument. Use `stop` instead.
2143
2221
  temperature (`float`, *optional*):
2144
2222
  The value used to module the logits distribution.
@@ -2159,10 +2237,10 @@ class AsyncInferenceClient:
2159
2237
  Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
2160
2238
 
2161
2239
  Returns:
2162
- `Union[str, TextGenerationOutput, Iterable[str], Iterable[TextGenerationStreamOutput]]`:
2240
+ `Union[str, TextGenerationOutput, AsyncIterable[str], AsyncIterable[TextGenerationStreamOutput]]`:
2163
2241
  Generated text returned from the server:
2164
2242
  - if `stream=False` and `details=False`, the generated text is returned as a `str` (default)
2165
- - if `stream=True` and `details=False`, the generated text is returned token by token as a `Iterable[str]`
2243
+ - if `stream=True` and `details=False`, the generated text is returned token by token as a `AsyncIterable[str]`
2166
2244
  - if `stream=False` and `details=True`, the generated text is returned with more details as a [`~huggingface_hub.TextGenerationOutput`]
2167
2245
  - if `details=True` and `stream=True`, the generated text is returned token by token as a iterable of [`~huggingface_hub.TextGenerationStreamOutput`]
2168
2246
 
@@ -2171,7 +2249,7 @@ class AsyncInferenceClient:
2171
2249
  If input values are not valid. No HTTP call is made to the server.
2172
2250
  [`InferenceTimeoutError`]:
2173
2251
  If the model is unavailable or the request times out.
2174
- `aiohttp.ClientResponseError`:
2252
+ [`HfHubHTTPError`]:
2175
2253
  If the request fails with an HTTP error status code other than HTTP 503.
2176
2254
 
2177
2255
  Example:
@@ -2361,9 +2439,9 @@ class AsyncInferenceClient:
2361
2439
  # Handle errors separately for more precise error messages
2362
2440
  try:
2363
2441
  bytes_output = await self._inner_post(request_parameters, stream=stream or False)
2364
- except _import_aiohttp().ClientResponseError as e:
2365
- match = MODEL_KWARGS_NOT_USED_REGEX.search(e.response_error_payload["error"])
2366
- if e.status == 400 and match:
2442
+ except HfHubHTTPError as e:
2443
+ match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
2444
+ if isinstance(e, BadRequestError) and match:
2367
2445
  unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
2368
2446
  _set_unsupported_text_generation_kwargs(model, unused_params)
2369
2447
  return await self.text_generation( # type: ignore
@@ -2416,7 +2494,7 @@ class AsyncInferenceClient:
2416
2494
  model: Optional[str] = None,
2417
2495
  scheduler: Optional[str] = None,
2418
2496
  seed: Optional[int] = None,
2419
- extra_body: Optional[Dict[str, Any]] = None,
2497
+ extra_body: Optional[dict[str, Any]] = None,
2420
2498
  ) -> "Image":
2421
2499
  """
2422
2500
  Generate an image based on a given text using a specified model.
@@ -2454,7 +2532,7 @@ class AsyncInferenceClient:
2454
2532
  Override the scheduler with a compatible one.
2455
2533
  seed (`int`, *optional*):
2456
2534
  Seed for the random number generator.
2457
- extra_body (`Dict[str, Any]`, *optional*):
2535
+ extra_body (`dict[str, Any]`, *optional*):
2458
2536
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2459
2537
  for supported parameters.
2460
2538
 
@@ -2464,7 +2542,7 @@ class AsyncInferenceClient:
2464
2542
  Raises:
2465
2543
  [`InferenceTimeoutError`]:
2466
2544
  If the model is unavailable or the request times out.
2467
- `aiohttp.ClientResponseError`:
2545
+ [`HfHubHTTPError`]:
2468
2546
  If the request fails with an HTTP error status code other than HTTP 503.
2469
2547
 
2470
2548
  Example:
@@ -2554,11 +2632,11 @@ class AsyncInferenceClient:
2554
2632
  *,
2555
2633
  model: Optional[str] = None,
2556
2634
  guidance_scale: Optional[float] = None,
2557
- negative_prompt: Optional[List[str]] = None,
2635
+ negative_prompt: Optional[list[str]] = None,
2558
2636
  num_frames: Optional[float] = None,
2559
2637
  num_inference_steps: Optional[int] = None,
2560
2638
  seed: Optional[int] = None,
2561
- extra_body: Optional[Dict[str, Any]] = None,
2639
+ extra_body: Optional[dict[str, Any]] = None,
2562
2640
  ) -> bytes:
2563
2641
  """
2564
2642
  Generate a video based on a given text.
@@ -2577,7 +2655,7 @@ class AsyncInferenceClient:
2577
2655
  guidance_scale (`float`, *optional*):
2578
2656
  A higher guidance scale value encourages the model to generate videos closely linked to the text
2579
2657
  prompt, but values too high may cause saturation and other artifacts.
2580
- negative_prompt (`List[str]`, *optional*):
2658
+ negative_prompt (`list[str]`, *optional*):
2581
2659
  One or several prompt to guide what NOT to include in video generation.
2582
2660
  num_frames (`float`, *optional*):
2583
2661
  The num_frames parameter determines how many video frames are generated.
@@ -2586,7 +2664,7 @@ class AsyncInferenceClient:
2586
2664
  expense of slower inference.
2587
2665
  seed (`int`, *optional*):
2588
2666
  Seed for the random number generator.
2589
- extra_body (`Dict[str, Any]`, *optional*):
2667
+ extra_body (`dict[str, Any]`, *optional*):
2590
2668
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2591
2669
  for supported parameters.
2592
2670
 
@@ -2666,7 +2744,7 @@ class AsyncInferenceClient:
2666
2744
  top_p: Optional[float] = None,
2667
2745
  typical_p: Optional[float] = None,
2668
2746
  use_cache: Optional[bool] = None,
2669
- extra_body: Optional[Dict[str, Any]] = None,
2747
+ extra_body: Optional[dict[str, Any]] = None,
2670
2748
  ) -> bytes:
2671
2749
  """
2672
2750
  Synthesize an audio of a voice pronouncing a given text.
@@ -2728,7 +2806,7 @@ class AsyncInferenceClient:
2728
2806
  paper](https://hf.co/papers/2202.00666) for more details.
2729
2807
  use_cache (`bool`, *optional*):
2730
2808
  Whether the model should use the past last key/values attentions to speed up decoding
2731
- extra_body (`Dict[str, Any]`, *optional*):
2809
+ extra_body (`dict[str, Any]`, *optional*):
2732
2810
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2733
2811
  for supported parameters.
2734
2812
  Returns:
@@ -2737,7 +2815,7 @@ class AsyncInferenceClient:
2737
2815
  Raises:
2738
2816
  [`InferenceTimeoutError`]:
2739
2817
  If the model is unavailable or the request times out.
2740
- `aiohttp.ClientResponseError`:
2818
+ [`HfHubHTTPError`]:
2741
2819
  If the request fails with an HTTP error status code other than HTTP 503.
2742
2820
 
2743
2821
  Example:
@@ -2861,9 +2939,9 @@ class AsyncInferenceClient:
2861
2939
  *,
2862
2940
  model: Optional[str] = None,
2863
2941
  aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
2864
- ignore_labels: Optional[List[str]] = None,
2942
+ ignore_labels: Optional[list[str]] = None,
2865
2943
  stride: Optional[int] = None,
2866
- ) -> List[TokenClassificationOutputElement]:
2944
+ ) -> list[TokenClassificationOutputElement]:
2867
2945
  """
2868
2946
  Perform token classification on the given text.
2869
2947
  Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
@@ -2877,18 +2955,18 @@ class AsyncInferenceClient:
2877
2955
  Defaults to None.
2878
2956
  aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
2879
2957
  The strategy used to fuse tokens based on model predictions
2880
- ignore_labels (`List[str`, *optional*):
2958
+ ignore_labels (`list[str`, *optional*):
2881
2959
  A list of labels to ignore
2882
2960
  stride (`int`, *optional*):
2883
2961
  The number of overlapping tokens between chunks when splitting the input text.
2884
2962
 
2885
2963
  Returns:
2886
- `List[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
2964
+ `list[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
2887
2965
 
2888
2966
  Raises:
2889
2967
  [`InferenceTimeoutError`]:
2890
2968
  If the model is unavailable or the request times out.
2891
- `aiohttp.ClientResponseError`:
2969
+ [`HfHubHTTPError`]:
2892
2970
  If the request fails with an HTTP error status code other than HTTP 503.
2893
2971
 
2894
2972
  Example:
@@ -2940,7 +3018,7 @@ class AsyncInferenceClient:
2940
3018
  tgt_lang: Optional[str] = None,
2941
3019
  clean_up_tokenization_spaces: Optional[bool] = None,
2942
3020
  truncation: Optional["TranslationTruncationStrategy"] = None,
2943
- generate_parameters: Optional[Dict[str, Any]] = None,
3021
+ generate_parameters: Optional[dict[str, Any]] = None,
2944
3022
  ) -> TranslationOutput:
2945
3023
  """
2946
3024
  Convert text from one language to another.
@@ -2965,7 +3043,7 @@ class AsyncInferenceClient:
2965
3043
  Whether to clean up the potential extra spaces in the text output.
2966
3044
  truncation (`"TranslationTruncationStrategy"`, *optional*):
2967
3045
  The truncation strategy to use.
2968
- generate_parameters (`Dict[str, Any]`, *optional*):
3046
+ generate_parameters (`dict[str, Any]`, *optional*):
2969
3047
  Additional parametrization of the text generation algorithm.
2970
3048
 
2971
3049
  Returns:
@@ -2974,7 +3052,7 @@ class AsyncInferenceClient:
2974
3052
  Raises:
2975
3053
  [`InferenceTimeoutError`]:
2976
3054
  If the model is unavailable or the request times out.
2977
- `aiohttp.ClientResponseError`:
3055
+ [`HfHubHTTPError`]:
2978
3056
  If the request fails with an HTTP error status code other than HTTP 503.
2979
3057
  `ValueError`:
2980
3058
  If only one of the `src_lang` and `tgt_lang` arguments are provided.
@@ -3028,7 +3106,7 @@ class AsyncInferenceClient:
3028
3106
  *,
3029
3107
  model: Optional[str] = None,
3030
3108
  top_k: Optional[int] = None,
3031
- ) -> List[VisualQuestionAnsweringOutputElement]:
3109
+ ) -> list[VisualQuestionAnsweringOutputElement]:
3032
3110
  """
3033
3111
  Answering open-ended questions based on an image.
3034
3112
 
@@ -3045,12 +3123,12 @@ class AsyncInferenceClient:
3045
3123
  The number of answers to return (will be chosen by order of likelihood). Note that we return less than
3046
3124
  topk answers if there are not enough options available within the context.
3047
3125
  Returns:
3048
- `List[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
3126
+ `list[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
3049
3127
 
3050
3128
  Raises:
3051
3129
  `InferenceTimeoutError`:
3052
3130
  If the model is unavailable or the request times out.
3053
- `aiohttp.ClientResponseError`:
3131
+ [`HfHubHTTPError`]:
3054
3132
  If the request fails with an HTTP error status code other than HTTP 503.
3055
3133
 
3056
3134
  Example:
@@ -3084,21 +3162,21 @@ class AsyncInferenceClient:
3084
3162
  async def zero_shot_classification(
3085
3163
  self,
3086
3164
  text: str,
3087
- candidate_labels: List[str],
3165
+ candidate_labels: list[str],
3088
3166
  *,
3089
3167
  multi_label: Optional[bool] = False,
3090
3168
  hypothesis_template: Optional[str] = None,
3091
3169
  model: Optional[str] = None,
3092
- ) -> List[ZeroShotClassificationOutputElement]:
3170
+ ) -> list[ZeroShotClassificationOutputElement]:
3093
3171
  """
3094
3172
  Provide as input a text and a set of candidate labels to classify the input text.
3095
3173
 
3096
3174
  Args:
3097
3175
  text (`str`):
3098
3176
  The input text to classify.
3099
- candidate_labels (`List[str]`):
3177
+ candidate_labels (`list[str]`):
3100
3178
  The set of possible class labels to classify the text into.
3101
- labels (`List[str]`, *optional*):
3179
+ labels (`list[str]`, *optional*):
3102
3180
  (deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
3103
3181
  multi_label (`bool`, *optional*):
3104
3182
  Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
@@ -3113,12 +3191,12 @@ class AsyncInferenceClient:
3113
3191
 
3114
3192
 
3115
3193
  Returns:
3116
- `List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
3194
+ `list[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
3117
3195
 
3118
3196
  Raises:
3119
3197
  [`InferenceTimeoutError`]:
3120
3198
  If the model is unavailable or the request times out.
3121
- `aiohttp.ClientResponseError`:
3199
+ [`HfHubHTTPError`]:
3122
3200
  If the request fails with an HTTP error status code other than HTTP 503.
3123
3201
 
3124
3202
  Example with `multi_label=False`:
@@ -3192,22 +3270,22 @@ class AsyncInferenceClient:
3192
3270
  async def zero_shot_image_classification(
3193
3271
  self,
3194
3272
  image: ContentT,
3195
- candidate_labels: List[str],
3273
+ candidate_labels: list[str],
3196
3274
  *,
3197
3275
  model: Optional[str] = None,
3198
3276
  hypothesis_template: Optional[str] = None,
3199
3277
  # deprecated argument
3200
- labels: List[str] = None, # type: ignore
3201
- ) -> List[ZeroShotImageClassificationOutputElement]:
3278
+ labels: list[str] = None, # type: ignore
3279
+ ) -> list[ZeroShotImageClassificationOutputElement]:
3202
3280
  """
3203
3281
  Provide input image and text labels to predict text labels for the image.
3204
3282
 
3205
3283
  Args:
3206
3284
  image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
3207
3285
  The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
3208
- candidate_labels (`List[str]`):
3286
+ candidate_labels (`list[str]`):
3209
3287
  The candidate labels for this image
3210
- labels (`List[str]`, *optional*):
3288
+ labels (`list[str]`, *optional*):
3211
3289
  (deprecated) List of string possible labels. There must be at least 2 labels.
3212
3290
  model (`str`, *optional*):
3213
3291
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
@@ -3217,12 +3295,12 @@ class AsyncInferenceClient:
3217
3295
  replacing the placeholder with the candidate labels.
3218
3296
 
3219
3297
  Returns:
3220
- `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
3298
+ `list[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
3221
3299
 
3222
3300
  Raises:
3223
3301
  [`InferenceTimeoutError`]:
3224
3302
  If the model is unavailable or the request times out.
3225
- `aiohttp.ClientResponseError`:
3303
+ [`HfHubHTTPError`]:
3226
3304
  If the request fails with an HTTP error status code other than HTTP 503.
3227
3305
 
3228
3306
  Example:
@@ -3257,144 +3335,7 @@ class AsyncInferenceClient:
3257
3335
  response = await self._inner_post(request_parameters)
3258
3336
  return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
3259
3337
 
3260
- @_deprecate_method(
3261
- version="0.35.0",
3262
- message=(
3263
- "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3264
- " Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
3265
- ),
3266
- )
3267
- async def list_deployed_models(
3268
- self, frameworks: Union[None, str, Literal["all"], List[str]] = None
3269
- ) -> Dict[str, List[str]]:
3270
- """
3271
- List models deployed on the HF Serverless Inference API service.
3272
-
3273
- This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
3274
- are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
3275
- specify `frameworks="all"` as input. Alternatively, if you know before-hand which framework you are interested
3276
- in, you can also restrict to search to this one (e.g. `frameworks="text-generation-inference"`). The more
3277
- frameworks are checked, the more time it will take.
3278
-
3279
- <Tip warning={true}>
3280
-
3281
- This endpoint method does not return a live list of all models available for the HF Inference API service.
3282
- It searches over a cached list of models that were recently available and the list may not be up to date.
3283
- If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
3284
-
3285
- </Tip>
3286
-
3287
- <Tip>
3288
-
3289
- This endpoint method is mostly useful for discoverability. If you already know which model you want to use and want to
3290
- check its availability, you can directly use [`~InferenceClient.get_model_status`].
3291
-
3292
- </Tip>
3293
-
3294
- Args:
3295
- frameworks (`Literal["all"]` or `List[str]` or `str`, *optional*):
3296
- The frameworks to filter on. By default only a subset of the available frameworks are tested. If set to
3297
- "all", all available frameworks will be tested. It is also possible to provide a single framework or a
3298
- custom set of frameworks to check.
3299
-
3300
- Returns:
3301
- `Dict[str, List[str]]`: A dictionary mapping task names to a sorted list of model IDs.
3302
-
3303
- Example:
3304
- ```py
3305
- # Must be run in an async contextthon
3306
- >>> from huggingface_hub import AsyncInferenceClient
3307
- >>> client = AsyncInferenceClient()
3308
-
3309
- # Discover zero-shot-classification models currently deployed
3310
- >>> models = await client.list_deployed_models()
3311
- >>> models["zero-shot-classification"]
3312
- ['Narsil/deberta-large-mnli-zero-cls', 'facebook/bart-large-mnli', ...]
3313
-
3314
- # List from only 1 framework
3315
- >>> await client.list_deployed_models("text-generation-inference")
3316
- {'text-generation': ['bigcode/starcoder', 'meta-llama/Llama-2-70b-chat-hf', ...], ...}
3317
- ```
3318
- """
3319
- if self.provider != "hf-inference":
3320
- raise ValueError(f"Listing deployed models is not supported on '{self.provider}'.")
3321
-
3322
- # Resolve which frameworks to check
3323
- if frameworks is None:
3324
- frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
3325
- elif frameworks == "all":
3326
- frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
3327
- elif isinstance(frameworks, str):
3328
- frameworks = [frameworks]
3329
- frameworks = list(set(frameworks))
3330
-
3331
- # Fetch them iteratively
3332
- models_by_task: Dict[str, List[str]] = {}
3333
-
3334
- def _unpack_response(framework: str, items: List[Dict]) -> None:
3335
- for model in items:
3336
- if framework == "sentence-transformers":
3337
- # Model running with the `sentence-transformers` framework can work with both tasks even if not
3338
- # branded as such in the API response
3339
- models_by_task.setdefault("feature-extraction", []).append(model["model_id"])
3340
- models_by_task.setdefault("sentence-similarity", []).append(model["model_id"])
3341
- else:
3342
- models_by_task.setdefault(model["task"], []).append(model["model_id"])
3343
-
3344
- for framework in frameworks:
3345
- response = get_session().get(
3346
- f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
3347
- )
3348
- hf_raise_for_status(response)
3349
- _unpack_response(framework, response.json())
3350
-
3351
- # Sort alphabetically for discoverability and return
3352
- for task, models in models_by_task.items():
3353
- models_by_task[task] = sorted(set(models), key=lambda x: x.lower())
3354
- return models_by_task
3355
-
3356
- def _get_client_session(self, headers: Optional[Dict] = None) -> "ClientSession":
3357
- aiohttp = _import_aiohttp()
3358
- client_headers = self.headers.copy()
3359
- if headers is not None:
3360
- client_headers.update(headers)
3361
-
3362
- # Return a new aiohttp ClientSession with correct settings.
3363
- session = aiohttp.ClientSession(
3364
- headers=client_headers,
3365
- cookies=self.cookies,
3366
- timeout=aiohttp.ClientTimeout(self.timeout),
3367
- trust_env=self.trust_env,
3368
- )
3369
-
3370
- # Keep track of sessions to close them later
3371
- self._sessions[session] = set()
3372
-
3373
- # Override the `._request` method to register responses to be closed
3374
- session._wrapped_request = session._request
3375
-
3376
- async def _request(method, url, **kwargs):
3377
- response = await session._wrapped_request(method, url, **kwargs)
3378
- self._sessions[session].add(response)
3379
- return response
3380
-
3381
- session._request = _request
3382
-
3383
- # Override the 'close' method to
3384
- # 1. close ongoing responses
3385
- # 2. deregister the session when closed
3386
- session._close = session.close
3387
-
3388
- async def close_session():
3389
- for response in self._sessions[session]:
3390
- response.close()
3391
- await session._close()
3392
- self._sessions.pop(session, None)
3393
-
3394
- session.close = close_session
3395
- return session
3396
-
3397
- async def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
3338
+ async def get_endpoint_info(self, *, model: Optional[str] = None) -> dict[str, Any]:
3398
3339
  """
3399
3340
  Get information about the deployed endpoint.
3400
3341
 
@@ -3407,7 +3348,7 @@ class AsyncInferenceClient:
3407
3348
  Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
3408
3349
 
3409
3350
  Returns:
3410
- `Dict[str, Any]`: Information about the endpoint.
3351
+ `dict[str, Any]`: Information about the endpoint.
3411
3352
 
3412
3353
  Example:
3413
3354
  ```py
@@ -3449,17 +3390,16 @@ class AsyncInferenceClient:
3449
3390
  else:
3450
3391
  url = f"{constants.INFERENCE_ENDPOINT}/models/{model}/info"
3451
3392
 
3452
- async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
3453
- response = await client.get(url, proxy=self.proxies)
3454
- response.raise_for_status()
3455
- return await response.json()
3393
+ client = await self._get_async_client()
3394
+ response = await client.get(url, headers=build_hf_headers(token=self.token))
3395
+ hf_raise_for_status(response)
3396
+ return response.json()
3456
3397
 
3457
3398
  async def health_check(self, model: Optional[str] = None) -> bool:
3458
3399
  """
3459
3400
  Check the health of the deployed endpoint.
3460
3401
 
3461
3402
  Health check is only available with Inference Endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI).
3462
- For Inference API, please use [`InferenceClient.get_model_status`] instead.
3463
3403
 
3464
3404
  Args:
3465
3405
  model (`str`, *optional*):
@@ -3484,77 +3424,12 @@ class AsyncInferenceClient:
3484
3424
  if model is None:
3485
3425
  raise ValueError("Model id not provided.")
3486
3426
  if not model.startswith(("http://", "https://")):
3487
- raise ValueError(
3488
- "Model must be an Inference Endpoint URL. For serverless Inference API, please use `InferenceClient.get_model_status`."
3489
- )
3427
+ raise ValueError("Model must be an Inference Endpoint URL.")
3490
3428
  url = model.rstrip("/") + "/health"
3491
3429
 
3492
- async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
3493
- response = await client.get(url, proxy=self.proxies)
3494
- return response.status == 200
3495
-
3496
- @_deprecate_method(
3497
- version="0.35.0",
3498
- message=(
3499
- "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3500
- " Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
3501
- ),
3502
- )
3503
- async def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
3504
- """
3505
- Get the status of a model hosted on the HF Inference API.
3506
-
3507
- <Tip>
3508
-
3509
- This endpoint is mostly useful when you already know which model you want to use and want to check its
3510
- availability. If you want to discover already deployed models, you should rather use [`~InferenceClient.list_deployed_models`].
3511
-
3512
- </Tip>
3513
-
3514
- Args:
3515
- model (`str`, *optional*):
3516
- Identifier of the model for witch the status gonna be checked. If model is not provided,
3517
- the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
3518
- identifier cannot be a URL.
3519
-
3520
-
3521
- Returns:
3522
- [`ModelStatus`]: An instance of ModelStatus dataclass, containing information,
3523
- about the state of the model: load, state, compute type and framework.
3524
-
3525
- Example:
3526
- ```py
3527
- # Must be run in an async context
3528
- >>> from huggingface_hub import AsyncInferenceClient
3529
- >>> client = AsyncInferenceClient()
3530
- >>> await client.get_model_status("meta-llama/Meta-Llama-3-8B-Instruct")
3531
- ModelStatus(loaded=True, state='Loaded', compute_type='gpu', framework='text-generation-inference')
3532
- ```
3533
- """
3534
- if self.provider != "hf-inference":
3535
- raise ValueError(f"Getting model status is not supported on '{self.provider}'.")
3536
-
3537
- model = model or self.model
3538
- if model is None:
3539
- raise ValueError("Model id not provided.")
3540
- if model.startswith("https://"):
3541
- raise NotImplementedError("Model status is only available for Inference API endpoints.")
3542
- url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
3543
-
3544
- async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
3545
- response = await client.get(url, proxy=self.proxies)
3546
- response.raise_for_status()
3547
- response_data = await response.json()
3548
-
3549
- if "error" in response_data:
3550
- raise ValueError(response_data["error"])
3551
-
3552
- return ModelStatus(
3553
- loaded=response_data["loaded"],
3554
- state=response_data["state"],
3555
- compute_type=response_data["compute_type"],
3556
- framework=response_data["framework"],
3557
- )
3430
+ client = await self._get_async_client()
3431
+ response = await client.get(url, headers=build_hf_headers(token=self.token))
3432
+ return response.status_code == 200
3558
3433
 
3559
3434
  @property
3560
3435
  def chat(self) -> "ProxyClientChat":