huggingface-hub 0.34.4__py3-none-any.whl → 1.0.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (125) hide show
  1. huggingface_hub/__init__.py +46 -45
  2. huggingface_hub/_commit_api.py +28 -28
  3. huggingface_hub/_commit_scheduler.py +11 -8
  4. huggingface_hub/_inference_endpoints.py +8 -8
  5. huggingface_hub/_jobs_api.py +167 -10
  6. huggingface_hub/_login.py +13 -39
  7. huggingface_hub/_oauth.py +8 -8
  8. huggingface_hub/_snapshot_download.py +14 -28
  9. huggingface_hub/_space_api.py +4 -4
  10. huggingface_hub/_tensorboard_logger.py +13 -14
  11. huggingface_hub/_upload_large_folder.py +15 -15
  12. huggingface_hub/_webhooks_payload.py +3 -3
  13. huggingface_hub/_webhooks_server.py +2 -2
  14. huggingface_hub/cli/_cli_utils.py +2 -2
  15. huggingface_hub/cli/auth.py +5 -6
  16. huggingface_hub/cli/cache.py +14 -20
  17. huggingface_hub/cli/download.py +4 -4
  18. huggingface_hub/cli/jobs.py +560 -11
  19. huggingface_hub/cli/lfs.py +4 -4
  20. huggingface_hub/cli/repo.py +7 -7
  21. huggingface_hub/cli/repo_files.py +2 -2
  22. huggingface_hub/cli/upload.py +4 -4
  23. huggingface_hub/cli/upload_large_folder.py +3 -3
  24. huggingface_hub/commands/_cli_utils.py +2 -2
  25. huggingface_hub/commands/delete_cache.py +13 -13
  26. huggingface_hub/commands/download.py +4 -13
  27. huggingface_hub/commands/lfs.py +4 -4
  28. huggingface_hub/commands/repo_files.py +2 -2
  29. huggingface_hub/commands/scan_cache.py +1 -1
  30. huggingface_hub/commands/tag.py +1 -3
  31. huggingface_hub/commands/upload.py +4 -4
  32. huggingface_hub/commands/upload_large_folder.py +3 -3
  33. huggingface_hub/commands/user.py +5 -6
  34. huggingface_hub/community.py +5 -5
  35. huggingface_hub/constants.py +3 -41
  36. huggingface_hub/dataclasses.py +16 -19
  37. huggingface_hub/errors.py +42 -29
  38. huggingface_hub/fastai_utils.py +8 -9
  39. huggingface_hub/file_download.py +153 -252
  40. huggingface_hub/hf_api.py +815 -600
  41. huggingface_hub/hf_file_system.py +98 -62
  42. huggingface_hub/hub_mixin.py +37 -57
  43. huggingface_hub/inference/_client.py +177 -325
  44. huggingface_hub/inference/_common.py +110 -124
  45. huggingface_hub/inference/_generated/_async_client.py +226 -432
  46. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
  47. huggingface_hub/inference/_generated/types/base.py +10 -7
  48. huggingface_hub/inference/_generated/types/chat_completion.py +18 -16
  49. huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
  50. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
  51. huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
  52. huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
  53. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
  54. huggingface_hub/inference/_generated/types/summarization.py +2 -2
  55. huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
  56. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
  57. huggingface_hub/inference/_generated/types/text_generation.py +10 -10
  58. huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
  59. huggingface_hub/inference/_generated/types/token_classification.py +2 -2
  60. huggingface_hub/inference/_generated/types/translation.py +2 -2
  61. huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
  62. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
  63. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
  64. huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
  65. huggingface_hub/inference/_mcp/agent.py +3 -3
  66. huggingface_hub/inference/_mcp/cli.py +1 -1
  67. huggingface_hub/inference/_mcp/constants.py +2 -3
  68. huggingface_hub/inference/_mcp/mcp_client.py +58 -30
  69. huggingface_hub/inference/_mcp/types.py +10 -7
  70. huggingface_hub/inference/_mcp/utils.py +11 -7
  71. huggingface_hub/inference/_providers/__init__.py +2 -2
  72. huggingface_hub/inference/_providers/_common.py +49 -25
  73. huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
  74. huggingface_hub/inference/_providers/cohere.py +3 -3
  75. huggingface_hub/inference/_providers/fal_ai.py +25 -25
  76. huggingface_hub/inference/_providers/featherless_ai.py +4 -4
  77. huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
  78. huggingface_hub/inference/_providers/hf_inference.py +28 -20
  79. huggingface_hub/inference/_providers/hyperbolic.py +4 -4
  80. huggingface_hub/inference/_providers/nebius.py +10 -10
  81. huggingface_hub/inference/_providers/novita.py +5 -5
  82. huggingface_hub/inference/_providers/nscale.py +4 -4
  83. huggingface_hub/inference/_providers/replicate.py +15 -15
  84. huggingface_hub/inference/_providers/sambanova.py +6 -6
  85. huggingface_hub/inference/_providers/together.py +7 -7
  86. huggingface_hub/lfs.py +20 -31
  87. huggingface_hub/repocard.py +18 -18
  88. huggingface_hub/repocard_data.py +56 -56
  89. huggingface_hub/serialization/__init__.py +0 -1
  90. huggingface_hub/serialization/_base.py +9 -9
  91. huggingface_hub/serialization/_dduf.py +7 -7
  92. huggingface_hub/serialization/_torch.py +28 -28
  93. huggingface_hub/utils/__init__.py +10 -4
  94. huggingface_hub/utils/_auth.py +5 -5
  95. huggingface_hub/utils/_cache_manager.py +31 -31
  96. huggingface_hub/utils/_deprecation.py +1 -1
  97. huggingface_hub/utils/_dotenv.py +3 -3
  98. huggingface_hub/utils/_fixes.py +0 -10
  99. huggingface_hub/utils/_git_credential.py +4 -4
  100. huggingface_hub/utils/_headers.py +7 -29
  101. huggingface_hub/utils/_http.py +366 -208
  102. huggingface_hub/utils/_pagination.py +4 -4
  103. huggingface_hub/utils/_paths.py +5 -5
  104. huggingface_hub/utils/_runtime.py +15 -13
  105. huggingface_hub/utils/_safetensors.py +21 -21
  106. huggingface_hub/utils/_subprocess.py +9 -9
  107. huggingface_hub/utils/_telemetry.py +3 -3
  108. huggingface_hub/utils/_typing.py +25 -5
  109. huggingface_hub/utils/_validators.py +53 -72
  110. huggingface_hub/utils/_xet.py +16 -16
  111. huggingface_hub/utils/_xet_progress_reporting.py +32 -11
  112. huggingface_hub/utils/insecure_hashlib.py +3 -9
  113. huggingface_hub/utils/tqdm.py +3 -3
  114. {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/METADATA +18 -29
  115. huggingface_hub-1.0.0rc0.dist-info/RECORD +161 -0
  116. huggingface_hub/inference_api.py +0 -217
  117. huggingface_hub/keras_mixin.py +0 -500
  118. huggingface_hub/repository.py +0 -1477
  119. huggingface_hub/serialization/_tensorflow.py +0 -95
  120. huggingface_hub/utils/_hf_folder.py +0 -68
  121. huggingface_hub-0.34.4.dist-info/RECORD +0 -166
  122. {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/LICENSE +0 -0
  123. {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/WHEEL +0 -0
  124. {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/entry_points.txt +0 -0
  125. {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/top_level.txt +0 -0
@@ -21,16 +21,19 @@
21
21
  import asyncio
22
22
  import base64
23
23
  import logging
24
+ import os
24
25
  import re
25
26
  import warnings
26
- from typing import TYPE_CHECKING, Any, AsyncIterable, Dict, List, Literal, Optional, Set, Union, overload
27
+ from contextlib import AsyncExitStack
28
+ from typing import TYPE_CHECKING, Any, AsyncIterable, Literal, Optional, Union, overload
29
+
30
+ import httpx
27
31
 
28
32
  from huggingface_hub import constants
29
- from huggingface_hub.errors import InferenceTimeoutError
33
+ from huggingface_hub.errors import BadRequestError, HfHubHTTPError, InferenceTimeoutError
30
34
  from huggingface_hub.inference._common import (
31
35
  TASKS_EXPECTING_IMAGES,
32
36
  ContentT,
33
- ModelStatus,
34
37
  RequestParameters,
35
38
  _async_stream_chat_completion_response,
36
39
  _async_stream_text_generation_response,
@@ -41,7 +44,6 @@ from huggingface_hub.inference._common import (
41
44
  _bytes_to_list,
42
45
  _get_unsupported_text_generation_kwargs,
43
46
  _import_numpy,
44
- _open_as_binary,
45
47
  _set_unsupported_text_generation_kwargs,
46
48
  raise_text_generation_error,
47
49
  )
@@ -88,16 +90,19 @@ from huggingface_hub.inference._generated.types import (
88
90
  ZeroShotImageClassificationOutputElement,
89
91
  )
90
92
  from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
91
- from huggingface_hub.utils import build_hf_headers, get_session, hf_raise_for_status
93
+ from huggingface_hub.utils import (
94
+ build_hf_headers,
95
+ get_async_session,
96
+ hf_raise_for_status,
97
+ validate_hf_hub_args,
98
+ )
92
99
  from huggingface_hub.utils._auth import get_token
93
- from huggingface_hub.utils._deprecation import _deprecate_method
94
100
 
95
- from .._common import _async_yield_from, _import_aiohttp
101
+ from .._common import _async_yield_from
96
102
 
97
103
 
98
104
  if TYPE_CHECKING:
99
105
  import numpy as np
100
- from aiohttp import ClientResponse, ClientSession
101
106
  from PIL.Image import Image
102
107
 
103
108
  logger = logging.getLogger(__name__)
@@ -130,18 +135,14 @@ class AsyncInferenceClient:
130
135
  arguments are mutually exclusive and have the exact same behavior.
131
136
  timeout (`float`, `optional`):
132
137
  The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
133
- headers (`Dict[str, str]`, `optional`):
138
+ headers (`dict[str, str]`, `optional`):
134
139
  Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
135
140
  Values in this dictionary will override the default values.
136
141
  bill_to (`str`, `optional`):
137
142
  The billing account to use for the requests. By default the requests are billed on the user's account.
138
143
  Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
139
- cookies (`Dict[str, str]`, `optional`):
144
+ cookies (`dict[str, str]`, `optional`):
140
145
  Additional cookies to send to the server.
141
- trust_env ('bool', 'optional'):
142
- Trust environment settings for proxy configuration if the parameter is `True` (`False` by default).
143
- proxies (`Any`, `optional`):
144
- Proxies to use for the request.
145
146
  base_url (`str`, `optional`):
146
147
  Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
147
148
  follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
@@ -150,6 +151,7 @@ class AsyncInferenceClient:
150
151
  follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
151
152
  """
152
153
 
154
+ @validate_hf_hub_args
153
155
  def __init__(
154
156
  self,
155
157
  model: Optional[str] = None,
@@ -157,10 +159,8 @@ class AsyncInferenceClient:
157
159
  provider: Optional[PROVIDER_OR_POLICY_T] = None,
158
160
  token: Optional[str] = None,
159
161
  timeout: Optional[float] = None,
160
- headers: Optional[Dict[str, str]] = None,
161
- cookies: Optional[Dict[str, str]] = None,
162
- trust_env: bool = False,
163
- proxies: Optional[Any] = None,
162
+ headers: Optional[dict[str, str]] = None,
163
+ cookies: Optional[dict[str, str]] = None,
164
164
  bill_to: Optional[str] = None,
165
165
  # OpenAI compatibility
166
166
  base_url: Optional[str] = None,
@@ -222,15 +222,36 @@ class AsyncInferenceClient:
222
222
 
223
223
  self.cookies = cookies
224
224
  self.timeout = timeout
225
- self.trust_env = trust_env
226
- self.proxies = proxies
227
225
 
228
- # Keep track of the sessions to close them properly
229
- self._sessions: Dict["ClientSession", Set["ClientResponse"]] = dict()
226
+ self.exit_stack = AsyncExitStack()
227
+ self._async_client: Optional[httpx.AsyncClient] = None
230
228
 
231
229
  def __repr__(self):
232
230
  return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
233
231
 
232
+ async def __aenter__(self):
233
+ return self
234
+
235
+ async def __aexit__(self, exc_type, exc_value, traceback):
236
+ await self.close()
237
+
238
+ async def close(self):
239
+ """Close the client.
240
+
241
+ This method is automatically called when using the client as a context manager.
242
+ """
243
+ await self.exit_stack.aclose()
244
+
245
+ async def _get_async_client(self):
246
+ """Get a unique async client for this AsyncInferenceClient instance.
247
+
248
+ Returns the same client instance on subsequent calls, ensuring proper
249
+ connection reuse and resource management through the exit stack.
250
+ """
251
+ if self._async_client is None:
252
+ self._async_client = await self.exit_stack.enter_async_context(get_async_session())
253
+ return self._async_client
254
+
234
255
  @overload
235
256
  async def _inner_post( # type: ignore[misc]
236
257
  self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
@@ -239,83 +260,59 @@ class AsyncInferenceClient:
239
260
  @overload
240
261
  async def _inner_post( # type: ignore[misc]
241
262
  self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
242
- ) -> AsyncIterable[bytes]: ...
263
+ ) -> AsyncIterable[str]: ...
243
264
 
244
265
  @overload
245
266
  async def _inner_post(
246
267
  self, request_parameters: RequestParameters, *, stream: bool = False
247
- ) -> Union[bytes, AsyncIterable[bytes]]: ...
268
+ ) -> Union[bytes, AsyncIterable[str]]: ...
248
269
 
249
270
  async def _inner_post(
250
271
  self, request_parameters: RequestParameters, *, stream: bool = False
251
- ) -> Union[bytes, AsyncIterable[bytes]]:
272
+ ) -> Union[bytes, AsyncIterable[str]]:
252
273
  """Make a request to the inference server."""
253
274
 
254
- aiohttp = _import_aiohttp()
255
-
256
275
  # TODO: this should be handled in provider helpers directly
257
276
  if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
258
277
  request_parameters.headers["Accept"] = "image/png"
259
278
 
260
- with _open_as_binary(request_parameters.data) as data_as_binary:
261
- # Do not use context manager as we don't want to close the connection immediately when returning
262
- # a stream
263
- session = self._get_client_session(headers=request_parameters.headers)
264
-
265
- try:
266
- response = await session.post(
267
- request_parameters.url, json=request_parameters.json, data=data_as_binary, proxy=self.proxies
279
+ try:
280
+ client = await self._get_async_client()
281
+ if stream:
282
+ response = await self.exit_stack.enter_async_context(
283
+ client.stream(
284
+ "POST",
285
+ request_parameters.url,
286
+ json=request_parameters.json,
287
+ data=request_parameters.data,
288
+ headers=request_parameters.headers,
289
+ cookies=self.cookies,
290
+ timeout=self.timeout,
291
+ )
268
292
  )
269
- response_error_payload = None
270
- if response.status != 200:
271
- try:
272
- response_error_payload = await response.json() # get payload before connection closed
273
- except Exception:
274
- pass
275
- response.raise_for_status()
276
- if stream:
277
- return _async_yield_from(session, response)
278
- else:
279
- content = await response.read()
280
- await session.close()
281
- return content
282
- except asyncio.TimeoutError as error:
283
- await session.close()
284
- # Convert any `TimeoutError` to a `InferenceTimeoutError`
285
- raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
286
- except aiohttp.ClientResponseError as error:
287
- error.response_error_payload = response_error_payload
288
- await session.close()
289
- raise error
290
- except Exception:
291
- await session.close()
292
- raise
293
-
294
- async def __aenter__(self):
295
- return self
296
-
297
- async def __aexit__(self, exc_type, exc_value, traceback):
298
- await self.close()
299
-
300
- def __del__(self):
301
- if len(self._sessions) > 0:
302
- warnings.warn(
303
- "Deleting 'AsyncInferenceClient' client but some sessions are still open. "
304
- "This can happen if you've stopped streaming data from the server before the stream was complete. "
305
- "To close the client properly, you must call `await client.close()` "
306
- "or use an async context (e.g. `async with AsyncInferenceClient(): ...`."
307
- )
308
-
309
- async def close(self):
310
- """Close all open sessions.
311
-
312
- By default, 'aiohttp.ClientSession' objects are closed automatically when a call is completed. However, if you
313
- are streaming data from the server and you stop before the stream is complete, you must call this method to
314
- close the session properly.
315
-
316
- Another possibility is to use an async context (e.g. `async with AsyncInferenceClient(): ...`).
317
- """
318
- await asyncio.gather(*[session.close() for session in self._sessions.keys()])
293
+ hf_raise_for_status(response)
294
+ return _async_yield_from(client, response)
295
+ else:
296
+ response = await client.post(
297
+ request_parameters.url,
298
+ json=request_parameters.json,
299
+ data=request_parameters.data,
300
+ headers=request_parameters.headers,
301
+ cookies=self.cookies,
302
+ timeout=self.timeout,
303
+ )
304
+ hf_raise_for_status(response)
305
+ return response.content
306
+ except asyncio.TimeoutError as error:
307
+ # Convert any `TimeoutError` to a `InferenceTimeoutError`
308
+ raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
309
+ except HfHubHTTPError as error:
310
+ if error.response.status_code == 422 and request_parameters.task != "unknown":
311
+ msg = str(error.args[0])
312
+ if len(error.response.text) > 0:
313
+ msg += f"{os.linesep}{error.response.text}{os.linesep}"
314
+ error.args = (msg,) + error.args[1:]
315
+ raise
319
316
 
320
317
  async def audio_classification(
321
318
  self,
@@ -324,7 +321,7 @@ class AsyncInferenceClient:
324
321
  model: Optional[str] = None,
325
322
  top_k: Optional[int] = None,
326
323
  function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
327
- ) -> List[AudioClassificationOutputElement]:
324
+ ) -> list[AudioClassificationOutputElement]:
328
325
  """
329
326
  Perform audio classification on the provided audio content.
330
327
 
@@ -342,12 +339,12 @@ class AsyncInferenceClient:
342
339
  The function to apply to the model outputs in order to retrieve the scores.
343
340
 
344
341
  Returns:
345
- `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
342
+ `list[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
346
343
 
347
344
  Raises:
348
345
  [`InferenceTimeoutError`]:
349
346
  If the model is unavailable or the request times out.
350
- `aiohttp.ClientResponseError`:
347
+ [`HfHubHTTPError`]:
351
348
  If the request fails with an HTTP error status code other than HTTP 503.
352
349
 
353
350
  Example:
@@ -380,7 +377,7 @@ class AsyncInferenceClient:
380
377
  audio: ContentT,
381
378
  *,
382
379
  model: Optional[str] = None,
383
- ) -> List[AudioToAudioOutputElement]:
380
+ ) -> list[AudioToAudioOutputElement]:
384
381
  """
385
382
  Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
386
383
 
@@ -394,12 +391,12 @@ class AsyncInferenceClient:
394
391
  audio_to_audio will be used.
395
392
 
396
393
  Returns:
397
- `List[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
394
+ `list[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
398
395
 
399
396
  Raises:
400
397
  `InferenceTimeoutError`:
401
398
  If the model is unavailable or the request times out.
402
- `aiohttp.ClientResponseError`:
399
+ [`HfHubHTTPError`]:
403
400
  If the request fails with an HTTP error status code other than HTTP 503.
404
401
 
405
402
  Example:
@@ -433,7 +430,7 @@ class AsyncInferenceClient:
433
430
  audio: ContentT,
434
431
  *,
435
432
  model: Optional[str] = None,
436
- extra_body: Optional[Dict] = None,
433
+ extra_body: Optional[dict] = None,
437
434
  ) -> AutomaticSpeechRecognitionOutput:
438
435
  """
439
436
  Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
@@ -444,7 +441,7 @@ class AsyncInferenceClient:
444
441
  model (`str`, *optional*):
445
442
  The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
446
443
  Inference Endpoint. If not provided, the default recommended model for ASR will be used.
447
- extra_body (`Dict`, *optional*):
444
+ extra_body (`dict`, *optional*):
448
445
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
449
446
  for supported parameters.
450
447
  Returns:
@@ -453,7 +450,7 @@ class AsyncInferenceClient:
453
450
  Raises:
454
451
  [`InferenceTimeoutError`]:
455
452
  If the model is unavailable or the request times out.
456
- `aiohttp.ClientResponseError`:
453
+ [`HfHubHTTPError`]:
457
454
  If the request fails with an HTTP error status code other than HTTP 503.
458
455
 
459
456
  Example:
@@ -480,105 +477,105 @@ class AsyncInferenceClient:
480
477
  @overload
481
478
  async def chat_completion( # type: ignore
482
479
  self,
483
- messages: List[Union[Dict, ChatCompletionInputMessage]],
480
+ messages: list[Union[dict, ChatCompletionInputMessage]],
484
481
  *,
485
482
  model: Optional[str] = None,
486
483
  stream: Literal[False] = False,
487
484
  frequency_penalty: Optional[float] = None,
488
- logit_bias: Optional[List[float]] = None,
485
+ logit_bias: Optional[list[float]] = None,
489
486
  logprobs: Optional[bool] = None,
490
487
  max_tokens: Optional[int] = None,
491
488
  n: Optional[int] = None,
492
489
  presence_penalty: Optional[float] = None,
493
490
  response_format: Optional[ChatCompletionInputGrammarType] = None,
494
491
  seed: Optional[int] = None,
495
- stop: Optional[List[str]] = None,
492
+ stop: Optional[list[str]] = None,
496
493
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
497
494
  temperature: Optional[float] = None,
498
495
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
499
496
  tool_prompt: Optional[str] = None,
500
- tools: Optional[List[ChatCompletionInputTool]] = None,
497
+ tools: Optional[list[ChatCompletionInputTool]] = None,
501
498
  top_logprobs: Optional[int] = None,
502
499
  top_p: Optional[float] = None,
503
- extra_body: Optional[Dict] = None,
500
+ extra_body: Optional[dict] = None,
504
501
  ) -> ChatCompletionOutput: ...
505
502
 
506
503
  @overload
507
504
  async def chat_completion( # type: ignore
508
505
  self,
509
- messages: List[Union[Dict, ChatCompletionInputMessage]],
506
+ messages: list[Union[dict, ChatCompletionInputMessage]],
510
507
  *,
511
508
  model: Optional[str] = None,
512
509
  stream: Literal[True] = True,
513
510
  frequency_penalty: Optional[float] = None,
514
- logit_bias: Optional[List[float]] = None,
511
+ logit_bias: Optional[list[float]] = None,
515
512
  logprobs: Optional[bool] = None,
516
513
  max_tokens: Optional[int] = None,
517
514
  n: Optional[int] = None,
518
515
  presence_penalty: Optional[float] = None,
519
516
  response_format: Optional[ChatCompletionInputGrammarType] = None,
520
517
  seed: Optional[int] = None,
521
- stop: Optional[List[str]] = None,
518
+ stop: Optional[list[str]] = None,
522
519
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
523
520
  temperature: Optional[float] = None,
524
521
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
525
522
  tool_prompt: Optional[str] = None,
526
- tools: Optional[List[ChatCompletionInputTool]] = None,
523
+ tools: Optional[list[ChatCompletionInputTool]] = None,
527
524
  top_logprobs: Optional[int] = None,
528
525
  top_p: Optional[float] = None,
529
- extra_body: Optional[Dict] = None,
526
+ extra_body: Optional[dict] = None,
530
527
  ) -> AsyncIterable[ChatCompletionStreamOutput]: ...
531
528
 
532
529
  @overload
533
530
  async def chat_completion(
534
531
  self,
535
- messages: List[Union[Dict, ChatCompletionInputMessage]],
532
+ messages: list[Union[dict, ChatCompletionInputMessage]],
536
533
  *,
537
534
  model: Optional[str] = None,
538
535
  stream: bool = False,
539
536
  frequency_penalty: Optional[float] = None,
540
- logit_bias: Optional[List[float]] = None,
537
+ logit_bias: Optional[list[float]] = None,
541
538
  logprobs: Optional[bool] = None,
542
539
  max_tokens: Optional[int] = None,
543
540
  n: Optional[int] = None,
544
541
  presence_penalty: Optional[float] = None,
545
542
  response_format: Optional[ChatCompletionInputGrammarType] = None,
546
543
  seed: Optional[int] = None,
547
- stop: Optional[List[str]] = None,
544
+ stop: Optional[list[str]] = None,
548
545
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
549
546
  temperature: Optional[float] = None,
550
547
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
551
548
  tool_prompt: Optional[str] = None,
552
- tools: Optional[List[ChatCompletionInputTool]] = None,
549
+ tools: Optional[list[ChatCompletionInputTool]] = None,
553
550
  top_logprobs: Optional[int] = None,
554
551
  top_p: Optional[float] = None,
555
- extra_body: Optional[Dict] = None,
552
+ extra_body: Optional[dict] = None,
556
553
  ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: ...
557
554
 
558
555
  async def chat_completion(
559
556
  self,
560
- messages: List[Union[Dict, ChatCompletionInputMessage]],
557
+ messages: list[Union[dict, ChatCompletionInputMessage]],
561
558
  *,
562
559
  model: Optional[str] = None,
563
560
  stream: bool = False,
564
561
  # Parameters from ChatCompletionInput (handled manually)
565
562
  frequency_penalty: Optional[float] = None,
566
- logit_bias: Optional[List[float]] = None,
563
+ logit_bias: Optional[list[float]] = None,
567
564
  logprobs: Optional[bool] = None,
568
565
  max_tokens: Optional[int] = None,
569
566
  n: Optional[int] = None,
570
567
  presence_penalty: Optional[float] = None,
571
568
  response_format: Optional[ChatCompletionInputGrammarType] = None,
572
569
  seed: Optional[int] = None,
573
- stop: Optional[List[str]] = None,
570
+ stop: Optional[list[str]] = None,
574
571
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
575
572
  temperature: Optional[float] = None,
576
573
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
577
574
  tool_prompt: Optional[str] = None,
578
- tools: Optional[List[ChatCompletionInputTool]] = None,
575
+ tools: Optional[list[ChatCompletionInputTool]] = None,
579
576
  top_logprobs: Optional[int] = None,
580
577
  top_p: Optional[float] = None,
581
- extra_body: Optional[Dict] = None,
578
+ extra_body: Optional[dict] = None,
582
579
  ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]:
583
580
  """
584
581
  A method for completing conversations using a specified language model.
@@ -608,7 +605,7 @@ class AsyncInferenceClient:
608
605
  frequency_penalty (`float`, *optional*):
609
606
  Penalizes new tokens based on their existing frequency
610
607
  in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
611
- logit_bias (`List[float]`, *optional*):
608
+ logit_bias (`list[float]`, *optional*):
612
609
  Adjusts the likelihood of specific tokens appearing in the generated output.
613
610
  logprobs (`bool`, *optional*):
614
611
  Whether to return log probabilities of the output tokens or not. If true, returns the log
@@ -624,7 +621,7 @@ class AsyncInferenceClient:
624
621
  Grammar constraints. Can be either a JSONSchema or a regex.
625
622
  seed (Optional[`int`], *optional*):
626
623
  Seed for reproducible control flow. Defaults to None.
627
- stop (`List[str]`, *optional*):
624
+ stop (`list[str]`, *optional*):
628
625
  Up to four strings which trigger the end of the response.
629
626
  Defaults to None.
630
627
  stream (`bool`, *optional*):
@@ -648,7 +645,7 @@ class AsyncInferenceClient:
648
645
  tools (List of [`ChatCompletionInputTool`], *optional*):
649
646
  A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
650
647
  provide a list of functions the model may generate JSON inputs for.
651
- extra_body (`Dict`, *optional*):
648
+ extra_body (`dict`, *optional*):
652
649
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
653
650
  for supported parameters.
654
651
  Returns:
@@ -660,7 +657,7 @@ class AsyncInferenceClient:
660
657
  Raises:
661
658
  [`InferenceTimeoutError`]:
662
659
  If the model is unavailable or the request times out.
663
- `aiohttp.ClientResponseError`:
660
+ [`HfHubHTTPError`]:
664
661
  If the request fails with an HTTP error status code other than HTTP 503.
665
662
 
666
663
  Example:
@@ -980,8 +977,8 @@ class AsyncInferenceClient:
980
977
  max_question_len: Optional[int] = None,
981
978
  max_seq_len: Optional[int] = None,
982
979
  top_k: Optional[int] = None,
983
- word_boxes: Optional[List[Union[List[float], str]]] = None,
984
- ) -> List[DocumentQuestionAnsweringOutputElement]:
980
+ word_boxes: Optional[list[Union[list[float], str]]] = None,
981
+ ) -> list[DocumentQuestionAnsweringOutputElement]:
985
982
  """
986
983
  Answer questions on document images.
987
984
 
@@ -1011,16 +1008,16 @@ class AsyncInferenceClient:
1011
1008
  top_k (`int`, *optional*):
1012
1009
  The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
1013
1010
  answers if there are not enough options available within the context.
1014
- word_boxes (`List[Union[List[float], str`, *optional*):
1011
+ word_boxes (`list[Union[list[float], str`, *optional*):
1015
1012
  A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
1016
1013
  step and use the provided bounding boxes instead.
1017
1014
  Returns:
1018
- `List[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
1015
+ `list[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
1019
1016
 
1020
1017
  Raises:
1021
1018
  [`InferenceTimeoutError`]:
1022
1019
  If the model is unavailable or the request times out.
1023
- `aiohttp.ClientResponseError`:
1020
+ [`HfHubHTTPError`]:
1024
1021
  If the request fails with an HTTP error status code other than HTTP 503.
1025
1022
 
1026
1023
 
@@ -1035,7 +1032,7 @@ class AsyncInferenceClient:
1035
1032
  """
1036
1033
  model_id = model or self.model
1037
1034
  provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
1038
- inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
1035
+ inputs: dict[str, Any] = {"question": question, "image": _b64_encode(image)}
1039
1036
  request_parameters = provider_helper.prepare_request(
1040
1037
  inputs=inputs,
1041
1038
  parameters={
@@ -1096,7 +1093,7 @@ class AsyncInferenceClient:
1096
1093
  Raises:
1097
1094
  [`InferenceTimeoutError`]:
1098
1095
  If the model is unavailable or the request times out.
1099
- `aiohttp.ClientResponseError`:
1096
+ [`HfHubHTTPError`]:
1100
1097
  If the request fails with an HTTP error status code other than HTTP 503.
1101
1098
 
1102
1099
  Example:
@@ -1134,9 +1131,9 @@ class AsyncInferenceClient:
1134
1131
  text: str,
1135
1132
  *,
1136
1133
  model: Optional[str] = None,
1137
- targets: Optional[List[str]] = None,
1134
+ targets: Optional[list[str]] = None,
1138
1135
  top_k: Optional[int] = None,
1139
- ) -> List[FillMaskOutputElement]:
1136
+ ) -> list[FillMaskOutputElement]:
1140
1137
  """
1141
1138
  Fill in a hole with a missing word (token to be precise).
1142
1139
 
@@ -1146,20 +1143,20 @@ class AsyncInferenceClient:
1146
1143
  model (`str`, *optional*):
1147
1144
  The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
1148
1145
  a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
1149
- targets (`List[str`, *optional*):
1146
+ targets (`list[str`, *optional*):
1150
1147
  When passed, the model will limit the scores to the passed targets instead of looking up in the whole
1151
1148
  vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
1152
1149
  resulting token will be used (with a warning, and that might be slower).
1153
1150
  top_k (`int`, *optional*):
1154
1151
  When passed, overrides the number of predictions to return.
1155
1152
  Returns:
1156
- `List[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
1153
+ `list[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
1157
1154
  probability, token reference, and completed text.
1158
1155
 
1159
1156
  Raises:
1160
1157
  [`InferenceTimeoutError`]:
1161
1158
  If the model is unavailable or the request times out.
1162
- `aiohttp.ClientResponseError`:
1159
+ [`HfHubHTTPError`]:
1163
1160
  If the request fails with an HTTP error status code other than HTTP 503.
1164
1161
 
1165
1162
  Example:
@@ -1193,7 +1190,7 @@ class AsyncInferenceClient:
1193
1190
  model: Optional[str] = None,
1194
1191
  function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
1195
1192
  top_k: Optional[int] = None,
1196
- ) -> List[ImageClassificationOutputElement]:
1193
+ ) -> list[ImageClassificationOutputElement]:
1197
1194
  """
1198
1195
  Perform image classification on the given image using the specified model.
1199
1196
 
@@ -1208,12 +1205,12 @@ class AsyncInferenceClient:
1208
1205
  top_k (`int`, *optional*):
1209
1206
  When specified, limits the output to the top K most probable classes.
1210
1207
  Returns:
1211
- `List[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
1208
+ `list[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
1212
1209
 
1213
1210
  Raises:
1214
1211
  [`InferenceTimeoutError`]:
1215
1212
  If the model is unavailable or the request times out.
1216
- `aiohttp.ClientResponseError`:
1213
+ [`HfHubHTTPError`]:
1217
1214
  If the request fails with an HTTP error status code other than HTTP 503.
1218
1215
 
1219
1216
  Example:
@@ -1246,7 +1243,7 @@ class AsyncInferenceClient:
1246
1243
  overlap_mask_area_threshold: Optional[float] = None,
1247
1244
  subtask: Optional["ImageSegmentationSubtask"] = None,
1248
1245
  threshold: Optional[float] = None,
1249
- ) -> List[ImageSegmentationOutputElement]:
1246
+ ) -> list[ImageSegmentationOutputElement]:
1250
1247
  """
1251
1248
  Perform image segmentation on the given image using the specified model.
1252
1249
 
@@ -1271,12 +1268,12 @@ class AsyncInferenceClient:
1271
1268
  threshold (`float`, *optional*):
1272
1269
  Probability threshold to filter out predicted masks.
1273
1270
  Returns:
1274
- `List[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
1271
+ `list[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
1275
1272
 
1276
1273
  Raises:
1277
1274
  [`InferenceTimeoutError`]:
1278
1275
  If the model is unavailable or the request times out.
1279
- `aiohttp.ClientResponseError`:
1276
+ [`HfHubHTTPError`]:
1280
1277
  If the request fails with an HTTP error status code other than HTTP 503.
1281
1278
 
1282
1279
  Example:
@@ -1354,7 +1351,7 @@ class AsyncInferenceClient:
1354
1351
  Raises:
1355
1352
  [`InferenceTimeoutError`]:
1356
1353
  If the model is unavailable or the request times out.
1357
- `aiohttp.ClientResponseError`:
1354
+ [`HfHubHTTPError`]:
1358
1355
  If the request fails with an HTTP error status code other than HTTP 503.
1359
1356
 
1360
1357
  Example:
@@ -1486,7 +1483,7 @@ class AsyncInferenceClient:
1486
1483
  Raises:
1487
1484
  [`InferenceTimeoutError`]:
1488
1485
  If the model is unavailable or the request times out.
1489
- `aiohttp.ClientResponseError`:
1486
+ [`HfHubHTTPError`]:
1490
1487
  If the request fails with an HTTP error status code other than HTTP 503.
1491
1488
 
1492
1489
  Example:
@@ -1510,12 +1507,12 @@ class AsyncInferenceClient:
1510
1507
  api_key=self.token,
1511
1508
  )
1512
1509
  response = await self._inner_post(request_parameters)
1513
- output = ImageToTextOutput.parse_obj(response)
1514
- return output[0] if isinstance(output, list) else output
1510
+ output_list: list[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
1511
+ return output_list[0]
1515
1512
 
1516
1513
  async def object_detection(
1517
1514
  self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
1518
- ) -> List[ObjectDetectionOutputElement]:
1515
+ ) -> list[ObjectDetectionOutputElement]:
1519
1516
  """
1520
1517
  Perform object detection on the given image using the specified model.
1521
1518
 
@@ -1534,12 +1531,12 @@ class AsyncInferenceClient:
1534
1531
  threshold (`float`, *optional*):
1535
1532
  The probability necessary to make a prediction.
1536
1533
  Returns:
1537
- `List[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
1534
+ `list[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
1538
1535
 
1539
1536
  Raises:
1540
1537
  [`InferenceTimeoutError`]:
1541
1538
  If the model is unavailable or the request times out.
1542
- `aiohttp.ClientResponseError`:
1539
+ [`HfHubHTTPError`]:
1543
1540
  If the request fails with an HTTP error status code other than HTTP 503.
1544
1541
  `ValueError`:
1545
1542
  If the request output is not a List.
@@ -1578,7 +1575,7 @@ class AsyncInferenceClient:
1578
1575
  max_question_len: Optional[int] = None,
1579
1576
  max_seq_len: Optional[int] = None,
1580
1577
  top_k: Optional[int] = None,
1581
- ) -> Union[QuestionAnsweringOutputElement, List[QuestionAnsweringOutputElement]]:
1578
+ ) -> Union[QuestionAnsweringOutputElement, list[QuestionAnsweringOutputElement]]:
1582
1579
  """
1583
1580
  Retrieve the answer to a question from a given text.
1584
1581
 
@@ -1610,13 +1607,13 @@ class AsyncInferenceClient:
1610
1607
  topk answers if there are not enough options available within the context.
1611
1608
 
1612
1609
  Returns:
1613
- Union[`QuestionAnsweringOutputElement`, List[`QuestionAnsweringOutputElement`]]:
1610
+ Union[`QuestionAnsweringOutputElement`, list[`QuestionAnsweringOutputElement`]]:
1614
1611
  When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
1615
1612
  When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
1616
1613
  Raises:
1617
1614
  [`InferenceTimeoutError`]:
1618
1615
  If the model is unavailable or the request times out.
1619
- `aiohttp.ClientResponseError`:
1616
+ [`HfHubHTTPError`]:
1620
1617
  If the request fails with an HTTP error status code other than HTTP 503.
1621
1618
 
1622
1619
  Example:
@@ -1651,15 +1648,15 @@ class AsyncInferenceClient:
1651
1648
  return output
1652
1649
 
1653
1650
  async def sentence_similarity(
1654
- self, sentence: str, other_sentences: List[str], *, model: Optional[str] = None
1655
- ) -> List[float]:
1651
+ self, sentence: str, other_sentences: list[str], *, model: Optional[str] = None
1652
+ ) -> list[float]:
1656
1653
  """
1657
1654
  Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
1658
1655
 
1659
1656
  Args:
1660
1657
  sentence (`str`):
1661
1658
  The main sentence to compare to others.
1662
- other_sentences (`List[str]`):
1659
+ other_sentences (`list[str]`):
1663
1660
  The list of sentences to compare to.
1664
1661
  model (`str`, *optional*):
1665
1662
  The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1667,12 +1664,12 @@ class AsyncInferenceClient:
1667
1664
  Defaults to None.
1668
1665
 
1669
1666
  Returns:
1670
- `List[float]`: The embedding representing the input text.
1667
+ `list[float]`: The embedding representing the input text.
1671
1668
 
1672
1669
  Raises:
1673
1670
  [`InferenceTimeoutError`]:
1674
1671
  If the model is unavailable or the request times out.
1675
- `aiohttp.ClientResponseError`:
1672
+ [`HfHubHTTPError`]:
1676
1673
  If the request fails with an HTTP error status code other than HTTP 503.
1677
1674
 
1678
1675
  Example:
@@ -1710,7 +1707,7 @@ class AsyncInferenceClient:
1710
1707
  *,
1711
1708
  model: Optional[str] = None,
1712
1709
  clean_up_tokenization_spaces: Optional[bool] = None,
1713
- generate_parameters: Optional[Dict[str, Any]] = None,
1710
+ generate_parameters: Optional[dict[str, Any]] = None,
1714
1711
  truncation: Optional["SummarizationTruncationStrategy"] = None,
1715
1712
  ) -> SummarizationOutput:
1716
1713
  """
@@ -1724,7 +1721,7 @@ class AsyncInferenceClient:
1724
1721
  Inference Endpoint. If not provided, the default recommended model for summarization will be used.
1725
1722
  clean_up_tokenization_spaces (`bool`, *optional*):
1726
1723
  Whether to clean up the potential extra spaces in the text output.
1727
- generate_parameters (`Dict[str, Any]`, *optional*):
1724
+ generate_parameters (`dict[str, Any]`, *optional*):
1728
1725
  Additional parametrization of the text generation algorithm.
1729
1726
  truncation (`"SummarizationTruncationStrategy"`, *optional*):
1730
1727
  The truncation strategy to use.
@@ -1734,7 +1731,7 @@ class AsyncInferenceClient:
1734
1731
  Raises:
1735
1732
  [`InferenceTimeoutError`]:
1736
1733
  If the model is unavailable or the request times out.
1737
- `aiohttp.ClientResponseError`:
1734
+ [`HfHubHTTPError`]:
1738
1735
  If the request fails with an HTTP error status code other than HTTP 503.
1739
1736
 
1740
1737
  Example:
@@ -1765,7 +1762,7 @@ class AsyncInferenceClient:
1765
1762
 
1766
1763
  async def table_question_answering(
1767
1764
  self,
1768
- table: Dict[str, Any],
1765
+ table: dict[str, Any],
1769
1766
  query: str,
1770
1767
  *,
1771
1768
  model: Optional[str] = None,
@@ -1800,7 +1797,7 @@ class AsyncInferenceClient:
1800
1797
  Raises:
1801
1798
  [`InferenceTimeoutError`]:
1802
1799
  If the model is unavailable or the request times out.
1803
- `aiohttp.ClientResponseError`:
1800
+ [`HfHubHTTPError`]:
1804
1801
  If the request fails with an HTTP error status code other than HTTP 503.
1805
1802
 
1806
1803
  Example:
@@ -1826,12 +1823,12 @@ class AsyncInferenceClient:
1826
1823
  response = await self._inner_post(request_parameters)
1827
1824
  return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
1828
1825
 
1829
- async def tabular_classification(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[str]:
1826
+ async def tabular_classification(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[str]:
1830
1827
  """
1831
1828
  Classifying a target category (a group) based on a set of attributes.
1832
1829
 
1833
1830
  Args:
1834
- table (`Dict[str, Any]`):
1831
+ table (`dict[str, Any]`):
1835
1832
  Set of attributes to classify.
1836
1833
  model (`str`, *optional*):
1837
1834
  The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1844,7 +1841,7 @@ class AsyncInferenceClient:
1844
1841
  Raises:
1845
1842
  [`InferenceTimeoutError`]:
1846
1843
  If the model is unavailable or the request times out.
1847
- `aiohttp.ClientResponseError`:
1844
+ [`HfHubHTTPError`]:
1848
1845
  If the request fails with an HTTP error status code other than HTTP 503.
1849
1846
 
1850
1847
  Example:
@@ -1882,12 +1879,12 @@ class AsyncInferenceClient:
1882
1879
  response = await self._inner_post(request_parameters)
1883
1880
  return _bytes_to_list(response)
1884
1881
 
1885
- async def tabular_regression(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[float]:
1882
+ async def tabular_regression(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[float]:
1886
1883
  """
1887
1884
  Predicting a numerical target value given a set of attributes/features in a table.
1888
1885
 
1889
1886
  Args:
1890
- table (`Dict[str, Any]`):
1887
+ table (`dict[str, Any]`):
1891
1888
  Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
1892
1889
  model (`str`, *optional*):
1893
1890
  The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1900,7 +1897,7 @@ class AsyncInferenceClient:
1900
1897
  Raises:
1901
1898
  [`InferenceTimeoutError`]:
1902
1899
  If the model is unavailable or the request times out.
1903
- `aiohttp.ClientResponseError`:
1900
+ [`HfHubHTTPError`]:
1904
1901
  If the request fails with an HTTP error status code other than HTTP 503.
1905
1902
 
1906
1903
  Example:
@@ -1940,7 +1937,7 @@ class AsyncInferenceClient:
1940
1937
  model: Optional[str] = None,
1941
1938
  top_k: Optional[int] = None,
1942
1939
  function_to_apply: Optional["TextClassificationOutputTransform"] = None,
1943
- ) -> List[TextClassificationOutputElement]:
1940
+ ) -> list[TextClassificationOutputElement]:
1944
1941
  """
1945
1942
  Perform text classification (e.g. sentiment-analysis) on the given text.
1946
1943
 
@@ -1957,12 +1954,12 @@ class AsyncInferenceClient:
1957
1954
  The function to apply to the model outputs in order to retrieve the scores.
1958
1955
 
1959
1956
  Returns:
1960
- `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
1957
+ `list[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
1961
1958
 
1962
1959
  Raises:
1963
1960
  [`InferenceTimeoutError`]:
1964
1961
  If the model is unavailable or the request times out.
1965
- `aiohttp.ClientResponseError`:
1962
+ [`HfHubHTTPError`]:
1966
1963
  If the request fails with an HTTP error status code other than HTTP 503.
1967
1964
 
1968
1965
  Example:
@@ -2011,8 +2008,8 @@ class AsyncInferenceClient:
2011
2008
  repetition_penalty: Optional[float] = None,
2012
2009
  return_full_text: Optional[bool] = None,
2013
2010
  seed: Optional[int] = None,
2014
- stop: Optional[List[str]] = None,
2015
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2011
+ stop: Optional[list[str]] = None,
2012
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2016
2013
  temperature: Optional[float] = None,
2017
2014
  top_k: Optional[int] = None,
2018
2015
  top_n_tokens: Optional[int] = None,
@@ -2041,8 +2038,8 @@ class AsyncInferenceClient:
2041
2038
  repetition_penalty: Optional[float] = None,
2042
2039
  return_full_text: Optional[bool] = None,
2043
2040
  seed: Optional[int] = None,
2044
- stop: Optional[List[str]] = None,
2045
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2041
+ stop: Optional[list[str]] = None,
2042
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2046
2043
  temperature: Optional[float] = None,
2047
2044
  top_k: Optional[int] = None,
2048
2045
  top_n_tokens: Optional[int] = None,
@@ -2071,8 +2068,8 @@ class AsyncInferenceClient:
2071
2068
  repetition_penalty: Optional[float] = None,
2072
2069
  return_full_text: Optional[bool] = None, # Manual default value
2073
2070
  seed: Optional[int] = None,
2074
- stop: Optional[List[str]] = None,
2075
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2071
+ stop: Optional[list[str]] = None,
2072
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2076
2073
  temperature: Optional[float] = None,
2077
2074
  top_k: Optional[int] = None,
2078
2075
  top_n_tokens: Optional[int] = None,
@@ -2101,8 +2098,8 @@ class AsyncInferenceClient:
2101
2098
  repetition_penalty: Optional[float] = None,
2102
2099
  return_full_text: Optional[bool] = None,
2103
2100
  seed: Optional[int] = None,
2104
- stop: Optional[List[str]] = None,
2105
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2101
+ stop: Optional[list[str]] = None,
2102
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2106
2103
  temperature: Optional[float] = None,
2107
2104
  top_k: Optional[int] = None,
2108
2105
  top_n_tokens: Optional[int] = None,
@@ -2131,8 +2128,8 @@ class AsyncInferenceClient:
2131
2128
  repetition_penalty: Optional[float] = None,
2132
2129
  return_full_text: Optional[bool] = None,
2133
2130
  seed: Optional[int] = None,
2134
- stop: Optional[List[str]] = None,
2135
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2131
+ stop: Optional[list[str]] = None,
2132
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2136
2133
  temperature: Optional[float] = None,
2137
2134
  top_k: Optional[int] = None,
2138
2135
  top_n_tokens: Optional[int] = None,
@@ -2160,8 +2157,8 @@ class AsyncInferenceClient:
2160
2157
  repetition_penalty: Optional[float] = None,
2161
2158
  return_full_text: Optional[bool] = None,
2162
2159
  seed: Optional[int] = None,
2163
- stop: Optional[List[str]] = None,
2164
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2160
+ stop: Optional[list[str]] = None,
2161
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2165
2162
  temperature: Optional[float] = None,
2166
2163
  top_k: Optional[int] = None,
2167
2164
  top_n_tokens: Optional[int] = None,
@@ -2217,9 +2214,9 @@ class AsyncInferenceClient:
2217
2214
  Whether to prepend the prompt to the generated text
2218
2215
  seed (`int`, *optional*):
2219
2216
  Random sampling seed
2220
- stop (`List[str]`, *optional*):
2217
+ stop (`list[str]`, *optional*):
2221
2218
  Stop generating tokens if a member of `stop` is generated.
2222
- stop_sequences (`List[str]`, *optional*):
2219
+ stop_sequences (`list[str]`, *optional*):
2223
2220
  Deprecated argument. Use `stop` instead.
2224
2221
  temperature (`float`, *optional*):
2225
2222
  The value used to module the logits distribution.
@@ -2240,10 +2237,10 @@ class AsyncInferenceClient:
2240
2237
  Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
2241
2238
 
2242
2239
  Returns:
2243
- `Union[str, TextGenerationOutput, Iterable[str], Iterable[TextGenerationStreamOutput]]`:
2240
+ `Union[str, TextGenerationOutput, AsyncIterable[str], AsyncIterable[TextGenerationStreamOutput]]`:
2244
2241
  Generated text returned from the server:
2245
2242
  - if `stream=False` and `details=False`, the generated text is returned as a `str` (default)
2246
- - if `stream=True` and `details=False`, the generated text is returned token by token as a `Iterable[str]`
2243
+ - if `stream=True` and `details=False`, the generated text is returned token by token as a `AsyncIterable[str]`
2247
2244
  - if `stream=False` and `details=True`, the generated text is returned with more details as a [`~huggingface_hub.TextGenerationOutput`]
2248
2245
  - if `details=True` and `stream=True`, the generated text is returned token by token as a iterable of [`~huggingface_hub.TextGenerationStreamOutput`]
2249
2246
 
@@ -2252,7 +2249,7 @@ class AsyncInferenceClient:
2252
2249
  If input values are not valid. No HTTP call is made to the server.
2253
2250
  [`InferenceTimeoutError`]:
2254
2251
  If the model is unavailable or the request times out.
2255
- `aiohttp.ClientResponseError`:
2252
+ [`HfHubHTTPError`]:
2256
2253
  If the request fails with an HTTP error status code other than HTTP 503.
2257
2254
 
2258
2255
  Example:
@@ -2442,9 +2439,9 @@ class AsyncInferenceClient:
2442
2439
  # Handle errors separately for more precise error messages
2443
2440
  try:
2444
2441
  bytes_output = await self._inner_post(request_parameters, stream=stream or False)
2445
- except _import_aiohttp().ClientResponseError as e:
2446
- match = MODEL_KWARGS_NOT_USED_REGEX.search(e.response_error_payload["error"])
2447
- if e.status == 400 and match:
2442
+ except HfHubHTTPError as e:
2443
+ match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
2444
+ if isinstance(e, BadRequestError) and match:
2448
2445
  unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
2449
2446
  _set_unsupported_text_generation_kwargs(model, unused_params)
2450
2447
  return await self.text_generation( # type: ignore
@@ -2497,7 +2494,7 @@ class AsyncInferenceClient:
2497
2494
  model: Optional[str] = None,
2498
2495
  scheduler: Optional[str] = None,
2499
2496
  seed: Optional[int] = None,
2500
- extra_body: Optional[Dict[str, Any]] = None,
2497
+ extra_body: Optional[dict[str, Any]] = None,
2501
2498
  ) -> "Image":
2502
2499
  """
2503
2500
  Generate an image based on a given text using a specified model.
@@ -2535,7 +2532,7 @@ class AsyncInferenceClient:
2535
2532
  Override the scheduler with a compatible one.
2536
2533
  seed (`int`, *optional*):
2537
2534
  Seed for the random number generator.
2538
- extra_body (`Dict[str, Any]`, *optional*):
2535
+ extra_body (`dict[str, Any]`, *optional*):
2539
2536
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2540
2537
  for supported parameters.
2541
2538
 
@@ -2545,7 +2542,7 @@ class AsyncInferenceClient:
2545
2542
  Raises:
2546
2543
  [`InferenceTimeoutError`]:
2547
2544
  If the model is unavailable or the request times out.
2548
- `aiohttp.ClientResponseError`:
2545
+ [`HfHubHTTPError`]:
2549
2546
  If the request fails with an HTTP error status code other than HTTP 503.
2550
2547
 
2551
2548
  Example:
@@ -2635,11 +2632,11 @@ class AsyncInferenceClient:
2635
2632
  *,
2636
2633
  model: Optional[str] = None,
2637
2634
  guidance_scale: Optional[float] = None,
2638
- negative_prompt: Optional[List[str]] = None,
2635
+ negative_prompt: Optional[list[str]] = None,
2639
2636
  num_frames: Optional[float] = None,
2640
2637
  num_inference_steps: Optional[int] = None,
2641
2638
  seed: Optional[int] = None,
2642
- extra_body: Optional[Dict[str, Any]] = None,
2639
+ extra_body: Optional[dict[str, Any]] = None,
2643
2640
  ) -> bytes:
2644
2641
  """
2645
2642
  Generate a video based on a given text.
@@ -2658,7 +2655,7 @@ class AsyncInferenceClient:
2658
2655
  guidance_scale (`float`, *optional*):
2659
2656
  A higher guidance scale value encourages the model to generate videos closely linked to the text
2660
2657
  prompt, but values too high may cause saturation and other artifacts.
2661
- negative_prompt (`List[str]`, *optional*):
2658
+ negative_prompt (`list[str]`, *optional*):
2662
2659
  One or several prompt to guide what NOT to include in video generation.
2663
2660
  num_frames (`float`, *optional*):
2664
2661
  The num_frames parameter determines how many video frames are generated.
@@ -2667,7 +2664,7 @@ class AsyncInferenceClient:
2667
2664
  expense of slower inference.
2668
2665
  seed (`int`, *optional*):
2669
2666
  Seed for the random number generator.
2670
- extra_body (`Dict[str, Any]`, *optional*):
2667
+ extra_body (`dict[str, Any]`, *optional*):
2671
2668
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2672
2669
  for supported parameters.
2673
2670
 
@@ -2747,7 +2744,7 @@ class AsyncInferenceClient:
2747
2744
  top_p: Optional[float] = None,
2748
2745
  typical_p: Optional[float] = None,
2749
2746
  use_cache: Optional[bool] = None,
2750
- extra_body: Optional[Dict[str, Any]] = None,
2747
+ extra_body: Optional[dict[str, Any]] = None,
2751
2748
  ) -> bytes:
2752
2749
  """
2753
2750
  Synthesize an audio of a voice pronouncing a given text.
@@ -2809,7 +2806,7 @@ class AsyncInferenceClient:
2809
2806
  paper](https://hf.co/papers/2202.00666) for more details.
2810
2807
  use_cache (`bool`, *optional*):
2811
2808
  Whether the model should use the past last key/values attentions to speed up decoding
2812
- extra_body (`Dict[str, Any]`, *optional*):
2809
+ extra_body (`dict[str, Any]`, *optional*):
2813
2810
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2814
2811
  for supported parameters.
2815
2812
  Returns:
@@ -2818,7 +2815,7 @@ class AsyncInferenceClient:
2818
2815
  Raises:
2819
2816
  [`InferenceTimeoutError`]:
2820
2817
  If the model is unavailable or the request times out.
2821
- `aiohttp.ClientResponseError`:
2818
+ [`HfHubHTTPError`]:
2822
2819
  If the request fails with an HTTP error status code other than HTTP 503.
2823
2820
 
2824
2821
  Example:
@@ -2942,9 +2939,9 @@ class AsyncInferenceClient:
2942
2939
  *,
2943
2940
  model: Optional[str] = None,
2944
2941
  aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
2945
- ignore_labels: Optional[List[str]] = None,
2942
+ ignore_labels: Optional[list[str]] = None,
2946
2943
  stride: Optional[int] = None,
2947
- ) -> List[TokenClassificationOutputElement]:
2944
+ ) -> list[TokenClassificationOutputElement]:
2948
2945
  """
2949
2946
  Perform token classification on the given text.
2950
2947
  Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
@@ -2958,18 +2955,18 @@ class AsyncInferenceClient:
2958
2955
  Defaults to None.
2959
2956
  aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
2960
2957
  The strategy used to fuse tokens based on model predictions
2961
- ignore_labels (`List[str`, *optional*):
2958
+ ignore_labels (`list[str`, *optional*):
2962
2959
  A list of labels to ignore
2963
2960
  stride (`int`, *optional*):
2964
2961
  The number of overlapping tokens between chunks when splitting the input text.
2965
2962
 
2966
2963
  Returns:
2967
- `List[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
2964
+ `list[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
2968
2965
 
2969
2966
  Raises:
2970
2967
  [`InferenceTimeoutError`]:
2971
2968
  If the model is unavailable or the request times out.
2972
- `aiohttp.ClientResponseError`:
2969
+ [`HfHubHTTPError`]:
2973
2970
  If the request fails with an HTTP error status code other than HTTP 503.
2974
2971
 
2975
2972
  Example:
@@ -3021,7 +3018,7 @@ class AsyncInferenceClient:
3021
3018
  tgt_lang: Optional[str] = None,
3022
3019
  clean_up_tokenization_spaces: Optional[bool] = None,
3023
3020
  truncation: Optional["TranslationTruncationStrategy"] = None,
3024
- generate_parameters: Optional[Dict[str, Any]] = None,
3021
+ generate_parameters: Optional[dict[str, Any]] = None,
3025
3022
  ) -> TranslationOutput:
3026
3023
  """
3027
3024
  Convert text from one language to another.
@@ -3046,7 +3043,7 @@ class AsyncInferenceClient:
3046
3043
  Whether to clean up the potential extra spaces in the text output.
3047
3044
  truncation (`"TranslationTruncationStrategy"`, *optional*):
3048
3045
  The truncation strategy to use.
3049
- generate_parameters (`Dict[str, Any]`, *optional*):
3046
+ generate_parameters (`dict[str, Any]`, *optional*):
3050
3047
  Additional parametrization of the text generation algorithm.
3051
3048
 
3052
3049
  Returns:
@@ -3055,7 +3052,7 @@ class AsyncInferenceClient:
3055
3052
  Raises:
3056
3053
  [`InferenceTimeoutError`]:
3057
3054
  If the model is unavailable or the request times out.
3058
- `aiohttp.ClientResponseError`:
3055
+ [`HfHubHTTPError`]:
3059
3056
  If the request fails with an HTTP error status code other than HTTP 503.
3060
3057
  `ValueError`:
3061
3058
  If only one of the `src_lang` and `tgt_lang` arguments are provided.
@@ -3109,7 +3106,7 @@ class AsyncInferenceClient:
3109
3106
  *,
3110
3107
  model: Optional[str] = None,
3111
3108
  top_k: Optional[int] = None,
3112
- ) -> List[VisualQuestionAnsweringOutputElement]:
3109
+ ) -> list[VisualQuestionAnsweringOutputElement]:
3113
3110
  """
3114
3111
  Answering open-ended questions based on an image.
3115
3112
 
@@ -3126,12 +3123,12 @@ class AsyncInferenceClient:
3126
3123
  The number of answers to return (will be chosen by order of likelihood). Note that we return less than
3127
3124
  topk answers if there are not enough options available within the context.
3128
3125
  Returns:
3129
- `List[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
3126
+ `list[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
3130
3127
 
3131
3128
  Raises:
3132
3129
  `InferenceTimeoutError`:
3133
3130
  If the model is unavailable or the request times out.
3134
- `aiohttp.ClientResponseError`:
3131
+ [`HfHubHTTPError`]:
3135
3132
  If the request fails with an HTTP error status code other than HTTP 503.
3136
3133
 
3137
3134
  Example:
@@ -3165,21 +3162,21 @@ class AsyncInferenceClient:
3165
3162
  async def zero_shot_classification(
3166
3163
  self,
3167
3164
  text: str,
3168
- candidate_labels: List[str],
3165
+ candidate_labels: list[str],
3169
3166
  *,
3170
3167
  multi_label: Optional[bool] = False,
3171
3168
  hypothesis_template: Optional[str] = None,
3172
3169
  model: Optional[str] = None,
3173
- ) -> List[ZeroShotClassificationOutputElement]:
3170
+ ) -> list[ZeroShotClassificationOutputElement]:
3174
3171
  """
3175
3172
  Provide as input a text and a set of candidate labels to classify the input text.
3176
3173
 
3177
3174
  Args:
3178
3175
  text (`str`):
3179
3176
  The input text to classify.
3180
- candidate_labels (`List[str]`):
3177
+ candidate_labels (`list[str]`):
3181
3178
  The set of possible class labels to classify the text into.
3182
- labels (`List[str]`, *optional*):
3179
+ labels (`list[str]`, *optional*):
3183
3180
  (deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
3184
3181
  multi_label (`bool`, *optional*):
3185
3182
  Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
@@ -3194,12 +3191,12 @@ class AsyncInferenceClient:
3194
3191
 
3195
3192
 
3196
3193
  Returns:
3197
- `List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
3194
+ `list[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
3198
3195
 
3199
3196
  Raises:
3200
3197
  [`InferenceTimeoutError`]:
3201
3198
  If the model is unavailable or the request times out.
3202
- `aiohttp.ClientResponseError`:
3199
+ [`HfHubHTTPError`]:
3203
3200
  If the request fails with an HTTP error status code other than HTTP 503.
3204
3201
 
3205
3202
  Example with `multi_label=False`:
@@ -3273,22 +3270,22 @@ class AsyncInferenceClient:
3273
3270
  async def zero_shot_image_classification(
3274
3271
  self,
3275
3272
  image: ContentT,
3276
- candidate_labels: List[str],
3273
+ candidate_labels: list[str],
3277
3274
  *,
3278
3275
  model: Optional[str] = None,
3279
3276
  hypothesis_template: Optional[str] = None,
3280
3277
  # deprecated argument
3281
- labels: List[str] = None, # type: ignore
3282
- ) -> List[ZeroShotImageClassificationOutputElement]:
3278
+ labels: list[str] = None, # type: ignore
3279
+ ) -> list[ZeroShotImageClassificationOutputElement]:
3283
3280
  """
3284
3281
  Provide input image and text labels to predict text labels for the image.
3285
3282
 
3286
3283
  Args:
3287
3284
  image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
3288
3285
  The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
3289
- candidate_labels (`List[str]`):
3286
+ candidate_labels (`list[str]`):
3290
3287
  The candidate labels for this image
3291
- labels (`List[str]`, *optional*):
3288
+ labels (`list[str]`, *optional*):
3292
3289
  (deprecated) List of string possible labels. There must be at least 2 labels.
3293
3290
  model (`str`, *optional*):
3294
3291
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
@@ -3298,12 +3295,12 @@ class AsyncInferenceClient:
3298
3295
  replacing the placeholder with the candidate labels.
3299
3296
 
3300
3297
  Returns:
3301
- `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
3298
+ `list[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
3302
3299
 
3303
3300
  Raises:
3304
3301
  [`InferenceTimeoutError`]:
3305
3302
  If the model is unavailable or the request times out.
3306
- `aiohttp.ClientResponseError`:
3303
+ [`HfHubHTTPError`]:
3307
3304
  If the request fails with an HTTP error status code other than HTTP 503.
3308
3305
 
3309
3306
  Example:
@@ -3338,144 +3335,7 @@ class AsyncInferenceClient:
3338
3335
  response = await self._inner_post(request_parameters)
3339
3336
  return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
3340
3337
 
3341
- @_deprecate_method(
3342
- version="0.35.0",
3343
- message=(
3344
- "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3345
- " Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
3346
- ),
3347
- )
3348
- async def list_deployed_models(
3349
- self, frameworks: Union[None, str, Literal["all"], List[str]] = None
3350
- ) -> Dict[str, List[str]]:
3351
- """
3352
- List models deployed on the HF Serverless Inference API service.
3353
-
3354
- This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
3355
- are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
3356
- specify `frameworks="all"` as input. Alternatively, if you know before-hand which framework you are interested
3357
- in, you can also restrict to search to this one (e.g. `frameworks="text-generation-inference"`). The more
3358
- frameworks are checked, the more time it will take.
3359
-
3360
- <Tip warning={true}>
3361
-
3362
- This endpoint method does not return a live list of all models available for the HF Inference API service.
3363
- It searches over a cached list of models that were recently available and the list may not be up to date.
3364
- If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
3365
-
3366
- </Tip>
3367
-
3368
- <Tip>
3369
-
3370
- This endpoint method is mostly useful for discoverability. If you already know which model you want to use and want to
3371
- check its availability, you can directly use [`~InferenceClient.get_model_status`].
3372
-
3373
- </Tip>
3374
-
3375
- Args:
3376
- frameworks (`Literal["all"]` or `List[str]` or `str`, *optional*):
3377
- The frameworks to filter on. By default only a subset of the available frameworks are tested. If set to
3378
- "all", all available frameworks will be tested. It is also possible to provide a single framework or a
3379
- custom set of frameworks to check.
3380
-
3381
- Returns:
3382
- `Dict[str, List[str]]`: A dictionary mapping task names to a sorted list of model IDs.
3383
-
3384
- Example:
3385
- ```py
3386
- # Must be run in an async contextthon
3387
- >>> from huggingface_hub import AsyncInferenceClient
3388
- >>> client = AsyncInferenceClient()
3389
-
3390
- # Discover zero-shot-classification models currently deployed
3391
- >>> models = await client.list_deployed_models()
3392
- >>> models["zero-shot-classification"]
3393
- ['Narsil/deberta-large-mnli-zero-cls', 'facebook/bart-large-mnli', ...]
3394
-
3395
- # List from only 1 framework
3396
- >>> await client.list_deployed_models("text-generation-inference")
3397
- {'text-generation': ['bigcode/starcoder', 'meta-llama/Llama-2-70b-chat-hf', ...], ...}
3398
- ```
3399
- """
3400
- if self.provider != "hf-inference":
3401
- raise ValueError(f"Listing deployed models is not supported on '{self.provider}'.")
3402
-
3403
- # Resolve which frameworks to check
3404
- if frameworks is None:
3405
- frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
3406
- elif frameworks == "all":
3407
- frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
3408
- elif isinstance(frameworks, str):
3409
- frameworks = [frameworks]
3410
- frameworks = list(set(frameworks))
3411
-
3412
- # Fetch them iteratively
3413
- models_by_task: Dict[str, List[str]] = {}
3414
-
3415
- def _unpack_response(framework: str, items: List[Dict]) -> None:
3416
- for model in items:
3417
- if framework == "sentence-transformers":
3418
- # Model running with the `sentence-transformers` framework can work with both tasks even if not
3419
- # branded as such in the API response
3420
- models_by_task.setdefault("feature-extraction", []).append(model["model_id"])
3421
- models_by_task.setdefault("sentence-similarity", []).append(model["model_id"])
3422
- else:
3423
- models_by_task.setdefault(model["task"], []).append(model["model_id"])
3424
-
3425
- for framework in frameworks:
3426
- response = get_session().get(
3427
- f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
3428
- )
3429
- hf_raise_for_status(response)
3430
- _unpack_response(framework, response.json())
3431
-
3432
- # Sort alphabetically for discoverability and return
3433
- for task, models in models_by_task.items():
3434
- models_by_task[task] = sorted(set(models), key=lambda x: x.lower())
3435
- return models_by_task
3436
-
3437
- def _get_client_session(self, headers: Optional[Dict] = None) -> "ClientSession":
3438
- aiohttp = _import_aiohttp()
3439
- client_headers = self.headers.copy()
3440
- if headers is not None:
3441
- client_headers.update(headers)
3442
-
3443
- # Return a new aiohttp ClientSession with correct settings.
3444
- session = aiohttp.ClientSession(
3445
- headers=client_headers,
3446
- cookies=self.cookies,
3447
- timeout=aiohttp.ClientTimeout(self.timeout),
3448
- trust_env=self.trust_env,
3449
- )
3450
-
3451
- # Keep track of sessions to close them later
3452
- self._sessions[session] = set()
3453
-
3454
- # Override the `._request` method to register responses to be closed
3455
- session._wrapped_request = session._request
3456
-
3457
- async def _request(method, url, **kwargs):
3458
- response = await session._wrapped_request(method, url, **kwargs)
3459
- self._sessions[session].add(response)
3460
- return response
3461
-
3462
- session._request = _request
3463
-
3464
- # Override the 'close' method to
3465
- # 1. close ongoing responses
3466
- # 2. deregister the session when closed
3467
- session._close = session.close
3468
-
3469
- async def close_session():
3470
- for response in self._sessions[session]:
3471
- response.close()
3472
- await session._close()
3473
- self._sessions.pop(session, None)
3474
-
3475
- session.close = close_session
3476
- return session
3477
-
3478
- async def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
3338
+ async def get_endpoint_info(self, *, model: Optional[str] = None) -> dict[str, Any]:
3479
3339
  """
3480
3340
  Get information about the deployed endpoint.
3481
3341
 
@@ -3488,7 +3348,7 @@ class AsyncInferenceClient:
3488
3348
  Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
3489
3349
 
3490
3350
  Returns:
3491
- `Dict[str, Any]`: Information about the endpoint.
3351
+ `dict[str, Any]`: Information about the endpoint.
3492
3352
 
3493
3353
  Example:
3494
3354
  ```py
@@ -3530,17 +3390,16 @@ class AsyncInferenceClient:
3530
3390
  else:
3531
3391
  url = f"{constants.INFERENCE_ENDPOINT}/models/{model}/info"
3532
3392
 
3533
- async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
3534
- response = await client.get(url, proxy=self.proxies)
3535
- response.raise_for_status()
3536
- return await response.json()
3393
+ client = await self._get_async_client()
3394
+ response = await client.get(url, headers=build_hf_headers(token=self.token))
3395
+ hf_raise_for_status(response)
3396
+ return response.json()
3537
3397
 
3538
3398
  async def health_check(self, model: Optional[str] = None) -> bool:
3539
3399
  """
3540
3400
  Check the health of the deployed endpoint.
3541
3401
 
3542
3402
  Health check is only available with Inference Endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI).
3543
- For Inference API, please use [`InferenceClient.get_model_status`] instead.
3544
3403
 
3545
3404
  Args:
3546
3405
  model (`str`, *optional*):
@@ -3565,77 +3424,12 @@ class AsyncInferenceClient:
3565
3424
  if model is None:
3566
3425
  raise ValueError("Model id not provided.")
3567
3426
  if not model.startswith(("http://", "https://")):
3568
- raise ValueError(
3569
- "Model must be an Inference Endpoint URL. For serverless Inference API, please use `InferenceClient.get_model_status`."
3570
- )
3427
+ raise ValueError("Model must be an Inference Endpoint URL.")
3571
3428
  url = model.rstrip("/") + "/health"
3572
3429
 
3573
- async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
3574
- response = await client.get(url, proxy=self.proxies)
3575
- return response.status == 200
3576
-
3577
- @_deprecate_method(
3578
- version="0.35.0",
3579
- message=(
3580
- "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3581
- " Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
3582
- ),
3583
- )
3584
- async def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
3585
- """
3586
- Get the status of a model hosted on the HF Inference API.
3587
-
3588
- <Tip>
3589
-
3590
- This endpoint is mostly useful when you already know which model you want to use and want to check its
3591
- availability. If you want to discover already deployed models, you should rather use [`~InferenceClient.list_deployed_models`].
3592
-
3593
- </Tip>
3594
-
3595
- Args:
3596
- model (`str`, *optional*):
3597
- Identifier of the model for witch the status gonna be checked. If model is not provided,
3598
- the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
3599
- identifier cannot be a URL.
3600
-
3601
-
3602
- Returns:
3603
- [`ModelStatus`]: An instance of ModelStatus dataclass, containing information,
3604
- about the state of the model: load, state, compute type and framework.
3605
-
3606
- Example:
3607
- ```py
3608
- # Must be run in an async context
3609
- >>> from huggingface_hub import AsyncInferenceClient
3610
- >>> client = AsyncInferenceClient()
3611
- >>> await client.get_model_status("meta-llama/Meta-Llama-3-8B-Instruct")
3612
- ModelStatus(loaded=True, state='Loaded', compute_type='gpu', framework='text-generation-inference')
3613
- ```
3614
- """
3615
- if self.provider != "hf-inference":
3616
- raise ValueError(f"Getting model status is not supported on '{self.provider}'.")
3617
-
3618
- model = model or self.model
3619
- if model is None:
3620
- raise ValueError("Model id not provided.")
3621
- if model.startswith("https://"):
3622
- raise NotImplementedError("Model status is only available for Inference API endpoints.")
3623
- url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
3624
-
3625
- async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
3626
- response = await client.get(url, proxy=self.proxies)
3627
- response.raise_for_status()
3628
- response_data = await response.json()
3629
-
3630
- if "error" in response_data:
3631
- raise ValueError(response_data["error"])
3632
-
3633
- return ModelStatus(
3634
- loaded=response_data["loaded"],
3635
- state=response_data["state"],
3636
- compute_type=response_data["compute_type"],
3637
- framework=response_data["framework"],
3638
- )
3430
+ client = await self._get_async_client()
3431
+ response = await client.get(url, headers=build_hf_headers(token=self.token))
3432
+ return response.status_code == 200
3639
3433
 
3640
3434
  @property
3641
3435
  def chat(self) -> "ProxyClientChat":