huggingface-hub 0.35.1__py3-none-any.whl → 1.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (127) hide show
  1. huggingface_hub/__init__.py +28 -45
  2. huggingface_hub/_commit_api.py +28 -28
  3. huggingface_hub/_commit_scheduler.py +11 -8
  4. huggingface_hub/_inference_endpoints.py +8 -8
  5. huggingface_hub/_jobs_api.py +20 -20
  6. huggingface_hub/_login.py +13 -39
  7. huggingface_hub/_oauth.py +8 -8
  8. huggingface_hub/_snapshot_download.py +14 -28
  9. huggingface_hub/_space_api.py +4 -4
  10. huggingface_hub/_tensorboard_logger.py +5 -5
  11. huggingface_hub/_upload_large_folder.py +15 -15
  12. huggingface_hub/_webhooks_payload.py +3 -3
  13. huggingface_hub/_webhooks_server.py +2 -2
  14. huggingface_hub/cli/__init__.py +0 -14
  15. huggingface_hub/cli/_cli_utils.py +80 -3
  16. huggingface_hub/cli/auth.py +104 -150
  17. huggingface_hub/cli/cache.py +102 -126
  18. huggingface_hub/cli/download.py +93 -110
  19. huggingface_hub/cli/hf.py +37 -41
  20. huggingface_hub/cli/jobs.py +689 -1017
  21. huggingface_hub/cli/lfs.py +120 -143
  22. huggingface_hub/cli/repo.py +158 -216
  23. huggingface_hub/cli/repo_files.py +50 -84
  24. huggingface_hub/cli/system.py +6 -25
  25. huggingface_hub/cli/upload.py +198 -212
  26. huggingface_hub/cli/upload_large_folder.py +90 -105
  27. huggingface_hub/commands/_cli_utils.py +2 -2
  28. huggingface_hub/commands/delete_cache.py +11 -11
  29. huggingface_hub/commands/download.py +4 -13
  30. huggingface_hub/commands/lfs.py +4 -4
  31. huggingface_hub/commands/repo_files.py +2 -2
  32. huggingface_hub/commands/tag.py +1 -3
  33. huggingface_hub/commands/upload.py +4 -4
  34. huggingface_hub/commands/upload_large_folder.py +3 -3
  35. huggingface_hub/commands/user.py +4 -5
  36. huggingface_hub/community.py +5 -5
  37. huggingface_hub/constants.py +3 -41
  38. huggingface_hub/dataclasses.py +16 -22
  39. huggingface_hub/errors.py +43 -30
  40. huggingface_hub/fastai_utils.py +8 -9
  41. huggingface_hub/file_download.py +154 -253
  42. huggingface_hub/hf_api.py +329 -558
  43. huggingface_hub/hf_file_system.py +104 -62
  44. huggingface_hub/hub_mixin.py +32 -54
  45. huggingface_hub/inference/_client.py +178 -163
  46. huggingface_hub/inference/_common.py +38 -54
  47. huggingface_hub/inference/_generated/_async_client.py +219 -259
  48. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
  49. huggingface_hub/inference/_generated/types/base.py +10 -7
  50. huggingface_hub/inference/_generated/types/chat_completion.py +16 -16
  51. huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
  52. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
  53. huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
  54. huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
  55. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
  56. huggingface_hub/inference/_generated/types/summarization.py +2 -2
  57. huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
  58. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
  59. huggingface_hub/inference/_generated/types/text_generation.py +10 -10
  60. huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
  61. huggingface_hub/inference/_generated/types/token_classification.py +2 -2
  62. huggingface_hub/inference/_generated/types/translation.py +2 -2
  63. huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
  64. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
  65. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
  66. huggingface_hub/inference/_mcp/agent.py +3 -3
  67. huggingface_hub/inference/_mcp/constants.py +1 -2
  68. huggingface_hub/inference/_mcp/mcp_client.py +33 -22
  69. huggingface_hub/inference/_mcp/types.py +10 -10
  70. huggingface_hub/inference/_mcp/utils.py +4 -4
  71. huggingface_hub/inference/_providers/__init__.py +2 -13
  72. huggingface_hub/inference/_providers/_common.py +24 -25
  73. huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
  74. huggingface_hub/inference/_providers/cohere.py +3 -3
  75. huggingface_hub/inference/_providers/fal_ai.py +25 -25
  76. huggingface_hub/inference/_providers/featherless_ai.py +4 -4
  77. huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
  78. huggingface_hub/inference/_providers/hf_inference.py +13 -13
  79. huggingface_hub/inference/_providers/hyperbolic.py +4 -4
  80. huggingface_hub/inference/_providers/nebius.py +10 -10
  81. huggingface_hub/inference/_providers/novita.py +5 -5
  82. huggingface_hub/inference/_providers/nscale.py +4 -4
  83. huggingface_hub/inference/_providers/replicate.py +15 -15
  84. huggingface_hub/inference/_providers/sambanova.py +6 -6
  85. huggingface_hub/inference/_providers/together.py +7 -7
  86. huggingface_hub/lfs.py +24 -33
  87. huggingface_hub/repocard.py +16 -17
  88. huggingface_hub/repocard_data.py +56 -56
  89. huggingface_hub/serialization/__init__.py +0 -1
  90. huggingface_hub/serialization/_base.py +9 -9
  91. huggingface_hub/serialization/_dduf.py +7 -7
  92. huggingface_hub/serialization/_torch.py +28 -28
  93. huggingface_hub/utils/__init__.py +10 -4
  94. huggingface_hub/utils/_auth.py +5 -5
  95. huggingface_hub/utils/_cache_manager.py +31 -31
  96. huggingface_hub/utils/_deprecation.py +1 -1
  97. huggingface_hub/utils/_dotenv.py +3 -3
  98. huggingface_hub/utils/_fixes.py +0 -10
  99. huggingface_hub/utils/_git_credential.py +3 -3
  100. huggingface_hub/utils/_headers.py +7 -29
  101. huggingface_hub/utils/_http.py +369 -209
  102. huggingface_hub/utils/_pagination.py +4 -4
  103. huggingface_hub/utils/_paths.py +5 -5
  104. huggingface_hub/utils/_runtime.py +15 -13
  105. huggingface_hub/utils/_safetensors.py +21 -21
  106. huggingface_hub/utils/_subprocess.py +9 -9
  107. huggingface_hub/utils/_telemetry.py +3 -3
  108. huggingface_hub/utils/_typing.py +3 -3
  109. huggingface_hub/utils/_validators.py +53 -72
  110. huggingface_hub/utils/_xet.py +16 -16
  111. huggingface_hub/utils/_xet_progress_reporting.py +1 -1
  112. huggingface_hub/utils/insecure_hashlib.py +3 -9
  113. huggingface_hub/utils/tqdm.py +3 -3
  114. {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/METADATA +17 -26
  115. huggingface_hub-1.0.0rc1.dist-info/RECORD +161 -0
  116. huggingface_hub/inference/_providers/publicai.py +0 -6
  117. huggingface_hub/inference/_providers/scaleway.py +0 -28
  118. huggingface_hub/inference_api.py +0 -217
  119. huggingface_hub/keras_mixin.py +0 -500
  120. huggingface_hub/repository.py +0 -1477
  121. huggingface_hub/serialization/_tensorflow.py +0 -95
  122. huggingface_hub/utils/_hf_folder.py +0 -68
  123. huggingface_hub-0.35.1.dist-info/RECORD +0 -168
  124. {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/LICENSE +0 -0
  125. {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/WHEEL +0 -0
  126. {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/entry_points.txt +0 -0
  127. {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -21,12 +21,16 @@
21
21
  import asyncio
22
22
  import base64
23
23
  import logging
24
+ import os
24
25
  import re
25
26
  import warnings
26
- from typing import TYPE_CHECKING, Any, AsyncIterable, Dict, List, Literal, Optional, Set, Union, overload
27
+ from contextlib import AsyncExitStack
28
+ from typing import TYPE_CHECKING, Any, AsyncIterable, Literal, Optional, Union, overload
29
+
30
+ import httpx
27
31
 
28
32
  from huggingface_hub import constants
29
- from huggingface_hub.errors import InferenceTimeoutError
33
+ from huggingface_hub.errors import BadRequestError, HfHubHTTPError, InferenceTimeoutError
30
34
  from huggingface_hub.inference._common import (
31
35
  TASKS_EXPECTING_IMAGES,
32
36
  ContentT,
@@ -86,15 +90,19 @@ from huggingface_hub.inference._generated.types import (
86
90
  ZeroShotImageClassificationOutputElement,
87
91
  )
88
92
  from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
89
- from huggingface_hub.utils import build_hf_headers
93
+ from huggingface_hub.utils import (
94
+ build_hf_headers,
95
+ get_async_session,
96
+ hf_raise_for_status,
97
+ validate_hf_hub_args,
98
+ )
90
99
  from huggingface_hub.utils._auth import get_token
91
100
 
92
- from .._common import _async_yield_from, _import_aiohttp
101
+ from .._common import _async_yield_from
93
102
 
94
103
 
95
104
  if TYPE_CHECKING:
96
105
  import numpy as np
97
- from aiohttp import ClientResponse, ClientSession
98
106
  from PIL.Image import Image
99
107
 
100
108
  logger = logging.getLogger(__name__)
@@ -118,7 +126,7 @@ class AsyncInferenceClient:
118
126
  Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2
119
127
  arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL.
120
128
  provider (`str`, *optional*):
121
- Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `publicai`, `"replicate"`, `"sambanova"`, `"scaleway"` or `"together"`.
129
+ Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"replicate"`, "sambanova"` or `"together"`.
122
130
  Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
123
131
  If model is a URL or `base_url` is passed, then `provider` is not used.
124
132
  token (`str`, *optional*):
@@ -127,18 +135,14 @@ class AsyncInferenceClient:
127
135
  arguments are mutually exclusive and have the exact same behavior.
128
136
  timeout (`float`, `optional`):
129
137
  The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
130
- headers (`Dict[str, str]`, `optional`):
138
+ headers (`dict[str, str]`, `optional`):
131
139
  Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
132
140
  Values in this dictionary will override the default values.
133
141
  bill_to (`str`, `optional`):
134
142
  The billing account to use for the requests. By default the requests are billed on the user's account.
135
143
  Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
136
- cookies (`Dict[str, str]`, `optional`):
144
+ cookies (`dict[str, str]`, `optional`):
137
145
  Additional cookies to send to the server.
138
- trust_env ('bool', 'optional'):
139
- Trust environment settings for proxy configuration if the parameter is `True` (`False` by default).
140
- proxies (`Any`, `optional`):
141
- Proxies to use for the request.
142
146
  base_url (`str`, `optional`):
143
147
  Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
144
148
  follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
@@ -147,6 +151,7 @@ class AsyncInferenceClient:
147
151
  follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
148
152
  """
149
153
 
154
+ @validate_hf_hub_args
150
155
  def __init__(
151
156
  self,
152
157
  model: Optional[str] = None,
@@ -154,10 +159,8 @@ class AsyncInferenceClient:
154
159
  provider: Optional[PROVIDER_OR_POLICY_T] = None,
155
160
  token: Optional[str] = None,
156
161
  timeout: Optional[float] = None,
157
- headers: Optional[Dict[str, str]] = None,
158
- cookies: Optional[Dict[str, str]] = None,
159
- trust_env: bool = False,
160
- proxies: Optional[Any] = None,
162
+ headers: Optional[dict[str, str]] = None,
163
+ cookies: Optional[dict[str, str]] = None,
161
164
  bill_to: Optional[str] = None,
162
165
  # OpenAI compatibility
163
166
  base_url: Optional[str] = None,
@@ -219,15 +222,36 @@ class AsyncInferenceClient:
219
222
 
220
223
  self.cookies = cookies
221
224
  self.timeout = timeout
222
- self.trust_env = trust_env
223
- self.proxies = proxies
224
225
 
225
- # Keep track of the sessions to close them properly
226
- self._sessions: Dict["ClientSession", Set["ClientResponse"]] = dict()
226
+ self.exit_stack = AsyncExitStack()
227
+ self._async_client: Optional[httpx.AsyncClient] = None
227
228
 
228
229
  def __repr__(self):
229
230
  return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
230
231
 
232
+ async def __aenter__(self):
233
+ return self
234
+
235
+ async def __aexit__(self, exc_type, exc_value, traceback):
236
+ await self.close()
237
+
238
+ async def close(self):
239
+ """Close the client.
240
+
241
+ This method is automatically called when using the client as a context manager.
242
+ """
243
+ await self.exit_stack.aclose()
244
+
245
+ async def _get_async_client(self):
246
+ """Get a unique async client for this AsyncInferenceClient instance.
247
+
248
+ Returns the same client instance on subsequent calls, ensuring proper
249
+ connection reuse and resource management through the exit stack.
250
+ """
251
+ if self._async_client is None:
252
+ self._async_client = await self.exit_stack.enter_async_context(get_async_session())
253
+ return self._async_client
254
+
231
255
  @overload
232
256
  async def _inner_post( # type: ignore[misc]
233
257
  self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
@@ -236,83 +260,60 @@ class AsyncInferenceClient:
236
260
  @overload
237
261
  async def _inner_post( # type: ignore[misc]
238
262
  self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
239
- ) -> AsyncIterable[bytes]: ...
263
+ ) -> AsyncIterable[str]: ...
240
264
 
241
265
  @overload
242
266
  async def _inner_post(
243
267
  self, request_parameters: RequestParameters, *, stream: bool = False
244
- ) -> Union[bytes, AsyncIterable[bytes]]: ...
268
+ ) -> Union[bytes, AsyncIterable[str]]: ...
245
269
 
246
270
  async def _inner_post(
247
271
  self, request_parameters: RequestParameters, *, stream: bool = False
248
- ) -> Union[bytes, AsyncIterable[bytes]]:
272
+ ) -> Union[bytes, AsyncIterable[str]]:
249
273
  """Make a request to the inference server."""
250
274
 
251
- aiohttp = _import_aiohttp()
252
-
253
275
  # TODO: this should be handled in provider helpers directly
254
276
  if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
255
277
  request_parameters.headers["Accept"] = "image/png"
256
278
 
257
- # Do not use context manager as we don't want to close the connection immediately when returning
258
- # a stream
259
- session = self._get_client_session(headers=request_parameters.headers)
260
-
261
279
  try:
262
- response = await session.post(
263
- request_parameters.url, json=request_parameters.json, data=request_parameters.data, proxy=self.proxies
264
- )
265
- response_error_payload = None
266
- if response.status != 200:
267
- try:
268
- response_error_payload = await response.json() # get payload before connection closed
269
- except Exception:
270
- pass
271
- response.raise_for_status()
280
+ client = await self._get_async_client()
272
281
  if stream:
273
- return _async_yield_from(session, response)
282
+ response = await self.exit_stack.enter_async_context(
283
+ client.stream(
284
+ "POST",
285
+ request_parameters.url,
286
+ json=request_parameters.json,
287
+ data=request_parameters.data,
288
+ headers=request_parameters.headers,
289
+ cookies=self.cookies,
290
+ timeout=self.timeout,
291
+ )
292
+ )
293
+ hf_raise_for_status(response)
294
+ return _async_yield_from(client, response)
274
295
  else:
275
- content = await response.read()
276
- await session.close()
277
- return content
296
+ response = await client.post(
297
+ request_parameters.url,
298
+ json=request_parameters.json,
299
+ data=request_parameters.data,
300
+ headers=request_parameters.headers,
301
+ cookies=self.cookies,
302
+ timeout=self.timeout,
303
+ )
304
+ hf_raise_for_status(response)
305
+ return response.content
278
306
  except asyncio.TimeoutError as error:
279
- await session.close()
280
307
  # Convert any `TimeoutError` to a `InferenceTimeoutError`
281
308
  raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
282
- except aiohttp.ClientResponseError as error:
283
- error.response_error_payload = response_error_payload
284
- await session.close()
285
- raise error
286
- except Exception:
287
- await session.close()
309
+ except HfHubHTTPError as error:
310
+ if error.response.status_code == 422 and request_parameters.task != "unknown":
311
+ msg = str(error.args[0])
312
+ if len(error.response.text) > 0:
313
+ msg += f"{os.linesep}{error.response.text}{os.linesep}"
314
+ error.args = (msg,) + error.args[1:]
288
315
  raise
289
316
 
290
- async def __aenter__(self):
291
- return self
292
-
293
- async def __aexit__(self, exc_type, exc_value, traceback):
294
- await self.close()
295
-
296
- def __del__(self):
297
- if len(self._sessions) > 0:
298
- warnings.warn(
299
- "Deleting 'AsyncInferenceClient' client but some sessions are still open. "
300
- "This can happen if you've stopped streaming data from the server before the stream was complete. "
301
- "To close the client properly, you must call `await client.close()` "
302
- "or use an async context (e.g. `async with AsyncInferenceClient(): ...`."
303
- )
304
-
305
- async def close(self):
306
- """Close all open sessions.
307
-
308
- By default, 'aiohttp.ClientSession' objects are closed automatically when a call is completed. However, if you
309
- are streaming data from the server and you stop before the stream is complete, you must call this method to
310
- close the session properly.
311
-
312
- Another possibility is to use an async context (e.g. `async with AsyncInferenceClient(): ...`).
313
- """
314
- await asyncio.gather(*[session.close() for session in self._sessions.keys()])
315
-
316
317
  async def audio_classification(
317
318
  self,
318
319
  audio: ContentT,
@@ -320,7 +321,7 @@ class AsyncInferenceClient:
320
321
  model: Optional[str] = None,
321
322
  top_k: Optional[int] = None,
322
323
  function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
323
- ) -> List[AudioClassificationOutputElement]:
324
+ ) -> list[AudioClassificationOutputElement]:
324
325
  """
325
326
  Perform audio classification on the provided audio content.
326
327
 
@@ -338,12 +339,12 @@ class AsyncInferenceClient:
338
339
  The function to apply to the model outputs in order to retrieve the scores.
339
340
 
340
341
  Returns:
341
- `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
342
+ `list[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
342
343
 
343
344
  Raises:
344
345
  [`InferenceTimeoutError`]:
345
346
  If the model is unavailable or the request times out.
346
- `aiohttp.ClientResponseError`:
347
+ [`HfHubHTTPError`]:
347
348
  If the request fails with an HTTP error status code other than HTTP 503.
348
349
 
349
350
  Example:
@@ -376,7 +377,7 @@ class AsyncInferenceClient:
376
377
  audio: ContentT,
377
378
  *,
378
379
  model: Optional[str] = None,
379
- ) -> List[AudioToAudioOutputElement]:
380
+ ) -> list[AudioToAudioOutputElement]:
380
381
  """
381
382
  Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
382
383
 
@@ -390,12 +391,12 @@ class AsyncInferenceClient:
390
391
  audio_to_audio will be used.
391
392
 
392
393
  Returns:
393
- `List[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
394
+ `list[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
394
395
 
395
396
  Raises:
396
397
  `InferenceTimeoutError`:
397
398
  If the model is unavailable or the request times out.
398
- `aiohttp.ClientResponseError`:
399
+ [`HfHubHTTPError`]:
399
400
  If the request fails with an HTTP error status code other than HTTP 503.
400
401
 
401
402
  Example:
@@ -429,7 +430,7 @@ class AsyncInferenceClient:
429
430
  audio: ContentT,
430
431
  *,
431
432
  model: Optional[str] = None,
432
- extra_body: Optional[Dict] = None,
433
+ extra_body: Optional[dict] = None,
433
434
  ) -> AutomaticSpeechRecognitionOutput:
434
435
  """
435
436
  Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
@@ -440,7 +441,7 @@ class AsyncInferenceClient:
440
441
  model (`str`, *optional*):
441
442
  The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
442
443
  Inference Endpoint. If not provided, the default recommended model for ASR will be used.
443
- extra_body (`Dict`, *optional*):
444
+ extra_body (`dict`, *optional*):
444
445
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
445
446
  for supported parameters.
446
447
  Returns:
@@ -449,7 +450,7 @@ class AsyncInferenceClient:
449
450
  Raises:
450
451
  [`InferenceTimeoutError`]:
451
452
  If the model is unavailable or the request times out.
452
- `aiohttp.ClientResponseError`:
453
+ [`HfHubHTTPError`]:
453
454
  If the request fails with an HTTP error status code other than HTTP 503.
454
455
 
455
456
  Example:
@@ -476,105 +477,105 @@ class AsyncInferenceClient:
476
477
  @overload
477
478
  async def chat_completion( # type: ignore
478
479
  self,
479
- messages: List[Union[Dict, ChatCompletionInputMessage]],
480
+ messages: list[Union[dict, ChatCompletionInputMessage]],
480
481
  *,
481
482
  model: Optional[str] = None,
482
483
  stream: Literal[False] = False,
483
484
  frequency_penalty: Optional[float] = None,
484
- logit_bias: Optional[List[float]] = None,
485
+ logit_bias: Optional[list[float]] = None,
485
486
  logprobs: Optional[bool] = None,
486
487
  max_tokens: Optional[int] = None,
487
488
  n: Optional[int] = None,
488
489
  presence_penalty: Optional[float] = None,
489
490
  response_format: Optional[ChatCompletionInputGrammarType] = None,
490
491
  seed: Optional[int] = None,
491
- stop: Optional[List[str]] = None,
492
+ stop: Optional[list[str]] = None,
492
493
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
493
494
  temperature: Optional[float] = None,
494
495
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
495
496
  tool_prompt: Optional[str] = None,
496
- tools: Optional[List[ChatCompletionInputTool]] = None,
497
+ tools: Optional[list[ChatCompletionInputTool]] = None,
497
498
  top_logprobs: Optional[int] = None,
498
499
  top_p: Optional[float] = None,
499
- extra_body: Optional[Dict] = None,
500
+ extra_body: Optional[dict] = None,
500
501
  ) -> ChatCompletionOutput: ...
501
502
 
502
503
  @overload
503
504
  async def chat_completion( # type: ignore
504
505
  self,
505
- messages: List[Union[Dict, ChatCompletionInputMessage]],
506
+ messages: list[Union[dict, ChatCompletionInputMessage]],
506
507
  *,
507
508
  model: Optional[str] = None,
508
509
  stream: Literal[True] = True,
509
510
  frequency_penalty: Optional[float] = None,
510
- logit_bias: Optional[List[float]] = None,
511
+ logit_bias: Optional[list[float]] = None,
511
512
  logprobs: Optional[bool] = None,
512
513
  max_tokens: Optional[int] = None,
513
514
  n: Optional[int] = None,
514
515
  presence_penalty: Optional[float] = None,
515
516
  response_format: Optional[ChatCompletionInputGrammarType] = None,
516
517
  seed: Optional[int] = None,
517
- stop: Optional[List[str]] = None,
518
+ stop: Optional[list[str]] = None,
518
519
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
519
520
  temperature: Optional[float] = None,
520
521
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
521
522
  tool_prompt: Optional[str] = None,
522
- tools: Optional[List[ChatCompletionInputTool]] = None,
523
+ tools: Optional[list[ChatCompletionInputTool]] = None,
523
524
  top_logprobs: Optional[int] = None,
524
525
  top_p: Optional[float] = None,
525
- extra_body: Optional[Dict] = None,
526
+ extra_body: Optional[dict] = None,
526
527
  ) -> AsyncIterable[ChatCompletionStreamOutput]: ...
527
528
 
528
529
  @overload
529
530
  async def chat_completion(
530
531
  self,
531
- messages: List[Union[Dict, ChatCompletionInputMessage]],
532
+ messages: list[Union[dict, ChatCompletionInputMessage]],
532
533
  *,
533
534
  model: Optional[str] = None,
534
535
  stream: bool = False,
535
536
  frequency_penalty: Optional[float] = None,
536
- logit_bias: Optional[List[float]] = None,
537
+ logit_bias: Optional[list[float]] = None,
537
538
  logprobs: Optional[bool] = None,
538
539
  max_tokens: Optional[int] = None,
539
540
  n: Optional[int] = None,
540
541
  presence_penalty: Optional[float] = None,
541
542
  response_format: Optional[ChatCompletionInputGrammarType] = None,
542
543
  seed: Optional[int] = None,
543
- stop: Optional[List[str]] = None,
544
+ stop: Optional[list[str]] = None,
544
545
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
545
546
  temperature: Optional[float] = None,
546
547
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
547
548
  tool_prompt: Optional[str] = None,
548
- tools: Optional[List[ChatCompletionInputTool]] = None,
549
+ tools: Optional[list[ChatCompletionInputTool]] = None,
549
550
  top_logprobs: Optional[int] = None,
550
551
  top_p: Optional[float] = None,
551
- extra_body: Optional[Dict] = None,
552
+ extra_body: Optional[dict] = None,
552
553
  ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: ...
553
554
 
554
555
  async def chat_completion(
555
556
  self,
556
- messages: List[Union[Dict, ChatCompletionInputMessage]],
557
+ messages: list[Union[dict, ChatCompletionInputMessage]],
557
558
  *,
558
559
  model: Optional[str] = None,
559
560
  stream: bool = False,
560
561
  # Parameters from ChatCompletionInput (handled manually)
561
562
  frequency_penalty: Optional[float] = None,
562
- logit_bias: Optional[List[float]] = None,
563
+ logit_bias: Optional[list[float]] = None,
563
564
  logprobs: Optional[bool] = None,
564
565
  max_tokens: Optional[int] = None,
565
566
  n: Optional[int] = None,
566
567
  presence_penalty: Optional[float] = None,
567
568
  response_format: Optional[ChatCompletionInputGrammarType] = None,
568
569
  seed: Optional[int] = None,
569
- stop: Optional[List[str]] = None,
570
+ stop: Optional[list[str]] = None,
570
571
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
571
572
  temperature: Optional[float] = None,
572
573
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
573
574
  tool_prompt: Optional[str] = None,
574
- tools: Optional[List[ChatCompletionInputTool]] = None,
575
+ tools: Optional[list[ChatCompletionInputTool]] = None,
575
576
  top_logprobs: Optional[int] = None,
576
577
  top_p: Optional[float] = None,
577
- extra_body: Optional[Dict] = None,
578
+ extra_body: Optional[dict] = None,
578
579
  ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]:
579
580
  """
580
581
  A method for completing conversations using a specified language model.
@@ -604,7 +605,7 @@ class AsyncInferenceClient:
604
605
  frequency_penalty (`float`, *optional*):
605
606
  Penalizes new tokens based on their existing frequency
606
607
  in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
607
- logit_bias (`List[float]`, *optional*):
608
+ logit_bias (`list[float]`, *optional*):
608
609
  Adjusts the likelihood of specific tokens appearing in the generated output.
609
610
  logprobs (`bool`, *optional*):
610
611
  Whether to return log probabilities of the output tokens or not. If true, returns the log
@@ -620,7 +621,7 @@ class AsyncInferenceClient:
620
621
  Grammar constraints. Can be either a JSONSchema or a regex.
621
622
  seed (Optional[`int`], *optional*):
622
623
  Seed for reproducible control flow. Defaults to None.
623
- stop (`List[str]`, *optional*):
624
+ stop (`list[str]`, *optional*):
624
625
  Up to four strings which trigger the end of the response.
625
626
  Defaults to None.
626
627
  stream (`bool`, *optional*):
@@ -644,7 +645,7 @@ class AsyncInferenceClient:
644
645
  tools (List of [`ChatCompletionInputTool`], *optional*):
645
646
  A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
646
647
  provide a list of functions the model may generate JSON inputs for.
647
- extra_body (`Dict`, *optional*):
648
+ extra_body (`dict`, *optional*):
648
649
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
649
650
  for supported parameters.
650
651
  Returns:
@@ -656,7 +657,7 @@ class AsyncInferenceClient:
656
657
  Raises:
657
658
  [`InferenceTimeoutError`]:
658
659
  If the model is unavailable or the request times out.
659
- `aiohttp.ClientResponseError`:
660
+ [`HfHubHTTPError`]:
660
661
  If the request fails with an HTTP error status code other than HTTP 503.
661
662
 
662
663
  Example:
@@ -976,8 +977,8 @@ class AsyncInferenceClient:
976
977
  max_question_len: Optional[int] = None,
977
978
  max_seq_len: Optional[int] = None,
978
979
  top_k: Optional[int] = None,
979
- word_boxes: Optional[List[Union[List[float], str]]] = None,
980
- ) -> List[DocumentQuestionAnsweringOutputElement]:
980
+ word_boxes: Optional[list[Union[list[float], str]]] = None,
981
+ ) -> list[DocumentQuestionAnsweringOutputElement]:
981
982
  """
982
983
  Answer questions on document images.
983
984
 
@@ -1007,16 +1008,16 @@ class AsyncInferenceClient:
1007
1008
  top_k (`int`, *optional*):
1008
1009
  The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
1009
1010
  answers if there are not enough options available within the context.
1010
- word_boxes (`List[Union[List[float], str`, *optional*):
1011
+ word_boxes (`list[Union[list[float], str`, *optional*):
1011
1012
  A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
1012
1013
  step and use the provided bounding boxes instead.
1013
1014
  Returns:
1014
- `List[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
1015
+ `list[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
1015
1016
 
1016
1017
  Raises:
1017
1018
  [`InferenceTimeoutError`]:
1018
1019
  If the model is unavailable or the request times out.
1019
- `aiohttp.ClientResponseError`:
1020
+ [`HfHubHTTPError`]:
1020
1021
  If the request fails with an HTTP error status code other than HTTP 503.
1021
1022
 
1022
1023
 
@@ -1031,7 +1032,7 @@ class AsyncInferenceClient:
1031
1032
  """
1032
1033
  model_id = model or self.model
1033
1034
  provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
1034
- inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
1035
+ inputs: dict[str, Any] = {"question": question, "image": _b64_encode(image)}
1035
1036
  request_parameters = provider_helper.prepare_request(
1036
1037
  inputs=inputs,
1037
1038
  parameters={
@@ -1092,7 +1093,7 @@ class AsyncInferenceClient:
1092
1093
  Raises:
1093
1094
  [`InferenceTimeoutError`]:
1094
1095
  If the model is unavailable or the request times out.
1095
- `aiohttp.ClientResponseError`:
1096
+ [`HfHubHTTPError`]:
1096
1097
  If the request fails with an HTTP error status code other than HTTP 503.
1097
1098
 
1098
1099
  Example:
@@ -1130,9 +1131,9 @@ class AsyncInferenceClient:
1130
1131
  text: str,
1131
1132
  *,
1132
1133
  model: Optional[str] = None,
1133
- targets: Optional[List[str]] = None,
1134
+ targets: Optional[list[str]] = None,
1134
1135
  top_k: Optional[int] = None,
1135
- ) -> List[FillMaskOutputElement]:
1136
+ ) -> list[FillMaskOutputElement]:
1136
1137
  """
1137
1138
  Fill in a hole with a missing word (token to be precise).
1138
1139
 
@@ -1142,20 +1143,20 @@ class AsyncInferenceClient:
1142
1143
  model (`str`, *optional*):
1143
1144
  The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
1144
1145
  a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
1145
- targets (`List[str`, *optional*):
1146
+ targets (`list[str`, *optional*):
1146
1147
  When passed, the model will limit the scores to the passed targets instead of looking up in the whole
1147
1148
  vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
1148
1149
  resulting token will be used (with a warning, and that might be slower).
1149
1150
  top_k (`int`, *optional*):
1150
1151
  When passed, overrides the number of predictions to return.
1151
1152
  Returns:
1152
- `List[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
1153
+ `list[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
1153
1154
  probability, token reference, and completed text.
1154
1155
 
1155
1156
  Raises:
1156
1157
  [`InferenceTimeoutError`]:
1157
1158
  If the model is unavailable or the request times out.
1158
- `aiohttp.ClientResponseError`:
1159
+ [`HfHubHTTPError`]:
1159
1160
  If the request fails with an HTTP error status code other than HTTP 503.
1160
1161
 
1161
1162
  Example:
@@ -1189,7 +1190,7 @@ class AsyncInferenceClient:
1189
1190
  model: Optional[str] = None,
1190
1191
  function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
1191
1192
  top_k: Optional[int] = None,
1192
- ) -> List[ImageClassificationOutputElement]:
1193
+ ) -> list[ImageClassificationOutputElement]:
1193
1194
  """
1194
1195
  Perform image classification on the given image using the specified model.
1195
1196
 
@@ -1204,12 +1205,12 @@ class AsyncInferenceClient:
1204
1205
  top_k (`int`, *optional*):
1205
1206
  When specified, limits the output to the top K most probable classes.
1206
1207
  Returns:
1207
- `List[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
1208
+ `list[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
1208
1209
 
1209
1210
  Raises:
1210
1211
  [`InferenceTimeoutError`]:
1211
1212
  If the model is unavailable or the request times out.
1212
- `aiohttp.ClientResponseError`:
1213
+ [`HfHubHTTPError`]:
1213
1214
  If the request fails with an HTTP error status code other than HTTP 503.
1214
1215
 
1215
1216
  Example:
@@ -1242,7 +1243,7 @@ class AsyncInferenceClient:
1242
1243
  overlap_mask_area_threshold: Optional[float] = None,
1243
1244
  subtask: Optional["ImageSegmentationSubtask"] = None,
1244
1245
  threshold: Optional[float] = None,
1245
- ) -> List[ImageSegmentationOutputElement]:
1246
+ ) -> list[ImageSegmentationOutputElement]:
1246
1247
  """
1247
1248
  Perform image segmentation on the given image using the specified model.
1248
1249
 
@@ -1267,12 +1268,12 @@ class AsyncInferenceClient:
1267
1268
  threshold (`float`, *optional*):
1268
1269
  Probability threshold to filter out predicted masks.
1269
1270
  Returns:
1270
- `List[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
1271
+ `list[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
1271
1272
 
1272
1273
  Raises:
1273
1274
  [`InferenceTimeoutError`]:
1274
1275
  If the model is unavailable or the request times out.
1275
- `aiohttp.ClientResponseError`:
1276
+ [`HfHubHTTPError`]:
1276
1277
  If the request fails with an HTTP error status code other than HTTP 503.
1277
1278
 
1278
1279
  Example:
@@ -1350,7 +1351,7 @@ class AsyncInferenceClient:
1350
1351
  Raises:
1351
1352
  [`InferenceTimeoutError`]:
1352
1353
  If the model is unavailable or the request times out.
1353
- `aiohttp.ClientResponseError`:
1354
+ [`HfHubHTTPError`]:
1354
1355
  If the request fails with an HTTP error status code other than HTTP 503.
1355
1356
 
1356
1357
  Example:
@@ -1482,7 +1483,7 @@ class AsyncInferenceClient:
1482
1483
  Raises:
1483
1484
  [`InferenceTimeoutError`]:
1484
1485
  If the model is unavailable or the request times out.
1485
- `aiohttp.ClientResponseError`:
1486
+ [`HfHubHTTPError`]:
1486
1487
  If the request fails with an HTTP error status code other than HTTP 503.
1487
1488
 
1488
1489
  Example:
@@ -1506,12 +1507,12 @@ class AsyncInferenceClient:
1506
1507
  api_key=self.token,
1507
1508
  )
1508
1509
  response = await self._inner_post(request_parameters)
1509
- output_list: List[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
1510
+ output_list: list[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
1510
1511
  return output_list[0]
1511
1512
 
1512
1513
  async def object_detection(
1513
1514
  self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
1514
- ) -> List[ObjectDetectionOutputElement]:
1515
+ ) -> list[ObjectDetectionOutputElement]:
1515
1516
  """
1516
1517
  Perform object detection on the given image using the specified model.
1517
1518
 
@@ -1530,12 +1531,12 @@ class AsyncInferenceClient:
1530
1531
  threshold (`float`, *optional*):
1531
1532
  The probability necessary to make a prediction.
1532
1533
  Returns:
1533
- `List[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
1534
+ `list[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
1534
1535
 
1535
1536
  Raises:
1536
1537
  [`InferenceTimeoutError`]:
1537
1538
  If the model is unavailable or the request times out.
1538
- `aiohttp.ClientResponseError`:
1539
+ [`HfHubHTTPError`]:
1539
1540
  If the request fails with an HTTP error status code other than HTTP 503.
1540
1541
  `ValueError`:
1541
1542
  If the request output is not a List.
@@ -1574,7 +1575,7 @@ class AsyncInferenceClient:
1574
1575
  max_question_len: Optional[int] = None,
1575
1576
  max_seq_len: Optional[int] = None,
1576
1577
  top_k: Optional[int] = None,
1577
- ) -> Union[QuestionAnsweringOutputElement, List[QuestionAnsweringOutputElement]]:
1578
+ ) -> Union[QuestionAnsweringOutputElement, list[QuestionAnsweringOutputElement]]:
1578
1579
  """
1579
1580
  Retrieve the answer to a question from a given text.
1580
1581
 
@@ -1606,13 +1607,13 @@ class AsyncInferenceClient:
1606
1607
  topk answers if there are not enough options available within the context.
1607
1608
 
1608
1609
  Returns:
1609
- Union[`QuestionAnsweringOutputElement`, List[`QuestionAnsweringOutputElement`]]:
1610
+ Union[`QuestionAnsweringOutputElement`, list[`QuestionAnsweringOutputElement`]]:
1610
1611
  When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
1611
1612
  When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
1612
1613
  Raises:
1613
1614
  [`InferenceTimeoutError`]:
1614
1615
  If the model is unavailable or the request times out.
1615
- `aiohttp.ClientResponseError`:
1616
+ [`HfHubHTTPError`]:
1616
1617
  If the request fails with an HTTP error status code other than HTTP 503.
1617
1618
 
1618
1619
  Example:
@@ -1647,15 +1648,15 @@ class AsyncInferenceClient:
1647
1648
  return output
1648
1649
 
1649
1650
  async def sentence_similarity(
1650
- self, sentence: str, other_sentences: List[str], *, model: Optional[str] = None
1651
- ) -> List[float]:
1651
+ self, sentence: str, other_sentences: list[str], *, model: Optional[str] = None
1652
+ ) -> list[float]:
1652
1653
  """
1653
1654
  Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
1654
1655
 
1655
1656
  Args:
1656
1657
  sentence (`str`):
1657
1658
  The main sentence to compare to others.
1658
- other_sentences (`List[str]`):
1659
+ other_sentences (`list[str]`):
1659
1660
  The list of sentences to compare to.
1660
1661
  model (`str`, *optional*):
1661
1662
  The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1663,12 +1664,12 @@ class AsyncInferenceClient:
1663
1664
  Defaults to None.
1664
1665
 
1665
1666
  Returns:
1666
- `List[float]`: The embedding representing the input text.
1667
+ `list[float]`: The embedding representing the input text.
1667
1668
 
1668
1669
  Raises:
1669
1670
  [`InferenceTimeoutError`]:
1670
1671
  If the model is unavailable or the request times out.
1671
- `aiohttp.ClientResponseError`:
1672
+ [`HfHubHTTPError`]:
1672
1673
  If the request fails with an HTTP error status code other than HTTP 503.
1673
1674
 
1674
1675
  Example:
@@ -1706,7 +1707,7 @@ class AsyncInferenceClient:
1706
1707
  *,
1707
1708
  model: Optional[str] = None,
1708
1709
  clean_up_tokenization_spaces: Optional[bool] = None,
1709
- generate_parameters: Optional[Dict[str, Any]] = None,
1710
+ generate_parameters: Optional[dict[str, Any]] = None,
1710
1711
  truncation: Optional["SummarizationTruncationStrategy"] = None,
1711
1712
  ) -> SummarizationOutput:
1712
1713
  """
@@ -1720,7 +1721,7 @@ class AsyncInferenceClient:
1720
1721
  Inference Endpoint. If not provided, the default recommended model for summarization will be used.
1721
1722
  clean_up_tokenization_spaces (`bool`, *optional*):
1722
1723
  Whether to clean up the potential extra spaces in the text output.
1723
- generate_parameters (`Dict[str, Any]`, *optional*):
1724
+ generate_parameters (`dict[str, Any]`, *optional*):
1724
1725
  Additional parametrization of the text generation algorithm.
1725
1726
  truncation (`"SummarizationTruncationStrategy"`, *optional*):
1726
1727
  The truncation strategy to use.
@@ -1730,7 +1731,7 @@ class AsyncInferenceClient:
1730
1731
  Raises:
1731
1732
  [`InferenceTimeoutError`]:
1732
1733
  If the model is unavailable or the request times out.
1733
- `aiohttp.ClientResponseError`:
1734
+ [`HfHubHTTPError`]:
1734
1735
  If the request fails with an HTTP error status code other than HTTP 503.
1735
1736
 
1736
1737
  Example:
@@ -1761,7 +1762,7 @@ class AsyncInferenceClient:
1761
1762
 
1762
1763
  async def table_question_answering(
1763
1764
  self,
1764
- table: Dict[str, Any],
1765
+ table: dict[str, Any],
1765
1766
  query: str,
1766
1767
  *,
1767
1768
  model: Optional[str] = None,
@@ -1796,7 +1797,7 @@ class AsyncInferenceClient:
1796
1797
  Raises:
1797
1798
  [`InferenceTimeoutError`]:
1798
1799
  If the model is unavailable or the request times out.
1799
- `aiohttp.ClientResponseError`:
1800
+ [`HfHubHTTPError`]:
1800
1801
  If the request fails with an HTTP error status code other than HTTP 503.
1801
1802
 
1802
1803
  Example:
@@ -1822,12 +1823,12 @@ class AsyncInferenceClient:
1822
1823
  response = await self._inner_post(request_parameters)
1823
1824
  return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
1824
1825
 
1825
- async def tabular_classification(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[str]:
1826
+ async def tabular_classification(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[str]:
1826
1827
  """
1827
1828
  Classifying a target category (a group) based on a set of attributes.
1828
1829
 
1829
1830
  Args:
1830
- table (`Dict[str, Any]`):
1831
+ table (`dict[str, Any]`):
1831
1832
  Set of attributes to classify.
1832
1833
  model (`str`, *optional*):
1833
1834
  The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1840,7 +1841,7 @@ class AsyncInferenceClient:
1840
1841
  Raises:
1841
1842
  [`InferenceTimeoutError`]:
1842
1843
  If the model is unavailable or the request times out.
1843
- `aiohttp.ClientResponseError`:
1844
+ [`HfHubHTTPError`]:
1844
1845
  If the request fails with an HTTP error status code other than HTTP 503.
1845
1846
 
1846
1847
  Example:
@@ -1878,12 +1879,12 @@ class AsyncInferenceClient:
1878
1879
  response = await self._inner_post(request_parameters)
1879
1880
  return _bytes_to_list(response)
1880
1881
 
1881
- async def tabular_regression(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[float]:
1882
+ async def tabular_regression(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[float]:
1882
1883
  """
1883
1884
  Predicting a numerical target value given a set of attributes/features in a table.
1884
1885
 
1885
1886
  Args:
1886
- table (`Dict[str, Any]`):
1887
+ table (`dict[str, Any]`):
1887
1888
  Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
1888
1889
  model (`str`, *optional*):
1889
1890
  The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1896,7 +1897,7 @@ class AsyncInferenceClient:
1896
1897
  Raises:
1897
1898
  [`InferenceTimeoutError`]:
1898
1899
  If the model is unavailable or the request times out.
1899
- `aiohttp.ClientResponseError`:
1900
+ [`HfHubHTTPError`]:
1900
1901
  If the request fails with an HTTP error status code other than HTTP 503.
1901
1902
 
1902
1903
  Example:
@@ -1936,7 +1937,7 @@ class AsyncInferenceClient:
1936
1937
  model: Optional[str] = None,
1937
1938
  top_k: Optional[int] = None,
1938
1939
  function_to_apply: Optional["TextClassificationOutputTransform"] = None,
1939
- ) -> List[TextClassificationOutputElement]:
1940
+ ) -> list[TextClassificationOutputElement]:
1940
1941
  """
1941
1942
  Perform text classification (e.g. sentiment-analysis) on the given text.
1942
1943
 
@@ -1953,12 +1954,12 @@ class AsyncInferenceClient:
1953
1954
  The function to apply to the model outputs in order to retrieve the scores.
1954
1955
 
1955
1956
  Returns:
1956
- `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
1957
+ `list[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
1957
1958
 
1958
1959
  Raises:
1959
1960
  [`InferenceTimeoutError`]:
1960
1961
  If the model is unavailable or the request times out.
1961
- `aiohttp.ClientResponseError`:
1962
+ [`HfHubHTTPError`]:
1962
1963
  If the request fails with an HTTP error status code other than HTTP 503.
1963
1964
 
1964
1965
  Example:
@@ -2007,8 +2008,8 @@ class AsyncInferenceClient:
2007
2008
  repetition_penalty: Optional[float] = None,
2008
2009
  return_full_text: Optional[bool] = None,
2009
2010
  seed: Optional[int] = None,
2010
- stop: Optional[List[str]] = None,
2011
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2011
+ stop: Optional[list[str]] = None,
2012
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2012
2013
  temperature: Optional[float] = None,
2013
2014
  top_k: Optional[int] = None,
2014
2015
  top_n_tokens: Optional[int] = None,
@@ -2037,8 +2038,8 @@ class AsyncInferenceClient:
2037
2038
  repetition_penalty: Optional[float] = None,
2038
2039
  return_full_text: Optional[bool] = None,
2039
2040
  seed: Optional[int] = None,
2040
- stop: Optional[List[str]] = None,
2041
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2041
+ stop: Optional[list[str]] = None,
2042
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2042
2043
  temperature: Optional[float] = None,
2043
2044
  top_k: Optional[int] = None,
2044
2045
  top_n_tokens: Optional[int] = None,
@@ -2067,8 +2068,8 @@ class AsyncInferenceClient:
2067
2068
  repetition_penalty: Optional[float] = None,
2068
2069
  return_full_text: Optional[bool] = None, # Manual default value
2069
2070
  seed: Optional[int] = None,
2070
- stop: Optional[List[str]] = None,
2071
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2071
+ stop: Optional[list[str]] = None,
2072
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2072
2073
  temperature: Optional[float] = None,
2073
2074
  top_k: Optional[int] = None,
2074
2075
  top_n_tokens: Optional[int] = None,
@@ -2097,8 +2098,8 @@ class AsyncInferenceClient:
2097
2098
  repetition_penalty: Optional[float] = None,
2098
2099
  return_full_text: Optional[bool] = None,
2099
2100
  seed: Optional[int] = None,
2100
- stop: Optional[List[str]] = None,
2101
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2101
+ stop: Optional[list[str]] = None,
2102
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2102
2103
  temperature: Optional[float] = None,
2103
2104
  top_k: Optional[int] = None,
2104
2105
  top_n_tokens: Optional[int] = None,
@@ -2127,8 +2128,8 @@ class AsyncInferenceClient:
2127
2128
  repetition_penalty: Optional[float] = None,
2128
2129
  return_full_text: Optional[bool] = None,
2129
2130
  seed: Optional[int] = None,
2130
- stop: Optional[List[str]] = None,
2131
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2131
+ stop: Optional[list[str]] = None,
2132
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2132
2133
  temperature: Optional[float] = None,
2133
2134
  top_k: Optional[int] = None,
2134
2135
  top_n_tokens: Optional[int] = None,
@@ -2156,8 +2157,8 @@ class AsyncInferenceClient:
2156
2157
  repetition_penalty: Optional[float] = None,
2157
2158
  return_full_text: Optional[bool] = None,
2158
2159
  seed: Optional[int] = None,
2159
- stop: Optional[List[str]] = None,
2160
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2160
+ stop: Optional[list[str]] = None,
2161
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2161
2162
  temperature: Optional[float] = None,
2162
2163
  top_k: Optional[int] = None,
2163
2164
  top_n_tokens: Optional[int] = None,
@@ -2213,9 +2214,9 @@ class AsyncInferenceClient:
2213
2214
  Whether to prepend the prompt to the generated text
2214
2215
  seed (`int`, *optional*):
2215
2216
  Random sampling seed
2216
- stop (`List[str]`, *optional*):
2217
+ stop (`list[str]`, *optional*):
2217
2218
  Stop generating tokens if a member of `stop` is generated.
2218
- stop_sequences (`List[str]`, *optional*):
2219
+ stop_sequences (`list[str]`, *optional*):
2219
2220
  Deprecated argument. Use `stop` instead.
2220
2221
  temperature (`float`, *optional*):
2221
2222
  The value used to module the logits distribution.
@@ -2236,10 +2237,10 @@ class AsyncInferenceClient:
2236
2237
  Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
2237
2238
 
2238
2239
  Returns:
2239
- `Union[str, TextGenerationOutput, Iterable[str], Iterable[TextGenerationStreamOutput]]`:
2240
+ `Union[str, TextGenerationOutput, AsyncIterable[str], AsyncIterable[TextGenerationStreamOutput]]`:
2240
2241
  Generated text returned from the server:
2241
2242
  - if `stream=False` and `details=False`, the generated text is returned as a `str` (default)
2242
- - if `stream=True` and `details=False`, the generated text is returned token by token as a `Iterable[str]`
2243
+ - if `stream=True` and `details=False`, the generated text is returned token by token as a `AsyncIterable[str]`
2243
2244
  - if `stream=False` and `details=True`, the generated text is returned with more details as a [`~huggingface_hub.TextGenerationOutput`]
2244
2245
  - if `details=True` and `stream=True`, the generated text is returned token by token as a iterable of [`~huggingface_hub.TextGenerationStreamOutput`]
2245
2246
 
@@ -2248,7 +2249,7 @@ class AsyncInferenceClient:
2248
2249
  If input values are not valid. No HTTP call is made to the server.
2249
2250
  [`InferenceTimeoutError`]:
2250
2251
  If the model is unavailable or the request times out.
2251
- `aiohttp.ClientResponseError`:
2252
+ [`HfHubHTTPError`]:
2252
2253
  If the request fails with an HTTP error status code other than HTTP 503.
2253
2254
 
2254
2255
  Example:
@@ -2438,9 +2439,9 @@ class AsyncInferenceClient:
2438
2439
  # Handle errors separately for more precise error messages
2439
2440
  try:
2440
2441
  bytes_output = await self._inner_post(request_parameters, stream=stream or False)
2441
- except _import_aiohttp().ClientResponseError as e:
2442
- match = MODEL_KWARGS_NOT_USED_REGEX.search(e.response_error_payload["error"])
2443
- if e.status == 400 and match:
2442
+ except HfHubHTTPError as e:
2443
+ match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
2444
+ if isinstance(e, BadRequestError) and match:
2444
2445
  unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
2445
2446
  _set_unsupported_text_generation_kwargs(model, unused_params)
2446
2447
  return await self.text_generation( # type: ignore
@@ -2493,7 +2494,7 @@ class AsyncInferenceClient:
2493
2494
  model: Optional[str] = None,
2494
2495
  scheduler: Optional[str] = None,
2495
2496
  seed: Optional[int] = None,
2496
- extra_body: Optional[Dict[str, Any]] = None,
2497
+ extra_body: Optional[dict[str, Any]] = None,
2497
2498
  ) -> "Image":
2498
2499
  """
2499
2500
  Generate an image based on a given text using a specified model.
@@ -2531,7 +2532,7 @@ class AsyncInferenceClient:
2531
2532
  Override the scheduler with a compatible one.
2532
2533
  seed (`int`, *optional*):
2533
2534
  Seed for the random number generator.
2534
- extra_body (`Dict[str, Any]`, *optional*):
2535
+ extra_body (`dict[str, Any]`, *optional*):
2535
2536
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2536
2537
  for supported parameters.
2537
2538
 
@@ -2541,7 +2542,7 @@ class AsyncInferenceClient:
2541
2542
  Raises:
2542
2543
  [`InferenceTimeoutError`]:
2543
2544
  If the model is unavailable or the request times out.
2544
- `aiohttp.ClientResponseError`:
2545
+ [`HfHubHTTPError`]:
2545
2546
  If the request fails with an HTTP error status code other than HTTP 503.
2546
2547
 
2547
2548
  Example:
@@ -2631,11 +2632,11 @@ class AsyncInferenceClient:
2631
2632
  *,
2632
2633
  model: Optional[str] = None,
2633
2634
  guidance_scale: Optional[float] = None,
2634
- negative_prompt: Optional[List[str]] = None,
2635
+ negative_prompt: Optional[list[str]] = None,
2635
2636
  num_frames: Optional[float] = None,
2636
2637
  num_inference_steps: Optional[int] = None,
2637
2638
  seed: Optional[int] = None,
2638
- extra_body: Optional[Dict[str, Any]] = None,
2639
+ extra_body: Optional[dict[str, Any]] = None,
2639
2640
  ) -> bytes:
2640
2641
  """
2641
2642
  Generate a video based on a given text.
@@ -2654,7 +2655,7 @@ class AsyncInferenceClient:
2654
2655
  guidance_scale (`float`, *optional*):
2655
2656
  A higher guidance scale value encourages the model to generate videos closely linked to the text
2656
2657
  prompt, but values too high may cause saturation and other artifacts.
2657
- negative_prompt (`List[str]`, *optional*):
2658
+ negative_prompt (`list[str]`, *optional*):
2658
2659
  One or several prompt to guide what NOT to include in video generation.
2659
2660
  num_frames (`float`, *optional*):
2660
2661
  The num_frames parameter determines how many video frames are generated.
@@ -2663,7 +2664,7 @@ class AsyncInferenceClient:
2663
2664
  expense of slower inference.
2664
2665
  seed (`int`, *optional*):
2665
2666
  Seed for the random number generator.
2666
- extra_body (`Dict[str, Any]`, *optional*):
2667
+ extra_body (`dict[str, Any]`, *optional*):
2667
2668
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2668
2669
  for supported parameters.
2669
2670
 
@@ -2743,7 +2744,7 @@ class AsyncInferenceClient:
2743
2744
  top_p: Optional[float] = None,
2744
2745
  typical_p: Optional[float] = None,
2745
2746
  use_cache: Optional[bool] = None,
2746
- extra_body: Optional[Dict[str, Any]] = None,
2747
+ extra_body: Optional[dict[str, Any]] = None,
2747
2748
  ) -> bytes:
2748
2749
  """
2749
2750
  Synthesize an audio of a voice pronouncing a given text.
@@ -2805,7 +2806,7 @@ class AsyncInferenceClient:
2805
2806
  paper](https://hf.co/papers/2202.00666) for more details.
2806
2807
  use_cache (`bool`, *optional*):
2807
2808
  Whether the model should use the past last key/values attentions to speed up decoding
2808
- extra_body (`Dict[str, Any]`, *optional*):
2809
+ extra_body (`dict[str, Any]`, *optional*):
2809
2810
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2810
2811
  for supported parameters.
2811
2812
  Returns:
@@ -2814,7 +2815,7 @@ class AsyncInferenceClient:
2814
2815
  Raises:
2815
2816
  [`InferenceTimeoutError`]:
2816
2817
  If the model is unavailable or the request times out.
2817
- `aiohttp.ClientResponseError`:
2818
+ [`HfHubHTTPError`]:
2818
2819
  If the request fails with an HTTP error status code other than HTTP 503.
2819
2820
 
2820
2821
  Example:
@@ -2938,9 +2939,9 @@ class AsyncInferenceClient:
2938
2939
  *,
2939
2940
  model: Optional[str] = None,
2940
2941
  aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
2941
- ignore_labels: Optional[List[str]] = None,
2942
+ ignore_labels: Optional[list[str]] = None,
2942
2943
  stride: Optional[int] = None,
2943
- ) -> List[TokenClassificationOutputElement]:
2944
+ ) -> list[TokenClassificationOutputElement]:
2944
2945
  """
2945
2946
  Perform token classification on the given text.
2946
2947
  Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
@@ -2954,18 +2955,18 @@ class AsyncInferenceClient:
2954
2955
  Defaults to None.
2955
2956
  aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
2956
2957
  The strategy used to fuse tokens based on model predictions
2957
- ignore_labels (`List[str`, *optional*):
2958
+ ignore_labels (`list[str`, *optional*):
2958
2959
  A list of labels to ignore
2959
2960
  stride (`int`, *optional*):
2960
2961
  The number of overlapping tokens between chunks when splitting the input text.
2961
2962
 
2962
2963
  Returns:
2963
- `List[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
2964
+ `list[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
2964
2965
 
2965
2966
  Raises:
2966
2967
  [`InferenceTimeoutError`]:
2967
2968
  If the model is unavailable or the request times out.
2968
- `aiohttp.ClientResponseError`:
2969
+ [`HfHubHTTPError`]:
2969
2970
  If the request fails with an HTTP error status code other than HTTP 503.
2970
2971
 
2971
2972
  Example:
@@ -3017,7 +3018,7 @@ class AsyncInferenceClient:
3017
3018
  tgt_lang: Optional[str] = None,
3018
3019
  clean_up_tokenization_spaces: Optional[bool] = None,
3019
3020
  truncation: Optional["TranslationTruncationStrategy"] = None,
3020
- generate_parameters: Optional[Dict[str, Any]] = None,
3021
+ generate_parameters: Optional[dict[str, Any]] = None,
3021
3022
  ) -> TranslationOutput:
3022
3023
  """
3023
3024
  Convert text from one language to another.
@@ -3042,7 +3043,7 @@ class AsyncInferenceClient:
3042
3043
  Whether to clean up the potential extra spaces in the text output.
3043
3044
  truncation (`"TranslationTruncationStrategy"`, *optional*):
3044
3045
  The truncation strategy to use.
3045
- generate_parameters (`Dict[str, Any]`, *optional*):
3046
+ generate_parameters (`dict[str, Any]`, *optional*):
3046
3047
  Additional parametrization of the text generation algorithm.
3047
3048
 
3048
3049
  Returns:
@@ -3051,7 +3052,7 @@ class AsyncInferenceClient:
3051
3052
  Raises:
3052
3053
  [`InferenceTimeoutError`]:
3053
3054
  If the model is unavailable or the request times out.
3054
- `aiohttp.ClientResponseError`:
3055
+ [`HfHubHTTPError`]:
3055
3056
  If the request fails with an HTTP error status code other than HTTP 503.
3056
3057
  `ValueError`:
3057
3058
  If only one of the `src_lang` and `tgt_lang` arguments are provided.
@@ -3105,7 +3106,7 @@ class AsyncInferenceClient:
3105
3106
  *,
3106
3107
  model: Optional[str] = None,
3107
3108
  top_k: Optional[int] = None,
3108
- ) -> List[VisualQuestionAnsweringOutputElement]:
3109
+ ) -> list[VisualQuestionAnsweringOutputElement]:
3109
3110
  """
3110
3111
  Answering open-ended questions based on an image.
3111
3112
 
@@ -3122,12 +3123,12 @@ class AsyncInferenceClient:
3122
3123
  The number of answers to return (will be chosen by order of likelihood). Note that we return less than
3123
3124
  topk answers if there are not enough options available within the context.
3124
3125
  Returns:
3125
- `List[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
3126
+ `list[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
3126
3127
 
3127
3128
  Raises:
3128
3129
  `InferenceTimeoutError`:
3129
3130
  If the model is unavailable or the request times out.
3130
- `aiohttp.ClientResponseError`:
3131
+ [`HfHubHTTPError`]:
3131
3132
  If the request fails with an HTTP error status code other than HTTP 503.
3132
3133
 
3133
3134
  Example:
@@ -3161,21 +3162,21 @@ class AsyncInferenceClient:
3161
3162
  async def zero_shot_classification(
3162
3163
  self,
3163
3164
  text: str,
3164
- candidate_labels: List[str],
3165
+ candidate_labels: list[str],
3165
3166
  *,
3166
3167
  multi_label: Optional[bool] = False,
3167
3168
  hypothesis_template: Optional[str] = None,
3168
3169
  model: Optional[str] = None,
3169
- ) -> List[ZeroShotClassificationOutputElement]:
3170
+ ) -> list[ZeroShotClassificationOutputElement]:
3170
3171
  """
3171
3172
  Provide as input a text and a set of candidate labels to classify the input text.
3172
3173
 
3173
3174
  Args:
3174
3175
  text (`str`):
3175
3176
  The input text to classify.
3176
- candidate_labels (`List[str]`):
3177
+ candidate_labels (`list[str]`):
3177
3178
  The set of possible class labels to classify the text into.
3178
- labels (`List[str]`, *optional*):
3179
+ labels (`list[str]`, *optional*):
3179
3180
  (deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
3180
3181
  multi_label (`bool`, *optional*):
3181
3182
  Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
@@ -3190,12 +3191,12 @@ class AsyncInferenceClient:
3190
3191
 
3191
3192
 
3192
3193
  Returns:
3193
- `List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
3194
+ `list[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
3194
3195
 
3195
3196
  Raises:
3196
3197
  [`InferenceTimeoutError`]:
3197
3198
  If the model is unavailable or the request times out.
3198
- `aiohttp.ClientResponseError`:
3199
+ [`HfHubHTTPError`]:
3199
3200
  If the request fails with an HTTP error status code other than HTTP 503.
3200
3201
 
3201
3202
  Example with `multi_label=False`:
@@ -3269,22 +3270,22 @@ class AsyncInferenceClient:
3269
3270
  async def zero_shot_image_classification(
3270
3271
  self,
3271
3272
  image: ContentT,
3272
- candidate_labels: List[str],
3273
+ candidate_labels: list[str],
3273
3274
  *,
3274
3275
  model: Optional[str] = None,
3275
3276
  hypothesis_template: Optional[str] = None,
3276
3277
  # deprecated argument
3277
- labels: List[str] = None, # type: ignore
3278
- ) -> List[ZeroShotImageClassificationOutputElement]:
3278
+ labels: list[str] = None, # type: ignore
3279
+ ) -> list[ZeroShotImageClassificationOutputElement]:
3279
3280
  """
3280
3281
  Provide input image and text labels to predict text labels for the image.
3281
3282
 
3282
3283
  Args:
3283
3284
  image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
3284
3285
  The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
3285
- candidate_labels (`List[str]`):
3286
+ candidate_labels (`list[str]`):
3286
3287
  The candidate labels for this image
3287
- labels (`List[str]`, *optional*):
3288
+ labels (`list[str]`, *optional*):
3288
3289
  (deprecated) List of string possible labels. There must be at least 2 labels.
3289
3290
  model (`str`, *optional*):
3290
3291
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
@@ -3294,12 +3295,12 @@ class AsyncInferenceClient:
3294
3295
  replacing the placeholder with the candidate labels.
3295
3296
 
3296
3297
  Returns:
3297
- `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
3298
+ `list[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
3298
3299
 
3299
3300
  Raises:
3300
3301
  [`InferenceTimeoutError`]:
3301
3302
  If the model is unavailable or the request times out.
3302
- `aiohttp.ClientResponseError`:
3303
+ [`HfHubHTTPError`]:
3303
3304
  If the request fails with an HTTP error status code other than HTTP 503.
3304
3305
 
3305
3306
  Example:
@@ -3334,48 +3335,7 @@ class AsyncInferenceClient:
3334
3335
  response = await self._inner_post(request_parameters)
3335
3336
  return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
3336
3337
 
3337
- def _get_client_session(self, headers: Optional[Dict] = None) -> "ClientSession":
3338
- aiohttp = _import_aiohttp()
3339
- client_headers = self.headers.copy()
3340
- if headers is not None:
3341
- client_headers.update(headers)
3342
-
3343
- # Return a new aiohttp ClientSession with correct settings.
3344
- session = aiohttp.ClientSession(
3345
- headers=client_headers,
3346
- cookies=self.cookies,
3347
- timeout=aiohttp.ClientTimeout(self.timeout),
3348
- trust_env=self.trust_env,
3349
- )
3350
-
3351
- # Keep track of sessions to close them later
3352
- self._sessions[session] = set()
3353
-
3354
- # Override the `._request` method to register responses to be closed
3355
- session._wrapped_request = session._request
3356
-
3357
- async def _request(method, url, **kwargs):
3358
- response = await session._wrapped_request(method, url, **kwargs)
3359
- self._sessions[session].add(response)
3360
- return response
3361
-
3362
- session._request = _request
3363
-
3364
- # Override the 'close' method to
3365
- # 1. close ongoing responses
3366
- # 2. deregister the session when closed
3367
- session._close = session.close
3368
-
3369
- async def close_session():
3370
- for response in self._sessions[session]:
3371
- response.close()
3372
- await session._close()
3373
- self._sessions.pop(session, None)
3374
-
3375
- session.close = close_session
3376
- return session
3377
-
3378
- async def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
3338
+ async def get_endpoint_info(self, *, model: Optional[str] = None) -> dict[str, Any]:
3379
3339
  """
3380
3340
  Get information about the deployed endpoint.
3381
3341
 
@@ -3388,7 +3348,7 @@ class AsyncInferenceClient:
3388
3348
  Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
3389
3349
 
3390
3350
  Returns:
3391
- `Dict[str, Any]`: Information about the endpoint.
3351
+ `dict[str, Any]`: Information about the endpoint.
3392
3352
 
3393
3353
  Example:
3394
3354
  ```py
@@ -3430,10 +3390,10 @@ class AsyncInferenceClient:
3430
3390
  else:
3431
3391
  url = f"{constants.INFERENCE_ENDPOINT}/models/{model}/info"
3432
3392
 
3433
- async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
3434
- response = await client.get(url, proxy=self.proxies)
3435
- response.raise_for_status()
3436
- return await response.json()
3393
+ client = await self._get_async_client()
3394
+ response = await client.get(url, headers=build_hf_headers(token=self.token))
3395
+ hf_raise_for_status(response)
3396
+ return response.json()
3437
3397
 
3438
3398
  async def health_check(self, model: Optional[str] = None) -> bool:
3439
3399
  """
@@ -3467,9 +3427,9 @@ class AsyncInferenceClient:
3467
3427
  raise ValueError("Model must be an Inference Endpoint URL.")
3468
3428
  url = model.rstrip("/") + "/health"
3469
3429
 
3470
- async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
3471
- response = await client.get(url, proxy=self.proxies)
3472
- return response.status == 200
3430
+ client = await self._get_async_client()
3431
+ response = await client.get(url, headers=build_hf_headers(token=self.token))
3432
+ return response.status_code == 200
3473
3433
 
3474
3434
  @property
3475
3435
  def chat(self) -> "ProxyClientChat":