huggingface-hub 0.36.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (132) hide show
  1. huggingface_hub/__init__.py +33 -45
  2. huggingface_hub/_commit_api.py +39 -43
  3. huggingface_hub/_commit_scheduler.py +11 -8
  4. huggingface_hub/_inference_endpoints.py +8 -8
  5. huggingface_hub/_jobs_api.py +20 -20
  6. huggingface_hub/_login.py +17 -43
  7. huggingface_hub/_oauth.py +8 -8
  8. huggingface_hub/_snapshot_download.py +135 -50
  9. huggingface_hub/_space_api.py +4 -4
  10. huggingface_hub/_tensorboard_logger.py +5 -5
  11. huggingface_hub/_upload_large_folder.py +18 -32
  12. huggingface_hub/_webhooks_payload.py +3 -3
  13. huggingface_hub/_webhooks_server.py +2 -2
  14. huggingface_hub/cli/__init__.py +0 -14
  15. huggingface_hub/cli/_cli_utils.py +143 -39
  16. huggingface_hub/cli/auth.py +105 -171
  17. huggingface_hub/cli/cache.py +594 -361
  18. huggingface_hub/cli/download.py +120 -112
  19. huggingface_hub/cli/hf.py +38 -41
  20. huggingface_hub/cli/jobs.py +689 -1017
  21. huggingface_hub/cli/lfs.py +120 -143
  22. huggingface_hub/cli/repo.py +282 -216
  23. huggingface_hub/cli/repo_files.py +50 -84
  24. huggingface_hub/cli/system.py +6 -25
  25. huggingface_hub/cli/upload.py +198 -220
  26. huggingface_hub/cli/upload_large_folder.py +91 -106
  27. huggingface_hub/community.py +5 -5
  28. huggingface_hub/constants.py +17 -52
  29. huggingface_hub/dataclasses.py +135 -21
  30. huggingface_hub/errors.py +47 -30
  31. huggingface_hub/fastai_utils.py +8 -9
  32. huggingface_hub/file_download.py +351 -303
  33. huggingface_hub/hf_api.py +398 -570
  34. huggingface_hub/hf_file_system.py +101 -66
  35. huggingface_hub/hub_mixin.py +32 -54
  36. huggingface_hub/inference/_client.py +177 -162
  37. huggingface_hub/inference/_common.py +38 -54
  38. huggingface_hub/inference/_generated/_async_client.py +218 -258
  39. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
  40. huggingface_hub/inference/_generated/types/base.py +10 -7
  41. huggingface_hub/inference/_generated/types/chat_completion.py +16 -16
  42. huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
  43. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
  44. huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
  45. huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
  46. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
  47. huggingface_hub/inference/_generated/types/summarization.py +2 -2
  48. huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
  49. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
  50. huggingface_hub/inference/_generated/types/text_generation.py +10 -10
  51. huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
  52. huggingface_hub/inference/_generated/types/token_classification.py +2 -2
  53. huggingface_hub/inference/_generated/types/translation.py +2 -2
  54. huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
  55. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
  56. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
  57. huggingface_hub/inference/_mcp/agent.py +3 -3
  58. huggingface_hub/inference/_mcp/constants.py +1 -2
  59. huggingface_hub/inference/_mcp/mcp_client.py +33 -22
  60. huggingface_hub/inference/_mcp/types.py +10 -10
  61. huggingface_hub/inference/_mcp/utils.py +4 -4
  62. huggingface_hub/inference/_providers/__init__.py +12 -4
  63. huggingface_hub/inference/_providers/_common.py +62 -24
  64. huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
  65. huggingface_hub/inference/_providers/cohere.py +3 -3
  66. huggingface_hub/inference/_providers/fal_ai.py +25 -25
  67. huggingface_hub/inference/_providers/featherless_ai.py +4 -4
  68. huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
  69. huggingface_hub/inference/_providers/hf_inference.py +13 -13
  70. huggingface_hub/inference/_providers/hyperbolic.py +4 -4
  71. huggingface_hub/inference/_providers/nebius.py +10 -10
  72. huggingface_hub/inference/_providers/novita.py +5 -5
  73. huggingface_hub/inference/_providers/nscale.py +4 -4
  74. huggingface_hub/inference/_providers/replicate.py +15 -15
  75. huggingface_hub/inference/_providers/sambanova.py +6 -6
  76. huggingface_hub/inference/_providers/together.py +7 -7
  77. huggingface_hub/lfs.py +21 -94
  78. huggingface_hub/repocard.py +15 -16
  79. huggingface_hub/repocard_data.py +57 -57
  80. huggingface_hub/serialization/__init__.py +0 -1
  81. huggingface_hub/serialization/_base.py +9 -9
  82. huggingface_hub/serialization/_dduf.py +7 -7
  83. huggingface_hub/serialization/_torch.py +28 -28
  84. huggingface_hub/utils/__init__.py +11 -6
  85. huggingface_hub/utils/_auth.py +5 -5
  86. huggingface_hub/utils/_cache_manager.py +49 -74
  87. huggingface_hub/utils/_deprecation.py +1 -1
  88. huggingface_hub/utils/_dotenv.py +3 -3
  89. huggingface_hub/utils/_fixes.py +0 -10
  90. huggingface_hub/utils/_git_credential.py +3 -3
  91. huggingface_hub/utils/_headers.py +7 -29
  92. huggingface_hub/utils/_http.py +371 -208
  93. huggingface_hub/utils/_pagination.py +4 -4
  94. huggingface_hub/utils/_parsing.py +98 -0
  95. huggingface_hub/utils/_paths.py +5 -5
  96. huggingface_hub/utils/_runtime.py +59 -23
  97. huggingface_hub/utils/_safetensors.py +21 -21
  98. huggingface_hub/utils/_subprocess.py +9 -9
  99. huggingface_hub/utils/_telemetry.py +3 -3
  100. huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -9
  101. huggingface_hub/utils/_typing.py +3 -3
  102. huggingface_hub/utils/_validators.py +53 -72
  103. huggingface_hub/utils/_xet.py +16 -16
  104. huggingface_hub/utils/_xet_progress_reporting.py +1 -1
  105. huggingface_hub/utils/insecure_hashlib.py +3 -9
  106. huggingface_hub/utils/tqdm.py +3 -3
  107. {huggingface_hub-0.36.0.dist-info → huggingface_hub-1.0.0.dist-info}/METADATA +16 -35
  108. huggingface_hub-1.0.0.dist-info/RECORD +152 -0
  109. {huggingface_hub-0.36.0.dist-info → huggingface_hub-1.0.0.dist-info}/entry_points.txt +0 -1
  110. huggingface_hub/commands/__init__.py +0 -27
  111. huggingface_hub/commands/delete_cache.py +0 -476
  112. huggingface_hub/commands/download.py +0 -204
  113. huggingface_hub/commands/env.py +0 -39
  114. huggingface_hub/commands/huggingface_cli.py +0 -65
  115. huggingface_hub/commands/lfs.py +0 -200
  116. huggingface_hub/commands/repo.py +0 -151
  117. huggingface_hub/commands/repo_files.py +0 -132
  118. huggingface_hub/commands/scan_cache.py +0 -183
  119. huggingface_hub/commands/tag.py +0 -161
  120. huggingface_hub/commands/upload.py +0 -318
  121. huggingface_hub/commands/upload_large_folder.py +0 -131
  122. huggingface_hub/commands/user.py +0 -208
  123. huggingface_hub/commands/version.py +0 -40
  124. huggingface_hub/inference_api.py +0 -217
  125. huggingface_hub/keras_mixin.py +0 -497
  126. huggingface_hub/repository.py +0 -1471
  127. huggingface_hub/serialization/_tensorflow.py +0 -92
  128. huggingface_hub/utils/_hf_folder.py +0 -68
  129. huggingface_hub-0.36.0.dist-info/RECORD +0 -170
  130. {huggingface_hub-0.36.0.dist-info → huggingface_hub-1.0.0.dist-info}/LICENSE +0 -0
  131. {huggingface_hub-0.36.0.dist-info → huggingface_hub-1.0.0.dist-info}/WHEEL +0 -0
  132. {huggingface_hub-0.36.0.dist-info → huggingface_hub-1.0.0.dist-info}/top_level.txt +0 -0
@@ -21,12 +21,16 @@
21
21
  import asyncio
22
22
  import base64
23
23
  import logging
24
+ import os
24
25
  import re
25
26
  import warnings
26
- from typing import TYPE_CHECKING, Any, AsyncIterable, Dict, List, Literal, Optional, Set, Union, overload
27
+ from contextlib import AsyncExitStack
28
+ from typing import TYPE_CHECKING, Any, AsyncIterable, Literal, Optional, Union, overload
29
+
30
+ import httpx
27
31
 
28
32
  from huggingface_hub import constants
29
- from huggingface_hub.errors import InferenceTimeoutError
33
+ from huggingface_hub.errors import BadRequestError, HfHubHTTPError, InferenceTimeoutError
30
34
  from huggingface_hub.inference._common import (
31
35
  TASKS_EXPECTING_IMAGES,
32
36
  ContentT,
@@ -86,15 +90,19 @@ from huggingface_hub.inference._generated.types import (
86
90
  ZeroShotImageClassificationOutputElement,
87
91
  )
88
92
  from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
89
- from huggingface_hub.utils import build_hf_headers
93
+ from huggingface_hub.utils import (
94
+ build_hf_headers,
95
+ get_async_session,
96
+ hf_raise_for_status,
97
+ validate_hf_hub_args,
98
+ )
90
99
  from huggingface_hub.utils._auth import get_token
91
100
 
92
- from .._common import _async_yield_from, _import_aiohttp
101
+ from .._common import _async_yield_from
93
102
 
94
103
 
95
104
  if TYPE_CHECKING:
96
105
  import numpy as np
97
- from aiohttp import ClientResponse, ClientSession
98
106
  from PIL.Image import Image
99
107
 
100
108
  logger = logging.getLogger(__name__)
@@ -127,18 +135,14 @@ class AsyncInferenceClient:
127
135
  arguments are mutually exclusive and have the exact same behavior.
128
136
  timeout (`float`, `optional`):
129
137
  The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
130
- headers (`Dict[str, str]`, `optional`):
138
+ headers (`dict[str, str]`, `optional`):
131
139
  Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
132
140
  Values in this dictionary will override the default values.
133
141
  bill_to (`str`, `optional`):
134
142
  The billing account to use for the requests. By default the requests are billed on the user's account.
135
143
  Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
136
- cookies (`Dict[str, str]`, `optional`):
144
+ cookies (`dict[str, str]`, `optional`):
137
145
  Additional cookies to send to the server.
138
- trust_env ('bool', 'optional'):
139
- Trust environment settings for proxy configuration if the parameter is `True` (`False` by default).
140
- proxies (`Any`, `optional`):
141
- Proxies to use for the request.
142
146
  base_url (`str`, `optional`):
143
147
  Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
144
148
  follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
@@ -147,6 +151,7 @@ class AsyncInferenceClient:
147
151
  follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
148
152
  """
149
153
 
154
+ @validate_hf_hub_args
150
155
  def __init__(
151
156
  self,
152
157
  model: Optional[str] = None,
@@ -154,10 +159,8 @@ class AsyncInferenceClient:
154
159
  provider: Optional[PROVIDER_OR_POLICY_T] = None,
155
160
  token: Optional[str] = None,
156
161
  timeout: Optional[float] = None,
157
- headers: Optional[Dict[str, str]] = None,
158
- cookies: Optional[Dict[str, str]] = None,
159
- trust_env: bool = False,
160
- proxies: Optional[Any] = None,
162
+ headers: Optional[dict[str, str]] = None,
163
+ cookies: Optional[dict[str, str]] = None,
161
164
  bill_to: Optional[str] = None,
162
165
  # OpenAI compatibility
163
166
  base_url: Optional[str] = None,
@@ -219,15 +222,36 @@ class AsyncInferenceClient:
219
222
 
220
223
  self.cookies = cookies
221
224
  self.timeout = timeout
222
- self.trust_env = trust_env
223
- self.proxies = proxies
224
225
 
225
- # Keep track of the sessions to close them properly
226
- self._sessions: Dict["ClientSession", Set["ClientResponse"]] = dict()
226
+ self.exit_stack = AsyncExitStack()
227
+ self._async_client: Optional[httpx.AsyncClient] = None
227
228
 
228
229
  def __repr__(self):
229
230
  return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
230
231
 
232
+ async def __aenter__(self):
233
+ return self
234
+
235
+ async def __aexit__(self, exc_type, exc_value, traceback):
236
+ await self.close()
237
+
238
+ async def close(self):
239
+ """Close the client.
240
+
241
+ This method is automatically called when using the client as a context manager.
242
+ """
243
+ await self.exit_stack.aclose()
244
+
245
+ async def _get_async_client(self):
246
+ """Get a unique async client for this AsyncInferenceClient instance.
247
+
248
+ Returns the same client instance on subsequent calls, ensuring proper
249
+ connection reuse and resource management through the exit stack.
250
+ """
251
+ if self._async_client is None:
252
+ self._async_client = await self.exit_stack.enter_async_context(get_async_session())
253
+ return self._async_client
254
+
231
255
  @overload
232
256
  async def _inner_post( # type: ignore[misc]
233
257
  self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
@@ -236,83 +260,60 @@ class AsyncInferenceClient:
236
260
  @overload
237
261
  async def _inner_post( # type: ignore[misc]
238
262
  self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
239
- ) -> AsyncIterable[bytes]: ...
263
+ ) -> AsyncIterable[str]: ...
240
264
 
241
265
  @overload
242
266
  async def _inner_post(
243
267
  self, request_parameters: RequestParameters, *, stream: bool = False
244
- ) -> Union[bytes, AsyncIterable[bytes]]: ...
268
+ ) -> Union[bytes, AsyncIterable[str]]: ...
245
269
 
246
270
  async def _inner_post(
247
271
  self, request_parameters: RequestParameters, *, stream: bool = False
248
- ) -> Union[bytes, AsyncIterable[bytes]]:
272
+ ) -> Union[bytes, AsyncIterable[str]]:
249
273
  """Make a request to the inference server."""
250
274
 
251
- aiohttp = _import_aiohttp()
252
-
253
275
  # TODO: this should be handled in provider helpers directly
254
276
  if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
255
277
  request_parameters.headers["Accept"] = "image/png"
256
278
 
257
- # Do not use context manager as we don't want to close the connection immediately when returning
258
- # a stream
259
- session = self._get_client_session(headers=request_parameters.headers)
260
-
261
279
  try:
262
- response = await session.post(
263
- request_parameters.url, json=request_parameters.json, data=request_parameters.data, proxy=self.proxies
264
- )
265
- response_error_payload = None
266
- if response.status != 200:
267
- try:
268
- response_error_payload = await response.json() # get payload before connection closed
269
- except Exception:
270
- pass
271
- response.raise_for_status()
280
+ client = await self._get_async_client()
272
281
  if stream:
273
- return _async_yield_from(session, response)
282
+ response = await self.exit_stack.enter_async_context(
283
+ client.stream(
284
+ "POST",
285
+ request_parameters.url,
286
+ json=request_parameters.json,
287
+ data=request_parameters.data,
288
+ headers=request_parameters.headers,
289
+ cookies=self.cookies,
290
+ timeout=self.timeout,
291
+ )
292
+ )
293
+ hf_raise_for_status(response)
294
+ return _async_yield_from(client, response)
274
295
  else:
275
- content = await response.read()
276
- await session.close()
277
- return content
296
+ response = await client.post(
297
+ request_parameters.url,
298
+ json=request_parameters.json,
299
+ data=request_parameters.data,
300
+ headers=request_parameters.headers,
301
+ cookies=self.cookies,
302
+ timeout=self.timeout,
303
+ )
304
+ hf_raise_for_status(response)
305
+ return response.content
278
306
  except asyncio.TimeoutError as error:
279
- await session.close()
280
307
  # Convert any `TimeoutError` to a `InferenceTimeoutError`
281
308
  raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
282
- except aiohttp.ClientResponseError as error:
283
- error.response_error_payload = response_error_payload
284
- await session.close()
285
- raise error
286
- except Exception:
287
- await session.close()
309
+ except HfHubHTTPError as error:
310
+ if error.response.status_code == 422 and request_parameters.task != "unknown":
311
+ msg = str(error.args[0])
312
+ if len(error.response.text) > 0:
313
+ msg += f"{os.linesep}{error.response.text}{os.linesep}"
314
+ error.args = (msg,) + error.args[1:]
288
315
  raise
289
316
 
290
- async def __aenter__(self):
291
- return self
292
-
293
- async def __aexit__(self, exc_type, exc_value, traceback):
294
- await self.close()
295
-
296
- def __del__(self):
297
- if len(self._sessions) > 0:
298
- warnings.warn(
299
- "Deleting 'AsyncInferenceClient' client but some sessions are still open. "
300
- "This can happen if you've stopped streaming data from the server before the stream was complete. "
301
- "To close the client properly, you must call `await client.close()` "
302
- "or use an async context (e.g. `async with AsyncInferenceClient(): ...`."
303
- )
304
-
305
- async def close(self):
306
- """Close all open sessions.
307
-
308
- By default, 'aiohttp.ClientSession' objects are closed automatically when a call is completed. However, if you
309
- are streaming data from the server and you stop before the stream is complete, you must call this method to
310
- close the session properly.
311
-
312
- Another possibility is to use an async context (e.g. `async with AsyncInferenceClient(): ...`).
313
- """
314
- await asyncio.gather(*[session.close() for session in self._sessions.keys()])
315
-
316
317
  async def audio_classification(
317
318
  self,
318
319
  audio: ContentT,
@@ -320,7 +321,7 @@ class AsyncInferenceClient:
320
321
  model: Optional[str] = None,
321
322
  top_k: Optional[int] = None,
322
323
  function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
323
- ) -> List[AudioClassificationOutputElement]:
324
+ ) -> list[AudioClassificationOutputElement]:
324
325
  """
325
326
  Perform audio classification on the provided audio content.
326
327
 
@@ -338,12 +339,12 @@ class AsyncInferenceClient:
338
339
  The function to apply to the model outputs in order to retrieve the scores.
339
340
 
340
341
  Returns:
341
- `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
342
+ `list[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
342
343
 
343
344
  Raises:
344
345
  [`InferenceTimeoutError`]:
345
346
  If the model is unavailable or the request times out.
346
- `aiohttp.ClientResponseError`:
347
+ [`HfHubHTTPError`]:
347
348
  If the request fails with an HTTP error status code other than HTTP 503.
348
349
 
349
350
  Example:
@@ -376,7 +377,7 @@ class AsyncInferenceClient:
376
377
  audio: ContentT,
377
378
  *,
378
379
  model: Optional[str] = None,
379
- ) -> List[AudioToAudioOutputElement]:
380
+ ) -> list[AudioToAudioOutputElement]:
380
381
  """
381
382
  Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
382
383
 
@@ -390,12 +391,12 @@ class AsyncInferenceClient:
390
391
  audio_to_audio will be used.
391
392
 
392
393
  Returns:
393
- `List[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
394
+ `list[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
394
395
 
395
396
  Raises:
396
397
  `InferenceTimeoutError`:
397
398
  If the model is unavailable or the request times out.
398
- `aiohttp.ClientResponseError`:
399
+ [`HfHubHTTPError`]:
399
400
  If the request fails with an HTTP error status code other than HTTP 503.
400
401
 
401
402
  Example:
@@ -429,7 +430,7 @@ class AsyncInferenceClient:
429
430
  audio: ContentT,
430
431
  *,
431
432
  model: Optional[str] = None,
432
- extra_body: Optional[Dict] = None,
433
+ extra_body: Optional[dict] = None,
433
434
  ) -> AutomaticSpeechRecognitionOutput:
434
435
  """
435
436
  Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
@@ -440,7 +441,7 @@ class AsyncInferenceClient:
440
441
  model (`str`, *optional*):
441
442
  The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
442
443
  Inference Endpoint. If not provided, the default recommended model for ASR will be used.
443
- extra_body (`Dict`, *optional*):
444
+ extra_body (`dict`, *optional*):
444
445
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
445
446
  for supported parameters.
446
447
  Returns:
@@ -449,7 +450,7 @@ class AsyncInferenceClient:
449
450
  Raises:
450
451
  [`InferenceTimeoutError`]:
451
452
  If the model is unavailable or the request times out.
452
- `aiohttp.ClientResponseError`:
453
+ [`HfHubHTTPError`]:
453
454
  If the request fails with an HTTP error status code other than HTTP 503.
454
455
 
455
456
  Example:
@@ -476,105 +477,105 @@ class AsyncInferenceClient:
476
477
  @overload
477
478
  async def chat_completion( # type: ignore
478
479
  self,
479
- messages: List[Union[Dict, ChatCompletionInputMessage]],
480
+ messages: list[Union[dict, ChatCompletionInputMessage]],
480
481
  *,
481
482
  model: Optional[str] = None,
482
483
  stream: Literal[False] = False,
483
484
  frequency_penalty: Optional[float] = None,
484
- logit_bias: Optional[List[float]] = None,
485
+ logit_bias: Optional[list[float]] = None,
485
486
  logprobs: Optional[bool] = None,
486
487
  max_tokens: Optional[int] = None,
487
488
  n: Optional[int] = None,
488
489
  presence_penalty: Optional[float] = None,
489
490
  response_format: Optional[ChatCompletionInputGrammarType] = None,
490
491
  seed: Optional[int] = None,
491
- stop: Optional[List[str]] = None,
492
+ stop: Optional[list[str]] = None,
492
493
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
493
494
  temperature: Optional[float] = None,
494
495
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
495
496
  tool_prompt: Optional[str] = None,
496
- tools: Optional[List[ChatCompletionInputTool]] = None,
497
+ tools: Optional[list[ChatCompletionInputTool]] = None,
497
498
  top_logprobs: Optional[int] = None,
498
499
  top_p: Optional[float] = None,
499
- extra_body: Optional[Dict] = None,
500
+ extra_body: Optional[dict] = None,
500
501
  ) -> ChatCompletionOutput: ...
501
502
 
502
503
  @overload
503
504
  async def chat_completion( # type: ignore
504
505
  self,
505
- messages: List[Union[Dict, ChatCompletionInputMessage]],
506
+ messages: list[Union[dict, ChatCompletionInputMessage]],
506
507
  *,
507
508
  model: Optional[str] = None,
508
509
  stream: Literal[True] = True,
509
510
  frequency_penalty: Optional[float] = None,
510
- logit_bias: Optional[List[float]] = None,
511
+ logit_bias: Optional[list[float]] = None,
511
512
  logprobs: Optional[bool] = None,
512
513
  max_tokens: Optional[int] = None,
513
514
  n: Optional[int] = None,
514
515
  presence_penalty: Optional[float] = None,
515
516
  response_format: Optional[ChatCompletionInputGrammarType] = None,
516
517
  seed: Optional[int] = None,
517
- stop: Optional[List[str]] = None,
518
+ stop: Optional[list[str]] = None,
518
519
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
519
520
  temperature: Optional[float] = None,
520
521
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
521
522
  tool_prompt: Optional[str] = None,
522
- tools: Optional[List[ChatCompletionInputTool]] = None,
523
+ tools: Optional[list[ChatCompletionInputTool]] = None,
523
524
  top_logprobs: Optional[int] = None,
524
525
  top_p: Optional[float] = None,
525
- extra_body: Optional[Dict] = None,
526
+ extra_body: Optional[dict] = None,
526
527
  ) -> AsyncIterable[ChatCompletionStreamOutput]: ...
527
528
 
528
529
  @overload
529
530
  async def chat_completion(
530
531
  self,
531
- messages: List[Union[Dict, ChatCompletionInputMessage]],
532
+ messages: list[Union[dict, ChatCompletionInputMessage]],
532
533
  *,
533
534
  model: Optional[str] = None,
534
535
  stream: bool = False,
535
536
  frequency_penalty: Optional[float] = None,
536
- logit_bias: Optional[List[float]] = None,
537
+ logit_bias: Optional[list[float]] = None,
537
538
  logprobs: Optional[bool] = None,
538
539
  max_tokens: Optional[int] = None,
539
540
  n: Optional[int] = None,
540
541
  presence_penalty: Optional[float] = None,
541
542
  response_format: Optional[ChatCompletionInputGrammarType] = None,
542
543
  seed: Optional[int] = None,
543
- stop: Optional[List[str]] = None,
544
+ stop: Optional[list[str]] = None,
544
545
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
545
546
  temperature: Optional[float] = None,
546
547
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
547
548
  tool_prompt: Optional[str] = None,
548
- tools: Optional[List[ChatCompletionInputTool]] = None,
549
+ tools: Optional[list[ChatCompletionInputTool]] = None,
549
550
  top_logprobs: Optional[int] = None,
550
551
  top_p: Optional[float] = None,
551
- extra_body: Optional[Dict] = None,
552
+ extra_body: Optional[dict] = None,
552
553
  ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: ...
553
554
 
554
555
  async def chat_completion(
555
556
  self,
556
- messages: List[Union[Dict, ChatCompletionInputMessage]],
557
+ messages: list[Union[dict, ChatCompletionInputMessage]],
557
558
  *,
558
559
  model: Optional[str] = None,
559
560
  stream: bool = False,
560
561
  # Parameters from ChatCompletionInput (handled manually)
561
562
  frequency_penalty: Optional[float] = None,
562
- logit_bias: Optional[List[float]] = None,
563
+ logit_bias: Optional[list[float]] = None,
563
564
  logprobs: Optional[bool] = None,
564
565
  max_tokens: Optional[int] = None,
565
566
  n: Optional[int] = None,
566
567
  presence_penalty: Optional[float] = None,
567
568
  response_format: Optional[ChatCompletionInputGrammarType] = None,
568
569
  seed: Optional[int] = None,
569
- stop: Optional[List[str]] = None,
570
+ stop: Optional[list[str]] = None,
570
571
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
571
572
  temperature: Optional[float] = None,
572
573
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
573
574
  tool_prompt: Optional[str] = None,
574
- tools: Optional[List[ChatCompletionInputTool]] = None,
575
+ tools: Optional[list[ChatCompletionInputTool]] = None,
575
576
  top_logprobs: Optional[int] = None,
576
577
  top_p: Optional[float] = None,
577
- extra_body: Optional[Dict] = None,
578
+ extra_body: Optional[dict] = None,
578
579
  ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]:
579
580
  """
580
581
  A method for completing conversations using a specified language model.
@@ -600,7 +601,7 @@ class AsyncInferenceClient:
600
601
  frequency_penalty (`float`, *optional*):
601
602
  Penalizes new tokens based on their existing frequency
602
603
  in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
603
- logit_bias (`List[float]`, *optional*):
604
+ logit_bias (`list[float]`, *optional*):
604
605
  Adjusts the likelihood of specific tokens appearing in the generated output.
605
606
  logprobs (`bool`, *optional*):
606
607
  Whether to return log probabilities of the output tokens or not. If true, returns the log
@@ -616,7 +617,7 @@ class AsyncInferenceClient:
616
617
  Grammar constraints. Can be either a JSONSchema or a regex.
617
618
  seed (Optional[`int`], *optional*):
618
619
  Seed for reproducible control flow. Defaults to None.
619
- stop (`List[str]`, *optional*):
620
+ stop (`list[str]`, *optional*):
620
621
  Up to four strings which trigger the end of the response.
621
622
  Defaults to None.
622
623
  stream (`bool`, *optional*):
@@ -640,7 +641,7 @@ class AsyncInferenceClient:
640
641
  tools (List of [`ChatCompletionInputTool`], *optional*):
641
642
  A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
642
643
  provide a list of functions the model may generate JSON inputs for.
643
- extra_body (`Dict`, *optional*):
644
+ extra_body (`dict`, *optional*):
644
645
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
645
646
  for supported parameters.
646
647
  Returns:
@@ -652,7 +653,7 @@ class AsyncInferenceClient:
652
653
  Raises:
653
654
  [`InferenceTimeoutError`]:
654
655
  If the model is unavailable or the request times out.
655
- `aiohttp.ClientResponseError`:
656
+ [`HfHubHTTPError`]:
656
657
  If the request fails with an HTTP error status code other than HTTP 503.
657
658
 
658
659
  Example:
@@ -972,8 +973,8 @@ class AsyncInferenceClient:
972
973
  max_question_len: Optional[int] = None,
973
974
  max_seq_len: Optional[int] = None,
974
975
  top_k: Optional[int] = None,
975
- word_boxes: Optional[List[Union[List[float], str]]] = None,
976
- ) -> List[DocumentQuestionAnsweringOutputElement]:
976
+ word_boxes: Optional[list[Union[list[float], str]]] = None,
977
+ ) -> list[DocumentQuestionAnsweringOutputElement]:
977
978
  """
978
979
  Answer questions on document images.
979
980
 
@@ -1003,16 +1004,16 @@ class AsyncInferenceClient:
1003
1004
  top_k (`int`, *optional*):
1004
1005
  The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
1005
1006
  answers if there are not enough options available within the context.
1006
- word_boxes (`List[Union[List[float], str`, *optional*):
1007
+ word_boxes (`list[Union[list[float], str`, *optional*):
1007
1008
  A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
1008
1009
  step and use the provided bounding boxes instead.
1009
1010
  Returns:
1010
- `List[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
1011
+ `list[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
1011
1012
 
1012
1013
  Raises:
1013
1014
  [`InferenceTimeoutError`]:
1014
1015
  If the model is unavailable or the request times out.
1015
- `aiohttp.ClientResponseError`:
1016
+ [`HfHubHTTPError`]:
1016
1017
  If the request fails with an HTTP error status code other than HTTP 503.
1017
1018
 
1018
1019
 
@@ -1027,7 +1028,7 @@ class AsyncInferenceClient:
1027
1028
  """
1028
1029
  model_id = model or self.model
1029
1030
  provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
1030
- inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
1031
+ inputs: dict[str, Any] = {"question": question, "image": _b64_encode(image)}
1031
1032
  request_parameters = provider_helper.prepare_request(
1032
1033
  inputs=inputs,
1033
1034
  parameters={
@@ -1088,7 +1089,7 @@ class AsyncInferenceClient:
1088
1089
  Raises:
1089
1090
  [`InferenceTimeoutError`]:
1090
1091
  If the model is unavailable or the request times out.
1091
- `aiohttp.ClientResponseError`:
1092
+ [`HfHubHTTPError`]:
1092
1093
  If the request fails with an HTTP error status code other than HTTP 503.
1093
1094
 
1094
1095
  Example:
@@ -1126,9 +1127,9 @@ class AsyncInferenceClient:
1126
1127
  text: str,
1127
1128
  *,
1128
1129
  model: Optional[str] = None,
1129
- targets: Optional[List[str]] = None,
1130
+ targets: Optional[list[str]] = None,
1130
1131
  top_k: Optional[int] = None,
1131
- ) -> List[FillMaskOutputElement]:
1132
+ ) -> list[FillMaskOutputElement]:
1132
1133
  """
1133
1134
  Fill in a hole with a missing word (token to be precise).
1134
1135
 
@@ -1138,20 +1139,20 @@ class AsyncInferenceClient:
1138
1139
  model (`str`, *optional*):
1139
1140
  The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
1140
1141
  a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
1141
- targets (`List[str`, *optional*):
1142
+ targets (`list[str`, *optional*):
1142
1143
  When passed, the model will limit the scores to the passed targets instead of looking up in the whole
1143
1144
  vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
1144
1145
  resulting token will be used (with a warning, and that might be slower).
1145
1146
  top_k (`int`, *optional*):
1146
1147
  When passed, overrides the number of predictions to return.
1147
1148
  Returns:
1148
- `List[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
1149
+ `list[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
1149
1150
  probability, token reference, and completed text.
1150
1151
 
1151
1152
  Raises:
1152
1153
  [`InferenceTimeoutError`]:
1153
1154
  If the model is unavailable or the request times out.
1154
- `aiohttp.ClientResponseError`:
1155
+ [`HfHubHTTPError`]:
1155
1156
  If the request fails with an HTTP error status code other than HTTP 503.
1156
1157
 
1157
1158
  Example:
@@ -1185,7 +1186,7 @@ class AsyncInferenceClient:
1185
1186
  model: Optional[str] = None,
1186
1187
  function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
1187
1188
  top_k: Optional[int] = None,
1188
- ) -> List[ImageClassificationOutputElement]:
1189
+ ) -> list[ImageClassificationOutputElement]:
1189
1190
  """
1190
1191
  Perform image classification on the given image using the specified model.
1191
1192
 
@@ -1200,12 +1201,12 @@ class AsyncInferenceClient:
1200
1201
  top_k (`int`, *optional*):
1201
1202
  When specified, limits the output to the top K most probable classes.
1202
1203
  Returns:
1203
- `List[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
1204
+ `list[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
1204
1205
 
1205
1206
  Raises:
1206
1207
  [`InferenceTimeoutError`]:
1207
1208
  If the model is unavailable or the request times out.
1208
- `aiohttp.ClientResponseError`:
1209
+ [`HfHubHTTPError`]:
1209
1210
  If the request fails with an HTTP error status code other than HTTP 503.
1210
1211
 
1211
1212
  Example:
@@ -1238,7 +1239,7 @@ class AsyncInferenceClient:
1238
1239
  overlap_mask_area_threshold: Optional[float] = None,
1239
1240
  subtask: Optional["ImageSegmentationSubtask"] = None,
1240
1241
  threshold: Optional[float] = None,
1241
- ) -> List[ImageSegmentationOutputElement]:
1242
+ ) -> list[ImageSegmentationOutputElement]:
1242
1243
  """
1243
1244
  Perform image segmentation on the given image using the specified model.
1244
1245
 
@@ -1260,12 +1261,12 @@ class AsyncInferenceClient:
1260
1261
  threshold (`float`, *optional*):
1261
1262
  Probability threshold to filter out predicted masks.
1262
1263
  Returns:
1263
- `List[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
1264
+ `list[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
1264
1265
 
1265
1266
  Raises:
1266
1267
  [`InferenceTimeoutError`]:
1267
1268
  If the model is unavailable or the request times out.
1268
- `aiohttp.ClientResponseError`:
1269
+ [`HfHubHTTPError`]:
1269
1270
  If the request fails with an HTTP error status code other than HTTP 503.
1270
1271
 
1271
1272
  Example:
@@ -1341,7 +1342,7 @@ class AsyncInferenceClient:
1341
1342
  Raises:
1342
1343
  [`InferenceTimeoutError`]:
1343
1344
  If the model is unavailable or the request times out.
1344
- `aiohttp.ClientResponseError`:
1345
+ [`HfHubHTTPError`]:
1345
1346
  If the request fails with an HTTP error status code other than HTTP 503.
1346
1347
 
1347
1348
  Example:
@@ -1473,7 +1474,7 @@ class AsyncInferenceClient:
1473
1474
  Raises:
1474
1475
  [`InferenceTimeoutError`]:
1475
1476
  If the model is unavailable or the request times out.
1476
- `aiohttp.ClientResponseError`:
1477
+ [`HfHubHTTPError`]:
1477
1478
  If the request fails with an HTTP error status code other than HTTP 503.
1478
1479
 
1479
1480
  Example:
@@ -1497,12 +1498,12 @@ class AsyncInferenceClient:
1497
1498
  api_key=self.token,
1498
1499
  )
1499
1500
  response = await self._inner_post(request_parameters)
1500
- output_list: List[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
1501
+ output_list: list[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
1501
1502
  return output_list[0]
1502
1503
 
1503
1504
  async def object_detection(
1504
1505
  self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
1505
- ) -> List[ObjectDetectionOutputElement]:
1506
+ ) -> list[ObjectDetectionOutputElement]:
1506
1507
  """
1507
1508
  Perform object detection on the given image using the specified model.
1508
1509
 
@@ -1518,12 +1519,12 @@ class AsyncInferenceClient:
1518
1519
  threshold (`float`, *optional*):
1519
1520
  The probability necessary to make a prediction.
1520
1521
  Returns:
1521
- `List[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
1522
+ `list[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
1522
1523
 
1523
1524
  Raises:
1524
1525
  [`InferenceTimeoutError`]:
1525
1526
  If the model is unavailable or the request times out.
1526
- `aiohttp.ClientResponseError`:
1527
+ [`HfHubHTTPError`]:
1527
1528
  If the request fails with an HTTP error status code other than HTTP 503.
1528
1529
  `ValueError`:
1529
1530
  If the request output is not a List.
@@ -1562,7 +1563,7 @@ class AsyncInferenceClient:
1562
1563
  max_question_len: Optional[int] = None,
1563
1564
  max_seq_len: Optional[int] = None,
1564
1565
  top_k: Optional[int] = None,
1565
- ) -> Union[QuestionAnsweringOutputElement, List[QuestionAnsweringOutputElement]]:
1566
+ ) -> Union[QuestionAnsweringOutputElement, list[QuestionAnsweringOutputElement]]:
1566
1567
  """
1567
1568
  Retrieve the answer to a question from a given text.
1568
1569
 
@@ -1594,13 +1595,13 @@ class AsyncInferenceClient:
1594
1595
  topk answers if there are not enough options available within the context.
1595
1596
 
1596
1597
  Returns:
1597
- Union[`QuestionAnsweringOutputElement`, List[`QuestionAnsweringOutputElement`]]:
1598
+ Union[`QuestionAnsweringOutputElement`, list[`QuestionAnsweringOutputElement`]]:
1598
1599
  When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
1599
1600
  When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
1600
1601
  Raises:
1601
1602
  [`InferenceTimeoutError`]:
1602
1603
  If the model is unavailable or the request times out.
1603
- `aiohttp.ClientResponseError`:
1604
+ [`HfHubHTTPError`]:
1604
1605
  If the request fails with an HTTP error status code other than HTTP 503.
1605
1606
 
1606
1607
  Example:
@@ -1635,15 +1636,15 @@ class AsyncInferenceClient:
1635
1636
  return output
1636
1637
 
1637
1638
  async def sentence_similarity(
1638
- self, sentence: str, other_sentences: List[str], *, model: Optional[str] = None
1639
- ) -> List[float]:
1639
+ self, sentence: str, other_sentences: list[str], *, model: Optional[str] = None
1640
+ ) -> list[float]:
1640
1641
  """
1641
1642
  Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
1642
1643
 
1643
1644
  Args:
1644
1645
  sentence (`str`):
1645
1646
  The main sentence to compare to others.
1646
- other_sentences (`List[str]`):
1647
+ other_sentences (`list[str]`):
1647
1648
  The list of sentences to compare to.
1648
1649
  model (`str`, *optional*):
1649
1650
  The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1651,12 +1652,12 @@ class AsyncInferenceClient:
1651
1652
  Defaults to None.
1652
1653
 
1653
1654
  Returns:
1654
- `List[float]`: The similarity scores between the main sentence and the given comparison sentences.
1655
+ `list[float]`: The embedding representing the input text.
1655
1656
 
1656
1657
  Raises:
1657
1658
  [`InferenceTimeoutError`]:
1658
1659
  If the model is unavailable or the request times out.
1659
- `aiohttp.ClientResponseError`:
1660
+ [`HfHubHTTPError`]:
1660
1661
  If the request fails with an HTTP error status code other than HTTP 503.
1661
1662
 
1662
1663
  Example:
@@ -1694,7 +1695,7 @@ class AsyncInferenceClient:
1694
1695
  *,
1695
1696
  model: Optional[str] = None,
1696
1697
  clean_up_tokenization_spaces: Optional[bool] = None,
1697
- generate_parameters: Optional[Dict[str, Any]] = None,
1698
+ generate_parameters: Optional[dict[str, Any]] = None,
1698
1699
  truncation: Optional["SummarizationTruncationStrategy"] = None,
1699
1700
  ) -> SummarizationOutput:
1700
1701
  """
@@ -1708,7 +1709,7 @@ class AsyncInferenceClient:
1708
1709
  Inference Endpoint. If not provided, the default recommended model for summarization will be used.
1709
1710
  clean_up_tokenization_spaces (`bool`, *optional*):
1710
1711
  Whether to clean up the potential extra spaces in the text output.
1711
- generate_parameters (`Dict[str, Any]`, *optional*):
1712
+ generate_parameters (`dict[str, Any]`, *optional*):
1712
1713
  Additional parametrization of the text generation algorithm.
1713
1714
  truncation (`"SummarizationTruncationStrategy"`, *optional*):
1714
1715
  The truncation strategy to use.
@@ -1718,7 +1719,7 @@ class AsyncInferenceClient:
1718
1719
  Raises:
1719
1720
  [`InferenceTimeoutError`]:
1720
1721
  If the model is unavailable or the request times out.
1721
- `aiohttp.ClientResponseError`:
1722
+ [`HfHubHTTPError`]:
1722
1723
  If the request fails with an HTTP error status code other than HTTP 503.
1723
1724
 
1724
1725
  Example:
@@ -1749,7 +1750,7 @@ class AsyncInferenceClient:
1749
1750
 
1750
1751
  async def table_question_answering(
1751
1752
  self,
1752
- table: Dict[str, Any],
1753
+ table: dict[str, Any],
1753
1754
  query: str,
1754
1755
  *,
1755
1756
  model: Optional[str] = None,
@@ -1784,7 +1785,7 @@ class AsyncInferenceClient:
1784
1785
  Raises:
1785
1786
  [`InferenceTimeoutError`]:
1786
1787
  If the model is unavailable or the request times out.
1787
- `aiohttp.ClientResponseError`:
1788
+ [`HfHubHTTPError`]:
1788
1789
  If the request fails with an HTTP error status code other than HTTP 503.
1789
1790
 
1790
1791
  Example:
@@ -1810,12 +1811,12 @@ class AsyncInferenceClient:
1810
1811
  response = await self._inner_post(request_parameters)
1811
1812
  return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
1812
1813
 
1813
- async def tabular_classification(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[str]:
1814
+ async def tabular_classification(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[str]:
1814
1815
  """
1815
1816
  Classifying a target category (a group) based on a set of attributes.
1816
1817
 
1817
1818
  Args:
1818
- table (`Dict[str, Any]`):
1819
+ table (`dict[str, Any]`):
1819
1820
  Set of attributes to classify.
1820
1821
  model (`str`, *optional*):
1821
1822
  The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1828,7 +1829,7 @@ class AsyncInferenceClient:
1828
1829
  Raises:
1829
1830
  [`InferenceTimeoutError`]:
1830
1831
  If the model is unavailable or the request times out.
1831
- `aiohttp.ClientResponseError`:
1832
+ [`HfHubHTTPError`]:
1832
1833
  If the request fails with an HTTP error status code other than HTTP 503.
1833
1834
 
1834
1835
  Example:
@@ -1866,12 +1867,12 @@ class AsyncInferenceClient:
1866
1867
  response = await self._inner_post(request_parameters)
1867
1868
  return _bytes_to_list(response)
1868
1869
 
1869
- async def tabular_regression(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[float]:
1870
+ async def tabular_regression(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[float]:
1870
1871
  """
1871
1872
  Predicting a numerical target value given a set of attributes/features in a table.
1872
1873
 
1873
1874
  Args:
1874
- table (`Dict[str, Any]`):
1875
+ table (`dict[str, Any]`):
1875
1876
  Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
1876
1877
  model (`str`, *optional*):
1877
1878
  The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1884,7 +1885,7 @@ class AsyncInferenceClient:
1884
1885
  Raises:
1885
1886
  [`InferenceTimeoutError`]:
1886
1887
  If the model is unavailable or the request times out.
1887
- `aiohttp.ClientResponseError`:
1888
+ [`HfHubHTTPError`]:
1888
1889
  If the request fails with an HTTP error status code other than HTTP 503.
1889
1890
 
1890
1891
  Example:
@@ -1924,7 +1925,7 @@ class AsyncInferenceClient:
1924
1925
  model: Optional[str] = None,
1925
1926
  top_k: Optional[int] = None,
1926
1927
  function_to_apply: Optional["TextClassificationOutputTransform"] = None,
1927
- ) -> List[TextClassificationOutputElement]:
1928
+ ) -> list[TextClassificationOutputElement]:
1928
1929
  """
1929
1930
  Perform text classification (e.g. sentiment-analysis) on the given text.
1930
1931
 
@@ -1941,12 +1942,12 @@ class AsyncInferenceClient:
1941
1942
  The function to apply to the model outputs in order to retrieve the scores.
1942
1943
 
1943
1944
  Returns:
1944
- `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
1945
+ `list[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
1945
1946
 
1946
1947
  Raises:
1947
1948
  [`InferenceTimeoutError`]:
1948
1949
  If the model is unavailable or the request times out.
1949
- `aiohttp.ClientResponseError`:
1950
+ [`HfHubHTTPError`]:
1950
1951
  If the request fails with an HTTP error status code other than HTTP 503.
1951
1952
 
1952
1953
  Example:
@@ -1995,8 +1996,8 @@ class AsyncInferenceClient:
1995
1996
  repetition_penalty: Optional[float] = None,
1996
1997
  return_full_text: Optional[bool] = None,
1997
1998
  seed: Optional[int] = None,
1998
- stop: Optional[List[str]] = None,
1999
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
1999
+ stop: Optional[list[str]] = None,
2000
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2000
2001
  temperature: Optional[float] = None,
2001
2002
  top_k: Optional[int] = None,
2002
2003
  top_n_tokens: Optional[int] = None,
@@ -2025,8 +2026,8 @@ class AsyncInferenceClient:
2025
2026
  repetition_penalty: Optional[float] = None,
2026
2027
  return_full_text: Optional[bool] = None,
2027
2028
  seed: Optional[int] = None,
2028
- stop: Optional[List[str]] = None,
2029
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2029
+ stop: Optional[list[str]] = None,
2030
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2030
2031
  temperature: Optional[float] = None,
2031
2032
  top_k: Optional[int] = None,
2032
2033
  top_n_tokens: Optional[int] = None,
@@ -2055,8 +2056,8 @@ class AsyncInferenceClient:
2055
2056
  repetition_penalty: Optional[float] = None,
2056
2057
  return_full_text: Optional[bool] = None, # Manual default value
2057
2058
  seed: Optional[int] = None,
2058
- stop: Optional[List[str]] = None,
2059
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2059
+ stop: Optional[list[str]] = None,
2060
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2060
2061
  temperature: Optional[float] = None,
2061
2062
  top_k: Optional[int] = None,
2062
2063
  top_n_tokens: Optional[int] = None,
@@ -2085,8 +2086,8 @@ class AsyncInferenceClient:
2085
2086
  repetition_penalty: Optional[float] = None,
2086
2087
  return_full_text: Optional[bool] = None,
2087
2088
  seed: Optional[int] = None,
2088
- stop: Optional[List[str]] = None,
2089
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2089
+ stop: Optional[list[str]] = None,
2090
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2090
2091
  temperature: Optional[float] = None,
2091
2092
  top_k: Optional[int] = None,
2092
2093
  top_n_tokens: Optional[int] = None,
@@ -2115,8 +2116,8 @@ class AsyncInferenceClient:
2115
2116
  repetition_penalty: Optional[float] = None,
2116
2117
  return_full_text: Optional[bool] = None,
2117
2118
  seed: Optional[int] = None,
2118
- stop: Optional[List[str]] = None,
2119
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2119
+ stop: Optional[list[str]] = None,
2120
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2120
2121
  temperature: Optional[float] = None,
2121
2122
  top_k: Optional[int] = None,
2122
2123
  top_n_tokens: Optional[int] = None,
@@ -2144,8 +2145,8 @@ class AsyncInferenceClient:
2144
2145
  repetition_penalty: Optional[float] = None,
2145
2146
  return_full_text: Optional[bool] = None,
2146
2147
  seed: Optional[int] = None,
2147
- stop: Optional[List[str]] = None,
2148
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2148
+ stop: Optional[list[str]] = None,
2149
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2149
2150
  temperature: Optional[float] = None,
2150
2151
  top_k: Optional[int] = None,
2151
2152
  top_n_tokens: Optional[int] = None,
@@ -2198,9 +2199,9 @@ class AsyncInferenceClient:
2198
2199
  Whether to prepend the prompt to the generated text
2199
2200
  seed (`int`, *optional*):
2200
2201
  Random sampling seed
2201
- stop (`List[str]`, *optional*):
2202
+ stop (`list[str]`, *optional*):
2202
2203
  Stop generating tokens if a member of `stop` is generated.
2203
- stop_sequences (`List[str]`, *optional*):
2204
+ stop_sequences (`list[str]`, *optional*):
2204
2205
  Deprecated argument. Use `stop` instead.
2205
2206
  temperature (`float`, *optional*):
2206
2207
  The value used to module the logits distribution.
@@ -2221,10 +2222,10 @@ class AsyncInferenceClient:
2221
2222
  Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
2222
2223
 
2223
2224
  Returns:
2224
- `Union[str, TextGenerationOutput, Iterable[str], Iterable[TextGenerationStreamOutput]]`:
2225
+ `Union[str, TextGenerationOutput, AsyncIterable[str], AsyncIterable[TextGenerationStreamOutput]]`:
2225
2226
  Generated text returned from the server:
2226
2227
  - if `stream=False` and `details=False`, the generated text is returned as a `str` (default)
2227
- - if `stream=True` and `details=False`, the generated text is returned token by token as a `Iterable[str]`
2228
+ - if `stream=True` and `details=False`, the generated text is returned token by token as a `AsyncIterable[str]`
2228
2229
  - if `stream=False` and `details=True`, the generated text is returned with more details as a [`~huggingface_hub.TextGenerationOutput`]
2229
2230
  - if `details=True` and `stream=True`, the generated text is returned token by token as a iterable of [`~huggingface_hub.TextGenerationStreamOutput`]
2230
2231
 
@@ -2233,7 +2234,7 @@ class AsyncInferenceClient:
2233
2234
  If input values are not valid. No HTTP call is made to the server.
2234
2235
  [`InferenceTimeoutError`]:
2235
2236
  If the model is unavailable or the request times out.
2236
- `aiohttp.ClientResponseError`:
2237
+ [`HfHubHTTPError`]:
2237
2238
  If the request fails with an HTTP error status code other than HTTP 503.
2238
2239
 
2239
2240
  Example:
@@ -2423,9 +2424,9 @@ class AsyncInferenceClient:
2423
2424
  # Handle errors separately for more precise error messages
2424
2425
  try:
2425
2426
  bytes_output = await self._inner_post(request_parameters, stream=stream or False)
2426
- except _import_aiohttp().ClientResponseError as e:
2427
- match = MODEL_KWARGS_NOT_USED_REGEX.search(e.response_error_payload["error"])
2428
- if e.status == 400 and match:
2427
+ except HfHubHTTPError as e:
2428
+ match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
2429
+ if isinstance(e, BadRequestError) and match:
2429
2430
  unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
2430
2431
  _set_unsupported_text_generation_kwargs(model, unused_params)
2431
2432
  return await self.text_generation( # type: ignore
@@ -2478,7 +2479,7 @@ class AsyncInferenceClient:
2478
2479
  model: Optional[str] = None,
2479
2480
  scheduler: Optional[str] = None,
2480
2481
  seed: Optional[int] = None,
2481
- extra_body: Optional[Dict[str, Any]] = None,
2482
+ extra_body: Optional[dict[str, Any]] = None,
2482
2483
  ) -> "Image":
2483
2484
  """
2484
2485
  Generate an image based on a given text using a specified model.
@@ -2512,7 +2513,7 @@ class AsyncInferenceClient:
2512
2513
  Override the scheduler with a compatible one.
2513
2514
  seed (`int`, *optional*):
2514
2515
  Seed for the random number generator.
2515
- extra_body (`Dict[str, Any]`, *optional*):
2516
+ extra_body (`dict[str, Any]`, *optional*):
2516
2517
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2517
2518
  for supported parameters.
2518
2519
 
@@ -2522,7 +2523,7 @@ class AsyncInferenceClient:
2522
2523
  Raises:
2523
2524
  [`InferenceTimeoutError`]:
2524
2525
  If the model is unavailable or the request times out.
2525
- `aiohttp.ClientResponseError`:
2526
+ [`HfHubHTTPError`]:
2526
2527
  If the request fails with an HTTP error status code other than HTTP 503.
2527
2528
 
2528
2529
  Example:
@@ -2612,11 +2613,11 @@ class AsyncInferenceClient:
2612
2613
  *,
2613
2614
  model: Optional[str] = None,
2614
2615
  guidance_scale: Optional[float] = None,
2615
- negative_prompt: Optional[List[str]] = None,
2616
+ negative_prompt: Optional[list[str]] = None,
2616
2617
  num_frames: Optional[float] = None,
2617
2618
  num_inference_steps: Optional[int] = None,
2618
2619
  seed: Optional[int] = None,
2619
- extra_body: Optional[Dict[str, Any]] = None,
2620
+ extra_body: Optional[dict[str, Any]] = None,
2620
2621
  ) -> bytes:
2621
2622
  """
2622
2623
  Generate a video based on a given text.
@@ -2634,7 +2635,7 @@ class AsyncInferenceClient:
2634
2635
  guidance_scale (`float`, *optional*):
2635
2636
  A higher guidance scale value encourages the model to generate videos closely linked to the text
2636
2637
  prompt, but values too high may cause saturation and other artifacts.
2637
- negative_prompt (`List[str]`, *optional*):
2638
+ negative_prompt (`list[str]`, *optional*):
2638
2639
  One or several prompt to guide what NOT to include in video generation.
2639
2640
  num_frames (`float`, *optional*):
2640
2641
  The num_frames parameter determines how many video frames are generated.
@@ -2643,7 +2644,7 @@ class AsyncInferenceClient:
2643
2644
  expense of slower inference.
2644
2645
  seed (`int`, *optional*):
2645
2646
  Seed for the random number generator.
2646
- extra_body (`Dict[str, Any]`, *optional*):
2647
+ extra_body (`dict[str, Any]`, *optional*):
2647
2648
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2648
2649
  for supported parameters.
2649
2650
 
@@ -2723,7 +2724,7 @@ class AsyncInferenceClient:
2723
2724
  top_p: Optional[float] = None,
2724
2725
  typical_p: Optional[float] = None,
2725
2726
  use_cache: Optional[bool] = None,
2726
- extra_body: Optional[Dict[str, Any]] = None,
2727
+ extra_body: Optional[dict[str, Any]] = None,
2727
2728
  ) -> bytes:
2728
2729
  """
2729
2730
  Synthesize an audio of a voice pronouncing a given text.
@@ -2784,7 +2785,7 @@ class AsyncInferenceClient:
2784
2785
  paper](https://hf.co/papers/2202.00666) for more details.
2785
2786
  use_cache (`bool`, *optional*):
2786
2787
  Whether the model should use the past last key/values attentions to speed up decoding
2787
- extra_body (`Dict[str, Any]`, *optional*):
2788
+ extra_body (`dict[str, Any]`, *optional*):
2788
2789
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2789
2790
  for supported parameters.
2790
2791
  Returns:
@@ -2793,7 +2794,7 @@ class AsyncInferenceClient:
2793
2794
  Raises:
2794
2795
  [`InferenceTimeoutError`]:
2795
2796
  If the model is unavailable or the request times out.
2796
- `aiohttp.ClientResponseError`:
2797
+ [`HfHubHTTPError`]:
2797
2798
  If the request fails with an HTTP error status code other than HTTP 503.
2798
2799
 
2799
2800
  Example:
@@ -2917,9 +2918,9 @@ class AsyncInferenceClient:
2917
2918
  *,
2918
2919
  model: Optional[str] = None,
2919
2920
  aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
2920
- ignore_labels: Optional[List[str]] = None,
2921
+ ignore_labels: Optional[list[str]] = None,
2921
2922
  stride: Optional[int] = None,
2922
- ) -> List[TokenClassificationOutputElement]:
2923
+ ) -> list[TokenClassificationOutputElement]:
2923
2924
  """
2924
2925
  Perform token classification on the given text.
2925
2926
  Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
@@ -2933,18 +2934,18 @@ class AsyncInferenceClient:
2933
2934
  Defaults to None.
2934
2935
  aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
2935
2936
  The strategy used to fuse tokens based on model predictions
2936
- ignore_labels (`List[str`, *optional*):
2937
+ ignore_labels (`list[str`, *optional*):
2937
2938
  A list of labels to ignore
2938
2939
  stride (`int`, *optional*):
2939
2940
  The number of overlapping tokens between chunks when splitting the input text.
2940
2941
 
2941
2942
  Returns:
2942
- `List[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
2943
+ `list[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
2943
2944
 
2944
2945
  Raises:
2945
2946
  [`InferenceTimeoutError`]:
2946
2947
  If the model is unavailable or the request times out.
2947
- `aiohttp.ClientResponseError`:
2948
+ [`HfHubHTTPError`]:
2948
2949
  If the request fails with an HTTP error status code other than HTTP 503.
2949
2950
 
2950
2951
  Example:
@@ -2996,7 +2997,7 @@ class AsyncInferenceClient:
2996
2997
  tgt_lang: Optional[str] = None,
2997
2998
  clean_up_tokenization_spaces: Optional[bool] = None,
2998
2999
  truncation: Optional["TranslationTruncationStrategy"] = None,
2999
- generate_parameters: Optional[Dict[str, Any]] = None,
3000
+ generate_parameters: Optional[dict[str, Any]] = None,
3000
3001
  ) -> TranslationOutput:
3001
3002
  """
3002
3003
  Convert text from one language to another.
@@ -3021,7 +3022,7 @@ class AsyncInferenceClient:
3021
3022
  Whether to clean up the potential extra spaces in the text output.
3022
3023
  truncation (`"TranslationTruncationStrategy"`, *optional*):
3023
3024
  The truncation strategy to use.
3024
- generate_parameters (`Dict[str, Any]`, *optional*):
3025
+ generate_parameters (`dict[str, Any]`, *optional*):
3025
3026
  Additional parametrization of the text generation algorithm.
3026
3027
 
3027
3028
  Returns:
@@ -3030,7 +3031,7 @@ class AsyncInferenceClient:
3030
3031
  Raises:
3031
3032
  [`InferenceTimeoutError`]:
3032
3033
  If the model is unavailable or the request times out.
3033
- `aiohttp.ClientResponseError`:
3034
+ [`HfHubHTTPError`]:
3034
3035
  If the request fails with an HTTP error status code other than HTTP 503.
3035
3036
  `ValueError`:
3036
3037
  If only one of the `src_lang` and `tgt_lang` arguments are provided.
@@ -3084,7 +3085,7 @@ class AsyncInferenceClient:
3084
3085
  *,
3085
3086
  model: Optional[str] = None,
3086
3087
  top_k: Optional[int] = None,
3087
- ) -> List[VisualQuestionAnsweringOutputElement]:
3088
+ ) -> list[VisualQuestionAnsweringOutputElement]:
3088
3089
  """
3089
3090
  Answering open-ended questions based on an image.
3090
3091
 
@@ -3101,12 +3102,12 @@ class AsyncInferenceClient:
3101
3102
  The number of answers to return (will be chosen by order of likelihood). Note that we return less than
3102
3103
  topk answers if there are not enough options available within the context.
3103
3104
  Returns:
3104
- `List[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
3105
+ `list[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
3105
3106
 
3106
3107
  Raises:
3107
3108
  `InferenceTimeoutError`:
3108
3109
  If the model is unavailable or the request times out.
3109
- `aiohttp.ClientResponseError`:
3110
+ [`HfHubHTTPError`]:
3110
3111
  If the request fails with an HTTP error status code other than HTTP 503.
3111
3112
 
3112
3113
  Example:
@@ -3140,21 +3141,21 @@ class AsyncInferenceClient:
3140
3141
  async def zero_shot_classification(
3141
3142
  self,
3142
3143
  text: str,
3143
- candidate_labels: List[str],
3144
+ candidate_labels: list[str],
3144
3145
  *,
3145
3146
  multi_label: Optional[bool] = False,
3146
3147
  hypothesis_template: Optional[str] = None,
3147
3148
  model: Optional[str] = None,
3148
- ) -> List[ZeroShotClassificationOutputElement]:
3149
+ ) -> list[ZeroShotClassificationOutputElement]:
3149
3150
  """
3150
3151
  Provide as input a text and a set of candidate labels to classify the input text.
3151
3152
 
3152
3153
  Args:
3153
3154
  text (`str`):
3154
3155
  The input text to classify.
3155
- candidate_labels (`List[str]`):
3156
+ candidate_labels (`list[str]`):
3156
3157
  The set of possible class labels to classify the text into.
3157
- labels (`List[str]`, *optional*):
3158
+ labels (`list[str]`, *optional*):
3158
3159
  (deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
3159
3160
  multi_label (`bool`, *optional*):
3160
3161
  Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
@@ -3169,12 +3170,12 @@ class AsyncInferenceClient:
3169
3170
 
3170
3171
 
3171
3172
  Returns:
3172
- `List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
3173
+ `list[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
3173
3174
 
3174
3175
  Raises:
3175
3176
  [`InferenceTimeoutError`]:
3176
3177
  If the model is unavailable or the request times out.
3177
- `aiohttp.ClientResponseError`:
3178
+ [`HfHubHTTPError`]:
3178
3179
  If the request fails with an HTTP error status code other than HTTP 503.
3179
3180
 
3180
3181
  Example with `multi_label=False`:
@@ -3248,22 +3249,22 @@ class AsyncInferenceClient:
3248
3249
  async def zero_shot_image_classification(
3249
3250
  self,
3250
3251
  image: ContentT,
3251
- candidate_labels: List[str],
3252
+ candidate_labels: list[str],
3252
3253
  *,
3253
3254
  model: Optional[str] = None,
3254
3255
  hypothesis_template: Optional[str] = None,
3255
3256
  # deprecated argument
3256
- labels: List[str] = None, # type: ignore
3257
- ) -> List[ZeroShotImageClassificationOutputElement]:
3257
+ labels: list[str] = None, # type: ignore
3258
+ ) -> list[ZeroShotImageClassificationOutputElement]:
3258
3259
  """
3259
3260
  Provide input image and text labels to predict text labels for the image.
3260
3261
 
3261
3262
  Args:
3262
3263
  image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
3263
3264
  The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
3264
- candidate_labels (`List[str]`):
3265
+ candidate_labels (`list[str]`):
3265
3266
  The candidate labels for this image
3266
- labels (`List[str]`, *optional*):
3267
+ labels (`list[str]`, *optional*):
3267
3268
  (deprecated) List of string possible labels. There must be at least 2 labels.
3268
3269
  model (`str`, *optional*):
3269
3270
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
@@ -3273,12 +3274,12 @@ class AsyncInferenceClient:
3273
3274
  replacing the placeholder with the candidate labels.
3274
3275
 
3275
3276
  Returns:
3276
- `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
3277
+ `list[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
3277
3278
 
3278
3279
  Raises:
3279
3280
  [`InferenceTimeoutError`]:
3280
3281
  If the model is unavailable or the request times out.
3281
- `aiohttp.ClientResponseError`:
3282
+ [`HfHubHTTPError`]:
3282
3283
  If the request fails with an HTTP error status code other than HTTP 503.
3283
3284
 
3284
3285
  Example:
@@ -3313,48 +3314,7 @@ class AsyncInferenceClient:
3313
3314
  response = await self._inner_post(request_parameters)
3314
3315
  return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
3315
3316
 
3316
- def _get_client_session(self, headers: Optional[Dict] = None) -> "ClientSession":
3317
- aiohttp = _import_aiohttp()
3318
- client_headers = self.headers.copy()
3319
- if headers is not None:
3320
- client_headers.update(headers)
3321
-
3322
- # Return a new aiohttp ClientSession with correct settings.
3323
- session = aiohttp.ClientSession(
3324
- headers=client_headers,
3325
- cookies=self.cookies,
3326
- timeout=aiohttp.ClientTimeout(self.timeout),
3327
- trust_env=self.trust_env,
3328
- )
3329
-
3330
- # Keep track of sessions to close them later
3331
- self._sessions[session] = set()
3332
-
3333
- # Override the `._request` method to register responses to be closed
3334
- session._wrapped_request = session._request
3335
-
3336
- async def _request(method, url, **kwargs):
3337
- response = await session._wrapped_request(method, url, **kwargs)
3338
- self._sessions[session].add(response)
3339
- return response
3340
-
3341
- session._request = _request
3342
-
3343
- # Override the 'close' method to
3344
- # 1. close ongoing responses
3345
- # 2. deregister the session when closed
3346
- session._close = session.close
3347
-
3348
- async def close_session():
3349
- for response in self._sessions[session]:
3350
- response.close()
3351
- await session._close()
3352
- self._sessions.pop(session, None)
3353
-
3354
- session.close = close_session
3355
- return session
3356
-
3357
- async def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
3317
+ async def get_endpoint_info(self, *, model: Optional[str] = None) -> dict[str, Any]:
3358
3318
  """
3359
3319
  Get information about the deployed endpoint.
3360
3320
 
@@ -3367,7 +3327,7 @@ class AsyncInferenceClient:
3367
3327
  Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
3368
3328
 
3369
3329
  Returns:
3370
- `Dict[str, Any]`: Information about the endpoint.
3330
+ `dict[str, Any]`: Information about the endpoint.
3371
3331
 
3372
3332
  Example:
3373
3333
  ```py
@@ -3409,10 +3369,10 @@ class AsyncInferenceClient:
3409
3369
  else:
3410
3370
  url = f"{constants.INFERENCE_ENDPOINT}/models/{model}/info"
3411
3371
 
3412
- async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
3413
- response = await client.get(url, proxy=self.proxies)
3414
- response.raise_for_status()
3415
- return await response.json()
3372
+ client = await self._get_async_client()
3373
+ response = await client.get(url, headers=build_hf_headers(token=self.token))
3374
+ hf_raise_for_status(response)
3375
+ return response.json()
3416
3376
 
3417
3377
  async def health_check(self, model: Optional[str] = None) -> bool:
3418
3378
  """
@@ -3446,9 +3406,9 @@ class AsyncInferenceClient:
3446
3406
  raise ValueError("Model must be an Inference Endpoint URL.")
3447
3407
  url = model.rstrip("/") + "/health"
3448
3408
 
3449
- async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
3450
- response = await client.get(url, proxy=self.proxies)
3451
- return response.status == 200
3409
+ client = await self._get_async_client()
3410
+ response = await client.get(url, headers=build_hf_headers(token=self.token))
3411
+ return response.status_code == 200
3452
3412
 
3453
3413
  @property
3454
3414
  def chat(self) -> "ProxyClientChat":