huggingface-hub 0.36.0rc0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (132) hide show
  1. huggingface_hub/__init__.py +33 -45
  2. huggingface_hub/_commit_api.py +39 -43
  3. huggingface_hub/_commit_scheduler.py +11 -8
  4. huggingface_hub/_inference_endpoints.py +8 -8
  5. huggingface_hub/_jobs_api.py +20 -20
  6. huggingface_hub/_login.py +17 -43
  7. huggingface_hub/_oauth.py +8 -8
  8. huggingface_hub/_snapshot_download.py +135 -50
  9. huggingface_hub/_space_api.py +4 -4
  10. huggingface_hub/_tensorboard_logger.py +5 -5
  11. huggingface_hub/_upload_large_folder.py +18 -32
  12. huggingface_hub/_webhooks_payload.py +3 -3
  13. huggingface_hub/_webhooks_server.py +2 -2
  14. huggingface_hub/cli/__init__.py +0 -14
  15. huggingface_hub/cli/_cli_utils.py +143 -39
  16. huggingface_hub/cli/auth.py +105 -171
  17. huggingface_hub/cli/cache.py +594 -361
  18. huggingface_hub/cli/download.py +120 -112
  19. huggingface_hub/cli/hf.py +38 -41
  20. huggingface_hub/cli/jobs.py +689 -1017
  21. huggingface_hub/cli/lfs.py +120 -143
  22. huggingface_hub/cli/repo.py +282 -216
  23. huggingface_hub/cli/repo_files.py +50 -84
  24. huggingface_hub/cli/system.py +6 -25
  25. huggingface_hub/cli/upload.py +198 -220
  26. huggingface_hub/cli/upload_large_folder.py +91 -106
  27. huggingface_hub/community.py +5 -5
  28. huggingface_hub/constants.py +17 -52
  29. huggingface_hub/dataclasses.py +135 -21
  30. huggingface_hub/errors.py +47 -30
  31. huggingface_hub/fastai_utils.py +8 -9
  32. huggingface_hub/file_download.py +351 -303
  33. huggingface_hub/hf_api.py +398 -570
  34. huggingface_hub/hf_file_system.py +101 -66
  35. huggingface_hub/hub_mixin.py +32 -54
  36. huggingface_hub/inference/_client.py +177 -162
  37. huggingface_hub/inference/_common.py +38 -54
  38. huggingface_hub/inference/_generated/_async_client.py +218 -258
  39. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
  40. huggingface_hub/inference/_generated/types/base.py +10 -7
  41. huggingface_hub/inference/_generated/types/chat_completion.py +16 -16
  42. huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
  43. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
  44. huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
  45. huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
  46. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
  47. huggingface_hub/inference/_generated/types/summarization.py +2 -2
  48. huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
  49. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
  50. huggingface_hub/inference/_generated/types/text_generation.py +10 -10
  51. huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
  52. huggingface_hub/inference/_generated/types/token_classification.py +2 -2
  53. huggingface_hub/inference/_generated/types/translation.py +2 -2
  54. huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
  55. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
  56. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
  57. huggingface_hub/inference/_mcp/agent.py +3 -3
  58. huggingface_hub/inference/_mcp/constants.py +1 -2
  59. huggingface_hub/inference/_mcp/mcp_client.py +33 -22
  60. huggingface_hub/inference/_mcp/types.py +10 -10
  61. huggingface_hub/inference/_mcp/utils.py +4 -4
  62. huggingface_hub/inference/_providers/__init__.py +12 -4
  63. huggingface_hub/inference/_providers/_common.py +62 -24
  64. huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
  65. huggingface_hub/inference/_providers/cohere.py +3 -3
  66. huggingface_hub/inference/_providers/fal_ai.py +25 -25
  67. huggingface_hub/inference/_providers/featherless_ai.py +4 -4
  68. huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
  69. huggingface_hub/inference/_providers/hf_inference.py +13 -13
  70. huggingface_hub/inference/_providers/hyperbolic.py +4 -4
  71. huggingface_hub/inference/_providers/nebius.py +10 -10
  72. huggingface_hub/inference/_providers/novita.py +5 -5
  73. huggingface_hub/inference/_providers/nscale.py +4 -4
  74. huggingface_hub/inference/_providers/replicate.py +15 -15
  75. huggingface_hub/inference/_providers/sambanova.py +6 -6
  76. huggingface_hub/inference/_providers/together.py +7 -7
  77. huggingface_hub/lfs.py +21 -94
  78. huggingface_hub/repocard.py +15 -16
  79. huggingface_hub/repocard_data.py +57 -57
  80. huggingface_hub/serialization/__init__.py +0 -1
  81. huggingface_hub/serialization/_base.py +9 -9
  82. huggingface_hub/serialization/_dduf.py +7 -7
  83. huggingface_hub/serialization/_torch.py +28 -28
  84. huggingface_hub/utils/__init__.py +11 -6
  85. huggingface_hub/utils/_auth.py +5 -5
  86. huggingface_hub/utils/_cache_manager.py +49 -74
  87. huggingface_hub/utils/_deprecation.py +1 -1
  88. huggingface_hub/utils/_dotenv.py +3 -3
  89. huggingface_hub/utils/_fixes.py +0 -10
  90. huggingface_hub/utils/_git_credential.py +3 -3
  91. huggingface_hub/utils/_headers.py +7 -29
  92. huggingface_hub/utils/_http.py +371 -208
  93. huggingface_hub/utils/_pagination.py +4 -4
  94. huggingface_hub/utils/_parsing.py +98 -0
  95. huggingface_hub/utils/_paths.py +5 -5
  96. huggingface_hub/utils/_runtime.py +59 -23
  97. huggingface_hub/utils/_safetensors.py +21 -21
  98. huggingface_hub/utils/_subprocess.py +9 -9
  99. huggingface_hub/utils/_telemetry.py +3 -3
  100. huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -9
  101. huggingface_hub/utils/_typing.py +3 -3
  102. huggingface_hub/utils/_validators.py +53 -72
  103. huggingface_hub/utils/_xet.py +16 -16
  104. huggingface_hub/utils/_xet_progress_reporting.py +1 -1
  105. huggingface_hub/utils/insecure_hashlib.py +3 -9
  106. huggingface_hub/utils/tqdm.py +3 -3
  107. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/METADATA +16 -35
  108. huggingface_hub-1.0.0.dist-info/RECORD +152 -0
  109. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/entry_points.txt +0 -1
  110. huggingface_hub/commands/__init__.py +0 -27
  111. huggingface_hub/commands/delete_cache.py +0 -476
  112. huggingface_hub/commands/download.py +0 -204
  113. huggingface_hub/commands/env.py +0 -39
  114. huggingface_hub/commands/huggingface_cli.py +0 -65
  115. huggingface_hub/commands/lfs.py +0 -200
  116. huggingface_hub/commands/repo.py +0 -151
  117. huggingface_hub/commands/repo_files.py +0 -132
  118. huggingface_hub/commands/scan_cache.py +0 -183
  119. huggingface_hub/commands/tag.py +0 -161
  120. huggingface_hub/commands/upload.py +0 -318
  121. huggingface_hub/commands/upload_large_folder.py +0 -131
  122. huggingface_hub/commands/user.py +0 -208
  123. huggingface_hub/commands/version.py +0 -40
  124. huggingface_hub/inference_api.py +0 -217
  125. huggingface_hub/keras_mixin.py +0 -497
  126. huggingface_hub/repository.py +0 -1471
  127. huggingface_hub/serialization/_tensorflow.py +0 -92
  128. huggingface_hub/utils/_hf_folder.py +0 -68
  129. huggingface_hub-0.36.0rc0.dist-info/RECORD +0 -170
  130. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/LICENSE +0 -0
  131. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/WHEEL +0 -0
  132. {huggingface_hub-0.36.0rc0.dist-info → huggingface_hub-1.0.0.dist-info}/top_level.txt +0 -0
@@ -34,14 +34,14 @@
34
34
  # - Only the main parameters are publicly exposed. Power users can always read the docs for more options.
35
35
  import base64
36
36
  import logging
37
+ import os
37
38
  import re
38
39
  import warnings
39
- from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Literal, Optional, Union, overload
40
-
41
- from requests import HTTPError
40
+ from contextlib import ExitStack
41
+ from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
42
42
 
43
43
  from huggingface_hub import constants
44
- from huggingface_hub.errors import BadRequestError, InferenceTimeoutError
44
+ from huggingface_hub.errors import BadRequestError, HfHubHTTPError, InferenceTimeoutError
45
45
  from huggingface_hub.inference._common import (
46
46
  TASKS_EXPECTING_IMAGES,
47
47
  ContentT,
@@ -101,7 +101,12 @@ from huggingface_hub.inference._generated.types import (
101
101
  ZeroShotImageClassificationOutputElement,
102
102
  )
103
103
  from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
104
- from huggingface_hub.utils import build_hf_headers, get_session, hf_raise_for_status
104
+ from huggingface_hub.utils import (
105
+ build_hf_headers,
106
+ get_session,
107
+ hf_raise_for_status,
108
+ validate_hf_hub_args,
109
+ )
105
110
  from huggingface_hub.utils._auth import get_token
106
111
 
107
112
 
@@ -139,16 +144,14 @@ class InferenceClient:
139
144
  arguments are mutually exclusive and have the exact same behavior.
140
145
  timeout (`float`, `optional`):
141
146
  The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
142
- headers (`Dict[str, str]`, `optional`):
147
+ headers (`dict[str, str]`, `optional`):
143
148
  Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
144
149
  Values in this dictionary will override the default values.
145
150
  bill_to (`str`, `optional`):
146
151
  The billing account to use for the requests. By default the requests are billed on the user's account.
147
152
  Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
148
- cookies (`Dict[str, str]`, `optional`):
153
+ cookies (`dict[str, str]`, `optional`):
149
154
  Additional cookies to send to the server.
150
- proxies (`Any`, `optional`):
151
- Proxies to use for the request.
152
155
  base_url (`str`, `optional`):
153
156
  Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
154
157
  follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
@@ -157,6 +160,7 @@ class InferenceClient:
157
160
  follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
158
161
  """
159
162
 
163
+ @validate_hf_hub_args
160
164
  def __init__(
161
165
  self,
162
166
  model: Optional[str] = None,
@@ -164,9 +168,8 @@ class InferenceClient:
164
168
  provider: Optional[PROVIDER_OR_POLICY_T] = None,
165
169
  token: Optional[str] = None,
166
170
  timeout: Optional[float] = None,
167
- headers: Optional[Dict[str, str]] = None,
168
- cookies: Optional[Dict[str, str]] = None,
169
- proxies: Optional[Any] = None,
171
+ headers: Optional[dict[str, str]] = None,
172
+ cookies: Optional[dict[str, str]] = None,
170
173
  bill_to: Optional[str] = None,
171
174
  # OpenAI compatibility
172
175
  base_url: Optional[str] = None,
@@ -228,11 +231,21 @@ class InferenceClient:
228
231
 
229
232
  self.cookies = cookies
230
233
  self.timeout = timeout
231
- self.proxies = proxies
234
+
235
+ self.exit_stack = ExitStack()
232
236
 
233
237
  def __repr__(self):
234
238
  return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
235
239
 
240
+ def __enter__(self):
241
+ return self
242
+
243
+ def __exit__(self, exc_type, exc_value, traceback):
244
+ self.exit_stack.close()
245
+
246
+ def close(self):
247
+ self.exit_stack.close()
248
+
236
249
  @overload
237
250
  def _inner_post( # type: ignore[misc]
238
251
  self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
@@ -241,44 +254,46 @@ class InferenceClient:
241
254
  @overload
242
255
  def _inner_post( # type: ignore[misc]
243
256
  self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
244
- ) -> Iterable[bytes]: ...
257
+ ) -> Iterable[str]: ...
245
258
 
246
259
  @overload
247
260
  def _inner_post(
248
261
  self, request_parameters: RequestParameters, *, stream: bool = False
249
- ) -> Union[bytes, Iterable[bytes]]: ...
262
+ ) -> Union[bytes, Iterable[str]]: ...
250
263
 
251
264
  def _inner_post(
252
265
  self, request_parameters: RequestParameters, *, stream: bool = False
253
- ) -> Union[bytes, Iterable[bytes]]:
266
+ ) -> Union[bytes, Iterable[str]]:
254
267
  """Make a request to the inference server."""
255
268
  # TODO: this should be handled in provider helpers directly
256
269
  if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
257
270
  request_parameters.headers["Accept"] = "image/png"
258
271
 
259
272
  try:
260
- response = get_session().post(
261
- request_parameters.url,
262
- json=request_parameters.json,
263
- data=request_parameters.data,
264
- headers=request_parameters.headers,
265
- cookies=self.cookies,
266
- timeout=self.timeout,
267
- stream=stream,
268
- proxies=self.proxies,
273
+ response = self.exit_stack.enter_context(
274
+ get_session().stream(
275
+ "POST",
276
+ request_parameters.url,
277
+ json=request_parameters.json,
278
+ content=request_parameters.data,
279
+ headers=request_parameters.headers,
280
+ cookies=self.cookies,
281
+ timeout=self.timeout,
282
+ )
269
283
  )
284
+ hf_raise_for_status(response)
285
+ if stream:
286
+ return response.iter_lines()
287
+ else:
288
+ return response.read()
270
289
  except TimeoutError as error:
271
290
  # Convert any `TimeoutError` to a `InferenceTimeoutError`
272
291
  raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
273
-
274
- try:
275
- hf_raise_for_status(response)
276
- return response.iter_lines() if stream else response.content
277
- except HTTPError as error:
292
+ except HfHubHTTPError as error:
278
293
  if error.response.status_code == 422 and request_parameters.task != "unknown":
279
294
  msg = str(error.args[0])
280
295
  if len(error.response.text) > 0:
281
- msg += f"\n{error.response.text}\n"
296
+ msg += f"{os.linesep}{error.response.text}{os.linesep}"
282
297
  error.args = (msg,) + error.args[1:]
283
298
  raise
284
299
 
@@ -289,7 +304,7 @@ class InferenceClient:
289
304
  model: Optional[str] = None,
290
305
  top_k: Optional[int] = None,
291
306
  function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
292
- ) -> List[AudioClassificationOutputElement]:
307
+ ) -> list[AudioClassificationOutputElement]:
293
308
  """
294
309
  Perform audio classification on the provided audio content.
295
310
 
@@ -307,12 +322,12 @@ class InferenceClient:
307
322
  The function to apply to the model outputs in order to retrieve the scores.
308
323
 
309
324
  Returns:
310
- `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
325
+ `list[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
311
326
 
312
327
  Raises:
313
328
  [`InferenceTimeoutError`]:
314
329
  If the model is unavailable or the request times out.
315
- `HTTPError`:
330
+ [`HfHubHTTPError`]:
316
331
  If the request fails with an HTTP error status code other than HTTP 503.
317
332
 
318
333
  Example:
@@ -344,7 +359,7 @@ class InferenceClient:
344
359
  audio: ContentT,
345
360
  *,
346
361
  model: Optional[str] = None,
347
- ) -> List[AudioToAudioOutputElement]:
362
+ ) -> list[AudioToAudioOutputElement]:
348
363
  """
349
364
  Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
350
365
 
@@ -358,12 +373,12 @@ class InferenceClient:
358
373
  audio_to_audio will be used.
359
374
 
360
375
  Returns:
361
- `List[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
376
+ `list[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
362
377
 
363
378
  Raises:
364
379
  `InferenceTimeoutError`:
365
380
  If the model is unavailable or the request times out.
366
- `HTTPError`:
381
+ [`HfHubHTTPError`]:
367
382
  If the request fails with an HTTP error status code other than HTTP 503.
368
383
 
369
384
  Example:
@@ -396,7 +411,7 @@ class InferenceClient:
396
411
  audio: ContentT,
397
412
  *,
398
413
  model: Optional[str] = None,
399
- extra_body: Optional[Dict] = None,
414
+ extra_body: Optional[dict] = None,
400
415
  ) -> AutomaticSpeechRecognitionOutput:
401
416
  """
402
417
  Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
@@ -407,7 +422,7 @@ class InferenceClient:
407
422
  model (`str`, *optional*):
408
423
  The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
409
424
  Inference Endpoint. If not provided, the default recommended model for ASR will be used.
410
- extra_body (`Dict`, *optional*):
425
+ extra_body (`dict`, *optional*):
411
426
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
412
427
  for supported parameters.
413
428
  Returns:
@@ -416,7 +431,7 @@ class InferenceClient:
416
431
  Raises:
417
432
  [`InferenceTimeoutError`]:
418
433
  If the model is unavailable or the request times out.
419
- `HTTPError`:
434
+ [`HfHubHTTPError`]:
420
435
  If the request fails with an HTTP error status code other than HTTP 503.
421
436
 
422
437
  Example:
@@ -442,105 +457,105 @@ class InferenceClient:
442
457
  @overload
443
458
  def chat_completion( # type: ignore
444
459
  self,
445
- messages: List[Union[Dict, ChatCompletionInputMessage]],
460
+ messages: list[Union[dict, ChatCompletionInputMessage]],
446
461
  *,
447
462
  model: Optional[str] = None,
448
463
  stream: Literal[False] = False,
449
464
  frequency_penalty: Optional[float] = None,
450
- logit_bias: Optional[List[float]] = None,
465
+ logit_bias: Optional[list[float]] = None,
451
466
  logprobs: Optional[bool] = None,
452
467
  max_tokens: Optional[int] = None,
453
468
  n: Optional[int] = None,
454
469
  presence_penalty: Optional[float] = None,
455
470
  response_format: Optional[ChatCompletionInputGrammarType] = None,
456
471
  seed: Optional[int] = None,
457
- stop: Optional[List[str]] = None,
472
+ stop: Optional[list[str]] = None,
458
473
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
459
474
  temperature: Optional[float] = None,
460
475
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
461
476
  tool_prompt: Optional[str] = None,
462
- tools: Optional[List[ChatCompletionInputTool]] = None,
477
+ tools: Optional[list[ChatCompletionInputTool]] = None,
463
478
  top_logprobs: Optional[int] = None,
464
479
  top_p: Optional[float] = None,
465
- extra_body: Optional[Dict] = None,
480
+ extra_body: Optional[dict] = None,
466
481
  ) -> ChatCompletionOutput: ...
467
482
 
468
483
  @overload
469
484
  def chat_completion( # type: ignore
470
485
  self,
471
- messages: List[Union[Dict, ChatCompletionInputMessage]],
486
+ messages: list[Union[dict, ChatCompletionInputMessage]],
472
487
  *,
473
488
  model: Optional[str] = None,
474
489
  stream: Literal[True] = True,
475
490
  frequency_penalty: Optional[float] = None,
476
- logit_bias: Optional[List[float]] = None,
491
+ logit_bias: Optional[list[float]] = None,
477
492
  logprobs: Optional[bool] = None,
478
493
  max_tokens: Optional[int] = None,
479
494
  n: Optional[int] = None,
480
495
  presence_penalty: Optional[float] = None,
481
496
  response_format: Optional[ChatCompletionInputGrammarType] = None,
482
497
  seed: Optional[int] = None,
483
- stop: Optional[List[str]] = None,
498
+ stop: Optional[list[str]] = None,
484
499
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
485
500
  temperature: Optional[float] = None,
486
501
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
487
502
  tool_prompt: Optional[str] = None,
488
- tools: Optional[List[ChatCompletionInputTool]] = None,
503
+ tools: Optional[list[ChatCompletionInputTool]] = None,
489
504
  top_logprobs: Optional[int] = None,
490
505
  top_p: Optional[float] = None,
491
- extra_body: Optional[Dict] = None,
506
+ extra_body: Optional[dict] = None,
492
507
  ) -> Iterable[ChatCompletionStreamOutput]: ...
493
508
 
494
509
  @overload
495
510
  def chat_completion(
496
511
  self,
497
- messages: List[Union[Dict, ChatCompletionInputMessage]],
512
+ messages: list[Union[dict, ChatCompletionInputMessage]],
498
513
  *,
499
514
  model: Optional[str] = None,
500
515
  stream: bool = False,
501
516
  frequency_penalty: Optional[float] = None,
502
- logit_bias: Optional[List[float]] = None,
517
+ logit_bias: Optional[list[float]] = None,
503
518
  logprobs: Optional[bool] = None,
504
519
  max_tokens: Optional[int] = None,
505
520
  n: Optional[int] = None,
506
521
  presence_penalty: Optional[float] = None,
507
522
  response_format: Optional[ChatCompletionInputGrammarType] = None,
508
523
  seed: Optional[int] = None,
509
- stop: Optional[List[str]] = None,
524
+ stop: Optional[list[str]] = None,
510
525
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
511
526
  temperature: Optional[float] = None,
512
527
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
513
528
  tool_prompt: Optional[str] = None,
514
- tools: Optional[List[ChatCompletionInputTool]] = None,
529
+ tools: Optional[list[ChatCompletionInputTool]] = None,
515
530
  top_logprobs: Optional[int] = None,
516
531
  top_p: Optional[float] = None,
517
- extra_body: Optional[Dict] = None,
532
+ extra_body: Optional[dict] = None,
518
533
  ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: ...
519
534
 
520
535
  def chat_completion(
521
536
  self,
522
- messages: List[Union[Dict, ChatCompletionInputMessage]],
537
+ messages: list[Union[dict, ChatCompletionInputMessage]],
523
538
  *,
524
539
  model: Optional[str] = None,
525
540
  stream: bool = False,
526
541
  # Parameters from ChatCompletionInput (handled manually)
527
542
  frequency_penalty: Optional[float] = None,
528
- logit_bias: Optional[List[float]] = None,
543
+ logit_bias: Optional[list[float]] = None,
529
544
  logprobs: Optional[bool] = None,
530
545
  max_tokens: Optional[int] = None,
531
546
  n: Optional[int] = None,
532
547
  presence_penalty: Optional[float] = None,
533
548
  response_format: Optional[ChatCompletionInputGrammarType] = None,
534
549
  seed: Optional[int] = None,
535
- stop: Optional[List[str]] = None,
550
+ stop: Optional[list[str]] = None,
536
551
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
537
552
  temperature: Optional[float] = None,
538
553
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
539
554
  tool_prompt: Optional[str] = None,
540
- tools: Optional[List[ChatCompletionInputTool]] = None,
555
+ tools: Optional[list[ChatCompletionInputTool]] = None,
541
556
  top_logprobs: Optional[int] = None,
542
557
  top_p: Optional[float] = None,
543
- extra_body: Optional[Dict] = None,
558
+ extra_body: Optional[dict] = None,
544
559
  ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]:
545
560
  """
546
561
  A method for completing conversations using a specified language model.
@@ -566,7 +581,7 @@ class InferenceClient:
566
581
  frequency_penalty (`float`, *optional*):
567
582
  Penalizes new tokens based on their existing frequency
568
583
  in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
569
- logit_bias (`List[float]`, *optional*):
584
+ logit_bias (`list[float]`, *optional*):
570
585
  Adjusts the likelihood of specific tokens appearing in the generated output.
571
586
  logprobs (`bool`, *optional*):
572
587
  Whether to return log probabilities of the output tokens or not. If true, returns the log
@@ -582,7 +597,7 @@ class InferenceClient:
582
597
  Grammar constraints. Can be either a JSONSchema or a regex.
583
598
  seed (Optional[`int`], *optional*):
584
599
  Seed for reproducible control flow. Defaults to None.
585
- stop (`List[str]`, *optional*):
600
+ stop (`list[str]`, *optional*):
586
601
  Up to four strings which trigger the end of the response.
587
602
  Defaults to None.
588
603
  stream (`bool`, *optional*):
@@ -606,7 +621,7 @@ class InferenceClient:
606
621
  tools (List of [`ChatCompletionInputTool`], *optional*):
607
622
  A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
608
623
  provide a list of functions the model may generate JSON inputs for.
609
- extra_body (`Dict`, *optional*):
624
+ extra_body (`dict`, *optional*):
610
625
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
611
626
  for supported parameters.
612
627
  Returns:
@@ -618,7 +633,7 @@ class InferenceClient:
618
633
  Raises:
619
634
  [`InferenceTimeoutError`]:
620
635
  If the model is unavailable or the request times out.
621
- `HTTPError`:
636
+ [`HfHubHTTPError`]:
622
637
  If the request fails with an HTTP error status code other than HTTP 503.
623
638
 
624
639
  Example:
@@ -932,8 +947,8 @@ class InferenceClient:
932
947
  max_question_len: Optional[int] = None,
933
948
  max_seq_len: Optional[int] = None,
934
949
  top_k: Optional[int] = None,
935
- word_boxes: Optional[List[Union[List[float], str]]] = None,
936
- ) -> List[DocumentQuestionAnsweringOutputElement]:
950
+ word_boxes: Optional[list[Union[list[float], str]]] = None,
951
+ ) -> list[DocumentQuestionAnsweringOutputElement]:
937
952
  """
938
953
  Answer questions on document images.
939
954
 
@@ -963,16 +978,16 @@ class InferenceClient:
963
978
  top_k (`int`, *optional*):
964
979
  The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
965
980
  answers if there are not enough options available within the context.
966
- word_boxes (`List[Union[List[float], str`, *optional*):
981
+ word_boxes (`list[Union[list[float], str`, *optional*):
967
982
  A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
968
983
  step and use the provided bounding boxes instead.
969
984
  Returns:
970
- `List[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
985
+ `list[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
971
986
 
972
987
  Raises:
973
988
  [`InferenceTimeoutError`]:
974
989
  If the model is unavailable or the request times out.
975
- `HTTPError`:
990
+ [`HfHubHTTPError`]:
976
991
  If the request fails with an HTTP error status code other than HTTP 503.
977
992
 
978
993
 
@@ -986,7 +1001,7 @@ class InferenceClient:
986
1001
  """
987
1002
  model_id = model or self.model
988
1003
  provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
989
- inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
1004
+ inputs: dict[str, Any] = {"question": question, "image": _b64_encode(image)}
990
1005
  request_parameters = provider_helper.prepare_request(
991
1006
  inputs=inputs,
992
1007
  parameters={
@@ -1047,7 +1062,7 @@ class InferenceClient:
1047
1062
  Raises:
1048
1063
  [`InferenceTimeoutError`]:
1049
1064
  If the model is unavailable or the request times out.
1050
- `HTTPError`:
1065
+ [`HfHubHTTPError`]:
1051
1066
  If the request fails with an HTTP error status code other than HTTP 503.
1052
1067
 
1053
1068
  Example:
@@ -1084,9 +1099,9 @@ class InferenceClient:
1084
1099
  text: str,
1085
1100
  *,
1086
1101
  model: Optional[str] = None,
1087
- targets: Optional[List[str]] = None,
1102
+ targets: Optional[list[str]] = None,
1088
1103
  top_k: Optional[int] = None,
1089
- ) -> List[FillMaskOutputElement]:
1104
+ ) -> list[FillMaskOutputElement]:
1090
1105
  """
1091
1106
  Fill in a hole with a missing word (token to be precise).
1092
1107
 
@@ -1096,20 +1111,20 @@ class InferenceClient:
1096
1111
  model (`str`, *optional*):
1097
1112
  The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
1098
1113
  a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
1099
- targets (`List[str`, *optional*):
1114
+ targets (`list[str`, *optional*):
1100
1115
  When passed, the model will limit the scores to the passed targets instead of looking up in the whole
1101
1116
  vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
1102
1117
  resulting token will be used (with a warning, and that might be slower).
1103
1118
  top_k (`int`, *optional*):
1104
1119
  When passed, overrides the number of predictions to return.
1105
1120
  Returns:
1106
- `List[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
1121
+ `list[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
1107
1122
  probability, token reference, and completed text.
1108
1123
 
1109
1124
  Raises:
1110
1125
  [`InferenceTimeoutError`]:
1111
1126
  If the model is unavailable or the request times out.
1112
- `HTTPError`:
1127
+ [`HfHubHTTPError`]:
1113
1128
  If the request fails with an HTTP error status code other than HTTP 503.
1114
1129
 
1115
1130
  Example:
@@ -1142,7 +1157,7 @@ class InferenceClient:
1142
1157
  model: Optional[str] = None,
1143
1158
  function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
1144
1159
  top_k: Optional[int] = None,
1145
- ) -> List[ImageClassificationOutputElement]:
1160
+ ) -> list[ImageClassificationOutputElement]:
1146
1161
  """
1147
1162
  Perform image classification on the given image using the specified model.
1148
1163
 
@@ -1157,12 +1172,12 @@ class InferenceClient:
1157
1172
  top_k (`int`, *optional*):
1158
1173
  When specified, limits the output to the top K most probable classes.
1159
1174
  Returns:
1160
- `List[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
1175
+ `list[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
1161
1176
 
1162
1177
  Raises:
1163
1178
  [`InferenceTimeoutError`]:
1164
1179
  If the model is unavailable or the request times out.
1165
- `HTTPError`:
1180
+ [`HfHubHTTPError`]:
1166
1181
  If the request fails with an HTTP error status code other than HTTP 503.
1167
1182
 
1168
1183
  Example:
@@ -1194,7 +1209,7 @@ class InferenceClient:
1194
1209
  overlap_mask_area_threshold: Optional[float] = None,
1195
1210
  subtask: Optional["ImageSegmentationSubtask"] = None,
1196
1211
  threshold: Optional[float] = None,
1197
- ) -> List[ImageSegmentationOutputElement]:
1212
+ ) -> list[ImageSegmentationOutputElement]:
1198
1213
  """
1199
1214
  Perform image segmentation on the given image using the specified model.
1200
1215
 
@@ -1216,12 +1231,12 @@ class InferenceClient:
1216
1231
  threshold (`float`, *optional*):
1217
1232
  Probability threshold to filter out predicted masks.
1218
1233
  Returns:
1219
- `List[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
1234
+ `list[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
1220
1235
 
1221
1236
  Raises:
1222
1237
  [`InferenceTimeoutError`]:
1223
1238
  If the model is unavailable or the request times out.
1224
- `HTTPError`:
1239
+ [`HfHubHTTPError`]:
1225
1240
  If the request fails with an HTTP error status code other than HTTP 503.
1226
1241
 
1227
1242
  Example:
@@ -1296,7 +1311,7 @@ class InferenceClient:
1296
1311
  Raises:
1297
1312
  [`InferenceTimeoutError`]:
1298
1313
  If the model is unavailable or the request times out.
1299
- `HTTPError`:
1314
+ [`HfHubHTTPError`]:
1300
1315
  If the request fails with an HTTP error status code other than HTTP 503.
1301
1316
 
1302
1317
  Example:
@@ -1426,7 +1441,7 @@ class InferenceClient:
1426
1441
  Raises:
1427
1442
  [`InferenceTimeoutError`]:
1428
1443
  If the model is unavailable or the request times out.
1429
- `HTTPError`:
1444
+ [`HfHubHTTPError`]:
1430
1445
  If the request fails with an HTTP error status code other than HTTP 503.
1431
1446
 
1432
1447
  Example:
@@ -1449,12 +1464,12 @@ class InferenceClient:
1449
1464
  api_key=self.token,
1450
1465
  )
1451
1466
  response = self._inner_post(request_parameters)
1452
- output_list: List[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
1467
+ output_list: list[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
1453
1468
  return output_list[0]
1454
1469
 
1455
1470
  def object_detection(
1456
1471
  self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
1457
- ) -> List[ObjectDetectionOutputElement]:
1472
+ ) -> list[ObjectDetectionOutputElement]:
1458
1473
  """
1459
1474
  Perform object detection on the given image using the specified model.
1460
1475
 
@@ -1470,12 +1485,12 @@ class InferenceClient:
1470
1485
  threshold (`float`, *optional*):
1471
1486
  The probability necessary to make a prediction.
1472
1487
  Returns:
1473
- `List[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
1488
+ `list[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
1474
1489
 
1475
1490
  Raises:
1476
1491
  [`InferenceTimeoutError`]:
1477
1492
  If the model is unavailable or the request times out.
1478
- `HTTPError`:
1493
+ [`HfHubHTTPError`]:
1479
1494
  If the request fails with an HTTP error status code other than HTTP 503.
1480
1495
  `ValueError`:
1481
1496
  If the request output is not a List.
@@ -1513,7 +1528,7 @@ class InferenceClient:
1513
1528
  max_question_len: Optional[int] = None,
1514
1529
  max_seq_len: Optional[int] = None,
1515
1530
  top_k: Optional[int] = None,
1516
- ) -> Union[QuestionAnsweringOutputElement, List[QuestionAnsweringOutputElement]]:
1531
+ ) -> Union[QuestionAnsweringOutputElement, list[QuestionAnsweringOutputElement]]:
1517
1532
  """
1518
1533
  Retrieve the answer to a question from a given text.
1519
1534
 
@@ -1545,13 +1560,13 @@ class InferenceClient:
1545
1560
  topk answers if there are not enough options available within the context.
1546
1561
 
1547
1562
  Returns:
1548
- Union[`QuestionAnsweringOutputElement`, List[`QuestionAnsweringOutputElement`]]:
1563
+ Union[`QuestionAnsweringOutputElement`, list[`QuestionAnsweringOutputElement`]]:
1549
1564
  When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
1550
1565
  When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
1551
1566
  Raises:
1552
1567
  [`InferenceTimeoutError`]:
1553
1568
  If the model is unavailable or the request times out.
1554
- `HTTPError`:
1569
+ [`HfHubHTTPError`]:
1555
1570
  If the request fails with an HTTP error status code other than HTTP 503.
1556
1571
 
1557
1572
  Example:
@@ -1585,15 +1600,15 @@ class InferenceClient:
1585
1600
  return output
1586
1601
 
1587
1602
  def sentence_similarity(
1588
- self, sentence: str, other_sentences: List[str], *, model: Optional[str] = None
1589
- ) -> List[float]:
1603
+ self, sentence: str, other_sentences: list[str], *, model: Optional[str] = None
1604
+ ) -> list[float]:
1590
1605
  """
1591
1606
  Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
1592
1607
 
1593
1608
  Args:
1594
1609
  sentence (`str`):
1595
1610
  The main sentence to compare to others.
1596
- other_sentences (`List[str]`):
1611
+ other_sentences (`list[str]`):
1597
1612
  The list of sentences to compare to.
1598
1613
  model (`str`, *optional*):
1599
1614
  The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1601,12 +1616,12 @@ class InferenceClient:
1601
1616
  Defaults to None.
1602
1617
 
1603
1618
  Returns:
1604
- `List[float]`: The similarity scores between the main sentence and the given comparison sentences.
1619
+ `list[float]`: The embedding representing the input text.
1605
1620
 
1606
1621
  Raises:
1607
1622
  [`InferenceTimeoutError`]:
1608
1623
  If the model is unavailable or the request times out.
1609
- `HTTPError`:
1624
+ [`HfHubHTTPError`]:
1610
1625
  If the request fails with an HTTP error status code other than HTTP 503.
1611
1626
 
1612
1627
  Example:
@@ -1643,7 +1658,7 @@ class InferenceClient:
1643
1658
  *,
1644
1659
  model: Optional[str] = None,
1645
1660
  clean_up_tokenization_spaces: Optional[bool] = None,
1646
- generate_parameters: Optional[Dict[str, Any]] = None,
1661
+ generate_parameters: Optional[dict[str, Any]] = None,
1647
1662
  truncation: Optional["SummarizationTruncationStrategy"] = None,
1648
1663
  ) -> SummarizationOutput:
1649
1664
  """
@@ -1657,7 +1672,7 @@ class InferenceClient:
1657
1672
  Inference Endpoint. If not provided, the default recommended model for summarization will be used.
1658
1673
  clean_up_tokenization_spaces (`bool`, *optional*):
1659
1674
  Whether to clean up the potential extra spaces in the text output.
1660
- generate_parameters (`Dict[str, Any]`, *optional*):
1675
+ generate_parameters (`dict[str, Any]`, *optional*):
1661
1676
  Additional parametrization of the text generation algorithm.
1662
1677
  truncation (`"SummarizationTruncationStrategy"`, *optional*):
1663
1678
  The truncation strategy to use.
@@ -1667,7 +1682,7 @@ class InferenceClient:
1667
1682
  Raises:
1668
1683
  [`InferenceTimeoutError`]:
1669
1684
  If the model is unavailable or the request times out.
1670
- `HTTPError`:
1685
+ [`HfHubHTTPError`]:
1671
1686
  If the request fails with an HTTP error status code other than HTTP 503.
1672
1687
 
1673
1688
  Example:
@@ -1697,7 +1712,7 @@ class InferenceClient:
1697
1712
 
1698
1713
  def table_question_answering(
1699
1714
  self,
1700
- table: Dict[str, Any],
1715
+ table: dict[str, Any],
1701
1716
  query: str,
1702
1717
  *,
1703
1718
  model: Optional[str] = None,
@@ -1732,7 +1747,7 @@ class InferenceClient:
1732
1747
  Raises:
1733
1748
  [`InferenceTimeoutError`]:
1734
1749
  If the model is unavailable or the request times out.
1735
- `HTTPError`:
1750
+ [`HfHubHTTPError`]:
1736
1751
  If the request fails with an HTTP error status code other than HTTP 503.
1737
1752
 
1738
1753
  Example:
@@ -1757,12 +1772,12 @@ class InferenceClient:
1757
1772
  response = self._inner_post(request_parameters)
1758
1773
  return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
1759
1774
 
1760
- def tabular_classification(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[str]:
1775
+ def tabular_classification(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[str]:
1761
1776
  """
1762
1777
  Classifying a target category (a group) based on a set of attributes.
1763
1778
 
1764
1779
  Args:
1765
- table (`Dict[str, Any]`):
1780
+ table (`dict[str, Any]`):
1766
1781
  Set of attributes to classify.
1767
1782
  model (`str`, *optional*):
1768
1783
  The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1775,7 +1790,7 @@ class InferenceClient:
1775
1790
  Raises:
1776
1791
  [`InferenceTimeoutError`]:
1777
1792
  If the model is unavailable or the request times out.
1778
- `HTTPError`:
1793
+ [`HfHubHTTPError`]:
1779
1794
  If the request fails with an HTTP error status code other than HTTP 503.
1780
1795
 
1781
1796
  Example:
@@ -1812,12 +1827,12 @@ class InferenceClient:
1812
1827
  response = self._inner_post(request_parameters)
1813
1828
  return _bytes_to_list(response)
1814
1829
 
1815
- def tabular_regression(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[float]:
1830
+ def tabular_regression(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[float]:
1816
1831
  """
1817
1832
  Predicting a numerical target value given a set of attributes/features in a table.
1818
1833
 
1819
1834
  Args:
1820
- table (`Dict[str, Any]`):
1835
+ table (`dict[str, Any]`):
1821
1836
  Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
1822
1837
  model (`str`, *optional*):
1823
1838
  The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1830,7 +1845,7 @@ class InferenceClient:
1830
1845
  Raises:
1831
1846
  [`InferenceTimeoutError`]:
1832
1847
  If the model is unavailable or the request times out.
1833
- `HTTPError`:
1848
+ [`HfHubHTTPError`]:
1834
1849
  If the request fails with an HTTP error status code other than HTTP 503.
1835
1850
 
1836
1851
  Example:
@@ -1869,7 +1884,7 @@ class InferenceClient:
1869
1884
  model: Optional[str] = None,
1870
1885
  top_k: Optional[int] = None,
1871
1886
  function_to_apply: Optional["TextClassificationOutputTransform"] = None,
1872
- ) -> List[TextClassificationOutputElement]:
1887
+ ) -> list[TextClassificationOutputElement]:
1873
1888
  """
1874
1889
  Perform text classification (e.g. sentiment-analysis) on the given text.
1875
1890
 
@@ -1886,12 +1901,12 @@ class InferenceClient:
1886
1901
  The function to apply to the model outputs in order to retrieve the scores.
1887
1902
 
1888
1903
  Returns:
1889
- `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
1904
+ `list[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
1890
1905
 
1891
1906
  Raises:
1892
1907
  [`InferenceTimeoutError`]:
1893
1908
  If the model is unavailable or the request times out.
1894
- `HTTPError`:
1909
+ [`HfHubHTTPError`]:
1895
1910
  If the request fails with an HTTP error status code other than HTTP 503.
1896
1911
 
1897
1912
  Example:
@@ -1939,8 +1954,8 @@ class InferenceClient:
1939
1954
  repetition_penalty: Optional[float] = None,
1940
1955
  return_full_text: Optional[bool] = None,
1941
1956
  seed: Optional[int] = None,
1942
- stop: Optional[List[str]] = None,
1943
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
1957
+ stop: Optional[list[str]] = None,
1958
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
1944
1959
  temperature: Optional[float] = None,
1945
1960
  top_k: Optional[int] = None,
1946
1961
  top_n_tokens: Optional[int] = None,
@@ -1969,8 +1984,8 @@ class InferenceClient:
1969
1984
  repetition_penalty: Optional[float] = None,
1970
1985
  return_full_text: Optional[bool] = None,
1971
1986
  seed: Optional[int] = None,
1972
- stop: Optional[List[str]] = None,
1973
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
1987
+ stop: Optional[list[str]] = None,
1988
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
1974
1989
  temperature: Optional[float] = None,
1975
1990
  top_k: Optional[int] = None,
1976
1991
  top_n_tokens: Optional[int] = None,
@@ -1999,8 +2014,8 @@ class InferenceClient:
1999
2014
  repetition_penalty: Optional[float] = None,
2000
2015
  return_full_text: Optional[bool] = None, # Manual default value
2001
2016
  seed: Optional[int] = None,
2002
- stop: Optional[List[str]] = None,
2003
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2017
+ stop: Optional[list[str]] = None,
2018
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2004
2019
  temperature: Optional[float] = None,
2005
2020
  top_k: Optional[int] = None,
2006
2021
  top_n_tokens: Optional[int] = None,
@@ -2029,8 +2044,8 @@ class InferenceClient:
2029
2044
  repetition_penalty: Optional[float] = None,
2030
2045
  return_full_text: Optional[bool] = None,
2031
2046
  seed: Optional[int] = None,
2032
- stop: Optional[List[str]] = None,
2033
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2047
+ stop: Optional[list[str]] = None,
2048
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2034
2049
  temperature: Optional[float] = None,
2035
2050
  top_k: Optional[int] = None,
2036
2051
  top_n_tokens: Optional[int] = None,
@@ -2059,8 +2074,8 @@ class InferenceClient:
2059
2074
  repetition_penalty: Optional[float] = None,
2060
2075
  return_full_text: Optional[bool] = None,
2061
2076
  seed: Optional[int] = None,
2062
- stop: Optional[List[str]] = None,
2063
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2077
+ stop: Optional[list[str]] = None,
2078
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2064
2079
  temperature: Optional[float] = None,
2065
2080
  top_k: Optional[int] = None,
2066
2081
  top_n_tokens: Optional[int] = None,
@@ -2088,8 +2103,8 @@ class InferenceClient:
2088
2103
  repetition_penalty: Optional[float] = None,
2089
2104
  return_full_text: Optional[bool] = None,
2090
2105
  seed: Optional[int] = None,
2091
- stop: Optional[List[str]] = None,
2092
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2106
+ stop: Optional[list[str]] = None,
2107
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2093
2108
  temperature: Optional[float] = None,
2094
2109
  top_k: Optional[int] = None,
2095
2110
  top_n_tokens: Optional[int] = None,
@@ -2142,9 +2157,9 @@ class InferenceClient:
2142
2157
  Whether to prepend the prompt to the generated text
2143
2158
  seed (`int`, *optional*):
2144
2159
  Random sampling seed
2145
- stop (`List[str]`, *optional*):
2160
+ stop (`list[str]`, *optional*):
2146
2161
  Stop generating tokens if a member of `stop` is generated.
2147
- stop_sequences (`List[str]`, *optional*):
2162
+ stop_sequences (`list[str]`, *optional*):
2148
2163
  Deprecated argument. Use `stop` instead.
2149
2164
  temperature (`float`, *optional*):
2150
2165
  The value used to module the logits distribution.
@@ -2177,7 +2192,7 @@ class InferenceClient:
2177
2192
  If input values are not valid. No HTTP call is made to the server.
2178
2193
  [`InferenceTimeoutError`]:
2179
2194
  If the model is unavailable or the request times out.
2180
- `HTTPError`:
2195
+ [`HfHubHTTPError`]:
2181
2196
  If the request fails with an HTTP error status code other than HTTP 503.
2182
2197
 
2183
2198
  Example:
@@ -2366,7 +2381,7 @@ class InferenceClient:
2366
2381
  # Handle errors separately for more precise error messages
2367
2382
  try:
2368
2383
  bytes_output = self._inner_post(request_parameters, stream=stream or False)
2369
- except HTTPError as e:
2384
+ except HfHubHTTPError as e:
2370
2385
  match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
2371
2386
  if isinstance(e, BadRequestError) and match:
2372
2387
  unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
@@ -2421,7 +2436,7 @@ class InferenceClient:
2421
2436
  model: Optional[str] = None,
2422
2437
  scheduler: Optional[str] = None,
2423
2438
  seed: Optional[int] = None,
2424
- extra_body: Optional[Dict[str, Any]] = None,
2439
+ extra_body: Optional[dict[str, Any]] = None,
2425
2440
  ) -> "Image":
2426
2441
  """
2427
2442
  Generate an image based on a given text using a specified model.
@@ -2455,7 +2470,7 @@ class InferenceClient:
2455
2470
  Override the scheduler with a compatible one.
2456
2471
  seed (`int`, *optional*):
2457
2472
  Seed for the random number generator.
2458
- extra_body (`Dict[str, Any]`, *optional*):
2473
+ extra_body (`dict[str, Any]`, *optional*):
2459
2474
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2460
2475
  for supported parameters.
2461
2476
 
@@ -2465,7 +2480,7 @@ class InferenceClient:
2465
2480
  Raises:
2466
2481
  [`InferenceTimeoutError`]:
2467
2482
  If the model is unavailable or the request times out.
2468
- `HTTPError`:
2483
+ [`HfHubHTTPError`]:
2469
2484
  If the request fails with an HTTP error status code other than HTTP 503.
2470
2485
 
2471
2486
  Example:
@@ -2554,11 +2569,11 @@ class InferenceClient:
2554
2569
  *,
2555
2570
  model: Optional[str] = None,
2556
2571
  guidance_scale: Optional[float] = None,
2557
- negative_prompt: Optional[List[str]] = None,
2572
+ negative_prompt: Optional[list[str]] = None,
2558
2573
  num_frames: Optional[float] = None,
2559
2574
  num_inference_steps: Optional[int] = None,
2560
2575
  seed: Optional[int] = None,
2561
- extra_body: Optional[Dict[str, Any]] = None,
2576
+ extra_body: Optional[dict[str, Any]] = None,
2562
2577
  ) -> bytes:
2563
2578
  """
2564
2579
  Generate a video based on a given text.
@@ -2576,7 +2591,7 @@ class InferenceClient:
2576
2591
  guidance_scale (`float`, *optional*):
2577
2592
  A higher guidance scale value encourages the model to generate videos closely linked to the text
2578
2593
  prompt, but values too high may cause saturation and other artifacts.
2579
- negative_prompt (`List[str]`, *optional*):
2594
+ negative_prompt (`list[str]`, *optional*):
2580
2595
  One or several prompt to guide what NOT to include in video generation.
2581
2596
  num_frames (`float`, *optional*):
2582
2597
  The num_frames parameter determines how many video frames are generated.
@@ -2585,7 +2600,7 @@ class InferenceClient:
2585
2600
  expense of slower inference.
2586
2601
  seed (`int`, *optional*):
2587
2602
  Seed for the random number generator.
2588
- extra_body (`Dict[str, Any]`, *optional*):
2603
+ extra_body (`dict[str, Any]`, *optional*):
2589
2604
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2590
2605
  for supported parameters.
2591
2606
 
@@ -2665,7 +2680,7 @@ class InferenceClient:
2665
2680
  top_p: Optional[float] = None,
2666
2681
  typical_p: Optional[float] = None,
2667
2682
  use_cache: Optional[bool] = None,
2668
- extra_body: Optional[Dict[str, Any]] = None,
2683
+ extra_body: Optional[dict[str, Any]] = None,
2669
2684
  ) -> bytes:
2670
2685
  """
2671
2686
  Synthesize an audio of a voice pronouncing a given text.
@@ -2726,7 +2741,7 @@ class InferenceClient:
2726
2741
  paper](https://hf.co/papers/2202.00666) for more details.
2727
2742
  use_cache (`bool`, *optional*):
2728
2743
  Whether the model should use the past last key/values attentions to speed up decoding
2729
- extra_body (`Dict[str, Any]`, *optional*):
2744
+ extra_body (`dict[str, Any]`, *optional*):
2730
2745
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2731
2746
  for supported parameters.
2732
2747
  Returns:
@@ -2735,7 +2750,7 @@ class InferenceClient:
2735
2750
  Raises:
2736
2751
  [`InferenceTimeoutError`]:
2737
2752
  If the model is unavailable or the request times out.
2738
- `HTTPError`:
2753
+ [`HfHubHTTPError`]:
2739
2754
  If the request fails with an HTTP error status code other than HTTP 503.
2740
2755
 
2741
2756
  Example:
@@ -2858,9 +2873,9 @@ class InferenceClient:
2858
2873
  *,
2859
2874
  model: Optional[str] = None,
2860
2875
  aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
2861
- ignore_labels: Optional[List[str]] = None,
2876
+ ignore_labels: Optional[list[str]] = None,
2862
2877
  stride: Optional[int] = None,
2863
- ) -> List[TokenClassificationOutputElement]:
2878
+ ) -> list[TokenClassificationOutputElement]:
2864
2879
  """
2865
2880
  Perform token classification on the given text.
2866
2881
  Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
@@ -2874,18 +2889,18 @@ class InferenceClient:
2874
2889
  Defaults to None.
2875
2890
  aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
2876
2891
  The strategy used to fuse tokens based on model predictions
2877
- ignore_labels (`List[str`, *optional*):
2892
+ ignore_labels (`list[str`, *optional*):
2878
2893
  A list of labels to ignore
2879
2894
  stride (`int`, *optional*):
2880
2895
  The number of overlapping tokens between chunks when splitting the input text.
2881
2896
 
2882
2897
  Returns:
2883
- `List[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
2898
+ `list[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
2884
2899
 
2885
2900
  Raises:
2886
2901
  [`InferenceTimeoutError`]:
2887
2902
  If the model is unavailable or the request times out.
2888
- `HTTPError`:
2903
+ [`HfHubHTTPError`]:
2889
2904
  If the request fails with an HTTP error status code other than HTTP 503.
2890
2905
 
2891
2906
  Example:
@@ -2936,7 +2951,7 @@ class InferenceClient:
2936
2951
  tgt_lang: Optional[str] = None,
2937
2952
  clean_up_tokenization_spaces: Optional[bool] = None,
2938
2953
  truncation: Optional["TranslationTruncationStrategy"] = None,
2939
- generate_parameters: Optional[Dict[str, Any]] = None,
2954
+ generate_parameters: Optional[dict[str, Any]] = None,
2940
2955
  ) -> TranslationOutput:
2941
2956
  """
2942
2957
  Convert text from one language to another.
@@ -2961,7 +2976,7 @@ class InferenceClient:
2961
2976
  Whether to clean up the potential extra spaces in the text output.
2962
2977
  truncation (`"TranslationTruncationStrategy"`, *optional*):
2963
2978
  The truncation strategy to use.
2964
- generate_parameters (`Dict[str, Any]`, *optional*):
2979
+ generate_parameters (`dict[str, Any]`, *optional*):
2965
2980
  Additional parametrization of the text generation algorithm.
2966
2981
 
2967
2982
  Returns:
@@ -2970,7 +2985,7 @@ class InferenceClient:
2970
2985
  Raises:
2971
2986
  [`InferenceTimeoutError`]:
2972
2987
  If the model is unavailable or the request times out.
2973
- `HTTPError`:
2988
+ [`HfHubHTTPError`]:
2974
2989
  If the request fails with an HTTP error status code other than HTTP 503.
2975
2990
  `ValueError`:
2976
2991
  If only one of the `src_lang` and `tgt_lang` arguments are provided.
@@ -3023,7 +3038,7 @@ class InferenceClient:
3023
3038
  *,
3024
3039
  model: Optional[str] = None,
3025
3040
  top_k: Optional[int] = None,
3026
- ) -> List[VisualQuestionAnsweringOutputElement]:
3041
+ ) -> list[VisualQuestionAnsweringOutputElement]:
3027
3042
  """
3028
3043
  Answering open-ended questions based on an image.
3029
3044
 
@@ -3040,12 +3055,12 @@ class InferenceClient:
3040
3055
  The number of answers to return (will be chosen by order of likelihood). Note that we return less than
3041
3056
  topk answers if there are not enough options available within the context.
3042
3057
  Returns:
3043
- `List[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
3058
+ `list[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
3044
3059
 
3045
3060
  Raises:
3046
3061
  `InferenceTimeoutError`:
3047
3062
  If the model is unavailable or the request times out.
3048
- `HTTPError`:
3063
+ [`HfHubHTTPError`]:
3049
3064
  If the request fails with an HTTP error status code other than HTTP 503.
3050
3065
 
3051
3066
  Example:
@@ -3078,21 +3093,21 @@ class InferenceClient:
3078
3093
  def zero_shot_classification(
3079
3094
  self,
3080
3095
  text: str,
3081
- candidate_labels: List[str],
3096
+ candidate_labels: list[str],
3082
3097
  *,
3083
3098
  multi_label: Optional[bool] = False,
3084
3099
  hypothesis_template: Optional[str] = None,
3085
3100
  model: Optional[str] = None,
3086
- ) -> List[ZeroShotClassificationOutputElement]:
3101
+ ) -> list[ZeroShotClassificationOutputElement]:
3087
3102
  """
3088
3103
  Provide as input a text and a set of candidate labels to classify the input text.
3089
3104
 
3090
3105
  Args:
3091
3106
  text (`str`):
3092
3107
  The input text to classify.
3093
- candidate_labels (`List[str]`):
3108
+ candidate_labels (`list[str]`):
3094
3109
  The set of possible class labels to classify the text into.
3095
- labels (`List[str]`, *optional*):
3110
+ labels (`list[str]`, *optional*):
3096
3111
  (deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
3097
3112
  multi_label (`bool`, *optional*):
3098
3113
  Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
@@ -3107,12 +3122,12 @@ class InferenceClient:
3107
3122
 
3108
3123
 
3109
3124
  Returns:
3110
- `List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
3125
+ `list[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
3111
3126
 
3112
3127
  Raises:
3113
3128
  [`InferenceTimeoutError`]:
3114
3129
  If the model is unavailable or the request times out.
3115
- `HTTPError`:
3130
+ [`HfHubHTTPError`]:
3116
3131
  If the request fails with an HTTP error status code other than HTTP 503.
3117
3132
 
3118
3133
  Example with `multi_label=False`:
@@ -3184,22 +3199,22 @@ class InferenceClient:
3184
3199
  def zero_shot_image_classification(
3185
3200
  self,
3186
3201
  image: ContentT,
3187
- candidate_labels: List[str],
3202
+ candidate_labels: list[str],
3188
3203
  *,
3189
3204
  model: Optional[str] = None,
3190
3205
  hypothesis_template: Optional[str] = None,
3191
3206
  # deprecated argument
3192
- labels: List[str] = None, # type: ignore
3193
- ) -> List[ZeroShotImageClassificationOutputElement]:
3207
+ labels: list[str] = None, # type: ignore
3208
+ ) -> list[ZeroShotImageClassificationOutputElement]:
3194
3209
  """
3195
3210
  Provide input image and text labels to predict text labels for the image.
3196
3211
 
3197
3212
  Args:
3198
3213
  image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
3199
3214
  The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
3200
- candidate_labels (`List[str]`):
3215
+ candidate_labels (`list[str]`):
3201
3216
  The candidate labels for this image
3202
- labels (`List[str]`, *optional*):
3217
+ labels (`list[str]`, *optional*):
3203
3218
  (deprecated) List of string possible labels. There must be at least 2 labels.
3204
3219
  model (`str`, *optional*):
3205
3220
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
@@ -3209,12 +3224,12 @@ class InferenceClient:
3209
3224
  replacing the placeholder with the candidate labels.
3210
3225
 
3211
3226
  Returns:
3212
- `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
3227
+ `list[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
3213
3228
 
3214
3229
  Raises:
3215
3230
  [`InferenceTimeoutError`]:
3216
3231
  If the model is unavailable or the request times out.
3217
- `HTTPError`:
3232
+ [`HfHubHTTPError`]:
3218
3233
  If the request fails with an HTTP error status code other than HTTP 503.
3219
3234
 
3220
3235
  Example:
@@ -3248,7 +3263,7 @@ class InferenceClient:
3248
3263
  response = self._inner_post(request_parameters)
3249
3264
  return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
3250
3265
 
3251
- def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
3266
+ def get_endpoint_info(self, *, model: Optional[str] = None) -> dict[str, Any]:
3252
3267
  """
3253
3268
  Get information about the deployed endpoint.
3254
3269
 
@@ -3261,7 +3276,7 @@ class InferenceClient:
3261
3276
  Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
3262
3277
 
3263
3278
  Returns:
3264
- `Dict[str, Any]`: Information about the endpoint.
3279
+ `dict[str, Any]`: Information about the endpoint.
3265
3280
 
3266
3281
  Example:
3267
3282
  ```py