huggingface-hub 0.35.1__py3-none-any.whl → 1.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (127) hide show
  1. huggingface_hub/__init__.py +28 -45
  2. huggingface_hub/_commit_api.py +28 -28
  3. huggingface_hub/_commit_scheduler.py +11 -8
  4. huggingface_hub/_inference_endpoints.py +8 -8
  5. huggingface_hub/_jobs_api.py +20 -20
  6. huggingface_hub/_login.py +13 -39
  7. huggingface_hub/_oauth.py +8 -8
  8. huggingface_hub/_snapshot_download.py +14 -28
  9. huggingface_hub/_space_api.py +4 -4
  10. huggingface_hub/_tensorboard_logger.py +5 -5
  11. huggingface_hub/_upload_large_folder.py +15 -15
  12. huggingface_hub/_webhooks_payload.py +3 -3
  13. huggingface_hub/_webhooks_server.py +2 -2
  14. huggingface_hub/cli/__init__.py +0 -14
  15. huggingface_hub/cli/_cli_utils.py +80 -3
  16. huggingface_hub/cli/auth.py +104 -150
  17. huggingface_hub/cli/cache.py +102 -126
  18. huggingface_hub/cli/download.py +93 -110
  19. huggingface_hub/cli/hf.py +37 -41
  20. huggingface_hub/cli/jobs.py +689 -1017
  21. huggingface_hub/cli/lfs.py +120 -143
  22. huggingface_hub/cli/repo.py +158 -216
  23. huggingface_hub/cli/repo_files.py +50 -84
  24. huggingface_hub/cli/system.py +6 -25
  25. huggingface_hub/cli/upload.py +198 -212
  26. huggingface_hub/cli/upload_large_folder.py +90 -105
  27. huggingface_hub/commands/_cli_utils.py +2 -2
  28. huggingface_hub/commands/delete_cache.py +11 -11
  29. huggingface_hub/commands/download.py +4 -13
  30. huggingface_hub/commands/lfs.py +4 -4
  31. huggingface_hub/commands/repo_files.py +2 -2
  32. huggingface_hub/commands/tag.py +1 -3
  33. huggingface_hub/commands/upload.py +4 -4
  34. huggingface_hub/commands/upload_large_folder.py +3 -3
  35. huggingface_hub/commands/user.py +4 -5
  36. huggingface_hub/community.py +5 -5
  37. huggingface_hub/constants.py +3 -41
  38. huggingface_hub/dataclasses.py +16 -22
  39. huggingface_hub/errors.py +43 -30
  40. huggingface_hub/fastai_utils.py +8 -9
  41. huggingface_hub/file_download.py +154 -253
  42. huggingface_hub/hf_api.py +329 -558
  43. huggingface_hub/hf_file_system.py +104 -62
  44. huggingface_hub/hub_mixin.py +32 -54
  45. huggingface_hub/inference/_client.py +178 -163
  46. huggingface_hub/inference/_common.py +38 -54
  47. huggingface_hub/inference/_generated/_async_client.py +219 -259
  48. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
  49. huggingface_hub/inference/_generated/types/base.py +10 -7
  50. huggingface_hub/inference/_generated/types/chat_completion.py +16 -16
  51. huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
  52. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
  53. huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
  54. huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
  55. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
  56. huggingface_hub/inference/_generated/types/summarization.py +2 -2
  57. huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
  58. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
  59. huggingface_hub/inference/_generated/types/text_generation.py +10 -10
  60. huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
  61. huggingface_hub/inference/_generated/types/token_classification.py +2 -2
  62. huggingface_hub/inference/_generated/types/translation.py +2 -2
  63. huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
  64. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
  65. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
  66. huggingface_hub/inference/_mcp/agent.py +3 -3
  67. huggingface_hub/inference/_mcp/constants.py +1 -2
  68. huggingface_hub/inference/_mcp/mcp_client.py +33 -22
  69. huggingface_hub/inference/_mcp/types.py +10 -10
  70. huggingface_hub/inference/_mcp/utils.py +4 -4
  71. huggingface_hub/inference/_providers/__init__.py +2 -13
  72. huggingface_hub/inference/_providers/_common.py +24 -25
  73. huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
  74. huggingface_hub/inference/_providers/cohere.py +3 -3
  75. huggingface_hub/inference/_providers/fal_ai.py +25 -25
  76. huggingface_hub/inference/_providers/featherless_ai.py +4 -4
  77. huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
  78. huggingface_hub/inference/_providers/hf_inference.py +13 -13
  79. huggingface_hub/inference/_providers/hyperbolic.py +4 -4
  80. huggingface_hub/inference/_providers/nebius.py +10 -10
  81. huggingface_hub/inference/_providers/novita.py +5 -5
  82. huggingface_hub/inference/_providers/nscale.py +4 -4
  83. huggingface_hub/inference/_providers/replicate.py +15 -15
  84. huggingface_hub/inference/_providers/sambanova.py +6 -6
  85. huggingface_hub/inference/_providers/together.py +7 -7
  86. huggingface_hub/lfs.py +24 -33
  87. huggingface_hub/repocard.py +16 -17
  88. huggingface_hub/repocard_data.py +56 -56
  89. huggingface_hub/serialization/__init__.py +0 -1
  90. huggingface_hub/serialization/_base.py +9 -9
  91. huggingface_hub/serialization/_dduf.py +7 -7
  92. huggingface_hub/serialization/_torch.py +28 -28
  93. huggingface_hub/utils/__init__.py +10 -4
  94. huggingface_hub/utils/_auth.py +5 -5
  95. huggingface_hub/utils/_cache_manager.py +31 -31
  96. huggingface_hub/utils/_deprecation.py +1 -1
  97. huggingface_hub/utils/_dotenv.py +3 -3
  98. huggingface_hub/utils/_fixes.py +0 -10
  99. huggingface_hub/utils/_git_credential.py +3 -3
  100. huggingface_hub/utils/_headers.py +7 -29
  101. huggingface_hub/utils/_http.py +369 -209
  102. huggingface_hub/utils/_pagination.py +4 -4
  103. huggingface_hub/utils/_paths.py +5 -5
  104. huggingface_hub/utils/_runtime.py +15 -13
  105. huggingface_hub/utils/_safetensors.py +21 -21
  106. huggingface_hub/utils/_subprocess.py +9 -9
  107. huggingface_hub/utils/_telemetry.py +3 -3
  108. huggingface_hub/utils/_typing.py +3 -3
  109. huggingface_hub/utils/_validators.py +53 -72
  110. huggingface_hub/utils/_xet.py +16 -16
  111. huggingface_hub/utils/_xet_progress_reporting.py +1 -1
  112. huggingface_hub/utils/insecure_hashlib.py +3 -9
  113. huggingface_hub/utils/tqdm.py +3 -3
  114. {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/METADATA +17 -26
  115. huggingface_hub-1.0.0rc1.dist-info/RECORD +161 -0
  116. huggingface_hub/inference/_providers/publicai.py +0 -6
  117. huggingface_hub/inference/_providers/scaleway.py +0 -28
  118. huggingface_hub/inference_api.py +0 -217
  119. huggingface_hub/keras_mixin.py +0 -500
  120. huggingface_hub/repository.py +0 -1477
  121. huggingface_hub/serialization/_tensorflow.py +0 -95
  122. huggingface_hub/utils/_hf_folder.py +0 -68
  123. huggingface_hub-0.35.1.dist-info/RECORD +0 -168
  124. {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/LICENSE +0 -0
  125. {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/WHEEL +0 -0
  126. {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/entry_points.txt +0 -0
  127. {huggingface_hub-0.35.1.dist-info → huggingface_hub-1.0.0rc1.dist-info}/top_level.txt +0 -0
@@ -34,14 +34,14 @@
34
34
  # - Only the main parameters are publicly exposed. Power users can always read the docs for more options.
35
35
  import base64
36
36
  import logging
37
+ import os
37
38
  import re
38
39
  import warnings
39
- from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Literal, Optional, Union, overload
40
-
41
- from requests import HTTPError
40
+ from contextlib import ExitStack
41
+ from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
42
42
 
43
43
  from huggingface_hub import constants
44
- from huggingface_hub.errors import BadRequestError, InferenceTimeoutError
44
+ from huggingface_hub.errors import BadRequestError, HfHubHTTPError, InferenceTimeoutError
45
45
  from huggingface_hub.inference._common import (
46
46
  TASKS_EXPECTING_IMAGES,
47
47
  ContentT,
@@ -101,7 +101,12 @@ from huggingface_hub.inference._generated.types import (
101
101
  ZeroShotImageClassificationOutputElement,
102
102
  )
103
103
  from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
104
- from huggingface_hub.utils import build_hf_headers, get_session, hf_raise_for_status
104
+ from huggingface_hub.utils import (
105
+ build_hf_headers,
106
+ get_session,
107
+ hf_raise_for_status,
108
+ validate_hf_hub_args,
109
+ )
105
110
  from huggingface_hub.utils._auth import get_token
106
111
 
107
112
 
@@ -130,7 +135,7 @@ class InferenceClient:
130
135
  Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2
131
136
  arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL.
132
137
  provider (`str`, *optional*):
133
- Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `publicai`, `"replicate"`, `"sambanova"`, `"scaleway"` or `"together"`.
138
+ Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"replicate"`, "sambanova"` or `"together"`.
134
139
  Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
135
140
  If model is a URL or `base_url` is passed, then `provider` is not used.
136
141
  token (`str`, *optional*):
@@ -139,16 +144,14 @@ class InferenceClient:
139
144
  arguments are mutually exclusive and have the exact same behavior.
140
145
  timeout (`float`, `optional`):
141
146
  The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
142
- headers (`Dict[str, str]`, `optional`):
147
+ headers (`dict[str, str]`, `optional`):
143
148
  Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
144
149
  Values in this dictionary will override the default values.
145
150
  bill_to (`str`, `optional`):
146
151
  The billing account to use for the requests. By default the requests are billed on the user's account.
147
152
  Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
148
- cookies (`Dict[str, str]`, `optional`):
153
+ cookies (`dict[str, str]`, `optional`):
149
154
  Additional cookies to send to the server.
150
- proxies (`Any`, `optional`):
151
- Proxies to use for the request.
152
155
  base_url (`str`, `optional`):
153
156
  Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
154
157
  follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
@@ -157,6 +160,7 @@ class InferenceClient:
157
160
  follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
158
161
  """
159
162
 
163
+ @validate_hf_hub_args
160
164
  def __init__(
161
165
  self,
162
166
  model: Optional[str] = None,
@@ -164,9 +168,8 @@ class InferenceClient:
164
168
  provider: Optional[PROVIDER_OR_POLICY_T] = None,
165
169
  token: Optional[str] = None,
166
170
  timeout: Optional[float] = None,
167
- headers: Optional[Dict[str, str]] = None,
168
- cookies: Optional[Dict[str, str]] = None,
169
- proxies: Optional[Any] = None,
171
+ headers: Optional[dict[str, str]] = None,
172
+ cookies: Optional[dict[str, str]] = None,
170
173
  bill_to: Optional[str] = None,
171
174
  # OpenAI compatibility
172
175
  base_url: Optional[str] = None,
@@ -228,11 +231,21 @@ class InferenceClient:
228
231
 
229
232
  self.cookies = cookies
230
233
  self.timeout = timeout
231
- self.proxies = proxies
234
+
235
+ self.exit_stack = ExitStack()
232
236
 
233
237
  def __repr__(self):
234
238
  return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
235
239
 
240
+ def __enter__(self):
241
+ return self
242
+
243
+ def __exit__(self, exc_type, exc_value, traceback):
244
+ self.exit_stack.close()
245
+
246
+ def close(self):
247
+ self.exit_stack.close()
248
+
236
249
  @overload
237
250
  def _inner_post( # type: ignore[misc]
238
251
  self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
@@ -241,44 +254,46 @@ class InferenceClient:
241
254
  @overload
242
255
  def _inner_post( # type: ignore[misc]
243
256
  self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
244
- ) -> Iterable[bytes]: ...
257
+ ) -> Iterable[str]: ...
245
258
 
246
259
  @overload
247
260
  def _inner_post(
248
261
  self, request_parameters: RequestParameters, *, stream: bool = False
249
- ) -> Union[bytes, Iterable[bytes]]: ...
262
+ ) -> Union[bytes, Iterable[str]]: ...
250
263
 
251
264
  def _inner_post(
252
265
  self, request_parameters: RequestParameters, *, stream: bool = False
253
- ) -> Union[bytes, Iterable[bytes]]:
266
+ ) -> Union[bytes, Iterable[str]]:
254
267
  """Make a request to the inference server."""
255
268
  # TODO: this should be handled in provider helpers directly
256
269
  if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
257
270
  request_parameters.headers["Accept"] = "image/png"
258
271
 
259
272
  try:
260
- response = get_session().post(
261
- request_parameters.url,
262
- json=request_parameters.json,
263
- data=request_parameters.data,
264
- headers=request_parameters.headers,
265
- cookies=self.cookies,
266
- timeout=self.timeout,
267
- stream=stream,
268
- proxies=self.proxies,
273
+ response = self.exit_stack.enter_context(
274
+ get_session().stream(
275
+ "POST",
276
+ request_parameters.url,
277
+ json=request_parameters.json,
278
+ content=request_parameters.data,
279
+ headers=request_parameters.headers,
280
+ cookies=self.cookies,
281
+ timeout=self.timeout,
282
+ )
269
283
  )
284
+ hf_raise_for_status(response)
285
+ if stream:
286
+ return response.iter_lines()
287
+ else:
288
+ return response.read()
270
289
  except TimeoutError as error:
271
290
  # Convert any `TimeoutError` to a `InferenceTimeoutError`
272
291
  raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
273
-
274
- try:
275
- hf_raise_for_status(response)
276
- return response.iter_lines() if stream else response.content
277
- except HTTPError as error:
292
+ except HfHubHTTPError as error:
278
293
  if error.response.status_code == 422 and request_parameters.task != "unknown":
279
294
  msg = str(error.args[0])
280
295
  if len(error.response.text) > 0:
281
- msg += f"\n{error.response.text}\n"
296
+ msg += f"{os.linesep}{error.response.text}{os.linesep}"
282
297
  error.args = (msg,) + error.args[1:]
283
298
  raise
284
299
 
@@ -289,7 +304,7 @@ class InferenceClient:
289
304
  model: Optional[str] = None,
290
305
  top_k: Optional[int] = None,
291
306
  function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
292
- ) -> List[AudioClassificationOutputElement]:
307
+ ) -> list[AudioClassificationOutputElement]:
293
308
  """
294
309
  Perform audio classification on the provided audio content.
295
310
 
@@ -307,12 +322,12 @@ class InferenceClient:
307
322
  The function to apply to the model outputs in order to retrieve the scores.
308
323
 
309
324
  Returns:
310
- `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
325
+ `list[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
311
326
 
312
327
  Raises:
313
328
  [`InferenceTimeoutError`]:
314
329
  If the model is unavailable or the request times out.
315
- `HTTPError`:
330
+ [`HfHubHTTPError`]:
316
331
  If the request fails with an HTTP error status code other than HTTP 503.
317
332
 
318
333
  Example:
@@ -344,7 +359,7 @@ class InferenceClient:
344
359
  audio: ContentT,
345
360
  *,
346
361
  model: Optional[str] = None,
347
- ) -> List[AudioToAudioOutputElement]:
362
+ ) -> list[AudioToAudioOutputElement]:
348
363
  """
349
364
  Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
350
365
 
@@ -358,12 +373,12 @@ class InferenceClient:
358
373
  audio_to_audio will be used.
359
374
 
360
375
  Returns:
361
- `List[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
376
+ `list[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
362
377
 
363
378
  Raises:
364
379
  `InferenceTimeoutError`:
365
380
  If the model is unavailable or the request times out.
366
- `HTTPError`:
381
+ [`HfHubHTTPError`]:
367
382
  If the request fails with an HTTP error status code other than HTTP 503.
368
383
 
369
384
  Example:
@@ -396,7 +411,7 @@ class InferenceClient:
396
411
  audio: ContentT,
397
412
  *,
398
413
  model: Optional[str] = None,
399
- extra_body: Optional[Dict] = None,
414
+ extra_body: Optional[dict] = None,
400
415
  ) -> AutomaticSpeechRecognitionOutput:
401
416
  """
402
417
  Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
@@ -407,7 +422,7 @@ class InferenceClient:
407
422
  model (`str`, *optional*):
408
423
  The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
409
424
  Inference Endpoint. If not provided, the default recommended model for ASR will be used.
410
- extra_body (`Dict`, *optional*):
425
+ extra_body (`dict`, *optional*):
411
426
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
412
427
  for supported parameters.
413
428
  Returns:
@@ -416,7 +431,7 @@ class InferenceClient:
416
431
  Raises:
417
432
  [`InferenceTimeoutError`]:
418
433
  If the model is unavailable or the request times out.
419
- `HTTPError`:
434
+ [`HfHubHTTPError`]:
420
435
  If the request fails with an HTTP error status code other than HTTP 503.
421
436
 
422
437
  Example:
@@ -442,105 +457,105 @@ class InferenceClient:
442
457
  @overload
443
458
  def chat_completion( # type: ignore
444
459
  self,
445
- messages: List[Union[Dict, ChatCompletionInputMessage]],
460
+ messages: list[Union[dict, ChatCompletionInputMessage]],
446
461
  *,
447
462
  model: Optional[str] = None,
448
463
  stream: Literal[False] = False,
449
464
  frequency_penalty: Optional[float] = None,
450
- logit_bias: Optional[List[float]] = None,
465
+ logit_bias: Optional[list[float]] = None,
451
466
  logprobs: Optional[bool] = None,
452
467
  max_tokens: Optional[int] = None,
453
468
  n: Optional[int] = None,
454
469
  presence_penalty: Optional[float] = None,
455
470
  response_format: Optional[ChatCompletionInputGrammarType] = None,
456
471
  seed: Optional[int] = None,
457
- stop: Optional[List[str]] = None,
472
+ stop: Optional[list[str]] = None,
458
473
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
459
474
  temperature: Optional[float] = None,
460
475
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
461
476
  tool_prompt: Optional[str] = None,
462
- tools: Optional[List[ChatCompletionInputTool]] = None,
477
+ tools: Optional[list[ChatCompletionInputTool]] = None,
463
478
  top_logprobs: Optional[int] = None,
464
479
  top_p: Optional[float] = None,
465
- extra_body: Optional[Dict] = None,
480
+ extra_body: Optional[dict] = None,
466
481
  ) -> ChatCompletionOutput: ...
467
482
 
468
483
  @overload
469
484
  def chat_completion( # type: ignore
470
485
  self,
471
- messages: List[Union[Dict, ChatCompletionInputMessage]],
486
+ messages: list[Union[dict, ChatCompletionInputMessage]],
472
487
  *,
473
488
  model: Optional[str] = None,
474
489
  stream: Literal[True] = True,
475
490
  frequency_penalty: Optional[float] = None,
476
- logit_bias: Optional[List[float]] = None,
491
+ logit_bias: Optional[list[float]] = None,
477
492
  logprobs: Optional[bool] = None,
478
493
  max_tokens: Optional[int] = None,
479
494
  n: Optional[int] = None,
480
495
  presence_penalty: Optional[float] = None,
481
496
  response_format: Optional[ChatCompletionInputGrammarType] = None,
482
497
  seed: Optional[int] = None,
483
- stop: Optional[List[str]] = None,
498
+ stop: Optional[list[str]] = None,
484
499
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
485
500
  temperature: Optional[float] = None,
486
501
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
487
502
  tool_prompt: Optional[str] = None,
488
- tools: Optional[List[ChatCompletionInputTool]] = None,
503
+ tools: Optional[list[ChatCompletionInputTool]] = None,
489
504
  top_logprobs: Optional[int] = None,
490
505
  top_p: Optional[float] = None,
491
- extra_body: Optional[Dict] = None,
506
+ extra_body: Optional[dict] = None,
492
507
  ) -> Iterable[ChatCompletionStreamOutput]: ...
493
508
 
494
509
  @overload
495
510
  def chat_completion(
496
511
  self,
497
- messages: List[Union[Dict, ChatCompletionInputMessage]],
512
+ messages: list[Union[dict, ChatCompletionInputMessage]],
498
513
  *,
499
514
  model: Optional[str] = None,
500
515
  stream: bool = False,
501
516
  frequency_penalty: Optional[float] = None,
502
- logit_bias: Optional[List[float]] = None,
517
+ logit_bias: Optional[list[float]] = None,
503
518
  logprobs: Optional[bool] = None,
504
519
  max_tokens: Optional[int] = None,
505
520
  n: Optional[int] = None,
506
521
  presence_penalty: Optional[float] = None,
507
522
  response_format: Optional[ChatCompletionInputGrammarType] = None,
508
523
  seed: Optional[int] = None,
509
- stop: Optional[List[str]] = None,
524
+ stop: Optional[list[str]] = None,
510
525
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
511
526
  temperature: Optional[float] = None,
512
527
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
513
528
  tool_prompt: Optional[str] = None,
514
- tools: Optional[List[ChatCompletionInputTool]] = None,
529
+ tools: Optional[list[ChatCompletionInputTool]] = None,
515
530
  top_logprobs: Optional[int] = None,
516
531
  top_p: Optional[float] = None,
517
- extra_body: Optional[Dict] = None,
532
+ extra_body: Optional[dict] = None,
518
533
  ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: ...
519
534
 
520
535
  def chat_completion(
521
536
  self,
522
- messages: List[Union[Dict, ChatCompletionInputMessage]],
537
+ messages: list[Union[dict, ChatCompletionInputMessage]],
523
538
  *,
524
539
  model: Optional[str] = None,
525
540
  stream: bool = False,
526
541
  # Parameters from ChatCompletionInput (handled manually)
527
542
  frequency_penalty: Optional[float] = None,
528
- logit_bias: Optional[List[float]] = None,
543
+ logit_bias: Optional[list[float]] = None,
529
544
  logprobs: Optional[bool] = None,
530
545
  max_tokens: Optional[int] = None,
531
546
  n: Optional[int] = None,
532
547
  presence_penalty: Optional[float] = None,
533
548
  response_format: Optional[ChatCompletionInputGrammarType] = None,
534
549
  seed: Optional[int] = None,
535
- stop: Optional[List[str]] = None,
550
+ stop: Optional[list[str]] = None,
536
551
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
537
552
  temperature: Optional[float] = None,
538
553
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
539
554
  tool_prompt: Optional[str] = None,
540
- tools: Optional[List[ChatCompletionInputTool]] = None,
555
+ tools: Optional[list[ChatCompletionInputTool]] = None,
541
556
  top_logprobs: Optional[int] = None,
542
557
  top_p: Optional[float] = None,
543
- extra_body: Optional[Dict] = None,
558
+ extra_body: Optional[dict] = None,
544
559
  ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]:
545
560
  """
546
561
  A method for completing conversations using a specified language model.
@@ -570,7 +585,7 @@ class InferenceClient:
570
585
  frequency_penalty (`float`, *optional*):
571
586
  Penalizes new tokens based on their existing frequency
572
587
  in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
573
- logit_bias (`List[float]`, *optional*):
588
+ logit_bias (`list[float]`, *optional*):
574
589
  Adjusts the likelihood of specific tokens appearing in the generated output.
575
590
  logprobs (`bool`, *optional*):
576
591
  Whether to return log probabilities of the output tokens or not. If true, returns the log
@@ -586,7 +601,7 @@ class InferenceClient:
586
601
  Grammar constraints. Can be either a JSONSchema or a regex.
587
602
  seed (Optional[`int`], *optional*):
588
603
  Seed for reproducible control flow. Defaults to None.
589
- stop (`List[str]`, *optional*):
604
+ stop (`list[str]`, *optional*):
590
605
  Up to four strings which trigger the end of the response.
591
606
  Defaults to None.
592
607
  stream (`bool`, *optional*):
@@ -610,7 +625,7 @@ class InferenceClient:
610
625
  tools (List of [`ChatCompletionInputTool`], *optional*):
611
626
  A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
612
627
  provide a list of functions the model may generate JSON inputs for.
613
- extra_body (`Dict`, *optional*):
628
+ extra_body (`dict`, *optional*):
614
629
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
615
630
  for supported parameters.
616
631
  Returns:
@@ -622,7 +637,7 @@ class InferenceClient:
622
637
  Raises:
623
638
  [`InferenceTimeoutError`]:
624
639
  If the model is unavailable or the request times out.
625
- `HTTPError`:
640
+ [`HfHubHTTPError`]:
626
641
  If the request fails with an HTTP error status code other than HTTP 503.
627
642
 
628
643
  Example:
@@ -936,8 +951,8 @@ class InferenceClient:
936
951
  max_question_len: Optional[int] = None,
937
952
  max_seq_len: Optional[int] = None,
938
953
  top_k: Optional[int] = None,
939
- word_boxes: Optional[List[Union[List[float], str]]] = None,
940
- ) -> List[DocumentQuestionAnsweringOutputElement]:
954
+ word_boxes: Optional[list[Union[list[float], str]]] = None,
955
+ ) -> list[DocumentQuestionAnsweringOutputElement]:
941
956
  """
942
957
  Answer questions on document images.
943
958
 
@@ -967,16 +982,16 @@ class InferenceClient:
967
982
  top_k (`int`, *optional*):
968
983
  The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
969
984
  answers if there are not enough options available within the context.
970
- word_boxes (`List[Union[List[float], str`, *optional*):
985
+ word_boxes (`list[Union[list[float], str`, *optional*):
971
986
  A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
972
987
  step and use the provided bounding boxes instead.
973
988
  Returns:
974
- `List[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
989
+ `list[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
975
990
 
976
991
  Raises:
977
992
  [`InferenceTimeoutError`]:
978
993
  If the model is unavailable or the request times out.
979
- `HTTPError`:
994
+ [`HfHubHTTPError`]:
980
995
  If the request fails with an HTTP error status code other than HTTP 503.
981
996
 
982
997
 
@@ -990,7 +1005,7 @@ class InferenceClient:
990
1005
  """
991
1006
  model_id = model or self.model
992
1007
  provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
993
- inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
1008
+ inputs: dict[str, Any] = {"question": question, "image": _b64_encode(image)}
994
1009
  request_parameters = provider_helper.prepare_request(
995
1010
  inputs=inputs,
996
1011
  parameters={
@@ -1051,7 +1066,7 @@ class InferenceClient:
1051
1066
  Raises:
1052
1067
  [`InferenceTimeoutError`]:
1053
1068
  If the model is unavailable or the request times out.
1054
- `HTTPError`:
1069
+ [`HfHubHTTPError`]:
1055
1070
  If the request fails with an HTTP error status code other than HTTP 503.
1056
1071
 
1057
1072
  Example:
@@ -1088,9 +1103,9 @@ class InferenceClient:
1088
1103
  text: str,
1089
1104
  *,
1090
1105
  model: Optional[str] = None,
1091
- targets: Optional[List[str]] = None,
1106
+ targets: Optional[list[str]] = None,
1092
1107
  top_k: Optional[int] = None,
1093
- ) -> List[FillMaskOutputElement]:
1108
+ ) -> list[FillMaskOutputElement]:
1094
1109
  """
1095
1110
  Fill in a hole with a missing word (token to be precise).
1096
1111
 
@@ -1100,20 +1115,20 @@ class InferenceClient:
1100
1115
  model (`str`, *optional*):
1101
1116
  The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
1102
1117
  a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
1103
- targets (`List[str`, *optional*):
1118
+ targets (`list[str`, *optional*):
1104
1119
  When passed, the model will limit the scores to the passed targets instead of looking up in the whole
1105
1120
  vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
1106
1121
  resulting token will be used (with a warning, and that might be slower).
1107
1122
  top_k (`int`, *optional*):
1108
1123
  When passed, overrides the number of predictions to return.
1109
1124
  Returns:
1110
- `List[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
1125
+ `list[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
1111
1126
  probability, token reference, and completed text.
1112
1127
 
1113
1128
  Raises:
1114
1129
  [`InferenceTimeoutError`]:
1115
1130
  If the model is unavailable or the request times out.
1116
- `HTTPError`:
1131
+ [`HfHubHTTPError`]:
1117
1132
  If the request fails with an HTTP error status code other than HTTP 503.
1118
1133
 
1119
1134
  Example:
@@ -1146,7 +1161,7 @@ class InferenceClient:
1146
1161
  model: Optional[str] = None,
1147
1162
  function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
1148
1163
  top_k: Optional[int] = None,
1149
- ) -> List[ImageClassificationOutputElement]:
1164
+ ) -> list[ImageClassificationOutputElement]:
1150
1165
  """
1151
1166
  Perform image classification on the given image using the specified model.
1152
1167
 
@@ -1161,12 +1176,12 @@ class InferenceClient:
1161
1176
  top_k (`int`, *optional*):
1162
1177
  When specified, limits the output to the top K most probable classes.
1163
1178
  Returns:
1164
- `List[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
1179
+ `list[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
1165
1180
 
1166
1181
  Raises:
1167
1182
  [`InferenceTimeoutError`]:
1168
1183
  If the model is unavailable or the request times out.
1169
- `HTTPError`:
1184
+ [`HfHubHTTPError`]:
1170
1185
  If the request fails with an HTTP error status code other than HTTP 503.
1171
1186
 
1172
1187
  Example:
@@ -1198,7 +1213,7 @@ class InferenceClient:
1198
1213
  overlap_mask_area_threshold: Optional[float] = None,
1199
1214
  subtask: Optional["ImageSegmentationSubtask"] = None,
1200
1215
  threshold: Optional[float] = None,
1201
- ) -> List[ImageSegmentationOutputElement]:
1216
+ ) -> list[ImageSegmentationOutputElement]:
1202
1217
  """
1203
1218
  Perform image segmentation on the given image using the specified model.
1204
1219
 
@@ -1223,12 +1238,12 @@ class InferenceClient:
1223
1238
  threshold (`float`, *optional*):
1224
1239
  Probability threshold to filter out predicted masks.
1225
1240
  Returns:
1226
- `List[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
1241
+ `list[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
1227
1242
 
1228
1243
  Raises:
1229
1244
  [`InferenceTimeoutError`]:
1230
1245
  If the model is unavailable or the request times out.
1231
- `HTTPError`:
1246
+ [`HfHubHTTPError`]:
1232
1247
  If the request fails with an HTTP error status code other than HTTP 503.
1233
1248
 
1234
1249
  Example:
@@ -1305,7 +1320,7 @@ class InferenceClient:
1305
1320
  Raises:
1306
1321
  [`InferenceTimeoutError`]:
1307
1322
  If the model is unavailable or the request times out.
1308
- `HTTPError`:
1323
+ [`HfHubHTTPError`]:
1309
1324
  If the request fails with an HTTP error status code other than HTTP 503.
1310
1325
 
1311
1326
  Example:
@@ -1435,7 +1450,7 @@ class InferenceClient:
1435
1450
  Raises:
1436
1451
  [`InferenceTimeoutError`]:
1437
1452
  If the model is unavailable or the request times out.
1438
- `HTTPError`:
1453
+ [`HfHubHTTPError`]:
1439
1454
  If the request fails with an HTTP error status code other than HTTP 503.
1440
1455
 
1441
1456
  Example:
@@ -1458,12 +1473,12 @@ class InferenceClient:
1458
1473
  api_key=self.token,
1459
1474
  )
1460
1475
  response = self._inner_post(request_parameters)
1461
- output_list: List[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
1476
+ output_list: list[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
1462
1477
  return output_list[0]
1463
1478
 
1464
1479
  def object_detection(
1465
1480
  self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
1466
- ) -> List[ObjectDetectionOutputElement]:
1481
+ ) -> list[ObjectDetectionOutputElement]:
1467
1482
  """
1468
1483
  Perform object detection on the given image using the specified model.
1469
1484
 
@@ -1482,12 +1497,12 @@ class InferenceClient:
1482
1497
  threshold (`float`, *optional*):
1483
1498
  The probability necessary to make a prediction.
1484
1499
  Returns:
1485
- `List[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
1500
+ `list[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
1486
1501
 
1487
1502
  Raises:
1488
1503
  [`InferenceTimeoutError`]:
1489
1504
  If the model is unavailable or the request times out.
1490
- `HTTPError`:
1505
+ [`HfHubHTTPError`]:
1491
1506
  If the request fails with an HTTP error status code other than HTTP 503.
1492
1507
  `ValueError`:
1493
1508
  If the request output is not a List.
@@ -1525,7 +1540,7 @@ class InferenceClient:
1525
1540
  max_question_len: Optional[int] = None,
1526
1541
  max_seq_len: Optional[int] = None,
1527
1542
  top_k: Optional[int] = None,
1528
- ) -> Union[QuestionAnsweringOutputElement, List[QuestionAnsweringOutputElement]]:
1543
+ ) -> Union[QuestionAnsweringOutputElement, list[QuestionAnsweringOutputElement]]:
1529
1544
  """
1530
1545
  Retrieve the answer to a question from a given text.
1531
1546
 
@@ -1557,13 +1572,13 @@ class InferenceClient:
1557
1572
  topk answers if there are not enough options available within the context.
1558
1573
 
1559
1574
  Returns:
1560
- Union[`QuestionAnsweringOutputElement`, List[`QuestionAnsweringOutputElement`]]:
1575
+ Union[`QuestionAnsweringOutputElement`, list[`QuestionAnsweringOutputElement`]]:
1561
1576
  When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
1562
1577
  When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
1563
1578
  Raises:
1564
1579
  [`InferenceTimeoutError`]:
1565
1580
  If the model is unavailable or the request times out.
1566
- `HTTPError`:
1581
+ [`HfHubHTTPError`]:
1567
1582
  If the request fails with an HTTP error status code other than HTTP 503.
1568
1583
 
1569
1584
  Example:
@@ -1597,15 +1612,15 @@ class InferenceClient:
1597
1612
  return output
1598
1613
 
1599
1614
  def sentence_similarity(
1600
- self, sentence: str, other_sentences: List[str], *, model: Optional[str] = None
1601
- ) -> List[float]:
1615
+ self, sentence: str, other_sentences: list[str], *, model: Optional[str] = None
1616
+ ) -> list[float]:
1602
1617
  """
1603
1618
  Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
1604
1619
 
1605
1620
  Args:
1606
1621
  sentence (`str`):
1607
1622
  The main sentence to compare to others.
1608
- other_sentences (`List[str]`):
1623
+ other_sentences (`list[str]`):
1609
1624
  The list of sentences to compare to.
1610
1625
  model (`str`, *optional*):
1611
1626
  The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1613,12 +1628,12 @@ class InferenceClient:
1613
1628
  Defaults to None.
1614
1629
 
1615
1630
  Returns:
1616
- `List[float]`: The embedding representing the input text.
1631
+ `list[float]`: The embedding representing the input text.
1617
1632
 
1618
1633
  Raises:
1619
1634
  [`InferenceTimeoutError`]:
1620
1635
  If the model is unavailable or the request times out.
1621
- `HTTPError`:
1636
+ [`HfHubHTTPError`]:
1622
1637
  If the request fails with an HTTP error status code other than HTTP 503.
1623
1638
 
1624
1639
  Example:
@@ -1655,7 +1670,7 @@ class InferenceClient:
1655
1670
  *,
1656
1671
  model: Optional[str] = None,
1657
1672
  clean_up_tokenization_spaces: Optional[bool] = None,
1658
- generate_parameters: Optional[Dict[str, Any]] = None,
1673
+ generate_parameters: Optional[dict[str, Any]] = None,
1659
1674
  truncation: Optional["SummarizationTruncationStrategy"] = None,
1660
1675
  ) -> SummarizationOutput:
1661
1676
  """
@@ -1669,7 +1684,7 @@ class InferenceClient:
1669
1684
  Inference Endpoint. If not provided, the default recommended model for summarization will be used.
1670
1685
  clean_up_tokenization_spaces (`bool`, *optional*):
1671
1686
  Whether to clean up the potential extra spaces in the text output.
1672
- generate_parameters (`Dict[str, Any]`, *optional*):
1687
+ generate_parameters (`dict[str, Any]`, *optional*):
1673
1688
  Additional parametrization of the text generation algorithm.
1674
1689
  truncation (`"SummarizationTruncationStrategy"`, *optional*):
1675
1690
  The truncation strategy to use.
@@ -1679,7 +1694,7 @@ class InferenceClient:
1679
1694
  Raises:
1680
1695
  [`InferenceTimeoutError`]:
1681
1696
  If the model is unavailable or the request times out.
1682
- `HTTPError`:
1697
+ [`HfHubHTTPError`]:
1683
1698
  If the request fails with an HTTP error status code other than HTTP 503.
1684
1699
 
1685
1700
  Example:
@@ -1709,7 +1724,7 @@ class InferenceClient:
1709
1724
 
1710
1725
  def table_question_answering(
1711
1726
  self,
1712
- table: Dict[str, Any],
1727
+ table: dict[str, Any],
1713
1728
  query: str,
1714
1729
  *,
1715
1730
  model: Optional[str] = None,
@@ -1744,7 +1759,7 @@ class InferenceClient:
1744
1759
  Raises:
1745
1760
  [`InferenceTimeoutError`]:
1746
1761
  If the model is unavailable or the request times out.
1747
- `HTTPError`:
1762
+ [`HfHubHTTPError`]:
1748
1763
  If the request fails with an HTTP error status code other than HTTP 503.
1749
1764
 
1750
1765
  Example:
@@ -1769,12 +1784,12 @@ class InferenceClient:
1769
1784
  response = self._inner_post(request_parameters)
1770
1785
  return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
1771
1786
 
1772
- def tabular_classification(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[str]:
1787
+ def tabular_classification(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[str]:
1773
1788
  """
1774
1789
  Classifying a target category (a group) based on a set of attributes.
1775
1790
 
1776
1791
  Args:
1777
- table (`Dict[str, Any]`):
1792
+ table (`dict[str, Any]`):
1778
1793
  Set of attributes to classify.
1779
1794
  model (`str`, *optional*):
1780
1795
  The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1787,7 +1802,7 @@ class InferenceClient:
1787
1802
  Raises:
1788
1803
  [`InferenceTimeoutError`]:
1789
1804
  If the model is unavailable or the request times out.
1790
- `HTTPError`:
1805
+ [`HfHubHTTPError`]:
1791
1806
  If the request fails with an HTTP error status code other than HTTP 503.
1792
1807
 
1793
1808
  Example:
@@ -1824,12 +1839,12 @@ class InferenceClient:
1824
1839
  response = self._inner_post(request_parameters)
1825
1840
  return _bytes_to_list(response)
1826
1841
 
1827
- def tabular_regression(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[float]:
1842
+ def tabular_regression(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[float]:
1828
1843
  """
1829
1844
  Predicting a numerical target value given a set of attributes/features in a table.
1830
1845
 
1831
1846
  Args:
1832
- table (`Dict[str, Any]`):
1847
+ table (`dict[str, Any]`):
1833
1848
  Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
1834
1849
  model (`str`, *optional*):
1835
1850
  The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1842,7 +1857,7 @@ class InferenceClient:
1842
1857
  Raises:
1843
1858
  [`InferenceTimeoutError`]:
1844
1859
  If the model is unavailable or the request times out.
1845
- `HTTPError`:
1860
+ [`HfHubHTTPError`]:
1846
1861
  If the request fails with an HTTP error status code other than HTTP 503.
1847
1862
 
1848
1863
  Example:
@@ -1881,7 +1896,7 @@ class InferenceClient:
1881
1896
  model: Optional[str] = None,
1882
1897
  top_k: Optional[int] = None,
1883
1898
  function_to_apply: Optional["TextClassificationOutputTransform"] = None,
1884
- ) -> List[TextClassificationOutputElement]:
1899
+ ) -> list[TextClassificationOutputElement]:
1885
1900
  """
1886
1901
  Perform text classification (e.g. sentiment-analysis) on the given text.
1887
1902
 
@@ -1898,12 +1913,12 @@ class InferenceClient:
1898
1913
  The function to apply to the model outputs in order to retrieve the scores.
1899
1914
 
1900
1915
  Returns:
1901
- `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
1916
+ `list[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
1902
1917
 
1903
1918
  Raises:
1904
1919
  [`InferenceTimeoutError`]:
1905
1920
  If the model is unavailable or the request times out.
1906
- `HTTPError`:
1921
+ [`HfHubHTTPError`]:
1907
1922
  If the request fails with an HTTP error status code other than HTTP 503.
1908
1923
 
1909
1924
  Example:
@@ -1951,8 +1966,8 @@ class InferenceClient:
1951
1966
  repetition_penalty: Optional[float] = None,
1952
1967
  return_full_text: Optional[bool] = None,
1953
1968
  seed: Optional[int] = None,
1954
- stop: Optional[List[str]] = None,
1955
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
1969
+ stop: Optional[list[str]] = None,
1970
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
1956
1971
  temperature: Optional[float] = None,
1957
1972
  top_k: Optional[int] = None,
1958
1973
  top_n_tokens: Optional[int] = None,
@@ -1981,8 +1996,8 @@ class InferenceClient:
1981
1996
  repetition_penalty: Optional[float] = None,
1982
1997
  return_full_text: Optional[bool] = None,
1983
1998
  seed: Optional[int] = None,
1984
- stop: Optional[List[str]] = None,
1985
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
1999
+ stop: Optional[list[str]] = None,
2000
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
1986
2001
  temperature: Optional[float] = None,
1987
2002
  top_k: Optional[int] = None,
1988
2003
  top_n_tokens: Optional[int] = None,
@@ -2011,8 +2026,8 @@ class InferenceClient:
2011
2026
  repetition_penalty: Optional[float] = None,
2012
2027
  return_full_text: Optional[bool] = None, # Manual default value
2013
2028
  seed: Optional[int] = None,
2014
- stop: Optional[List[str]] = None,
2015
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2029
+ stop: Optional[list[str]] = None,
2030
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2016
2031
  temperature: Optional[float] = None,
2017
2032
  top_k: Optional[int] = None,
2018
2033
  top_n_tokens: Optional[int] = None,
@@ -2041,8 +2056,8 @@ class InferenceClient:
2041
2056
  repetition_penalty: Optional[float] = None,
2042
2057
  return_full_text: Optional[bool] = None,
2043
2058
  seed: Optional[int] = None,
2044
- stop: Optional[List[str]] = None,
2045
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2059
+ stop: Optional[list[str]] = None,
2060
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2046
2061
  temperature: Optional[float] = None,
2047
2062
  top_k: Optional[int] = None,
2048
2063
  top_n_tokens: Optional[int] = None,
@@ -2071,8 +2086,8 @@ class InferenceClient:
2071
2086
  repetition_penalty: Optional[float] = None,
2072
2087
  return_full_text: Optional[bool] = None,
2073
2088
  seed: Optional[int] = None,
2074
- stop: Optional[List[str]] = None,
2075
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2089
+ stop: Optional[list[str]] = None,
2090
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2076
2091
  temperature: Optional[float] = None,
2077
2092
  top_k: Optional[int] = None,
2078
2093
  top_n_tokens: Optional[int] = None,
@@ -2100,8 +2115,8 @@ class InferenceClient:
2100
2115
  repetition_penalty: Optional[float] = None,
2101
2116
  return_full_text: Optional[bool] = None,
2102
2117
  seed: Optional[int] = None,
2103
- stop: Optional[List[str]] = None,
2104
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2118
+ stop: Optional[list[str]] = None,
2119
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2105
2120
  temperature: Optional[float] = None,
2106
2121
  top_k: Optional[int] = None,
2107
2122
  top_n_tokens: Optional[int] = None,
@@ -2157,9 +2172,9 @@ class InferenceClient:
2157
2172
  Whether to prepend the prompt to the generated text
2158
2173
  seed (`int`, *optional*):
2159
2174
  Random sampling seed
2160
- stop (`List[str]`, *optional*):
2175
+ stop (`list[str]`, *optional*):
2161
2176
  Stop generating tokens if a member of `stop` is generated.
2162
- stop_sequences (`List[str]`, *optional*):
2177
+ stop_sequences (`list[str]`, *optional*):
2163
2178
  Deprecated argument. Use `stop` instead.
2164
2179
  temperature (`float`, *optional*):
2165
2180
  The value used to module the logits distribution.
@@ -2192,7 +2207,7 @@ class InferenceClient:
2192
2207
  If input values are not valid. No HTTP call is made to the server.
2193
2208
  [`InferenceTimeoutError`]:
2194
2209
  If the model is unavailable or the request times out.
2195
- `HTTPError`:
2210
+ [`HfHubHTTPError`]:
2196
2211
  If the request fails with an HTTP error status code other than HTTP 503.
2197
2212
 
2198
2213
  Example:
@@ -2381,7 +2396,7 @@ class InferenceClient:
2381
2396
  # Handle errors separately for more precise error messages
2382
2397
  try:
2383
2398
  bytes_output = self._inner_post(request_parameters, stream=stream or False)
2384
- except HTTPError as e:
2399
+ except HfHubHTTPError as e:
2385
2400
  match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
2386
2401
  if isinstance(e, BadRequestError) and match:
2387
2402
  unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
@@ -2436,7 +2451,7 @@ class InferenceClient:
2436
2451
  model: Optional[str] = None,
2437
2452
  scheduler: Optional[str] = None,
2438
2453
  seed: Optional[int] = None,
2439
- extra_body: Optional[Dict[str, Any]] = None,
2454
+ extra_body: Optional[dict[str, Any]] = None,
2440
2455
  ) -> "Image":
2441
2456
  """
2442
2457
  Generate an image based on a given text using a specified model.
@@ -2474,7 +2489,7 @@ class InferenceClient:
2474
2489
  Override the scheduler with a compatible one.
2475
2490
  seed (`int`, *optional*):
2476
2491
  Seed for the random number generator.
2477
- extra_body (`Dict[str, Any]`, *optional*):
2492
+ extra_body (`dict[str, Any]`, *optional*):
2478
2493
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2479
2494
  for supported parameters.
2480
2495
 
@@ -2484,7 +2499,7 @@ class InferenceClient:
2484
2499
  Raises:
2485
2500
  [`InferenceTimeoutError`]:
2486
2501
  If the model is unavailable or the request times out.
2487
- `HTTPError`:
2502
+ [`HfHubHTTPError`]:
2488
2503
  If the request fails with an HTTP error status code other than HTTP 503.
2489
2504
 
2490
2505
  Example:
@@ -2573,11 +2588,11 @@ class InferenceClient:
2573
2588
  *,
2574
2589
  model: Optional[str] = None,
2575
2590
  guidance_scale: Optional[float] = None,
2576
- negative_prompt: Optional[List[str]] = None,
2591
+ negative_prompt: Optional[list[str]] = None,
2577
2592
  num_frames: Optional[float] = None,
2578
2593
  num_inference_steps: Optional[int] = None,
2579
2594
  seed: Optional[int] = None,
2580
- extra_body: Optional[Dict[str, Any]] = None,
2595
+ extra_body: Optional[dict[str, Any]] = None,
2581
2596
  ) -> bytes:
2582
2597
  """
2583
2598
  Generate a video based on a given text.
@@ -2596,7 +2611,7 @@ class InferenceClient:
2596
2611
  guidance_scale (`float`, *optional*):
2597
2612
  A higher guidance scale value encourages the model to generate videos closely linked to the text
2598
2613
  prompt, but values too high may cause saturation and other artifacts.
2599
- negative_prompt (`List[str]`, *optional*):
2614
+ negative_prompt (`list[str]`, *optional*):
2600
2615
  One or several prompt to guide what NOT to include in video generation.
2601
2616
  num_frames (`float`, *optional*):
2602
2617
  The num_frames parameter determines how many video frames are generated.
@@ -2605,7 +2620,7 @@ class InferenceClient:
2605
2620
  expense of slower inference.
2606
2621
  seed (`int`, *optional*):
2607
2622
  Seed for the random number generator.
2608
- extra_body (`Dict[str, Any]`, *optional*):
2623
+ extra_body (`dict[str, Any]`, *optional*):
2609
2624
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2610
2625
  for supported parameters.
2611
2626
 
@@ -2685,7 +2700,7 @@ class InferenceClient:
2685
2700
  top_p: Optional[float] = None,
2686
2701
  typical_p: Optional[float] = None,
2687
2702
  use_cache: Optional[bool] = None,
2688
- extra_body: Optional[Dict[str, Any]] = None,
2703
+ extra_body: Optional[dict[str, Any]] = None,
2689
2704
  ) -> bytes:
2690
2705
  """
2691
2706
  Synthesize an audio of a voice pronouncing a given text.
@@ -2747,7 +2762,7 @@ class InferenceClient:
2747
2762
  paper](https://hf.co/papers/2202.00666) for more details.
2748
2763
  use_cache (`bool`, *optional*):
2749
2764
  Whether the model should use the past last key/values attentions to speed up decoding
2750
- extra_body (`Dict[str, Any]`, *optional*):
2765
+ extra_body (`dict[str, Any]`, *optional*):
2751
2766
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2752
2767
  for supported parameters.
2753
2768
  Returns:
@@ -2756,7 +2771,7 @@ class InferenceClient:
2756
2771
  Raises:
2757
2772
  [`InferenceTimeoutError`]:
2758
2773
  If the model is unavailable or the request times out.
2759
- `HTTPError`:
2774
+ [`HfHubHTTPError`]:
2760
2775
  If the request fails with an HTTP error status code other than HTTP 503.
2761
2776
 
2762
2777
  Example:
@@ -2879,9 +2894,9 @@ class InferenceClient:
2879
2894
  *,
2880
2895
  model: Optional[str] = None,
2881
2896
  aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
2882
- ignore_labels: Optional[List[str]] = None,
2897
+ ignore_labels: Optional[list[str]] = None,
2883
2898
  stride: Optional[int] = None,
2884
- ) -> List[TokenClassificationOutputElement]:
2899
+ ) -> list[TokenClassificationOutputElement]:
2885
2900
  """
2886
2901
  Perform token classification on the given text.
2887
2902
  Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
@@ -2895,18 +2910,18 @@ class InferenceClient:
2895
2910
  Defaults to None.
2896
2911
  aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
2897
2912
  The strategy used to fuse tokens based on model predictions
2898
- ignore_labels (`List[str`, *optional*):
2913
+ ignore_labels (`list[str`, *optional*):
2899
2914
  A list of labels to ignore
2900
2915
  stride (`int`, *optional*):
2901
2916
  The number of overlapping tokens between chunks when splitting the input text.
2902
2917
 
2903
2918
  Returns:
2904
- `List[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
2919
+ `list[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
2905
2920
 
2906
2921
  Raises:
2907
2922
  [`InferenceTimeoutError`]:
2908
2923
  If the model is unavailable or the request times out.
2909
- `HTTPError`:
2924
+ [`HfHubHTTPError`]:
2910
2925
  If the request fails with an HTTP error status code other than HTTP 503.
2911
2926
 
2912
2927
  Example:
@@ -2957,7 +2972,7 @@ class InferenceClient:
2957
2972
  tgt_lang: Optional[str] = None,
2958
2973
  clean_up_tokenization_spaces: Optional[bool] = None,
2959
2974
  truncation: Optional["TranslationTruncationStrategy"] = None,
2960
- generate_parameters: Optional[Dict[str, Any]] = None,
2975
+ generate_parameters: Optional[dict[str, Any]] = None,
2961
2976
  ) -> TranslationOutput:
2962
2977
  """
2963
2978
  Convert text from one language to another.
@@ -2982,7 +2997,7 @@ class InferenceClient:
2982
2997
  Whether to clean up the potential extra spaces in the text output.
2983
2998
  truncation (`"TranslationTruncationStrategy"`, *optional*):
2984
2999
  The truncation strategy to use.
2985
- generate_parameters (`Dict[str, Any]`, *optional*):
3000
+ generate_parameters (`dict[str, Any]`, *optional*):
2986
3001
  Additional parametrization of the text generation algorithm.
2987
3002
 
2988
3003
  Returns:
@@ -2991,7 +3006,7 @@ class InferenceClient:
2991
3006
  Raises:
2992
3007
  [`InferenceTimeoutError`]:
2993
3008
  If the model is unavailable or the request times out.
2994
- `HTTPError`:
3009
+ [`HfHubHTTPError`]:
2995
3010
  If the request fails with an HTTP error status code other than HTTP 503.
2996
3011
  `ValueError`:
2997
3012
  If only one of the `src_lang` and `tgt_lang` arguments are provided.
@@ -3044,7 +3059,7 @@ class InferenceClient:
3044
3059
  *,
3045
3060
  model: Optional[str] = None,
3046
3061
  top_k: Optional[int] = None,
3047
- ) -> List[VisualQuestionAnsweringOutputElement]:
3062
+ ) -> list[VisualQuestionAnsweringOutputElement]:
3048
3063
  """
3049
3064
  Answering open-ended questions based on an image.
3050
3065
 
@@ -3061,12 +3076,12 @@ class InferenceClient:
3061
3076
  The number of answers to return (will be chosen by order of likelihood). Note that we return less than
3062
3077
  topk answers if there are not enough options available within the context.
3063
3078
  Returns:
3064
- `List[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
3079
+ `list[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
3065
3080
 
3066
3081
  Raises:
3067
3082
  `InferenceTimeoutError`:
3068
3083
  If the model is unavailable or the request times out.
3069
- `HTTPError`:
3084
+ [`HfHubHTTPError`]:
3070
3085
  If the request fails with an HTTP error status code other than HTTP 503.
3071
3086
 
3072
3087
  Example:
@@ -3099,21 +3114,21 @@ class InferenceClient:
3099
3114
  def zero_shot_classification(
3100
3115
  self,
3101
3116
  text: str,
3102
- candidate_labels: List[str],
3117
+ candidate_labels: list[str],
3103
3118
  *,
3104
3119
  multi_label: Optional[bool] = False,
3105
3120
  hypothesis_template: Optional[str] = None,
3106
3121
  model: Optional[str] = None,
3107
- ) -> List[ZeroShotClassificationOutputElement]:
3122
+ ) -> list[ZeroShotClassificationOutputElement]:
3108
3123
  """
3109
3124
  Provide as input a text and a set of candidate labels to classify the input text.
3110
3125
 
3111
3126
  Args:
3112
3127
  text (`str`):
3113
3128
  The input text to classify.
3114
- candidate_labels (`List[str]`):
3129
+ candidate_labels (`list[str]`):
3115
3130
  The set of possible class labels to classify the text into.
3116
- labels (`List[str]`, *optional*):
3131
+ labels (`list[str]`, *optional*):
3117
3132
  (deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
3118
3133
  multi_label (`bool`, *optional*):
3119
3134
  Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
@@ -3128,12 +3143,12 @@ class InferenceClient:
3128
3143
 
3129
3144
 
3130
3145
  Returns:
3131
- `List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
3146
+ `list[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
3132
3147
 
3133
3148
  Raises:
3134
3149
  [`InferenceTimeoutError`]:
3135
3150
  If the model is unavailable or the request times out.
3136
- `HTTPError`:
3151
+ [`HfHubHTTPError`]:
3137
3152
  If the request fails with an HTTP error status code other than HTTP 503.
3138
3153
 
3139
3154
  Example with `multi_label=False`:
@@ -3205,22 +3220,22 @@ class InferenceClient:
3205
3220
  def zero_shot_image_classification(
3206
3221
  self,
3207
3222
  image: ContentT,
3208
- candidate_labels: List[str],
3223
+ candidate_labels: list[str],
3209
3224
  *,
3210
3225
  model: Optional[str] = None,
3211
3226
  hypothesis_template: Optional[str] = None,
3212
3227
  # deprecated argument
3213
- labels: List[str] = None, # type: ignore
3214
- ) -> List[ZeroShotImageClassificationOutputElement]:
3228
+ labels: list[str] = None, # type: ignore
3229
+ ) -> list[ZeroShotImageClassificationOutputElement]:
3215
3230
  """
3216
3231
  Provide input image and text labels to predict text labels for the image.
3217
3232
 
3218
3233
  Args:
3219
3234
  image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
3220
3235
  The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
3221
- candidate_labels (`List[str]`):
3236
+ candidate_labels (`list[str]`):
3222
3237
  The candidate labels for this image
3223
- labels (`List[str]`, *optional*):
3238
+ labels (`list[str]`, *optional*):
3224
3239
  (deprecated) List of string possible labels. There must be at least 2 labels.
3225
3240
  model (`str`, *optional*):
3226
3241
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
@@ -3230,12 +3245,12 @@ class InferenceClient:
3230
3245
  replacing the placeholder with the candidate labels.
3231
3246
 
3232
3247
  Returns:
3233
- `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
3248
+ `list[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
3234
3249
 
3235
3250
  Raises:
3236
3251
  [`InferenceTimeoutError`]:
3237
3252
  If the model is unavailable or the request times out.
3238
- `HTTPError`:
3253
+ [`HfHubHTTPError`]:
3239
3254
  If the request fails with an HTTP error status code other than HTTP 503.
3240
3255
 
3241
3256
  Example:
@@ -3269,7 +3284,7 @@ class InferenceClient:
3269
3284
  response = self._inner_post(request_parameters)
3270
3285
  return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
3271
3286
 
3272
- def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
3287
+ def get_endpoint_info(self, *, model: Optional[str] = None) -> dict[str, Any]:
3273
3288
  """
3274
3289
  Get information about the deployed endpoint.
3275
3290
 
@@ -3282,7 +3297,7 @@ class InferenceClient:
3282
3297
  Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
3283
3298
 
3284
3299
  Returns:
3285
- `Dict[str, Any]`: Information about the endpoint.
3300
+ `dict[str, Any]`: Information about the endpoint.
3286
3301
 
3287
3302
  Example:
3288
3303
  ```py