huggingface-hub 0.35.0rc0__py3-none-any.whl → 1.0.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (127) hide show
  1. huggingface_hub/__init__.py +46 -45
  2. huggingface_hub/_commit_api.py +28 -28
  3. huggingface_hub/_commit_scheduler.py +11 -8
  4. huggingface_hub/_inference_endpoints.py +8 -8
  5. huggingface_hub/_jobs_api.py +176 -20
  6. huggingface_hub/_local_folder.py +1 -1
  7. huggingface_hub/_login.py +13 -39
  8. huggingface_hub/_oauth.py +10 -14
  9. huggingface_hub/_snapshot_download.py +14 -28
  10. huggingface_hub/_space_api.py +4 -4
  11. huggingface_hub/_tensorboard_logger.py +13 -14
  12. huggingface_hub/_upload_large_folder.py +120 -13
  13. huggingface_hub/_webhooks_payload.py +3 -3
  14. huggingface_hub/_webhooks_server.py +2 -2
  15. huggingface_hub/cli/_cli_utils.py +2 -2
  16. huggingface_hub/cli/auth.py +8 -6
  17. huggingface_hub/cli/cache.py +18 -20
  18. huggingface_hub/cli/download.py +4 -4
  19. huggingface_hub/cli/hf.py +2 -5
  20. huggingface_hub/cli/jobs.py +599 -22
  21. huggingface_hub/cli/lfs.py +4 -4
  22. huggingface_hub/cli/repo.py +11 -7
  23. huggingface_hub/cli/repo_files.py +2 -2
  24. huggingface_hub/cli/upload.py +4 -4
  25. huggingface_hub/cli/upload_large_folder.py +3 -3
  26. huggingface_hub/commands/_cli_utils.py +2 -2
  27. huggingface_hub/commands/delete_cache.py +13 -13
  28. huggingface_hub/commands/download.py +4 -13
  29. huggingface_hub/commands/lfs.py +4 -4
  30. huggingface_hub/commands/repo_files.py +2 -2
  31. huggingface_hub/commands/scan_cache.py +1 -1
  32. huggingface_hub/commands/tag.py +1 -3
  33. huggingface_hub/commands/upload.py +4 -4
  34. huggingface_hub/commands/upload_large_folder.py +3 -3
  35. huggingface_hub/commands/user.py +4 -5
  36. huggingface_hub/community.py +5 -5
  37. huggingface_hub/constants.py +3 -41
  38. huggingface_hub/dataclasses.py +16 -19
  39. huggingface_hub/errors.py +42 -29
  40. huggingface_hub/fastai_utils.py +8 -9
  41. huggingface_hub/file_download.py +162 -259
  42. huggingface_hub/hf_api.py +841 -616
  43. huggingface_hub/hf_file_system.py +98 -62
  44. huggingface_hub/hub_mixin.py +37 -57
  45. huggingface_hub/inference/_client.py +257 -325
  46. huggingface_hub/inference/_common.py +110 -124
  47. huggingface_hub/inference/_generated/_async_client.py +307 -432
  48. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
  49. huggingface_hub/inference/_generated/types/base.py +10 -7
  50. huggingface_hub/inference/_generated/types/chat_completion.py +18 -16
  51. huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
  52. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
  53. huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
  54. huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
  55. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
  56. huggingface_hub/inference/_generated/types/summarization.py +2 -2
  57. huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
  58. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
  59. huggingface_hub/inference/_generated/types/text_generation.py +10 -10
  60. huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
  61. huggingface_hub/inference/_generated/types/token_classification.py +2 -2
  62. huggingface_hub/inference/_generated/types/translation.py +2 -2
  63. huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
  64. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
  65. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
  66. huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
  67. huggingface_hub/inference/_mcp/agent.py +3 -3
  68. huggingface_hub/inference/_mcp/cli.py +1 -1
  69. huggingface_hub/inference/_mcp/constants.py +2 -3
  70. huggingface_hub/inference/_mcp/mcp_client.py +58 -30
  71. huggingface_hub/inference/_mcp/types.py +10 -7
  72. huggingface_hub/inference/_mcp/utils.py +11 -7
  73. huggingface_hub/inference/_providers/__init__.py +4 -2
  74. huggingface_hub/inference/_providers/_common.py +49 -25
  75. huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
  76. huggingface_hub/inference/_providers/cohere.py +3 -3
  77. huggingface_hub/inference/_providers/fal_ai.py +52 -21
  78. huggingface_hub/inference/_providers/featherless_ai.py +4 -4
  79. huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
  80. huggingface_hub/inference/_providers/hf_inference.py +28 -20
  81. huggingface_hub/inference/_providers/hyperbolic.py +4 -4
  82. huggingface_hub/inference/_providers/nebius.py +10 -10
  83. huggingface_hub/inference/_providers/novita.py +5 -5
  84. huggingface_hub/inference/_providers/nscale.py +4 -4
  85. huggingface_hub/inference/_providers/replicate.py +15 -15
  86. huggingface_hub/inference/_providers/sambanova.py +6 -6
  87. huggingface_hub/inference/_providers/together.py +7 -7
  88. huggingface_hub/lfs.py +20 -31
  89. huggingface_hub/repocard.py +18 -18
  90. huggingface_hub/repocard_data.py +56 -56
  91. huggingface_hub/serialization/__init__.py +0 -1
  92. huggingface_hub/serialization/_base.py +9 -9
  93. huggingface_hub/serialization/_dduf.py +7 -7
  94. huggingface_hub/serialization/_torch.py +28 -28
  95. huggingface_hub/utils/__init__.py +10 -4
  96. huggingface_hub/utils/_auth.py +5 -5
  97. huggingface_hub/utils/_cache_manager.py +31 -31
  98. huggingface_hub/utils/_deprecation.py +1 -1
  99. huggingface_hub/utils/_dotenv.py +25 -21
  100. huggingface_hub/utils/_fixes.py +0 -10
  101. huggingface_hub/utils/_git_credential.py +4 -4
  102. huggingface_hub/utils/_headers.py +7 -29
  103. huggingface_hub/utils/_http.py +366 -208
  104. huggingface_hub/utils/_pagination.py +4 -4
  105. huggingface_hub/utils/_paths.py +5 -5
  106. huggingface_hub/utils/_runtime.py +16 -13
  107. huggingface_hub/utils/_safetensors.py +21 -21
  108. huggingface_hub/utils/_subprocess.py +9 -9
  109. huggingface_hub/utils/_telemetry.py +3 -3
  110. huggingface_hub/utils/_typing.py +25 -5
  111. huggingface_hub/utils/_validators.py +53 -72
  112. huggingface_hub/utils/_xet.py +16 -16
  113. huggingface_hub/utils/_xet_progress_reporting.py +32 -11
  114. huggingface_hub/utils/insecure_hashlib.py +3 -9
  115. huggingface_hub/utils/tqdm.py +3 -3
  116. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/METADATA +18 -29
  117. huggingface_hub-1.0.0rc0.dist-info/RECORD +161 -0
  118. huggingface_hub/inference_api.py +0 -217
  119. huggingface_hub/keras_mixin.py +0 -500
  120. huggingface_hub/repository.py +0 -1477
  121. huggingface_hub/serialization/_tensorflow.py +0 -95
  122. huggingface_hub/utils/_hf_folder.py +0 -68
  123. huggingface_hub-0.35.0rc0.dist-info/RECORD +0 -166
  124. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/LICENSE +0 -0
  125. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/WHEEL +0 -0
  126. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/entry_points.txt +0 -0
  127. {huggingface_hub-0.35.0rc0.dist-info → huggingface_hub-1.0.0rc0.dist-info}/top_level.txt +0 -0
@@ -34,18 +34,17 @@
34
34
  # - Only the main parameters are publicly exposed. Power users can always read the docs for more options.
35
35
  import base64
36
36
  import logging
37
+ import os
37
38
  import re
38
39
  import warnings
39
- from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Literal, Optional, Union, overload
40
-
41
- from requests import HTTPError
40
+ from contextlib import ExitStack
41
+ from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
42
42
 
43
43
  from huggingface_hub import constants
44
- from huggingface_hub.errors import BadRequestError, InferenceTimeoutError
44
+ from huggingface_hub.errors import BadRequestError, HfHubHTTPError, InferenceTimeoutError
45
45
  from huggingface_hub.inference._common import (
46
46
  TASKS_EXPECTING_IMAGES,
47
47
  ContentT,
48
- ModelStatus,
49
48
  RequestParameters,
50
49
  _b64_encode,
51
50
  _b64_to_image,
@@ -54,7 +53,6 @@ from huggingface_hub.inference._common import (
54
53
  _bytes_to_list,
55
54
  _get_unsupported_text_generation_kwargs,
56
55
  _import_numpy,
57
- _open_as_binary,
58
56
  _set_unsupported_text_generation_kwargs,
59
57
  _stream_chat_completion_response,
60
58
  _stream_text_generation_response,
@@ -81,6 +79,7 @@ from huggingface_hub.inference._generated.types import (
81
79
  ImageSegmentationSubtask,
82
80
  ImageToImageTargetSize,
83
81
  ImageToTextOutput,
82
+ ImageToVideoTargetSize,
84
83
  ObjectDetectionOutputElement,
85
84
  Padding,
86
85
  QuestionAnsweringOutputElement,
@@ -102,9 +101,13 @@ from huggingface_hub.inference._generated.types import (
102
101
  ZeroShotImageClassificationOutputElement,
103
102
  )
104
103
  from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
105
- from huggingface_hub.utils import build_hf_headers, get_session, hf_raise_for_status
104
+ from huggingface_hub.utils import (
105
+ build_hf_headers,
106
+ get_session,
107
+ hf_raise_for_status,
108
+ validate_hf_hub_args,
109
+ )
106
110
  from huggingface_hub.utils._auth import get_token
107
- from huggingface_hub.utils._deprecation import _deprecate_method
108
111
 
109
112
 
110
113
  if TYPE_CHECKING:
@@ -141,16 +144,14 @@ class InferenceClient:
141
144
  arguments are mutually exclusive and have the exact same behavior.
142
145
  timeout (`float`, `optional`):
143
146
  The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
144
- headers (`Dict[str, str]`, `optional`):
147
+ headers (`dict[str, str]`, `optional`):
145
148
  Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
146
149
  Values in this dictionary will override the default values.
147
150
  bill_to (`str`, `optional`):
148
151
  The billing account to use for the requests. By default the requests are billed on the user's account.
149
152
  Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
150
- cookies (`Dict[str, str]`, `optional`):
153
+ cookies (`dict[str, str]`, `optional`):
151
154
  Additional cookies to send to the server.
152
- proxies (`Any`, `optional`):
153
- Proxies to use for the request.
154
155
  base_url (`str`, `optional`):
155
156
  Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
156
157
  follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
@@ -159,6 +160,7 @@ class InferenceClient:
159
160
  follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
160
161
  """
161
162
 
163
+ @validate_hf_hub_args
162
164
  def __init__(
163
165
  self,
164
166
  model: Optional[str] = None,
@@ -166,9 +168,8 @@ class InferenceClient:
166
168
  provider: Optional[PROVIDER_OR_POLICY_T] = None,
167
169
  token: Optional[str] = None,
168
170
  timeout: Optional[float] = None,
169
- headers: Optional[Dict[str, str]] = None,
170
- cookies: Optional[Dict[str, str]] = None,
171
- proxies: Optional[Any] = None,
171
+ headers: Optional[dict[str, str]] = None,
172
+ cookies: Optional[dict[str, str]] = None,
172
173
  bill_to: Optional[str] = None,
173
174
  # OpenAI compatibility
174
175
  base_url: Optional[str] = None,
@@ -230,11 +231,21 @@ class InferenceClient:
230
231
 
231
232
  self.cookies = cookies
232
233
  self.timeout = timeout
233
- self.proxies = proxies
234
+
235
+ self.exit_stack = ExitStack()
234
236
 
235
237
  def __repr__(self):
236
238
  return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
237
239
 
240
+ def __enter__(self):
241
+ return self
242
+
243
+ def __exit__(self, exc_type, exc_value, traceback):
244
+ self.exit_stack.close()
245
+
246
+ def close(self):
247
+ self.exit_stack.close()
248
+
238
249
  @overload
239
250
  def _inner_post( # type: ignore[misc]
240
251
  self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
@@ -243,45 +254,46 @@ class InferenceClient:
243
254
  @overload
244
255
  def _inner_post( # type: ignore[misc]
245
256
  self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
246
- ) -> Iterable[bytes]: ...
257
+ ) -> Iterable[str]: ...
247
258
 
248
259
  @overload
249
260
  def _inner_post(
250
261
  self, request_parameters: RequestParameters, *, stream: bool = False
251
- ) -> Union[bytes, Iterable[bytes]]: ...
262
+ ) -> Union[bytes, Iterable[str]]: ...
252
263
 
253
264
  def _inner_post(
254
265
  self, request_parameters: RequestParameters, *, stream: bool = False
255
- ) -> Union[bytes, Iterable[bytes]]:
266
+ ) -> Union[bytes, Iterable[str]]:
256
267
  """Make a request to the inference server."""
257
268
  # TODO: this should be handled in provider helpers directly
258
269
  if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
259
270
  request_parameters.headers["Accept"] = "image/png"
260
271
 
261
- with _open_as_binary(request_parameters.data) as data_as_binary:
262
- try:
263
- response = get_session().post(
272
+ try:
273
+ response = self.exit_stack.enter_context(
274
+ get_session().stream(
275
+ "POST",
264
276
  request_parameters.url,
265
277
  json=request_parameters.json,
266
- data=data_as_binary,
278
+ content=request_parameters.data,
267
279
  headers=request_parameters.headers,
268
280
  cookies=self.cookies,
269
281
  timeout=self.timeout,
270
- stream=stream,
271
- proxies=self.proxies,
272
282
  )
273
- except TimeoutError as error:
274
- # Convert any `TimeoutError` to a `InferenceTimeoutError`
275
- raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
276
-
277
- try:
283
+ )
278
284
  hf_raise_for_status(response)
279
- return response.iter_lines() if stream else response.content
280
- except HTTPError as error:
285
+ if stream:
286
+ return response.iter_lines()
287
+ else:
288
+ return response.read()
289
+ except TimeoutError as error:
290
+ # Convert any `TimeoutError` to a `InferenceTimeoutError`
291
+ raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
292
+ except HfHubHTTPError as error:
281
293
  if error.response.status_code == 422 and request_parameters.task != "unknown":
282
294
  msg = str(error.args[0])
283
295
  if len(error.response.text) > 0:
284
- msg += f"\n{error.response.text}\n"
296
+ msg += f"{os.linesep}{error.response.text}{os.linesep}"
285
297
  error.args = (msg,) + error.args[1:]
286
298
  raise
287
299
 
@@ -292,7 +304,7 @@ class InferenceClient:
292
304
  model: Optional[str] = None,
293
305
  top_k: Optional[int] = None,
294
306
  function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
295
- ) -> List[AudioClassificationOutputElement]:
307
+ ) -> list[AudioClassificationOutputElement]:
296
308
  """
297
309
  Perform audio classification on the provided audio content.
298
310
 
@@ -310,12 +322,12 @@ class InferenceClient:
310
322
  The function to apply to the model outputs in order to retrieve the scores.
311
323
 
312
324
  Returns:
313
- `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
325
+ `list[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
314
326
 
315
327
  Raises:
316
328
  [`InferenceTimeoutError`]:
317
329
  If the model is unavailable or the request times out.
318
- `HTTPError`:
330
+ [`HfHubHTTPError`]:
319
331
  If the request fails with an HTTP error status code other than HTTP 503.
320
332
 
321
333
  Example:
@@ -347,7 +359,7 @@ class InferenceClient:
347
359
  audio: ContentT,
348
360
  *,
349
361
  model: Optional[str] = None,
350
- ) -> List[AudioToAudioOutputElement]:
362
+ ) -> list[AudioToAudioOutputElement]:
351
363
  """
352
364
  Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
353
365
 
@@ -361,12 +373,12 @@ class InferenceClient:
361
373
  audio_to_audio will be used.
362
374
 
363
375
  Returns:
364
- `List[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
376
+ `list[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
365
377
 
366
378
  Raises:
367
379
  `InferenceTimeoutError`:
368
380
  If the model is unavailable or the request times out.
369
- `HTTPError`:
381
+ [`HfHubHTTPError`]:
370
382
  If the request fails with an HTTP error status code other than HTTP 503.
371
383
 
372
384
  Example:
@@ -399,7 +411,7 @@ class InferenceClient:
399
411
  audio: ContentT,
400
412
  *,
401
413
  model: Optional[str] = None,
402
- extra_body: Optional[Dict] = None,
414
+ extra_body: Optional[dict] = None,
403
415
  ) -> AutomaticSpeechRecognitionOutput:
404
416
  """
405
417
  Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
@@ -410,7 +422,7 @@ class InferenceClient:
410
422
  model (`str`, *optional*):
411
423
  The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
412
424
  Inference Endpoint. If not provided, the default recommended model for ASR will be used.
413
- extra_body (`Dict`, *optional*):
425
+ extra_body (`dict`, *optional*):
414
426
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
415
427
  for supported parameters.
416
428
  Returns:
@@ -419,7 +431,7 @@ class InferenceClient:
419
431
  Raises:
420
432
  [`InferenceTimeoutError`]:
421
433
  If the model is unavailable or the request times out.
422
- `HTTPError`:
434
+ [`HfHubHTTPError`]:
423
435
  If the request fails with an HTTP error status code other than HTTP 503.
424
436
 
425
437
  Example:
@@ -445,105 +457,105 @@ class InferenceClient:
445
457
  @overload
446
458
  def chat_completion( # type: ignore
447
459
  self,
448
- messages: List[Union[Dict, ChatCompletionInputMessage]],
460
+ messages: list[Union[dict, ChatCompletionInputMessage]],
449
461
  *,
450
462
  model: Optional[str] = None,
451
463
  stream: Literal[False] = False,
452
464
  frequency_penalty: Optional[float] = None,
453
- logit_bias: Optional[List[float]] = None,
465
+ logit_bias: Optional[list[float]] = None,
454
466
  logprobs: Optional[bool] = None,
455
467
  max_tokens: Optional[int] = None,
456
468
  n: Optional[int] = None,
457
469
  presence_penalty: Optional[float] = None,
458
470
  response_format: Optional[ChatCompletionInputGrammarType] = None,
459
471
  seed: Optional[int] = None,
460
- stop: Optional[List[str]] = None,
472
+ stop: Optional[list[str]] = None,
461
473
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
462
474
  temperature: Optional[float] = None,
463
475
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
464
476
  tool_prompt: Optional[str] = None,
465
- tools: Optional[List[ChatCompletionInputTool]] = None,
477
+ tools: Optional[list[ChatCompletionInputTool]] = None,
466
478
  top_logprobs: Optional[int] = None,
467
479
  top_p: Optional[float] = None,
468
- extra_body: Optional[Dict] = None,
480
+ extra_body: Optional[dict] = None,
469
481
  ) -> ChatCompletionOutput: ...
470
482
 
471
483
  @overload
472
484
  def chat_completion( # type: ignore
473
485
  self,
474
- messages: List[Union[Dict, ChatCompletionInputMessage]],
486
+ messages: list[Union[dict, ChatCompletionInputMessage]],
475
487
  *,
476
488
  model: Optional[str] = None,
477
489
  stream: Literal[True] = True,
478
490
  frequency_penalty: Optional[float] = None,
479
- logit_bias: Optional[List[float]] = None,
491
+ logit_bias: Optional[list[float]] = None,
480
492
  logprobs: Optional[bool] = None,
481
493
  max_tokens: Optional[int] = None,
482
494
  n: Optional[int] = None,
483
495
  presence_penalty: Optional[float] = None,
484
496
  response_format: Optional[ChatCompletionInputGrammarType] = None,
485
497
  seed: Optional[int] = None,
486
- stop: Optional[List[str]] = None,
498
+ stop: Optional[list[str]] = None,
487
499
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
488
500
  temperature: Optional[float] = None,
489
501
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
490
502
  tool_prompt: Optional[str] = None,
491
- tools: Optional[List[ChatCompletionInputTool]] = None,
503
+ tools: Optional[list[ChatCompletionInputTool]] = None,
492
504
  top_logprobs: Optional[int] = None,
493
505
  top_p: Optional[float] = None,
494
- extra_body: Optional[Dict] = None,
506
+ extra_body: Optional[dict] = None,
495
507
  ) -> Iterable[ChatCompletionStreamOutput]: ...
496
508
 
497
509
  @overload
498
510
  def chat_completion(
499
511
  self,
500
- messages: List[Union[Dict, ChatCompletionInputMessage]],
512
+ messages: list[Union[dict, ChatCompletionInputMessage]],
501
513
  *,
502
514
  model: Optional[str] = None,
503
515
  stream: bool = False,
504
516
  frequency_penalty: Optional[float] = None,
505
- logit_bias: Optional[List[float]] = None,
517
+ logit_bias: Optional[list[float]] = None,
506
518
  logprobs: Optional[bool] = None,
507
519
  max_tokens: Optional[int] = None,
508
520
  n: Optional[int] = None,
509
521
  presence_penalty: Optional[float] = None,
510
522
  response_format: Optional[ChatCompletionInputGrammarType] = None,
511
523
  seed: Optional[int] = None,
512
- stop: Optional[List[str]] = None,
524
+ stop: Optional[list[str]] = None,
513
525
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
514
526
  temperature: Optional[float] = None,
515
527
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
516
528
  tool_prompt: Optional[str] = None,
517
- tools: Optional[List[ChatCompletionInputTool]] = None,
529
+ tools: Optional[list[ChatCompletionInputTool]] = None,
518
530
  top_logprobs: Optional[int] = None,
519
531
  top_p: Optional[float] = None,
520
- extra_body: Optional[Dict] = None,
532
+ extra_body: Optional[dict] = None,
521
533
  ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: ...
522
534
 
523
535
  def chat_completion(
524
536
  self,
525
- messages: List[Union[Dict, ChatCompletionInputMessage]],
537
+ messages: list[Union[dict, ChatCompletionInputMessage]],
526
538
  *,
527
539
  model: Optional[str] = None,
528
540
  stream: bool = False,
529
541
  # Parameters from ChatCompletionInput (handled manually)
530
542
  frequency_penalty: Optional[float] = None,
531
- logit_bias: Optional[List[float]] = None,
543
+ logit_bias: Optional[list[float]] = None,
532
544
  logprobs: Optional[bool] = None,
533
545
  max_tokens: Optional[int] = None,
534
546
  n: Optional[int] = None,
535
547
  presence_penalty: Optional[float] = None,
536
548
  response_format: Optional[ChatCompletionInputGrammarType] = None,
537
549
  seed: Optional[int] = None,
538
- stop: Optional[List[str]] = None,
550
+ stop: Optional[list[str]] = None,
539
551
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
540
552
  temperature: Optional[float] = None,
541
553
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
542
554
  tool_prompt: Optional[str] = None,
543
- tools: Optional[List[ChatCompletionInputTool]] = None,
555
+ tools: Optional[list[ChatCompletionInputTool]] = None,
544
556
  top_logprobs: Optional[int] = None,
545
557
  top_p: Optional[float] = None,
546
- extra_body: Optional[Dict] = None,
558
+ extra_body: Optional[dict] = None,
547
559
  ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]:
548
560
  """
549
561
  A method for completing conversations using a specified language model.
@@ -573,7 +585,7 @@ class InferenceClient:
573
585
  frequency_penalty (`float`, *optional*):
574
586
  Penalizes new tokens based on their existing frequency
575
587
  in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
576
- logit_bias (`List[float]`, *optional*):
588
+ logit_bias (`list[float]`, *optional*):
577
589
  Adjusts the likelihood of specific tokens appearing in the generated output.
578
590
  logprobs (`bool`, *optional*):
579
591
  Whether to return log probabilities of the output tokens or not. If true, returns the log
@@ -589,7 +601,7 @@ class InferenceClient:
589
601
  Grammar constraints. Can be either a JSONSchema or a regex.
590
602
  seed (Optional[`int`], *optional*):
591
603
  Seed for reproducible control flow. Defaults to None.
592
- stop (`List[str]`, *optional*):
604
+ stop (`list[str]`, *optional*):
593
605
  Up to four strings which trigger the end of the response.
594
606
  Defaults to None.
595
607
  stream (`bool`, *optional*):
@@ -613,7 +625,7 @@ class InferenceClient:
613
625
  tools (List of [`ChatCompletionInputTool`], *optional*):
614
626
  A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
615
627
  provide a list of functions the model may generate JSON inputs for.
616
- extra_body (`Dict`, *optional*):
628
+ extra_body (`dict`, *optional*):
617
629
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
618
630
  for supported parameters.
619
631
  Returns:
@@ -625,7 +637,7 @@ class InferenceClient:
625
637
  Raises:
626
638
  [`InferenceTimeoutError`]:
627
639
  If the model is unavailable or the request times out.
628
- `HTTPError`:
640
+ [`HfHubHTTPError`]:
629
641
  If the request fails with an HTTP error status code other than HTTP 503.
630
642
 
631
643
  Example:
@@ -939,8 +951,8 @@ class InferenceClient:
939
951
  max_question_len: Optional[int] = None,
940
952
  max_seq_len: Optional[int] = None,
941
953
  top_k: Optional[int] = None,
942
- word_boxes: Optional[List[Union[List[float], str]]] = None,
943
- ) -> List[DocumentQuestionAnsweringOutputElement]:
954
+ word_boxes: Optional[list[Union[list[float], str]]] = None,
955
+ ) -> list[DocumentQuestionAnsweringOutputElement]:
944
956
  """
945
957
  Answer questions on document images.
946
958
 
@@ -970,16 +982,16 @@ class InferenceClient:
970
982
  top_k (`int`, *optional*):
971
983
  The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
972
984
  answers if there are not enough options available within the context.
973
- word_boxes (`List[Union[List[float], str`, *optional*):
985
+ word_boxes (`list[Union[list[float], str`, *optional*):
974
986
  A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
975
987
  step and use the provided bounding boxes instead.
976
988
  Returns:
977
- `List[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
989
+ `list[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
978
990
 
979
991
  Raises:
980
992
  [`InferenceTimeoutError`]:
981
993
  If the model is unavailable or the request times out.
982
- `HTTPError`:
994
+ [`HfHubHTTPError`]:
983
995
  If the request fails with an HTTP error status code other than HTTP 503.
984
996
 
985
997
 
@@ -993,7 +1005,7 @@ class InferenceClient:
993
1005
  """
994
1006
  model_id = model or self.model
995
1007
  provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
996
- inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
1008
+ inputs: dict[str, Any] = {"question": question, "image": _b64_encode(image)}
997
1009
  request_parameters = provider_helper.prepare_request(
998
1010
  inputs=inputs,
999
1011
  parameters={
@@ -1054,7 +1066,7 @@ class InferenceClient:
1054
1066
  Raises:
1055
1067
  [`InferenceTimeoutError`]:
1056
1068
  If the model is unavailable or the request times out.
1057
- `HTTPError`:
1069
+ [`HfHubHTTPError`]:
1058
1070
  If the request fails with an HTTP error status code other than HTTP 503.
1059
1071
 
1060
1072
  Example:
@@ -1091,9 +1103,9 @@ class InferenceClient:
1091
1103
  text: str,
1092
1104
  *,
1093
1105
  model: Optional[str] = None,
1094
- targets: Optional[List[str]] = None,
1106
+ targets: Optional[list[str]] = None,
1095
1107
  top_k: Optional[int] = None,
1096
- ) -> List[FillMaskOutputElement]:
1108
+ ) -> list[FillMaskOutputElement]:
1097
1109
  """
1098
1110
  Fill in a hole with a missing word (token to be precise).
1099
1111
 
@@ -1103,20 +1115,20 @@ class InferenceClient:
1103
1115
  model (`str`, *optional*):
1104
1116
  The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
1105
1117
  a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
1106
- targets (`List[str`, *optional*):
1118
+ targets (`list[str`, *optional*):
1107
1119
  When passed, the model will limit the scores to the passed targets instead of looking up in the whole
1108
1120
  vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
1109
1121
  resulting token will be used (with a warning, and that might be slower).
1110
1122
  top_k (`int`, *optional*):
1111
1123
  When passed, overrides the number of predictions to return.
1112
1124
  Returns:
1113
- `List[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
1125
+ `list[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
1114
1126
  probability, token reference, and completed text.
1115
1127
 
1116
1128
  Raises:
1117
1129
  [`InferenceTimeoutError`]:
1118
1130
  If the model is unavailable or the request times out.
1119
- `HTTPError`:
1131
+ [`HfHubHTTPError`]:
1120
1132
  If the request fails with an HTTP error status code other than HTTP 503.
1121
1133
 
1122
1134
  Example:
@@ -1149,7 +1161,7 @@ class InferenceClient:
1149
1161
  model: Optional[str] = None,
1150
1162
  function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
1151
1163
  top_k: Optional[int] = None,
1152
- ) -> List[ImageClassificationOutputElement]:
1164
+ ) -> list[ImageClassificationOutputElement]:
1153
1165
  """
1154
1166
  Perform image classification on the given image using the specified model.
1155
1167
 
@@ -1164,12 +1176,12 @@ class InferenceClient:
1164
1176
  top_k (`int`, *optional*):
1165
1177
  When specified, limits the output to the top K most probable classes.
1166
1178
  Returns:
1167
- `List[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
1179
+ `list[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
1168
1180
 
1169
1181
  Raises:
1170
1182
  [`InferenceTimeoutError`]:
1171
1183
  If the model is unavailable or the request times out.
1172
- `HTTPError`:
1184
+ [`HfHubHTTPError`]:
1173
1185
  If the request fails with an HTTP error status code other than HTTP 503.
1174
1186
 
1175
1187
  Example:
@@ -1201,7 +1213,7 @@ class InferenceClient:
1201
1213
  overlap_mask_area_threshold: Optional[float] = None,
1202
1214
  subtask: Optional["ImageSegmentationSubtask"] = None,
1203
1215
  threshold: Optional[float] = None,
1204
- ) -> List[ImageSegmentationOutputElement]:
1216
+ ) -> list[ImageSegmentationOutputElement]:
1205
1217
  """
1206
1218
  Perform image segmentation on the given image using the specified model.
1207
1219
 
@@ -1226,12 +1238,12 @@ class InferenceClient:
1226
1238
  threshold (`float`, *optional*):
1227
1239
  Probability threshold to filter out predicted masks.
1228
1240
  Returns:
1229
- `List[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
1241
+ `list[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
1230
1242
 
1231
1243
  Raises:
1232
1244
  [`InferenceTimeoutError`]:
1233
1245
  If the model is unavailable or the request times out.
1234
- `HTTPError`:
1246
+ [`HfHubHTTPError`]:
1235
1247
  If the request fails with an HTTP error status code other than HTTP 503.
1236
1248
 
1237
1249
  Example:
@@ -1308,7 +1320,7 @@ class InferenceClient:
1308
1320
  Raises:
1309
1321
  [`InferenceTimeoutError`]:
1310
1322
  If the model is unavailable or the request times out.
1311
- `HTTPError`:
1323
+ [`HfHubHTTPError`]:
1312
1324
  If the request fails with an HTTP error status code other than HTTP 503.
1313
1325
 
1314
1326
  Example:
@@ -1339,6 +1351,85 @@ class InferenceClient:
1339
1351
  response = provider_helper.get_response(response, request_parameters)
1340
1352
  return _bytes_to_image(response)
1341
1353
 
1354
+ def image_to_video(
1355
+ self,
1356
+ image: ContentT,
1357
+ *,
1358
+ model: Optional[str] = None,
1359
+ prompt: Optional[str] = None,
1360
+ negative_prompt: Optional[str] = None,
1361
+ num_frames: Optional[float] = None,
1362
+ num_inference_steps: Optional[int] = None,
1363
+ guidance_scale: Optional[float] = None,
1364
+ seed: Optional[int] = None,
1365
+ target_size: Optional[ImageToVideoTargetSize] = None,
1366
+ **kwargs,
1367
+ ) -> bytes:
1368
+ """
1369
+ Generate a video from an input image.
1370
+
1371
+ Args:
1372
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
1373
+ The input image to generate a video from. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
1374
+ model (`str`, *optional*):
1375
+ The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
1376
+ Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
1377
+ prompt (`str`, *optional*):
1378
+ The text prompt to guide the video generation.
1379
+ negative_prompt (`str`, *optional*):
1380
+ One prompt to guide what NOT to include in video generation.
1381
+ num_frames (`float`, *optional*):
1382
+ The num_frames parameter determines how many video frames are generated.
1383
+ num_inference_steps (`int`, *optional*):
1384
+ For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher
1385
+ quality image at the expense of slower inference.
1386
+ guidance_scale (`float`, *optional*):
1387
+ For diffusion models. A higher guidance scale value encourages the model to generate videos closely
1388
+ linked to the text prompt at the expense of lower image quality.
1389
+ seed (`int`, *optional*):
1390
+ The seed to use for the video generation.
1391
+ target_size (`ImageToVideoTargetSize`, *optional*):
1392
+ The size in pixel of the output video frames.
1393
+ num_inference_steps (`int`, *optional*):
1394
+ The number of denoising steps. More denoising steps usually lead to a higher quality video at the
1395
+ expense of slower inference.
1396
+ seed (`int`, *optional*):
1397
+ Seed for the random number generator.
1398
+
1399
+ Returns:
1400
+ `bytes`: The generated video.
1401
+
1402
+ Examples:
1403
+ ```py
1404
+ >>> from huggingface_hub import InferenceClient
1405
+ >>> client = InferenceClient()
1406
+ >>> video = client.image_to_video("cat.jpg", model="Wan-AI/Wan2.2-I2V-A14B", prompt="turn the cat into a tiger")
1407
+ >>> with open("tiger.mp4", "wb") as f:
1408
+ ... f.write(video)
1409
+ ```
1410
+ """
1411
+ model_id = model or self.model
1412
+ provider_helper = get_provider_helper(self.provider, task="image-to-video", model=model_id)
1413
+ request_parameters = provider_helper.prepare_request(
1414
+ inputs=image,
1415
+ parameters={
1416
+ "prompt": prompt,
1417
+ "negative_prompt": negative_prompt,
1418
+ "num_frames": num_frames,
1419
+ "num_inference_steps": num_inference_steps,
1420
+ "guidance_scale": guidance_scale,
1421
+ "seed": seed,
1422
+ "target_size": target_size,
1423
+ **kwargs,
1424
+ },
1425
+ headers=self.headers,
1426
+ model=model_id,
1427
+ api_key=self.token,
1428
+ )
1429
+ response = self._inner_post(request_parameters)
1430
+ response = provider_helper.get_response(response, request_parameters)
1431
+ return response
1432
+
1342
1433
  def image_to_text(self, image: ContentT, *, model: Optional[str] = None) -> ImageToTextOutput:
1343
1434
  """
1344
1435
  Takes an input image and return text.
@@ -1359,7 +1450,7 @@ class InferenceClient:
1359
1450
  Raises:
1360
1451
  [`InferenceTimeoutError`]:
1361
1452
  If the model is unavailable or the request times out.
1362
- `HTTPError`:
1453
+ [`HfHubHTTPError`]:
1363
1454
  If the request fails with an HTTP error status code other than HTTP 503.
1364
1455
 
1365
1456
  Example:
@@ -1382,12 +1473,12 @@ class InferenceClient:
1382
1473
  api_key=self.token,
1383
1474
  )
1384
1475
  response = self._inner_post(request_parameters)
1385
- output = ImageToTextOutput.parse_obj(response)
1386
- return output[0] if isinstance(output, list) else output
1476
+ output_list: list[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
1477
+ return output_list[0]
1387
1478
 
1388
1479
  def object_detection(
1389
1480
  self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
1390
- ) -> List[ObjectDetectionOutputElement]:
1481
+ ) -> list[ObjectDetectionOutputElement]:
1391
1482
  """
1392
1483
  Perform object detection on the given image using the specified model.
1393
1484
 
@@ -1406,12 +1497,12 @@ class InferenceClient:
1406
1497
  threshold (`float`, *optional*):
1407
1498
  The probability necessary to make a prediction.
1408
1499
  Returns:
1409
- `List[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
1500
+ `list[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
1410
1501
 
1411
1502
  Raises:
1412
1503
  [`InferenceTimeoutError`]:
1413
1504
  If the model is unavailable or the request times out.
1414
- `HTTPError`:
1505
+ [`HfHubHTTPError`]:
1415
1506
  If the request fails with an HTTP error status code other than HTTP 503.
1416
1507
  `ValueError`:
1417
1508
  If the request output is not a List.
@@ -1449,7 +1540,7 @@ class InferenceClient:
1449
1540
  max_question_len: Optional[int] = None,
1450
1541
  max_seq_len: Optional[int] = None,
1451
1542
  top_k: Optional[int] = None,
1452
- ) -> Union[QuestionAnsweringOutputElement, List[QuestionAnsweringOutputElement]]:
1543
+ ) -> Union[QuestionAnsweringOutputElement, list[QuestionAnsweringOutputElement]]:
1453
1544
  """
1454
1545
  Retrieve the answer to a question from a given text.
1455
1546
 
@@ -1481,13 +1572,13 @@ class InferenceClient:
1481
1572
  topk answers if there are not enough options available within the context.
1482
1573
 
1483
1574
  Returns:
1484
- Union[`QuestionAnsweringOutputElement`, List[`QuestionAnsweringOutputElement`]]:
1575
+ Union[`QuestionAnsweringOutputElement`, list[`QuestionAnsweringOutputElement`]]:
1485
1576
  When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
1486
1577
  When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
1487
1578
  Raises:
1488
1579
  [`InferenceTimeoutError`]:
1489
1580
  If the model is unavailable or the request times out.
1490
- `HTTPError`:
1581
+ [`HfHubHTTPError`]:
1491
1582
  If the request fails with an HTTP error status code other than HTTP 503.
1492
1583
 
1493
1584
  Example:
@@ -1521,15 +1612,15 @@ class InferenceClient:
1521
1612
  return output
1522
1613
 
1523
1614
  def sentence_similarity(
1524
- self, sentence: str, other_sentences: List[str], *, model: Optional[str] = None
1525
- ) -> List[float]:
1615
+ self, sentence: str, other_sentences: list[str], *, model: Optional[str] = None
1616
+ ) -> list[float]:
1526
1617
  """
1527
1618
  Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
1528
1619
 
1529
1620
  Args:
1530
1621
  sentence (`str`):
1531
1622
  The main sentence to compare to others.
1532
- other_sentences (`List[str]`):
1623
+ other_sentences (`list[str]`):
1533
1624
  The list of sentences to compare to.
1534
1625
  model (`str`, *optional*):
1535
1626
  The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1537,12 +1628,12 @@ class InferenceClient:
1537
1628
  Defaults to None.
1538
1629
 
1539
1630
  Returns:
1540
- `List[float]`: The embedding representing the input text.
1631
+ `list[float]`: The embedding representing the input text.
1541
1632
 
1542
1633
  Raises:
1543
1634
  [`InferenceTimeoutError`]:
1544
1635
  If the model is unavailable or the request times out.
1545
- `HTTPError`:
1636
+ [`HfHubHTTPError`]:
1546
1637
  If the request fails with an HTTP error status code other than HTTP 503.
1547
1638
 
1548
1639
  Example:
@@ -1579,7 +1670,7 @@ class InferenceClient:
1579
1670
  *,
1580
1671
  model: Optional[str] = None,
1581
1672
  clean_up_tokenization_spaces: Optional[bool] = None,
1582
- generate_parameters: Optional[Dict[str, Any]] = None,
1673
+ generate_parameters: Optional[dict[str, Any]] = None,
1583
1674
  truncation: Optional["SummarizationTruncationStrategy"] = None,
1584
1675
  ) -> SummarizationOutput:
1585
1676
  """
@@ -1593,7 +1684,7 @@ class InferenceClient:
1593
1684
  Inference Endpoint. If not provided, the default recommended model for summarization will be used.
1594
1685
  clean_up_tokenization_spaces (`bool`, *optional*):
1595
1686
  Whether to clean up the potential extra spaces in the text output.
1596
- generate_parameters (`Dict[str, Any]`, *optional*):
1687
+ generate_parameters (`dict[str, Any]`, *optional*):
1597
1688
  Additional parametrization of the text generation algorithm.
1598
1689
  truncation (`"SummarizationTruncationStrategy"`, *optional*):
1599
1690
  The truncation strategy to use.
@@ -1603,7 +1694,7 @@ class InferenceClient:
1603
1694
  Raises:
1604
1695
  [`InferenceTimeoutError`]:
1605
1696
  If the model is unavailable or the request times out.
1606
- `HTTPError`:
1697
+ [`HfHubHTTPError`]:
1607
1698
  If the request fails with an HTTP error status code other than HTTP 503.
1608
1699
 
1609
1700
  Example:
@@ -1633,7 +1724,7 @@ class InferenceClient:
1633
1724
 
1634
1725
  def table_question_answering(
1635
1726
  self,
1636
- table: Dict[str, Any],
1727
+ table: dict[str, Any],
1637
1728
  query: str,
1638
1729
  *,
1639
1730
  model: Optional[str] = None,
@@ -1668,7 +1759,7 @@ class InferenceClient:
1668
1759
  Raises:
1669
1760
  [`InferenceTimeoutError`]:
1670
1761
  If the model is unavailable or the request times out.
1671
- `HTTPError`:
1762
+ [`HfHubHTTPError`]:
1672
1763
  If the request fails with an HTTP error status code other than HTTP 503.
1673
1764
 
1674
1765
  Example:
@@ -1693,12 +1784,12 @@ class InferenceClient:
1693
1784
  response = self._inner_post(request_parameters)
1694
1785
  return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
1695
1786
 
1696
- def tabular_classification(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[str]:
1787
+ def tabular_classification(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[str]:
1697
1788
  """
1698
1789
  Classifying a target category (a group) based on a set of attributes.
1699
1790
 
1700
1791
  Args:
1701
- table (`Dict[str, Any]`):
1792
+ table (`dict[str, Any]`):
1702
1793
  Set of attributes to classify.
1703
1794
  model (`str`, *optional*):
1704
1795
  The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1711,7 +1802,7 @@ class InferenceClient:
1711
1802
  Raises:
1712
1803
  [`InferenceTimeoutError`]:
1713
1804
  If the model is unavailable or the request times out.
1714
- `HTTPError`:
1805
+ [`HfHubHTTPError`]:
1715
1806
  If the request fails with an HTTP error status code other than HTTP 503.
1716
1807
 
1717
1808
  Example:
@@ -1748,12 +1839,12 @@ class InferenceClient:
1748
1839
  response = self._inner_post(request_parameters)
1749
1840
  return _bytes_to_list(response)
1750
1841
 
1751
- def tabular_regression(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[float]:
1842
+ def tabular_regression(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[float]:
1752
1843
  """
1753
1844
  Predicting a numerical target value given a set of attributes/features in a table.
1754
1845
 
1755
1846
  Args:
1756
- table (`Dict[str, Any]`):
1847
+ table (`dict[str, Any]`):
1757
1848
  Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
1758
1849
  model (`str`, *optional*):
1759
1850
  The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1766,7 +1857,7 @@ class InferenceClient:
1766
1857
  Raises:
1767
1858
  [`InferenceTimeoutError`]:
1768
1859
  If the model is unavailable or the request times out.
1769
- `HTTPError`:
1860
+ [`HfHubHTTPError`]:
1770
1861
  If the request fails with an HTTP error status code other than HTTP 503.
1771
1862
 
1772
1863
  Example:
@@ -1805,7 +1896,7 @@ class InferenceClient:
1805
1896
  model: Optional[str] = None,
1806
1897
  top_k: Optional[int] = None,
1807
1898
  function_to_apply: Optional["TextClassificationOutputTransform"] = None,
1808
- ) -> List[TextClassificationOutputElement]:
1899
+ ) -> list[TextClassificationOutputElement]:
1809
1900
  """
1810
1901
  Perform text classification (e.g. sentiment-analysis) on the given text.
1811
1902
 
@@ -1822,12 +1913,12 @@ class InferenceClient:
1822
1913
  The function to apply to the model outputs in order to retrieve the scores.
1823
1914
 
1824
1915
  Returns:
1825
- `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
1916
+ `list[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
1826
1917
 
1827
1918
  Raises:
1828
1919
  [`InferenceTimeoutError`]:
1829
1920
  If the model is unavailable or the request times out.
1830
- `HTTPError`:
1921
+ [`HfHubHTTPError`]:
1831
1922
  If the request fails with an HTTP error status code other than HTTP 503.
1832
1923
 
1833
1924
  Example:
@@ -1875,8 +1966,8 @@ class InferenceClient:
1875
1966
  repetition_penalty: Optional[float] = None,
1876
1967
  return_full_text: Optional[bool] = None,
1877
1968
  seed: Optional[int] = None,
1878
- stop: Optional[List[str]] = None,
1879
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
1969
+ stop: Optional[list[str]] = None,
1970
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
1880
1971
  temperature: Optional[float] = None,
1881
1972
  top_k: Optional[int] = None,
1882
1973
  top_n_tokens: Optional[int] = None,
@@ -1905,8 +1996,8 @@ class InferenceClient:
1905
1996
  repetition_penalty: Optional[float] = None,
1906
1997
  return_full_text: Optional[bool] = None,
1907
1998
  seed: Optional[int] = None,
1908
- stop: Optional[List[str]] = None,
1909
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
1999
+ stop: Optional[list[str]] = None,
2000
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
1910
2001
  temperature: Optional[float] = None,
1911
2002
  top_k: Optional[int] = None,
1912
2003
  top_n_tokens: Optional[int] = None,
@@ -1935,8 +2026,8 @@ class InferenceClient:
1935
2026
  repetition_penalty: Optional[float] = None,
1936
2027
  return_full_text: Optional[bool] = None, # Manual default value
1937
2028
  seed: Optional[int] = None,
1938
- stop: Optional[List[str]] = None,
1939
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2029
+ stop: Optional[list[str]] = None,
2030
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
1940
2031
  temperature: Optional[float] = None,
1941
2032
  top_k: Optional[int] = None,
1942
2033
  top_n_tokens: Optional[int] = None,
@@ -1965,8 +2056,8 @@ class InferenceClient:
1965
2056
  repetition_penalty: Optional[float] = None,
1966
2057
  return_full_text: Optional[bool] = None,
1967
2058
  seed: Optional[int] = None,
1968
- stop: Optional[List[str]] = None,
1969
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2059
+ stop: Optional[list[str]] = None,
2060
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
1970
2061
  temperature: Optional[float] = None,
1971
2062
  top_k: Optional[int] = None,
1972
2063
  top_n_tokens: Optional[int] = None,
@@ -1995,8 +2086,8 @@ class InferenceClient:
1995
2086
  repetition_penalty: Optional[float] = None,
1996
2087
  return_full_text: Optional[bool] = None,
1997
2088
  seed: Optional[int] = None,
1998
- stop: Optional[List[str]] = None,
1999
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2089
+ stop: Optional[list[str]] = None,
2090
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2000
2091
  temperature: Optional[float] = None,
2001
2092
  top_k: Optional[int] = None,
2002
2093
  top_n_tokens: Optional[int] = None,
@@ -2024,8 +2115,8 @@ class InferenceClient:
2024
2115
  repetition_penalty: Optional[float] = None,
2025
2116
  return_full_text: Optional[bool] = None,
2026
2117
  seed: Optional[int] = None,
2027
- stop: Optional[List[str]] = None,
2028
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2118
+ stop: Optional[list[str]] = None,
2119
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2029
2120
  temperature: Optional[float] = None,
2030
2121
  top_k: Optional[int] = None,
2031
2122
  top_n_tokens: Optional[int] = None,
@@ -2081,9 +2172,9 @@ class InferenceClient:
2081
2172
  Whether to prepend the prompt to the generated text
2082
2173
  seed (`int`, *optional*):
2083
2174
  Random sampling seed
2084
- stop (`List[str]`, *optional*):
2175
+ stop (`list[str]`, *optional*):
2085
2176
  Stop generating tokens if a member of `stop` is generated.
2086
- stop_sequences (`List[str]`, *optional*):
2177
+ stop_sequences (`list[str]`, *optional*):
2087
2178
  Deprecated argument. Use `stop` instead.
2088
2179
  temperature (`float`, *optional*):
2089
2180
  The value used to module the logits distribution.
@@ -2116,7 +2207,7 @@ class InferenceClient:
2116
2207
  If input values are not valid. No HTTP call is made to the server.
2117
2208
  [`InferenceTimeoutError`]:
2118
2209
  If the model is unavailable or the request times out.
2119
- `HTTPError`:
2210
+ [`HfHubHTTPError`]:
2120
2211
  If the request fails with an HTTP error status code other than HTTP 503.
2121
2212
 
2122
2213
  Example:
@@ -2305,7 +2396,7 @@ class InferenceClient:
2305
2396
  # Handle errors separately for more precise error messages
2306
2397
  try:
2307
2398
  bytes_output = self._inner_post(request_parameters, stream=stream or False)
2308
- except HTTPError as e:
2399
+ except HfHubHTTPError as e:
2309
2400
  match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
2310
2401
  if isinstance(e, BadRequestError) and match:
2311
2402
  unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
@@ -2360,7 +2451,7 @@ class InferenceClient:
2360
2451
  model: Optional[str] = None,
2361
2452
  scheduler: Optional[str] = None,
2362
2453
  seed: Optional[int] = None,
2363
- extra_body: Optional[Dict[str, Any]] = None,
2454
+ extra_body: Optional[dict[str, Any]] = None,
2364
2455
  ) -> "Image":
2365
2456
  """
2366
2457
  Generate an image based on a given text using a specified model.
@@ -2398,7 +2489,7 @@ class InferenceClient:
2398
2489
  Override the scheduler with a compatible one.
2399
2490
  seed (`int`, *optional*):
2400
2491
  Seed for the random number generator.
2401
- extra_body (`Dict[str, Any]`, *optional*):
2492
+ extra_body (`dict[str, Any]`, *optional*):
2402
2493
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2403
2494
  for supported parameters.
2404
2495
 
@@ -2408,7 +2499,7 @@ class InferenceClient:
2408
2499
  Raises:
2409
2500
  [`InferenceTimeoutError`]:
2410
2501
  If the model is unavailable or the request times out.
2411
- `HTTPError`:
2502
+ [`HfHubHTTPError`]:
2412
2503
  If the request fails with an HTTP error status code other than HTTP 503.
2413
2504
 
2414
2505
  Example:
@@ -2497,11 +2588,11 @@ class InferenceClient:
2497
2588
  *,
2498
2589
  model: Optional[str] = None,
2499
2590
  guidance_scale: Optional[float] = None,
2500
- negative_prompt: Optional[List[str]] = None,
2591
+ negative_prompt: Optional[list[str]] = None,
2501
2592
  num_frames: Optional[float] = None,
2502
2593
  num_inference_steps: Optional[int] = None,
2503
2594
  seed: Optional[int] = None,
2504
- extra_body: Optional[Dict[str, Any]] = None,
2595
+ extra_body: Optional[dict[str, Any]] = None,
2505
2596
  ) -> bytes:
2506
2597
  """
2507
2598
  Generate a video based on a given text.
@@ -2520,7 +2611,7 @@ class InferenceClient:
2520
2611
  guidance_scale (`float`, *optional*):
2521
2612
  A higher guidance scale value encourages the model to generate videos closely linked to the text
2522
2613
  prompt, but values too high may cause saturation and other artifacts.
2523
- negative_prompt (`List[str]`, *optional*):
2614
+ negative_prompt (`list[str]`, *optional*):
2524
2615
  One or several prompt to guide what NOT to include in video generation.
2525
2616
  num_frames (`float`, *optional*):
2526
2617
  The num_frames parameter determines how many video frames are generated.
@@ -2529,7 +2620,7 @@ class InferenceClient:
2529
2620
  expense of slower inference.
2530
2621
  seed (`int`, *optional*):
2531
2622
  Seed for the random number generator.
2532
- extra_body (`Dict[str, Any]`, *optional*):
2623
+ extra_body (`dict[str, Any]`, *optional*):
2533
2624
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2534
2625
  for supported parameters.
2535
2626
 
@@ -2609,7 +2700,7 @@ class InferenceClient:
2609
2700
  top_p: Optional[float] = None,
2610
2701
  typical_p: Optional[float] = None,
2611
2702
  use_cache: Optional[bool] = None,
2612
- extra_body: Optional[Dict[str, Any]] = None,
2703
+ extra_body: Optional[dict[str, Any]] = None,
2613
2704
  ) -> bytes:
2614
2705
  """
2615
2706
  Synthesize an audio of a voice pronouncing a given text.
@@ -2671,7 +2762,7 @@ class InferenceClient:
2671
2762
  paper](https://hf.co/papers/2202.00666) for more details.
2672
2763
  use_cache (`bool`, *optional*):
2673
2764
  Whether the model should use the past last key/values attentions to speed up decoding
2674
- extra_body (`Dict[str, Any]`, *optional*):
2765
+ extra_body (`dict[str, Any]`, *optional*):
2675
2766
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2676
2767
  for supported parameters.
2677
2768
  Returns:
@@ -2680,7 +2771,7 @@ class InferenceClient:
2680
2771
  Raises:
2681
2772
  [`InferenceTimeoutError`]:
2682
2773
  If the model is unavailable or the request times out.
2683
- `HTTPError`:
2774
+ [`HfHubHTTPError`]:
2684
2775
  If the request fails with an HTTP error status code other than HTTP 503.
2685
2776
 
2686
2777
  Example:
@@ -2803,9 +2894,9 @@ class InferenceClient:
2803
2894
  *,
2804
2895
  model: Optional[str] = None,
2805
2896
  aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
2806
- ignore_labels: Optional[List[str]] = None,
2897
+ ignore_labels: Optional[list[str]] = None,
2807
2898
  stride: Optional[int] = None,
2808
- ) -> List[TokenClassificationOutputElement]:
2899
+ ) -> list[TokenClassificationOutputElement]:
2809
2900
  """
2810
2901
  Perform token classification on the given text.
2811
2902
  Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
@@ -2819,18 +2910,18 @@ class InferenceClient:
2819
2910
  Defaults to None.
2820
2911
  aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
2821
2912
  The strategy used to fuse tokens based on model predictions
2822
- ignore_labels (`List[str`, *optional*):
2913
+ ignore_labels (`list[str`, *optional*):
2823
2914
  A list of labels to ignore
2824
2915
  stride (`int`, *optional*):
2825
2916
  The number of overlapping tokens between chunks when splitting the input text.
2826
2917
 
2827
2918
  Returns:
2828
- `List[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
2919
+ `list[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
2829
2920
 
2830
2921
  Raises:
2831
2922
  [`InferenceTimeoutError`]:
2832
2923
  If the model is unavailable or the request times out.
2833
- `HTTPError`:
2924
+ [`HfHubHTTPError`]:
2834
2925
  If the request fails with an HTTP error status code other than HTTP 503.
2835
2926
 
2836
2927
  Example:
@@ -2881,7 +2972,7 @@ class InferenceClient:
2881
2972
  tgt_lang: Optional[str] = None,
2882
2973
  clean_up_tokenization_spaces: Optional[bool] = None,
2883
2974
  truncation: Optional["TranslationTruncationStrategy"] = None,
2884
- generate_parameters: Optional[Dict[str, Any]] = None,
2975
+ generate_parameters: Optional[dict[str, Any]] = None,
2885
2976
  ) -> TranslationOutput:
2886
2977
  """
2887
2978
  Convert text from one language to another.
@@ -2906,7 +2997,7 @@ class InferenceClient:
2906
2997
  Whether to clean up the potential extra spaces in the text output.
2907
2998
  truncation (`"TranslationTruncationStrategy"`, *optional*):
2908
2999
  The truncation strategy to use.
2909
- generate_parameters (`Dict[str, Any]`, *optional*):
3000
+ generate_parameters (`dict[str, Any]`, *optional*):
2910
3001
  Additional parametrization of the text generation algorithm.
2911
3002
 
2912
3003
  Returns:
@@ -2915,7 +3006,7 @@ class InferenceClient:
2915
3006
  Raises:
2916
3007
  [`InferenceTimeoutError`]:
2917
3008
  If the model is unavailable or the request times out.
2918
- `HTTPError`:
3009
+ [`HfHubHTTPError`]:
2919
3010
  If the request fails with an HTTP error status code other than HTTP 503.
2920
3011
  `ValueError`:
2921
3012
  If only one of the `src_lang` and `tgt_lang` arguments are provided.
@@ -2968,7 +3059,7 @@ class InferenceClient:
2968
3059
  *,
2969
3060
  model: Optional[str] = None,
2970
3061
  top_k: Optional[int] = None,
2971
- ) -> List[VisualQuestionAnsweringOutputElement]:
3062
+ ) -> list[VisualQuestionAnsweringOutputElement]:
2972
3063
  """
2973
3064
  Answering open-ended questions based on an image.
2974
3065
 
@@ -2985,12 +3076,12 @@ class InferenceClient:
2985
3076
  The number of answers to return (will be chosen by order of likelihood). Note that we return less than
2986
3077
  topk answers if there are not enough options available within the context.
2987
3078
  Returns:
2988
- `List[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
3079
+ `list[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
2989
3080
 
2990
3081
  Raises:
2991
3082
  `InferenceTimeoutError`:
2992
3083
  If the model is unavailable or the request times out.
2993
- `HTTPError`:
3084
+ [`HfHubHTTPError`]:
2994
3085
  If the request fails with an HTTP error status code other than HTTP 503.
2995
3086
 
2996
3087
  Example:
@@ -3023,21 +3114,21 @@ class InferenceClient:
3023
3114
  def zero_shot_classification(
3024
3115
  self,
3025
3116
  text: str,
3026
- candidate_labels: List[str],
3117
+ candidate_labels: list[str],
3027
3118
  *,
3028
3119
  multi_label: Optional[bool] = False,
3029
3120
  hypothesis_template: Optional[str] = None,
3030
3121
  model: Optional[str] = None,
3031
- ) -> List[ZeroShotClassificationOutputElement]:
3122
+ ) -> list[ZeroShotClassificationOutputElement]:
3032
3123
  """
3033
3124
  Provide as input a text and a set of candidate labels to classify the input text.
3034
3125
 
3035
3126
  Args:
3036
3127
  text (`str`):
3037
3128
  The input text to classify.
3038
- candidate_labels (`List[str]`):
3129
+ candidate_labels (`list[str]`):
3039
3130
  The set of possible class labels to classify the text into.
3040
- labels (`List[str]`, *optional*):
3131
+ labels (`list[str]`, *optional*):
3041
3132
  (deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
3042
3133
  multi_label (`bool`, *optional*):
3043
3134
  Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
@@ -3052,12 +3143,12 @@ class InferenceClient:
3052
3143
 
3053
3144
 
3054
3145
  Returns:
3055
- `List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
3146
+ `list[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
3056
3147
 
3057
3148
  Raises:
3058
3149
  [`InferenceTimeoutError`]:
3059
3150
  If the model is unavailable or the request times out.
3060
- `HTTPError`:
3151
+ [`HfHubHTTPError`]:
3061
3152
  If the request fails with an HTTP error status code other than HTTP 503.
3062
3153
 
3063
3154
  Example with `multi_label=False`:
@@ -3129,22 +3220,22 @@ class InferenceClient:
3129
3220
  def zero_shot_image_classification(
3130
3221
  self,
3131
3222
  image: ContentT,
3132
- candidate_labels: List[str],
3223
+ candidate_labels: list[str],
3133
3224
  *,
3134
3225
  model: Optional[str] = None,
3135
3226
  hypothesis_template: Optional[str] = None,
3136
3227
  # deprecated argument
3137
- labels: List[str] = None, # type: ignore
3138
- ) -> List[ZeroShotImageClassificationOutputElement]:
3228
+ labels: list[str] = None, # type: ignore
3229
+ ) -> list[ZeroShotImageClassificationOutputElement]:
3139
3230
  """
3140
3231
  Provide input image and text labels to predict text labels for the image.
3141
3232
 
3142
3233
  Args:
3143
3234
  image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
3144
3235
  The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
3145
- candidate_labels (`List[str]`):
3236
+ candidate_labels (`list[str]`):
3146
3237
  The candidate labels for this image
3147
- labels (`List[str]`, *optional*):
3238
+ labels (`list[str]`, *optional*):
3148
3239
  (deprecated) List of string possible labels. There must be at least 2 labels.
3149
3240
  model (`str`, *optional*):
3150
3241
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
@@ -3154,12 +3245,12 @@ class InferenceClient:
3154
3245
  replacing the placeholder with the candidate labels.
3155
3246
 
3156
3247
  Returns:
3157
- `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
3248
+ `list[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
3158
3249
 
3159
3250
  Raises:
3160
3251
  [`InferenceTimeoutError`]:
3161
3252
  If the model is unavailable or the request times out.
3162
- `HTTPError`:
3253
+ [`HfHubHTTPError`]:
3163
3254
  If the request fails with an HTTP error status code other than HTTP 503.
3164
3255
 
3165
3256
  Example:
@@ -3193,102 +3284,7 @@ class InferenceClient:
3193
3284
  response = self._inner_post(request_parameters)
3194
3285
  return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
3195
3286
 
3196
- @_deprecate_method(
3197
- version="0.35.0",
3198
- message=(
3199
- "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3200
- " Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
3201
- ),
3202
- )
3203
- def list_deployed_models(
3204
- self, frameworks: Union[None, str, Literal["all"], List[str]] = None
3205
- ) -> Dict[str, List[str]]:
3206
- """
3207
- List models deployed on the HF Serverless Inference API service.
3208
-
3209
- This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
3210
- are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
3211
- specify `frameworks="all"` as input. Alternatively, if you know before-hand which framework you are interested
3212
- in, you can also restrict to search to this one (e.g. `frameworks="text-generation-inference"`). The more
3213
- frameworks are checked, the more time it will take.
3214
-
3215
- <Tip warning={true}>
3216
-
3217
- This endpoint method does not return a live list of all models available for the HF Inference API service.
3218
- It searches over a cached list of models that were recently available and the list may not be up to date.
3219
- If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
3220
-
3221
- </Tip>
3222
-
3223
- <Tip>
3224
-
3225
- This endpoint method is mostly useful for discoverability. If you already know which model you want to use and want to
3226
- check its availability, you can directly use [`~InferenceClient.get_model_status`].
3227
-
3228
- </Tip>
3229
-
3230
- Args:
3231
- frameworks (`Literal["all"]` or `List[str]` or `str`, *optional*):
3232
- The frameworks to filter on. By default only a subset of the available frameworks are tested. If set to
3233
- "all", all available frameworks will be tested. It is also possible to provide a single framework or a
3234
- custom set of frameworks to check.
3235
-
3236
- Returns:
3237
- `Dict[str, List[str]]`: A dictionary mapping task names to a sorted list of model IDs.
3238
-
3239
- Example:
3240
- ```python
3241
- >>> from huggingface_hub import InferenceClient
3242
- >>> client = InferenceClient()
3243
-
3244
- # Discover zero-shot-classification models currently deployed
3245
- >>> models = client.list_deployed_models()
3246
- >>> models["zero-shot-classification"]
3247
- ['Narsil/deberta-large-mnli-zero-cls', 'facebook/bart-large-mnli', ...]
3248
-
3249
- # List from only 1 framework
3250
- >>> client.list_deployed_models("text-generation-inference")
3251
- {'text-generation': ['bigcode/starcoder', 'meta-llama/Llama-2-70b-chat-hf', ...], ...}
3252
- ```
3253
- """
3254
- if self.provider != "hf-inference":
3255
- raise ValueError(f"Listing deployed models is not supported on '{self.provider}'.")
3256
-
3257
- # Resolve which frameworks to check
3258
- if frameworks is None:
3259
- frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
3260
- elif frameworks == "all":
3261
- frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
3262
- elif isinstance(frameworks, str):
3263
- frameworks = [frameworks]
3264
- frameworks = list(set(frameworks))
3265
-
3266
- # Fetch them iteratively
3267
- models_by_task: Dict[str, List[str]] = {}
3268
-
3269
- def _unpack_response(framework: str, items: List[Dict]) -> None:
3270
- for model in items:
3271
- if framework == "sentence-transformers":
3272
- # Model running with the `sentence-transformers` framework can work with both tasks even if not
3273
- # branded as such in the API response
3274
- models_by_task.setdefault("feature-extraction", []).append(model["model_id"])
3275
- models_by_task.setdefault("sentence-similarity", []).append(model["model_id"])
3276
- else:
3277
- models_by_task.setdefault(model["task"], []).append(model["model_id"])
3278
-
3279
- for framework in frameworks:
3280
- response = get_session().get(
3281
- f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
3282
- )
3283
- hf_raise_for_status(response)
3284
- _unpack_response(framework, response.json())
3285
-
3286
- # Sort alphabetically for discoverability and return
3287
- for task, models in models_by_task.items():
3288
- models_by_task[task] = sorted(set(models), key=lambda x: x.lower())
3289
- return models_by_task
3290
-
3291
- def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
3287
+ def get_endpoint_info(self, *, model: Optional[str] = None) -> dict[str, Any]:
3292
3288
  """
3293
3289
  Get information about the deployed endpoint.
3294
3290
 
@@ -3301,7 +3297,7 @@ class InferenceClient:
3301
3297
  Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
3302
3298
 
3303
3299
  Returns:
3304
- `Dict[str, Any]`: Information about the endpoint.
3300
+ `dict[str, Any]`: Information about the endpoint.
3305
3301
 
3306
3302
  Example:
3307
3303
  ```py
@@ -3351,7 +3347,6 @@ class InferenceClient:
3351
3347
  Check the health of the deployed endpoint.
3352
3348
 
3353
3349
  Health check is only available with Inference Endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI).
3354
- For Inference API, please use [`InferenceClient.get_model_status`] instead.
3355
3350
 
3356
3351
  Args:
3357
3352
  model (`str`, *optional*):
@@ -3375,75 +3370,12 @@ class InferenceClient:
3375
3370
  if model is None:
3376
3371
  raise ValueError("Model id not provided.")
3377
3372
  if not model.startswith(("http://", "https://")):
3378
- raise ValueError(
3379
- "Model must be an Inference Endpoint URL. For serverless Inference API, please use `InferenceClient.get_model_status`."
3380
- )
3373
+ raise ValueError("Model must be an Inference Endpoint URL.")
3381
3374
  url = model.rstrip("/") + "/health"
3382
3375
 
3383
3376
  response = get_session().get(url, headers=build_hf_headers(token=self.token))
3384
3377
  return response.status_code == 200
3385
3378
 
3386
- @_deprecate_method(
3387
- version="0.35.0",
3388
- message=(
3389
- "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3390
- " Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
3391
- ),
3392
- )
3393
- def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
3394
- """
3395
- Get the status of a model hosted on the HF Inference API.
3396
-
3397
- <Tip>
3398
-
3399
- This endpoint is mostly useful when you already know which model you want to use and want to check its
3400
- availability. If you want to discover already deployed models, you should rather use [`~InferenceClient.list_deployed_models`].
3401
-
3402
- </Tip>
3403
-
3404
- Args:
3405
- model (`str`, *optional*):
3406
- Identifier of the model for witch the status gonna be checked. If model is not provided,
3407
- the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
3408
- identifier cannot be a URL.
3409
-
3410
-
3411
- Returns:
3412
- [`ModelStatus`]: An instance of ModelStatus dataclass, containing information,
3413
- about the state of the model: load, state, compute type and framework.
3414
-
3415
- Example:
3416
- ```py
3417
- >>> from huggingface_hub import InferenceClient
3418
- >>> client = InferenceClient()
3419
- >>> client.get_model_status("meta-llama/Meta-Llama-3-8B-Instruct")
3420
- ModelStatus(loaded=True, state='Loaded', compute_type='gpu', framework='text-generation-inference')
3421
- ```
3422
- """
3423
- if self.provider != "hf-inference":
3424
- raise ValueError(f"Getting model status is not supported on '{self.provider}'.")
3425
-
3426
- model = model or self.model
3427
- if model is None:
3428
- raise ValueError("Model id not provided.")
3429
- if model.startswith("https://"):
3430
- raise NotImplementedError("Model status is only available for Inference API endpoints.")
3431
- url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
3432
-
3433
- response = get_session().get(url, headers=build_hf_headers(token=self.token))
3434
- hf_raise_for_status(response)
3435
- response_data = response.json()
3436
-
3437
- if "error" in response_data:
3438
- raise ValueError(response_data["error"])
3439
-
3440
- return ModelStatus(
3441
- loaded=response_data["loaded"],
3442
- state=response_data["state"],
3443
- compute_type=response_data["compute_type"],
3444
- framework=response_data["framework"],
3445
- )
3446
-
3447
3379
  @property
3448
3380
  def chat(self) -> "ProxyClientChat":
3449
3381
  return ProxyClientChat(self)