huggingface-hub 0.34.4__py3-none-any.whl → 1.0.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (125) hide show
  1. huggingface_hub/__init__.py +46 -45
  2. huggingface_hub/_commit_api.py +28 -28
  3. huggingface_hub/_commit_scheduler.py +11 -8
  4. huggingface_hub/_inference_endpoints.py +8 -8
  5. huggingface_hub/_jobs_api.py +167 -10
  6. huggingface_hub/_login.py +13 -39
  7. huggingface_hub/_oauth.py +8 -8
  8. huggingface_hub/_snapshot_download.py +14 -28
  9. huggingface_hub/_space_api.py +4 -4
  10. huggingface_hub/_tensorboard_logger.py +13 -14
  11. huggingface_hub/_upload_large_folder.py +15 -15
  12. huggingface_hub/_webhooks_payload.py +3 -3
  13. huggingface_hub/_webhooks_server.py +2 -2
  14. huggingface_hub/cli/_cli_utils.py +2 -2
  15. huggingface_hub/cli/auth.py +5 -6
  16. huggingface_hub/cli/cache.py +14 -20
  17. huggingface_hub/cli/download.py +4 -4
  18. huggingface_hub/cli/jobs.py +560 -11
  19. huggingface_hub/cli/lfs.py +4 -4
  20. huggingface_hub/cli/repo.py +7 -7
  21. huggingface_hub/cli/repo_files.py +2 -2
  22. huggingface_hub/cli/upload.py +4 -4
  23. huggingface_hub/cli/upload_large_folder.py +3 -3
  24. huggingface_hub/commands/_cli_utils.py +2 -2
  25. huggingface_hub/commands/delete_cache.py +13 -13
  26. huggingface_hub/commands/download.py +4 -13
  27. huggingface_hub/commands/lfs.py +4 -4
  28. huggingface_hub/commands/repo_files.py +2 -2
  29. huggingface_hub/commands/scan_cache.py +1 -1
  30. huggingface_hub/commands/tag.py +1 -3
  31. huggingface_hub/commands/upload.py +4 -4
  32. huggingface_hub/commands/upload_large_folder.py +3 -3
  33. huggingface_hub/commands/user.py +5 -6
  34. huggingface_hub/community.py +5 -5
  35. huggingface_hub/constants.py +3 -41
  36. huggingface_hub/dataclasses.py +16 -19
  37. huggingface_hub/errors.py +42 -29
  38. huggingface_hub/fastai_utils.py +8 -9
  39. huggingface_hub/file_download.py +153 -252
  40. huggingface_hub/hf_api.py +815 -600
  41. huggingface_hub/hf_file_system.py +98 -62
  42. huggingface_hub/hub_mixin.py +37 -57
  43. huggingface_hub/inference/_client.py +177 -325
  44. huggingface_hub/inference/_common.py +110 -124
  45. huggingface_hub/inference/_generated/_async_client.py +226 -432
  46. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
  47. huggingface_hub/inference/_generated/types/base.py +10 -7
  48. huggingface_hub/inference/_generated/types/chat_completion.py +18 -16
  49. huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
  50. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
  51. huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
  52. huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
  53. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
  54. huggingface_hub/inference/_generated/types/summarization.py +2 -2
  55. huggingface_hub/inference/_generated/types/table_question_answering.py +4 -4
  56. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
  57. huggingface_hub/inference/_generated/types/text_generation.py +10 -10
  58. huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
  59. huggingface_hub/inference/_generated/types/token_classification.py +2 -2
  60. huggingface_hub/inference/_generated/types/translation.py +2 -2
  61. huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
  62. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
  63. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
  64. huggingface_hub/inference/_mcp/_cli_hacks.py +3 -3
  65. huggingface_hub/inference/_mcp/agent.py +3 -3
  66. huggingface_hub/inference/_mcp/cli.py +1 -1
  67. huggingface_hub/inference/_mcp/constants.py +2 -3
  68. huggingface_hub/inference/_mcp/mcp_client.py +58 -30
  69. huggingface_hub/inference/_mcp/types.py +10 -7
  70. huggingface_hub/inference/_mcp/utils.py +11 -7
  71. huggingface_hub/inference/_providers/__init__.py +2 -2
  72. huggingface_hub/inference/_providers/_common.py +49 -25
  73. huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
  74. huggingface_hub/inference/_providers/cohere.py +3 -3
  75. huggingface_hub/inference/_providers/fal_ai.py +25 -25
  76. huggingface_hub/inference/_providers/featherless_ai.py +4 -4
  77. huggingface_hub/inference/_providers/fireworks_ai.py +3 -3
  78. huggingface_hub/inference/_providers/hf_inference.py +28 -20
  79. huggingface_hub/inference/_providers/hyperbolic.py +4 -4
  80. huggingface_hub/inference/_providers/nebius.py +10 -10
  81. huggingface_hub/inference/_providers/novita.py +5 -5
  82. huggingface_hub/inference/_providers/nscale.py +4 -4
  83. huggingface_hub/inference/_providers/replicate.py +15 -15
  84. huggingface_hub/inference/_providers/sambanova.py +6 -6
  85. huggingface_hub/inference/_providers/together.py +7 -7
  86. huggingface_hub/lfs.py +20 -31
  87. huggingface_hub/repocard.py +18 -18
  88. huggingface_hub/repocard_data.py +56 -56
  89. huggingface_hub/serialization/__init__.py +0 -1
  90. huggingface_hub/serialization/_base.py +9 -9
  91. huggingface_hub/serialization/_dduf.py +7 -7
  92. huggingface_hub/serialization/_torch.py +28 -28
  93. huggingface_hub/utils/__init__.py +10 -4
  94. huggingface_hub/utils/_auth.py +5 -5
  95. huggingface_hub/utils/_cache_manager.py +31 -31
  96. huggingface_hub/utils/_deprecation.py +1 -1
  97. huggingface_hub/utils/_dotenv.py +3 -3
  98. huggingface_hub/utils/_fixes.py +0 -10
  99. huggingface_hub/utils/_git_credential.py +4 -4
  100. huggingface_hub/utils/_headers.py +7 -29
  101. huggingface_hub/utils/_http.py +366 -208
  102. huggingface_hub/utils/_pagination.py +4 -4
  103. huggingface_hub/utils/_paths.py +5 -5
  104. huggingface_hub/utils/_runtime.py +15 -13
  105. huggingface_hub/utils/_safetensors.py +21 -21
  106. huggingface_hub/utils/_subprocess.py +9 -9
  107. huggingface_hub/utils/_telemetry.py +3 -3
  108. huggingface_hub/utils/_typing.py +25 -5
  109. huggingface_hub/utils/_validators.py +53 -72
  110. huggingface_hub/utils/_xet.py +16 -16
  111. huggingface_hub/utils/_xet_progress_reporting.py +32 -11
  112. huggingface_hub/utils/insecure_hashlib.py +3 -9
  113. huggingface_hub/utils/tqdm.py +3 -3
  114. {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/METADATA +18 -29
  115. huggingface_hub-1.0.0rc0.dist-info/RECORD +161 -0
  116. huggingface_hub/inference_api.py +0 -217
  117. huggingface_hub/keras_mixin.py +0 -500
  118. huggingface_hub/repository.py +0 -1477
  119. huggingface_hub/serialization/_tensorflow.py +0 -95
  120. huggingface_hub/utils/_hf_folder.py +0 -68
  121. huggingface_hub-0.34.4.dist-info/RECORD +0 -166
  122. {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/LICENSE +0 -0
  123. {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/WHEEL +0 -0
  124. {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/entry_points.txt +0 -0
  125. {huggingface_hub-0.34.4.dist-info → huggingface_hub-1.0.0rc0.dist-info}/top_level.txt +0 -0
@@ -34,18 +34,17 @@
34
34
  # - Only the main parameters are publicly exposed. Power users can always read the docs for more options.
35
35
  import base64
36
36
  import logging
37
+ import os
37
38
  import re
38
39
  import warnings
39
- from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Literal, Optional, Union, overload
40
-
41
- from requests import HTTPError
40
+ from contextlib import ExitStack
41
+ from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
42
42
 
43
43
  from huggingface_hub import constants
44
- from huggingface_hub.errors import BadRequestError, InferenceTimeoutError
44
+ from huggingface_hub.errors import BadRequestError, HfHubHTTPError, InferenceTimeoutError
45
45
  from huggingface_hub.inference._common import (
46
46
  TASKS_EXPECTING_IMAGES,
47
47
  ContentT,
48
- ModelStatus,
49
48
  RequestParameters,
50
49
  _b64_encode,
51
50
  _b64_to_image,
@@ -54,7 +53,6 @@ from huggingface_hub.inference._common import (
54
53
  _bytes_to_list,
55
54
  _get_unsupported_text_generation_kwargs,
56
55
  _import_numpy,
57
- _open_as_binary,
58
56
  _set_unsupported_text_generation_kwargs,
59
57
  _stream_chat_completion_response,
60
58
  _stream_text_generation_response,
@@ -103,9 +101,13 @@ from huggingface_hub.inference._generated.types import (
103
101
  ZeroShotImageClassificationOutputElement,
104
102
  )
105
103
  from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
106
- from huggingface_hub.utils import build_hf_headers, get_session, hf_raise_for_status
104
+ from huggingface_hub.utils import (
105
+ build_hf_headers,
106
+ get_session,
107
+ hf_raise_for_status,
108
+ validate_hf_hub_args,
109
+ )
107
110
  from huggingface_hub.utils._auth import get_token
108
- from huggingface_hub.utils._deprecation import _deprecate_method
109
111
 
110
112
 
111
113
  if TYPE_CHECKING:
@@ -142,16 +144,14 @@ class InferenceClient:
142
144
  arguments are mutually exclusive and have the exact same behavior.
143
145
  timeout (`float`, `optional`):
144
146
  The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
145
- headers (`Dict[str, str]`, `optional`):
147
+ headers (`dict[str, str]`, `optional`):
146
148
  Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
147
149
  Values in this dictionary will override the default values.
148
150
  bill_to (`str`, `optional`):
149
151
  The billing account to use for the requests. By default the requests are billed on the user's account.
150
152
  Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
151
- cookies (`Dict[str, str]`, `optional`):
153
+ cookies (`dict[str, str]`, `optional`):
152
154
  Additional cookies to send to the server.
153
- proxies (`Any`, `optional`):
154
- Proxies to use for the request.
155
155
  base_url (`str`, `optional`):
156
156
  Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
157
157
  follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
@@ -160,6 +160,7 @@ class InferenceClient:
160
160
  follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
161
161
  """
162
162
 
163
+ @validate_hf_hub_args
163
164
  def __init__(
164
165
  self,
165
166
  model: Optional[str] = None,
@@ -167,9 +168,8 @@ class InferenceClient:
167
168
  provider: Optional[PROVIDER_OR_POLICY_T] = None,
168
169
  token: Optional[str] = None,
169
170
  timeout: Optional[float] = None,
170
- headers: Optional[Dict[str, str]] = None,
171
- cookies: Optional[Dict[str, str]] = None,
172
- proxies: Optional[Any] = None,
171
+ headers: Optional[dict[str, str]] = None,
172
+ cookies: Optional[dict[str, str]] = None,
173
173
  bill_to: Optional[str] = None,
174
174
  # OpenAI compatibility
175
175
  base_url: Optional[str] = None,
@@ -231,11 +231,21 @@ class InferenceClient:
231
231
 
232
232
  self.cookies = cookies
233
233
  self.timeout = timeout
234
- self.proxies = proxies
234
+
235
+ self.exit_stack = ExitStack()
235
236
 
236
237
  def __repr__(self):
237
238
  return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
238
239
 
240
+ def __enter__(self):
241
+ return self
242
+
243
+ def __exit__(self, exc_type, exc_value, traceback):
244
+ self.exit_stack.close()
245
+
246
+ def close(self):
247
+ self.exit_stack.close()
248
+
239
249
  @overload
240
250
  def _inner_post( # type: ignore[misc]
241
251
  self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
@@ -244,45 +254,46 @@ class InferenceClient:
244
254
  @overload
245
255
  def _inner_post( # type: ignore[misc]
246
256
  self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
247
- ) -> Iterable[bytes]: ...
257
+ ) -> Iterable[str]: ...
248
258
 
249
259
  @overload
250
260
  def _inner_post(
251
261
  self, request_parameters: RequestParameters, *, stream: bool = False
252
- ) -> Union[bytes, Iterable[bytes]]: ...
262
+ ) -> Union[bytes, Iterable[str]]: ...
253
263
 
254
264
  def _inner_post(
255
265
  self, request_parameters: RequestParameters, *, stream: bool = False
256
- ) -> Union[bytes, Iterable[bytes]]:
266
+ ) -> Union[bytes, Iterable[str]]:
257
267
  """Make a request to the inference server."""
258
268
  # TODO: this should be handled in provider helpers directly
259
269
  if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
260
270
  request_parameters.headers["Accept"] = "image/png"
261
271
 
262
- with _open_as_binary(request_parameters.data) as data_as_binary:
263
- try:
264
- response = get_session().post(
272
+ try:
273
+ response = self.exit_stack.enter_context(
274
+ get_session().stream(
275
+ "POST",
265
276
  request_parameters.url,
266
277
  json=request_parameters.json,
267
- data=data_as_binary,
278
+ content=request_parameters.data,
268
279
  headers=request_parameters.headers,
269
280
  cookies=self.cookies,
270
281
  timeout=self.timeout,
271
- stream=stream,
272
- proxies=self.proxies,
273
282
  )
274
- except TimeoutError as error:
275
- # Convert any `TimeoutError` to a `InferenceTimeoutError`
276
- raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
277
-
278
- try:
283
+ )
279
284
  hf_raise_for_status(response)
280
- return response.iter_lines() if stream else response.content
281
- except HTTPError as error:
285
+ if stream:
286
+ return response.iter_lines()
287
+ else:
288
+ return response.read()
289
+ except TimeoutError as error:
290
+ # Convert any `TimeoutError` to a `InferenceTimeoutError`
291
+ raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
292
+ except HfHubHTTPError as error:
282
293
  if error.response.status_code == 422 and request_parameters.task != "unknown":
283
294
  msg = str(error.args[0])
284
295
  if len(error.response.text) > 0:
285
- msg += f"\n{error.response.text}\n"
296
+ msg += f"{os.linesep}{error.response.text}{os.linesep}"
286
297
  error.args = (msg,) + error.args[1:]
287
298
  raise
288
299
 
@@ -293,7 +304,7 @@ class InferenceClient:
293
304
  model: Optional[str] = None,
294
305
  top_k: Optional[int] = None,
295
306
  function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
296
- ) -> List[AudioClassificationOutputElement]:
307
+ ) -> list[AudioClassificationOutputElement]:
297
308
  """
298
309
  Perform audio classification on the provided audio content.
299
310
 
@@ -311,12 +322,12 @@ class InferenceClient:
311
322
  The function to apply to the model outputs in order to retrieve the scores.
312
323
 
313
324
  Returns:
314
- `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
325
+ `list[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
315
326
 
316
327
  Raises:
317
328
  [`InferenceTimeoutError`]:
318
329
  If the model is unavailable or the request times out.
319
- `HTTPError`:
330
+ [`HfHubHTTPError`]:
320
331
  If the request fails with an HTTP error status code other than HTTP 503.
321
332
 
322
333
  Example:
@@ -348,7 +359,7 @@ class InferenceClient:
348
359
  audio: ContentT,
349
360
  *,
350
361
  model: Optional[str] = None,
351
- ) -> List[AudioToAudioOutputElement]:
362
+ ) -> list[AudioToAudioOutputElement]:
352
363
  """
353
364
  Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
354
365
 
@@ -362,12 +373,12 @@ class InferenceClient:
362
373
  audio_to_audio will be used.
363
374
 
364
375
  Returns:
365
- `List[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
376
+ `list[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
366
377
 
367
378
  Raises:
368
379
  `InferenceTimeoutError`:
369
380
  If the model is unavailable or the request times out.
370
- `HTTPError`:
381
+ [`HfHubHTTPError`]:
371
382
  If the request fails with an HTTP error status code other than HTTP 503.
372
383
 
373
384
  Example:
@@ -400,7 +411,7 @@ class InferenceClient:
400
411
  audio: ContentT,
401
412
  *,
402
413
  model: Optional[str] = None,
403
- extra_body: Optional[Dict] = None,
414
+ extra_body: Optional[dict] = None,
404
415
  ) -> AutomaticSpeechRecognitionOutput:
405
416
  """
406
417
  Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
@@ -411,7 +422,7 @@ class InferenceClient:
411
422
  model (`str`, *optional*):
412
423
  The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
413
424
  Inference Endpoint. If not provided, the default recommended model for ASR will be used.
414
- extra_body (`Dict`, *optional*):
425
+ extra_body (`dict`, *optional*):
415
426
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
416
427
  for supported parameters.
417
428
  Returns:
@@ -420,7 +431,7 @@ class InferenceClient:
420
431
  Raises:
421
432
  [`InferenceTimeoutError`]:
422
433
  If the model is unavailable or the request times out.
423
- `HTTPError`:
434
+ [`HfHubHTTPError`]:
424
435
  If the request fails with an HTTP error status code other than HTTP 503.
425
436
 
426
437
  Example:
@@ -446,105 +457,105 @@ class InferenceClient:
446
457
  @overload
447
458
  def chat_completion( # type: ignore
448
459
  self,
449
- messages: List[Union[Dict, ChatCompletionInputMessage]],
460
+ messages: list[Union[dict, ChatCompletionInputMessage]],
450
461
  *,
451
462
  model: Optional[str] = None,
452
463
  stream: Literal[False] = False,
453
464
  frequency_penalty: Optional[float] = None,
454
- logit_bias: Optional[List[float]] = None,
465
+ logit_bias: Optional[list[float]] = None,
455
466
  logprobs: Optional[bool] = None,
456
467
  max_tokens: Optional[int] = None,
457
468
  n: Optional[int] = None,
458
469
  presence_penalty: Optional[float] = None,
459
470
  response_format: Optional[ChatCompletionInputGrammarType] = None,
460
471
  seed: Optional[int] = None,
461
- stop: Optional[List[str]] = None,
472
+ stop: Optional[list[str]] = None,
462
473
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
463
474
  temperature: Optional[float] = None,
464
475
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
465
476
  tool_prompt: Optional[str] = None,
466
- tools: Optional[List[ChatCompletionInputTool]] = None,
477
+ tools: Optional[list[ChatCompletionInputTool]] = None,
467
478
  top_logprobs: Optional[int] = None,
468
479
  top_p: Optional[float] = None,
469
- extra_body: Optional[Dict] = None,
480
+ extra_body: Optional[dict] = None,
470
481
  ) -> ChatCompletionOutput: ...
471
482
 
472
483
  @overload
473
484
  def chat_completion( # type: ignore
474
485
  self,
475
- messages: List[Union[Dict, ChatCompletionInputMessage]],
486
+ messages: list[Union[dict, ChatCompletionInputMessage]],
476
487
  *,
477
488
  model: Optional[str] = None,
478
489
  stream: Literal[True] = True,
479
490
  frequency_penalty: Optional[float] = None,
480
- logit_bias: Optional[List[float]] = None,
491
+ logit_bias: Optional[list[float]] = None,
481
492
  logprobs: Optional[bool] = None,
482
493
  max_tokens: Optional[int] = None,
483
494
  n: Optional[int] = None,
484
495
  presence_penalty: Optional[float] = None,
485
496
  response_format: Optional[ChatCompletionInputGrammarType] = None,
486
497
  seed: Optional[int] = None,
487
- stop: Optional[List[str]] = None,
498
+ stop: Optional[list[str]] = None,
488
499
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
489
500
  temperature: Optional[float] = None,
490
501
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
491
502
  tool_prompt: Optional[str] = None,
492
- tools: Optional[List[ChatCompletionInputTool]] = None,
503
+ tools: Optional[list[ChatCompletionInputTool]] = None,
493
504
  top_logprobs: Optional[int] = None,
494
505
  top_p: Optional[float] = None,
495
- extra_body: Optional[Dict] = None,
506
+ extra_body: Optional[dict] = None,
496
507
  ) -> Iterable[ChatCompletionStreamOutput]: ...
497
508
 
498
509
  @overload
499
510
  def chat_completion(
500
511
  self,
501
- messages: List[Union[Dict, ChatCompletionInputMessage]],
512
+ messages: list[Union[dict, ChatCompletionInputMessage]],
502
513
  *,
503
514
  model: Optional[str] = None,
504
515
  stream: bool = False,
505
516
  frequency_penalty: Optional[float] = None,
506
- logit_bias: Optional[List[float]] = None,
517
+ logit_bias: Optional[list[float]] = None,
507
518
  logprobs: Optional[bool] = None,
508
519
  max_tokens: Optional[int] = None,
509
520
  n: Optional[int] = None,
510
521
  presence_penalty: Optional[float] = None,
511
522
  response_format: Optional[ChatCompletionInputGrammarType] = None,
512
523
  seed: Optional[int] = None,
513
- stop: Optional[List[str]] = None,
524
+ stop: Optional[list[str]] = None,
514
525
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
515
526
  temperature: Optional[float] = None,
516
527
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
517
528
  tool_prompt: Optional[str] = None,
518
- tools: Optional[List[ChatCompletionInputTool]] = None,
529
+ tools: Optional[list[ChatCompletionInputTool]] = None,
519
530
  top_logprobs: Optional[int] = None,
520
531
  top_p: Optional[float] = None,
521
- extra_body: Optional[Dict] = None,
532
+ extra_body: Optional[dict] = None,
522
533
  ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: ...
523
534
 
524
535
  def chat_completion(
525
536
  self,
526
- messages: List[Union[Dict, ChatCompletionInputMessage]],
537
+ messages: list[Union[dict, ChatCompletionInputMessage]],
527
538
  *,
528
539
  model: Optional[str] = None,
529
540
  stream: bool = False,
530
541
  # Parameters from ChatCompletionInput (handled manually)
531
542
  frequency_penalty: Optional[float] = None,
532
- logit_bias: Optional[List[float]] = None,
543
+ logit_bias: Optional[list[float]] = None,
533
544
  logprobs: Optional[bool] = None,
534
545
  max_tokens: Optional[int] = None,
535
546
  n: Optional[int] = None,
536
547
  presence_penalty: Optional[float] = None,
537
548
  response_format: Optional[ChatCompletionInputGrammarType] = None,
538
549
  seed: Optional[int] = None,
539
- stop: Optional[List[str]] = None,
550
+ stop: Optional[list[str]] = None,
540
551
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
541
552
  temperature: Optional[float] = None,
542
553
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
543
554
  tool_prompt: Optional[str] = None,
544
- tools: Optional[List[ChatCompletionInputTool]] = None,
555
+ tools: Optional[list[ChatCompletionInputTool]] = None,
545
556
  top_logprobs: Optional[int] = None,
546
557
  top_p: Optional[float] = None,
547
- extra_body: Optional[Dict] = None,
558
+ extra_body: Optional[dict] = None,
548
559
  ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]:
549
560
  """
550
561
  A method for completing conversations using a specified language model.
@@ -574,7 +585,7 @@ class InferenceClient:
574
585
  frequency_penalty (`float`, *optional*):
575
586
  Penalizes new tokens based on their existing frequency
576
587
  in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
577
- logit_bias (`List[float]`, *optional*):
588
+ logit_bias (`list[float]`, *optional*):
578
589
  Adjusts the likelihood of specific tokens appearing in the generated output.
579
590
  logprobs (`bool`, *optional*):
580
591
  Whether to return log probabilities of the output tokens or not. If true, returns the log
@@ -590,7 +601,7 @@ class InferenceClient:
590
601
  Grammar constraints. Can be either a JSONSchema or a regex.
591
602
  seed (Optional[`int`], *optional*):
592
603
  Seed for reproducible control flow. Defaults to None.
593
- stop (`List[str]`, *optional*):
604
+ stop (`list[str]`, *optional*):
594
605
  Up to four strings which trigger the end of the response.
595
606
  Defaults to None.
596
607
  stream (`bool`, *optional*):
@@ -614,7 +625,7 @@ class InferenceClient:
614
625
  tools (List of [`ChatCompletionInputTool`], *optional*):
615
626
  A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
616
627
  provide a list of functions the model may generate JSON inputs for.
617
- extra_body (`Dict`, *optional*):
628
+ extra_body (`dict`, *optional*):
618
629
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
619
630
  for supported parameters.
620
631
  Returns:
@@ -626,7 +637,7 @@ class InferenceClient:
626
637
  Raises:
627
638
  [`InferenceTimeoutError`]:
628
639
  If the model is unavailable or the request times out.
629
- `HTTPError`:
640
+ [`HfHubHTTPError`]:
630
641
  If the request fails with an HTTP error status code other than HTTP 503.
631
642
 
632
643
  Example:
@@ -940,8 +951,8 @@ class InferenceClient:
940
951
  max_question_len: Optional[int] = None,
941
952
  max_seq_len: Optional[int] = None,
942
953
  top_k: Optional[int] = None,
943
- word_boxes: Optional[List[Union[List[float], str]]] = None,
944
- ) -> List[DocumentQuestionAnsweringOutputElement]:
954
+ word_boxes: Optional[list[Union[list[float], str]]] = None,
955
+ ) -> list[DocumentQuestionAnsweringOutputElement]:
945
956
  """
946
957
  Answer questions on document images.
947
958
 
@@ -971,16 +982,16 @@ class InferenceClient:
971
982
  top_k (`int`, *optional*):
972
983
  The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
973
984
  answers if there are not enough options available within the context.
974
- word_boxes (`List[Union[List[float], str`, *optional*):
985
+ word_boxes (`list[Union[list[float], str`, *optional*):
975
986
  A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
976
987
  step and use the provided bounding boxes instead.
977
988
  Returns:
978
- `List[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
989
+ `list[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
979
990
 
980
991
  Raises:
981
992
  [`InferenceTimeoutError`]:
982
993
  If the model is unavailable or the request times out.
983
- `HTTPError`:
994
+ [`HfHubHTTPError`]:
984
995
  If the request fails with an HTTP error status code other than HTTP 503.
985
996
 
986
997
 
@@ -994,7 +1005,7 @@ class InferenceClient:
994
1005
  """
995
1006
  model_id = model or self.model
996
1007
  provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
997
- inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
1008
+ inputs: dict[str, Any] = {"question": question, "image": _b64_encode(image)}
998
1009
  request_parameters = provider_helper.prepare_request(
999
1010
  inputs=inputs,
1000
1011
  parameters={
@@ -1055,7 +1066,7 @@ class InferenceClient:
1055
1066
  Raises:
1056
1067
  [`InferenceTimeoutError`]:
1057
1068
  If the model is unavailable or the request times out.
1058
- `HTTPError`:
1069
+ [`HfHubHTTPError`]:
1059
1070
  If the request fails with an HTTP error status code other than HTTP 503.
1060
1071
 
1061
1072
  Example:
@@ -1092,9 +1103,9 @@ class InferenceClient:
1092
1103
  text: str,
1093
1104
  *,
1094
1105
  model: Optional[str] = None,
1095
- targets: Optional[List[str]] = None,
1106
+ targets: Optional[list[str]] = None,
1096
1107
  top_k: Optional[int] = None,
1097
- ) -> List[FillMaskOutputElement]:
1108
+ ) -> list[FillMaskOutputElement]:
1098
1109
  """
1099
1110
  Fill in a hole with a missing word (token to be precise).
1100
1111
 
@@ -1104,20 +1115,20 @@ class InferenceClient:
1104
1115
  model (`str`, *optional*):
1105
1116
  The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
1106
1117
  a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
1107
- targets (`List[str`, *optional*):
1118
+ targets (`list[str`, *optional*):
1108
1119
  When passed, the model will limit the scores to the passed targets instead of looking up in the whole
1109
1120
  vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
1110
1121
  resulting token will be used (with a warning, and that might be slower).
1111
1122
  top_k (`int`, *optional*):
1112
1123
  When passed, overrides the number of predictions to return.
1113
1124
  Returns:
1114
- `List[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
1125
+ `list[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
1115
1126
  probability, token reference, and completed text.
1116
1127
 
1117
1128
  Raises:
1118
1129
  [`InferenceTimeoutError`]:
1119
1130
  If the model is unavailable or the request times out.
1120
- `HTTPError`:
1131
+ [`HfHubHTTPError`]:
1121
1132
  If the request fails with an HTTP error status code other than HTTP 503.
1122
1133
 
1123
1134
  Example:
@@ -1150,7 +1161,7 @@ class InferenceClient:
1150
1161
  model: Optional[str] = None,
1151
1162
  function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
1152
1163
  top_k: Optional[int] = None,
1153
- ) -> List[ImageClassificationOutputElement]:
1164
+ ) -> list[ImageClassificationOutputElement]:
1154
1165
  """
1155
1166
  Perform image classification on the given image using the specified model.
1156
1167
 
@@ -1165,12 +1176,12 @@ class InferenceClient:
1165
1176
  top_k (`int`, *optional*):
1166
1177
  When specified, limits the output to the top K most probable classes.
1167
1178
  Returns:
1168
- `List[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
1179
+ `list[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
1169
1180
 
1170
1181
  Raises:
1171
1182
  [`InferenceTimeoutError`]:
1172
1183
  If the model is unavailable or the request times out.
1173
- `HTTPError`:
1184
+ [`HfHubHTTPError`]:
1174
1185
  If the request fails with an HTTP error status code other than HTTP 503.
1175
1186
 
1176
1187
  Example:
@@ -1202,7 +1213,7 @@ class InferenceClient:
1202
1213
  overlap_mask_area_threshold: Optional[float] = None,
1203
1214
  subtask: Optional["ImageSegmentationSubtask"] = None,
1204
1215
  threshold: Optional[float] = None,
1205
- ) -> List[ImageSegmentationOutputElement]:
1216
+ ) -> list[ImageSegmentationOutputElement]:
1206
1217
  """
1207
1218
  Perform image segmentation on the given image using the specified model.
1208
1219
 
@@ -1227,12 +1238,12 @@ class InferenceClient:
1227
1238
  threshold (`float`, *optional*):
1228
1239
  Probability threshold to filter out predicted masks.
1229
1240
  Returns:
1230
- `List[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
1241
+ `list[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
1231
1242
 
1232
1243
  Raises:
1233
1244
  [`InferenceTimeoutError`]:
1234
1245
  If the model is unavailable or the request times out.
1235
- `HTTPError`:
1246
+ [`HfHubHTTPError`]:
1236
1247
  If the request fails with an HTTP error status code other than HTTP 503.
1237
1248
 
1238
1249
  Example:
@@ -1309,7 +1320,7 @@ class InferenceClient:
1309
1320
  Raises:
1310
1321
  [`InferenceTimeoutError`]:
1311
1322
  If the model is unavailable or the request times out.
1312
- `HTTPError`:
1323
+ [`HfHubHTTPError`]:
1313
1324
  If the request fails with an HTTP error status code other than HTTP 503.
1314
1325
 
1315
1326
  Example:
@@ -1439,7 +1450,7 @@ class InferenceClient:
1439
1450
  Raises:
1440
1451
  [`InferenceTimeoutError`]:
1441
1452
  If the model is unavailable or the request times out.
1442
- `HTTPError`:
1453
+ [`HfHubHTTPError`]:
1443
1454
  If the request fails with an HTTP error status code other than HTTP 503.
1444
1455
 
1445
1456
  Example:
@@ -1462,12 +1473,12 @@ class InferenceClient:
1462
1473
  api_key=self.token,
1463
1474
  )
1464
1475
  response = self._inner_post(request_parameters)
1465
- output = ImageToTextOutput.parse_obj(response)
1466
- return output[0] if isinstance(output, list) else output
1476
+ output_list: list[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
1477
+ return output_list[0]
1467
1478
 
1468
1479
  def object_detection(
1469
1480
  self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
1470
- ) -> List[ObjectDetectionOutputElement]:
1481
+ ) -> list[ObjectDetectionOutputElement]:
1471
1482
  """
1472
1483
  Perform object detection on the given image using the specified model.
1473
1484
 
@@ -1486,12 +1497,12 @@ class InferenceClient:
1486
1497
  threshold (`float`, *optional*):
1487
1498
  The probability necessary to make a prediction.
1488
1499
  Returns:
1489
- `List[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
1500
+ `list[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
1490
1501
 
1491
1502
  Raises:
1492
1503
  [`InferenceTimeoutError`]:
1493
1504
  If the model is unavailable or the request times out.
1494
- `HTTPError`:
1505
+ [`HfHubHTTPError`]:
1495
1506
  If the request fails with an HTTP error status code other than HTTP 503.
1496
1507
  `ValueError`:
1497
1508
  If the request output is not a List.
@@ -1529,7 +1540,7 @@ class InferenceClient:
1529
1540
  max_question_len: Optional[int] = None,
1530
1541
  max_seq_len: Optional[int] = None,
1531
1542
  top_k: Optional[int] = None,
1532
- ) -> Union[QuestionAnsweringOutputElement, List[QuestionAnsweringOutputElement]]:
1543
+ ) -> Union[QuestionAnsweringOutputElement, list[QuestionAnsweringOutputElement]]:
1533
1544
  """
1534
1545
  Retrieve the answer to a question from a given text.
1535
1546
 
@@ -1561,13 +1572,13 @@ class InferenceClient:
1561
1572
  topk answers if there are not enough options available within the context.
1562
1573
 
1563
1574
  Returns:
1564
- Union[`QuestionAnsweringOutputElement`, List[`QuestionAnsweringOutputElement`]]:
1575
+ Union[`QuestionAnsweringOutputElement`, list[`QuestionAnsweringOutputElement`]]:
1565
1576
  When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
1566
1577
  When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
1567
1578
  Raises:
1568
1579
  [`InferenceTimeoutError`]:
1569
1580
  If the model is unavailable or the request times out.
1570
- `HTTPError`:
1581
+ [`HfHubHTTPError`]:
1571
1582
  If the request fails with an HTTP error status code other than HTTP 503.
1572
1583
 
1573
1584
  Example:
@@ -1601,15 +1612,15 @@ class InferenceClient:
1601
1612
  return output
1602
1613
 
1603
1614
  def sentence_similarity(
1604
- self, sentence: str, other_sentences: List[str], *, model: Optional[str] = None
1605
- ) -> List[float]:
1615
+ self, sentence: str, other_sentences: list[str], *, model: Optional[str] = None
1616
+ ) -> list[float]:
1606
1617
  """
1607
1618
  Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
1608
1619
 
1609
1620
  Args:
1610
1621
  sentence (`str`):
1611
1622
  The main sentence to compare to others.
1612
- other_sentences (`List[str]`):
1623
+ other_sentences (`list[str]`):
1613
1624
  The list of sentences to compare to.
1614
1625
  model (`str`, *optional*):
1615
1626
  The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1617,12 +1628,12 @@ class InferenceClient:
1617
1628
  Defaults to None.
1618
1629
 
1619
1630
  Returns:
1620
- `List[float]`: The embedding representing the input text.
1631
+ `list[float]`: The embedding representing the input text.
1621
1632
 
1622
1633
  Raises:
1623
1634
  [`InferenceTimeoutError`]:
1624
1635
  If the model is unavailable or the request times out.
1625
- `HTTPError`:
1636
+ [`HfHubHTTPError`]:
1626
1637
  If the request fails with an HTTP error status code other than HTTP 503.
1627
1638
 
1628
1639
  Example:
@@ -1659,7 +1670,7 @@ class InferenceClient:
1659
1670
  *,
1660
1671
  model: Optional[str] = None,
1661
1672
  clean_up_tokenization_spaces: Optional[bool] = None,
1662
- generate_parameters: Optional[Dict[str, Any]] = None,
1673
+ generate_parameters: Optional[dict[str, Any]] = None,
1663
1674
  truncation: Optional["SummarizationTruncationStrategy"] = None,
1664
1675
  ) -> SummarizationOutput:
1665
1676
  """
@@ -1673,7 +1684,7 @@ class InferenceClient:
1673
1684
  Inference Endpoint. If not provided, the default recommended model for summarization will be used.
1674
1685
  clean_up_tokenization_spaces (`bool`, *optional*):
1675
1686
  Whether to clean up the potential extra spaces in the text output.
1676
- generate_parameters (`Dict[str, Any]`, *optional*):
1687
+ generate_parameters (`dict[str, Any]`, *optional*):
1677
1688
  Additional parametrization of the text generation algorithm.
1678
1689
  truncation (`"SummarizationTruncationStrategy"`, *optional*):
1679
1690
  The truncation strategy to use.
@@ -1683,7 +1694,7 @@ class InferenceClient:
1683
1694
  Raises:
1684
1695
  [`InferenceTimeoutError`]:
1685
1696
  If the model is unavailable or the request times out.
1686
- `HTTPError`:
1697
+ [`HfHubHTTPError`]:
1687
1698
  If the request fails with an HTTP error status code other than HTTP 503.
1688
1699
 
1689
1700
  Example:
@@ -1713,7 +1724,7 @@ class InferenceClient:
1713
1724
 
1714
1725
  def table_question_answering(
1715
1726
  self,
1716
- table: Dict[str, Any],
1727
+ table: dict[str, Any],
1717
1728
  query: str,
1718
1729
  *,
1719
1730
  model: Optional[str] = None,
@@ -1748,7 +1759,7 @@ class InferenceClient:
1748
1759
  Raises:
1749
1760
  [`InferenceTimeoutError`]:
1750
1761
  If the model is unavailable or the request times out.
1751
- `HTTPError`:
1762
+ [`HfHubHTTPError`]:
1752
1763
  If the request fails with an HTTP error status code other than HTTP 503.
1753
1764
 
1754
1765
  Example:
@@ -1773,12 +1784,12 @@ class InferenceClient:
1773
1784
  response = self._inner_post(request_parameters)
1774
1785
  return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
1775
1786
 
1776
- def tabular_classification(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[str]:
1787
+ def tabular_classification(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[str]:
1777
1788
  """
1778
1789
  Classifying a target category (a group) based on a set of attributes.
1779
1790
 
1780
1791
  Args:
1781
- table (`Dict[str, Any]`):
1792
+ table (`dict[str, Any]`):
1782
1793
  Set of attributes to classify.
1783
1794
  model (`str`, *optional*):
1784
1795
  The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1791,7 +1802,7 @@ class InferenceClient:
1791
1802
  Raises:
1792
1803
  [`InferenceTimeoutError`]:
1793
1804
  If the model is unavailable or the request times out.
1794
- `HTTPError`:
1805
+ [`HfHubHTTPError`]:
1795
1806
  If the request fails with an HTTP error status code other than HTTP 503.
1796
1807
 
1797
1808
  Example:
@@ -1828,12 +1839,12 @@ class InferenceClient:
1828
1839
  response = self._inner_post(request_parameters)
1829
1840
  return _bytes_to_list(response)
1830
1841
 
1831
- def tabular_regression(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[float]:
1842
+ def tabular_regression(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[float]:
1832
1843
  """
1833
1844
  Predicting a numerical target value given a set of attributes/features in a table.
1834
1845
 
1835
1846
  Args:
1836
- table (`Dict[str, Any]`):
1847
+ table (`dict[str, Any]`):
1837
1848
  Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
1838
1849
  model (`str`, *optional*):
1839
1850
  The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1846,7 +1857,7 @@ class InferenceClient:
1846
1857
  Raises:
1847
1858
  [`InferenceTimeoutError`]:
1848
1859
  If the model is unavailable or the request times out.
1849
- `HTTPError`:
1860
+ [`HfHubHTTPError`]:
1850
1861
  If the request fails with an HTTP error status code other than HTTP 503.
1851
1862
 
1852
1863
  Example:
@@ -1885,7 +1896,7 @@ class InferenceClient:
1885
1896
  model: Optional[str] = None,
1886
1897
  top_k: Optional[int] = None,
1887
1898
  function_to_apply: Optional["TextClassificationOutputTransform"] = None,
1888
- ) -> List[TextClassificationOutputElement]:
1899
+ ) -> list[TextClassificationOutputElement]:
1889
1900
  """
1890
1901
  Perform text classification (e.g. sentiment-analysis) on the given text.
1891
1902
 
@@ -1902,12 +1913,12 @@ class InferenceClient:
1902
1913
  The function to apply to the model outputs in order to retrieve the scores.
1903
1914
 
1904
1915
  Returns:
1905
- `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
1916
+ `list[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
1906
1917
 
1907
1918
  Raises:
1908
1919
  [`InferenceTimeoutError`]:
1909
1920
  If the model is unavailable or the request times out.
1910
- `HTTPError`:
1921
+ [`HfHubHTTPError`]:
1911
1922
  If the request fails with an HTTP error status code other than HTTP 503.
1912
1923
 
1913
1924
  Example:
@@ -1955,8 +1966,8 @@ class InferenceClient:
1955
1966
  repetition_penalty: Optional[float] = None,
1956
1967
  return_full_text: Optional[bool] = None,
1957
1968
  seed: Optional[int] = None,
1958
- stop: Optional[List[str]] = None,
1959
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
1969
+ stop: Optional[list[str]] = None,
1970
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
1960
1971
  temperature: Optional[float] = None,
1961
1972
  top_k: Optional[int] = None,
1962
1973
  top_n_tokens: Optional[int] = None,
@@ -1985,8 +1996,8 @@ class InferenceClient:
1985
1996
  repetition_penalty: Optional[float] = None,
1986
1997
  return_full_text: Optional[bool] = None,
1987
1998
  seed: Optional[int] = None,
1988
- stop: Optional[List[str]] = None,
1989
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
1999
+ stop: Optional[list[str]] = None,
2000
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
1990
2001
  temperature: Optional[float] = None,
1991
2002
  top_k: Optional[int] = None,
1992
2003
  top_n_tokens: Optional[int] = None,
@@ -2015,8 +2026,8 @@ class InferenceClient:
2015
2026
  repetition_penalty: Optional[float] = None,
2016
2027
  return_full_text: Optional[bool] = None, # Manual default value
2017
2028
  seed: Optional[int] = None,
2018
- stop: Optional[List[str]] = None,
2019
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2029
+ stop: Optional[list[str]] = None,
2030
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2020
2031
  temperature: Optional[float] = None,
2021
2032
  top_k: Optional[int] = None,
2022
2033
  top_n_tokens: Optional[int] = None,
@@ -2045,8 +2056,8 @@ class InferenceClient:
2045
2056
  repetition_penalty: Optional[float] = None,
2046
2057
  return_full_text: Optional[bool] = None,
2047
2058
  seed: Optional[int] = None,
2048
- stop: Optional[List[str]] = None,
2049
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2059
+ stop: Optional[list[str]] = None,
2060
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2050
2061
  temperature: Optional[float] = None,
2051
2062
  top_k: Optional[int] = None,
2052
2063
  top_n_tokens: Optional[int] = None,
@@ -2075,8 +2086,8 @@ class InferenceClient:
2075
2086
  repetition_penalty: Optional[float] = None,
2076
2087
  return_full_text: Optional[bool] = None,
2077
2088
  seed: Optional[int] = None,
2078
- stop: Optional[List[str]] = None,
2079
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2089
+ stop: Optional[list[str]] = None,
2090
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2080
2091
  temperature: Optional[float] = None,
2081
2092
  top_k: Optional[int] = None,
2082
2093
  top_n_tokens: Optional[int] = None,
@@ -2104,8 +2115,8 @@ class InferenceClient:
2104
2115
  repetition_penalty: Optional[float] = None,
2105
2116
  return_full_text: Optional[bool] = None,
2106
2117
  seed: Optional[int] = None,
2107
- stop: Optional[List[str]] = None,
2108
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2118
+ stop: Optional[list[str]] = None,
2119
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2109
2120
  temperature: Optional[float] = None,
2110
2121
  top_k: Optional[int] = None,
2111
2122
  top_n_tokens: Optional[int] = None,
@@ -2161,9 +2172,9 @@ class InferenceClient:
2161
2172
  Whether to prepend the prompt to the generated text
2162
2173
  seed (`int`, *optional*):
2163
2174
  Random sampling seed
2164
- stop (`List[str]`, *optional*):
2175
+ stop (`list[str]`, *optional*):
2165
2176
  Stop generating tokens if a member of `stop` is generated.
2166
- stop_sequences (`List[str]`, *optional*):
2177
+ stop_sequences (`list[str]`, *optional*):
2167
2178
  Deprecated argument. Use `stop` instead.
2168
2179
  temperature (`float`, *optional*):
2169
2180
  The value used to module the logits distribution.
@@ -2196,7 +2207,7 @@ class InferenceClient:
2196
2207
  If input values are not valid. No HTTP call is made to the server.
2197
2208
  [`InferenceTimeoutError`]:
2198
2209
  If the model is unavailable or the request times out.
2199
- `HTTPError`:
2210
+ [`HfHubHTTPError`]:
2200
2211
  If the request fails with an HTTP error status code other than HTTP 503.
2201
2212
 
2202
2213
  Example:
@@ -2385,7 +2396,7 @@ class InferenceClient:
2385
2396
  # Handle errors separately for more precise error messages
2386
2397
  try:
2387
2398
  bytes_output = self._inner_post(request_parameters, stream=stream or False)
2388
- except HTTPError as e:
2399
+ except HfHubHTTPError as e:
2389
2400
  match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
2390
2401
  if isinstance(e, BadRequestError) and match:
2391
2402
  unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
@@ -2440,7 +2451,7 @@ class InferenceClient:
2440
2451
  model: Optional[str] = None,
2441
2452
  scheduler: Optional[str] = None,
2442
2453
  seed: Optional[int] = None,
2443
- extra_body: Optional[Dict[str, Any]] = None,
2454
+ extra_body: Optional[dict[str, Any]] = None,
2444
2455
  ) -> "Image":
2445
2456
  """
2446
2457
  Generate an image based on a given text using a specified model.
@@ -2478,7 +2489,7 @@ class InferenceClient:
2478
2489
  Override the scheduler with a compatible one.
2479
2490
  seed (`int`, *optional*):
2480
2491
  Seed for the random number generator.
2481
- extra_body (`Dict[str, Any]`, *optional*):
2492
+ extra_body (`dict[str, Any]`, *optional*):
2482
2493
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2483
2494
  for supported parameters.
2484
2495
 
@@ -2488,7 +2499,7 @@ class InferenceClient:
2488
2499
  Raises:
2489
2500
  [`InferenceTimeoutError`]:
2490
2501
  If the model is unavailable or the request times out.
2491
- `HTTPError`:
2502
+ [`HfHubHTTPError`]:
2492
2503
  If the request fails with an HTTP error status code other than HTTP 503.
2493
2504
 
2494
2505
  Example:
@@ -2577,11 +2588,11 @@ class InferenceClient:
2577
2588
  *,
2578
2589
  model: Optional[str] = None,
2579
2590
  guidance_scale: Optional[float] = None,
2580
- negative_prompt: Optional[List[str]] = None,
2591
+ negative_prompt: Optional[list[str]] = None,
2581
2592
  num_frames: Optional[float] = None,
2582
2593
  num_inference_steps: Optional[int] = None,
2583
2594
  seed: Optional[int] = None,
2584
- extra_body: Optional[Dict[str, Any]] = None,
2595
+ extra_body: Optional[dict[str, Any]] = None,
2585
2596
  ) -> bytes:
2586
2597
  """
2587
2598
  Generate a video based on a given text.
@@ -2600,7 +2611,7 @@ class InferenceClient:
2600
2611
  guidance_scale (`float`, *optional*):
2601
2612
  A higher guidance scale value encourages the model to generate videos closely linked to the text
2602
2613
  prompt, but values too high may cause saturation and other artifacts.
2603
- negative_prompt (`List[str]`, *optional*):
2614
+ negative_prompt (`list[str]`, *optional*):
2604
2615
  One or several prompt to guide what NOT to include in video generation.
2605
2616
  num_frames (`float`, *optional*):
2606
2617
  The num_frames parameter determines how many video frames are generated.
@@ -2609,7 +2620,7 @@ class InferenceClient:
2609
2620
  expense of slower inference.
2610
2621
  seed (`int`, *optional*):
2611
2622
  Seed for the random number generator.
2612
- extra_body (`Dict[str, Any]`, *optional*):
2623
+ extra_body (`dict[str, Any]`, *optional*):
2613
2624
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2614
2625
  for supported parameters.
2615
2626
 
@@ -2689,7 +2700,7 @@ class InferenceClient:
2689
2700
  top_p: Optional[float] = None,
2690
2701
  typical_p: Optional[float] = None,
2691
2702
  use_cache: Optional[bool] = None,
2692
- extra_body: Optional[Dict[str, Any]] = None,
2703
+ extra_body: Optional[dict[str, Any]] = None,
2693
2704
  ) -> bytes:
2694
2705
  """
2695
2706
  Synthesize an audio of a voice pronouncing a given text.
@@ -2751,7 +2762,7 @@ class InferenceClient:
2751
2762
  paper](https://hf.co/papers/2202.00666) for more details.
2752
2763
  use_cache (`bool`, *optional*):
2753
2764
  Whether the model should use the past last key/values attentions to speed up decoding
2754
- extra_body (`Dict[str, Any]`, *optional*):
2765
+ extra_body (`dict[str, Any]`, *optional*):
2755
2766
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2756
2767
  for supported parameters.
2757
2768
  Returns:
@@ -2760,7 +2771,7 @@ class InferenceClient:
2760
2771
  Raises:
2761
2772
  [`InferenceTimeoutError`]:
2762
2773
  If the model is unavailable or the request times out.
2763
- `HTTPError`:
2774
+ [`HfHubHTTPError`]:
2764
2775
  If the request fails with an HTTP error status code other than HTTP 503.
2765
2776
 
2766
2777
  Example:
@@ -2883,9 +2894,9 @@ class InferenceClient:
2883
2894
  *,
2884
2895
  model: Optional[str] = None,
2885
2896
  aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
2886
- ignore_labels: Optional[List[str]] = None,
2897
+ ignore_labels: Optional[list[str]] = None,
2887
2898
  stride: Optional[int] = None,
2888
- ) -> List[TokenClassificationOutputElement]:
2899
+ ) -> list[TokenClassificationOutputElement]:
2889
2900
  """
2890
2901
  Perform token classification on the given text.
2891
2902
  Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
@@ -2899,18 +2910,18 @@ class InferenceClient:
2899
2910
  Defaults to None.
2900
2911
  aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
2901
2912
  The strategy used to fuse tokens based on model predictions
2902
- ignore_labels (`List[str`, *optional*):
2913
+ ignore_labels (`list[str`, *optional*):
2903
2914
  A list of labels to ignore
2904
2915
  stride (`int`, *optional*):
2905
2916
  The number of overlapping tokens between chunks when splitting the input text.
2906
2917
 
2907
2918
  Returns:
2908
- `List[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
2919
+ `list[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
2909
2920
 
2910
2921
  Raises:
2911
2922
  [`InferenceTimeoutError`]:
2912
2923
  If the model is unavailable or the request times out.
2913
- `HTTPError`:
2924
+ [`HfHubHTTPError`]:
2914
2925
  If the request fails with an HTTP error status code other than HTTP 503.
2915
2926
 
2916
2927
  Example:
@@ -2961,7 +2972,7 @@ class InferenceClient:
2961
2972
  tgt_lang: Optional[str] = None,
2962
2973
  clean_up_tokenization_spaces: Optional[bool] = None,
2963
2974
  truncation: Optional["TranslationTruncationStrategy"] = None,
2964
- generate_parameters: Optional[Dict[str, Any]] = None,
2975
+ generate_parameters: Optional[dict[str, Any]] = None,
2965
2976
  ) -> TranslationOutput:
2966
2977
  """
2967
2978
  Convert text from one language to another.
@@ -2986,7 +2997,7 @@ class InferenceClient:
2986
2997
  Whether to clean up the potential extra spaces in the text output.
2987
2998
  truncation (`"TranslationTruncationStrategy"`, *optional*):
2988
2999
  The truncation strategy to use.
2989
- generate_parameters (`Dict[str, Any]`, *optional*):
3000
+ generate_parameters (`dict[str, Any]`, *optional*):
2990
3001
  Additional parametrization of the text generation algorithm.
2991
3002
 
2992
3003
  Returns:
@@ -2995,7 +3006,7 @@ class InferenceClient:
2995
3006
  Raises:
2996
3007
  [`InferenceTimeoutError`]:
2997
3008
  If the model is unavailable or the request times out.
2998
- `HTTPError`:
3009
+ [`HfHubHTTPError`]:
2999
3010
  If the request fails with an HTTP error status code other than HTTP 503.
3000
3011
  `ValueError`:
3001
3012
  If only one of the `src_lang` and `tgt_lang` arguments are provided.
@@ -3048,7 +3059,7 @@ class InferenceClient:
3048
3059
  *,
3049
3060
  model: Optional[str] = None,
3050
3061
  top_k: Optional[int] = None,
3051
- ) -> List[VisualQuestionAnsweringOutputElement]:
3062
+ ) -> list[VisualQuestionAnsweringOutputElement]:
3052
3063
  """
3053
3064
  Answering open-ended questions based on an image.
3054
3065
 
@@ -3065,12 +3076,12 @@ class InferenceClient:
3065
3076
  The number of answers to return (will be chosen by order of likelihood). Note that we return less than
3066
3077
  topk answers if there are not enough options available within the context.
3067
3078
  Returns:
3068
- `List[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
3079
+ `list[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
3069
3080
 
3070
3081
  Raises:
3071
3082
  `InferenceTimeoutError`:
3072
3083
  If the model is unavailable or the request times out.
3073
- `HTTPError`:
3084
+ [`HfHubHTTPError`]:
3074
3085
  If the request fails with an HTTP error status code other than HTTP 503.
3075
3086
 
3076
3087
  Example:
@@ -3103,21 +3114,21 @@ class InferenceClient:
3103
3114
  def zero_shot_classification(
3104
3115
  self,
3105
3116
  text: str,
3106
- candidate_labels: List[str],
3117
+ candidate_labels: list[str],
3107
3118
  *,
3108
3119
  multi_label: Optional[bool] = False,
3109
3120
  hypothesis_template: Optional[str] = None,
3110
3121
  model: Optional[str] = None,
3111
- ) -> List[ZeroShotClassificationOutputElement]:
3122
+ ) -> list[ZeroShotClassificationOutputElement]:
3112
3123
  """
3113
3124
  Provide as input a text and a set of candidate labels to classify the input text.
3114
3125
 
3115
3126
  Args:
3116
3127
  text (`str`):
3117
3128
  The input text to classify.
3118
- candidate_labels (`List[str]`):
3129
+ candidate_labels (`list[str]`):
3119
3130
  The set of possible class labels to classify the text into.
3120
- labels (`List[str]`, *optional*):
3131
+ labels (`list[str]`, *optional*):
3121
3132
  (deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
3122
3133
  multi_label (`bool`, *optional*):
3123
3134
  Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
@@ -3132,12 +3143,12 @@ class InferenceClient:
3132
3143
 
3133
3144
 
3134
3145
  Returns:
3135
- `List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
3146
+ `list[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
3136
3147
 
3137
3148
  Raises:
3138
3149
  [`InferenceTimeoutError`]:
3139
3150
  If the model is unavailable or the request times out.
3140
- `HTTPError`:
3151
+ [`HfHubHTTPError`]:
3141
3152
  If the request fails with an HTTP error status code other than HTTP 503.
3142
3153
 
3143
3154
  Example with `multi_label=False`:
@@ -3209,22 +3220,22 @@ class InferenceClient:
3209
3220
  def zero_shot_image_classification(
3210
3221
  self,
3211
3222
  image: ContentT,
3212
- candidate_labels: List[str],
3223
+ candidate_labels: list[str],
3213
3224
  *,
3214
3225
  model: Optional[str] = None,
3215
3226
  hypothesis_template: Optional[str] = None,
3216
3227
  # deprecated argument
3217
- labels: List[str] = None, # type: ignore
3218
- ) -> List[ZeroShotImageClassificationOutputElement]:
3228
+ labels: list[str] = None, # type: ignore
3229
+ ) -> list[ZeroShotImageClassificationOutputElement]:
3219
3230
  """
3220
3231
  Provide input image and text labels to predict text labels for the image.
3221
3232
 
3222
3233
  Args:
3223
3234
  image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
3224
3235
  The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
3225
- candidate_labels (`List[str]`):
3236
+ candidate_labels (`list[str]`):
3226
3237
  The candidate labels for this image
3227
- labels (`List[str]`, *optional*):
3238
+ labels (`list[str]`, *optional*):
3228
3239
  (deprecated) List of string possible labels. There must be at least 2 labels.
3229
3240
  model (`str`, *optional*):
3230
3241
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
@@ -3234,12 +3245,12 @@ class InferenceClient:
3234
3245
  replacing the placeholder with the candidate labels.
3235
3246
 
3236
3247
  Returns:
3237
- `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
3248
+ `list[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
3238
3249
 
3239
3250
  Raises:
3240
3251
  [`InferenceTimeoutError`]:
3241
3252
  If the model is unavailable or the request times out.
3242
- `HTTPError`:
3253
+ [`HfHubHTTPError`]:
3243
3254
  If the request fails with an HTTP error status code other than HTTP 503.
3244
3255
 
3245
3256
  Example:
@@ -3273,102 +3284,7 @@ class InferenceClient:
3273
3284
  response = self._inner_post(request_parameters)
3274
3285
  return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
3275
3286
 
3276
- @_deprecate_method(
3277
- version="0.35.0",
3278
- message=(
3279
- "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3280
- " Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
3281
- ),
3282
- )
3283
- def list_deployed_models(
3284
- self, frameworks: Union[None, str, Literal["all"], List[str]] = None
3285
- ) -> Dict[str, List[str]]:
3286
- """
3287
- List models deployed on the HF Serverless Inference API service.
3288
-
3289
- This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
3290
- are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
3291
- specify `frameworks="all"` as input. Alternatively, if you know before-hand which framework you are interested
3292
- in, you can also restrict to search to this one (e.g. `frameworks="text-generation-inference"`). The more
3293
- frameworks are checked, the more time it will take.
3294
-
3295
- <Tip warning={true}>
3296
-
3297
- This endpoint method does not return a live list of all models available for the HF Inference API service.
3298
- It searches over a cached list of models that were recently available and the list may not be up to date.
3299
- If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
3300
-
3301
- </Tip>
3302
-
3303
- <Tip>
3304
-
3305
- This endpoint method is mostly useful for discoverability. If you already know which model you want to use and want to
3306
- check its availability, you can directly use [`~InferenceClient.get_model_status`].
3307
-
3308
- </Tip>
3309
-
3310
- Args:
3311
- frameworks (`Literal["all"]` or `List[str]` or `str`, *optional*):
3312
- The frameworks to filter on. By default only a subset of the available frameworks are tested. If set to
3313
- "all", all available frameworks will be tested. It is also possible to provide a single framework or a
3314
- custom set of frameworks to check.
3315
-
3316
- Returns:
3317
- `Dict[str, List[str]]`: A dictionary mapping task names to a sorted list of model IDs.
3318
-
3319
- Example:
3320
- ```python
3321
- >>> from huggingface_hub import InferenceClient
3322
- >>> client = InferenceClient()
3323
-
3324
- # Discover zero-shot-classification models currently deployed
3325
- >>> models = client.list_deployed_models()
3326
- >>> models["zero-shot-classification"]
3327
- ['Narsil/deberta-large-mnli-zero-cls', 'facebook/bart-large-mnli', ...]
3328
-
3329
- # List from only 1 framework
3330
- >>> client.list_deployed_models("text-generation-inference")
3331
- {'text-generation': ['bigcode/starcoder', 'meta-llama/Llama-2-70b-chat-hf', ...], ...}
3332
- ```
3333
- """
3334
- if self.provider != "hf-inference":
3335
- raise ValueError(f"Listing deployed models is not supported on '{self.provider}'.")
3336
-
3337
- # Resolve which frameworks to check
3338
- if frameworks is None:
3339
- frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
3340
- elif frameworks == "all":
3341
- frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
3342
- elif isinstance(frameworks, str):
3343
- frameworks = [frameworks]
3344
- frameworks = list(set(frameworks))
3345
-
3346
- # Fetch them iteratively
3347
- models_by_task: Dict[str, List[str]] = {}
3348
-
3349
- def _unpack_response(framework: str, items: List[Dict]) -> None:
3350
- for model in items:
3351
- if framework == "sentence-transformers":
3352
- # Model running with the `sentence-transformers` framework can work with both tasks even if not
3353
- # branded as such in the API response
3354
- models_by_task.setdefault("feature-extraction", []).append(model["model_id"])
3355
- models_by_task.setdefault("sentence-similarity", []).append(model["model_id"])
3356
- else:
3357
- models_by_task.setdefault(model["task"], []).append(model["model_id"])
3358
-
3359
- for framework in frameworks:
3360
- response = get_session().get(
3361
- f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
3362
- )
3363
- hf_raise_for_status(response)
3364
- _unpack_response(framework, response.json())
3365
-
3366
- # Sort alphabetically for discoverability and return
3367
- for task, models in models_by_task.items():
3368
- models_by_task[task] = sorted(set(models), key=lambda x: x.lower())
3369
- return models_by_task
3370
-
3371
- def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
3287
+ def get_endpoint_info(self, *, model: Optional[str] = None) -> dict[str, Any]:
3372
3288
  """
3373
3289
  Get information about the deployed endpoint.
3374
3290
 
@@ -3381,7 +3297,7 @@ class InferenceClient:
3381
3297
  Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
3382
3298
 
3383
3299
  Returns:
3384
- `Dict[str, Any]`: Information about the endpoint.
3300
+ `dict[str, Any]`: Information about the endpoint.
3385
3301
 
3386
3302
  Example:
3387
3303
  ```py
@@ -3431,7 +3347,6 @@ class InferenceClient:
3431
3347
  Check the health of the deployed endpoint.
3432
3348
 
3433
3349
  Health check is only available with Inference Endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI).
3434
- For Inference API, please use [`InferenceClient.get_model_status`] instead.
3435
3350
 
3436
3351
  Args:
3437
3352
  model (`str`, *optional*):
@@ -3455,75 +3370,12 @@ class InferenceClient:
3455
3370
  if model is None:
3456
3371
  raise ValueError("Model id not provided.")
3457
3372
  if not model.startswith(("http://", "https://")):
3458
- raise ValueError(
3459
- "Model must be an Inference Endpoint URL. For serverless Inference API, please use `InferenceClient.get_model_status`."
3460
- )
3373
+ raise ValueError("Model must be an Inference Endpoint URL.")
3461
3374
  url = model.rstrip("/") + "/health"
3462
3375
 
3463
3376
  response = get_session().get(url, headers=build_hf_headers(token=self.token))
3464
3377
  return response.status_code == 200
3465
3378
 
3466
- @_deprecate_method(
3467
- version="0.35.0",
3468
- message=(
3469
- "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3470
- " Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
3471
- ),
3472
- )
3473
- def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
3474
- """
3475
- Get the status of a model hosted on the HF Inference API.
3476
-
3477
- <Tip>
3478
-
3479
- This endpoint is mostly useful when you already know which model you want to use and want to check its
3480
- availability. If you want to discover already deployed models, you should rather use [`~InferenceClient.list_deployed_models`].
3481
-
3482
- </Tip>
3483
-
3484
- Args:
3485
- model (`str`, *optional*):
3486
- Identifier of the model for witch the status gonna be checked. If model is not provided,
3487
- the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
3488
- identifier cannot be a URL.
3489
-
3490
-
3491
- Returns:
3492
- [`ModelStatus`]: An instance of ModelStatus dataclass, containing information,
3493
- about the state of the model: load, state, compute type and framework.
3494
-
3495
- Example:
3496
- ```py
3497
- >>> from huggingface_hub import InferenceClient
3498
- >>> client = InferenceClient()
3499
- >>> client.get_model_status("meta-llama/Meta-Llama-3-8B-Instruct")
3500
- ModelStatus(loaded=True, state='Loaded', compute_type='gpu', framework='text-generation-inference')
3501
- ```
3502
- """
3503
- if self.provider != "hf-inference":
3504
- raise ValueError(f"Getting model status is not supported on '{self.provider}'.")
3505
-
3506
- model = model or self.model
3507
- if model is None:
3508
- raise ValueError("Model id not provided.")
3509
- if model.startswith("https://"):
3510
- raise NotImplementedError("Model status is only available for Inference API endpoints.")
3511
- url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
3512
-
3513
- response = get_session().get(url, headers=build_hf_headers(token=self.token))
3514
- hf_raise_for_status(response)
3515
- response_data = response.json()
3516
-
3517
- if "error" in response_data:
3518
- raise ValueError(response_data["error"])
3519
-
3520
- return ModelStatus(
3521
- loaded=response_data["loaded"],
3522
- state=response_data["state"],
3523
- compute_type=response_data["compute_type"],
3524
- framework=response_data["framework"],
3525
- )
3526
-
3527
3379
  @property
3528
3380
  def chat(self) -> "ProxyClientChat":
3529
3381
  return ProxyClientChat(self)