huggingface-hub 0.31.0rc0__py3-none-any.whl → 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. huggingface_hub/__init__.py +145 -46
  2. huggingface_hub/_commit_api.py +168 -119
  3. huggingface_hub/_commit_scheduler.py +15 -15
  4. huggingface_hub/_inference_endpoints.py +15 -12
  5. huggingface_hub/_jobs_api.py +301 -0
  6. huggingface_hub/_local_folder.py +18 -3
  7. huggingface_hub/_login.py +31 -63
  8. huggingface_hub/_oauth.py +460 -0
  9. huggingface_hub/_snapshot_download.py +239 -80
  10. huggingface_hub/_space_api.py +5 -5
  11. huggingface_hub/_tensorboard_logger.py +15 -19
  12. huggingface_hub/_upload_large_folder.py +172 -76
  13. huggingface_hub/_webhooks_payload.py +3 -3
  14. huggingface_hub/_webhooks_server.py +13 -25
  15. huggingface_hub/{commands → cli}/__init__.py +1 -15
  16. huggingface_hub/cli/_cli_utils.py +173 -0
  17. huggingface_hub/cli/auth.py +147 -0
  18. huggingface_hub/cli/cache.py +841 -0
  19. huggingface_hub/cli/download.py +189 -0
  20. huggingface_hub/cli/hf.py +60 -0
  21. huggingface_hub/cli/inference_endpoints.py +377 -0
  22. huggingface_hub/cli/jobs.py +772 -0
  23. huggingface_hub/cli/lfs.py +175 -0
  24. huggingface_hub/cli/repo.py +315 -0
  25. huggingface_hub/cli/repo_files.py +94 -0
  26. huggingface_hub/{commands/env.py → cli/system.py} +10 -13
  27. huggingface_hub/cli/upload.py +294 -0
  28. huggingface_hub/cli/upload_large_folder.py +117 -0
  29. huggingface_hub/community.py +20 -12
  30. huggingface_hub/constants.py +38 -53
  31. huggingface_hub/dataclasses.py +609 -0
  32. huggingface_hub/errors.py +80 -30
  33. huggingface_hub/fastai_utils.py +30 -41
  34. huggingface_hub/file_download.py +435 -351
  35. huggingface_hub/hf_api.py +2050 -1124
  36. huggingface_hub/hf_file_system.py +269 -152
  37. huggingface_hub/hub_mixin.py +43 -63
  38. huggingface_hub/inference/_client.py +347 -434
  39. huggingface_hub/inference/_common.py +133 -121
  40. huggingface_hub/inference/_generated/_async_client.py +397 -541
  41. huggingface_hub/inference/_generated/types/__init__.py +5 -1
  42. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +3 -3
  43. huggingface_hub/inference/_generated/types/base.py +10 -7
  44. huggingface_hub/inference/_generated/types/chat_completion.py +59 -23
  45. huggingface_hub/inference/_generated/types/depth_estimation.py +2 -2
  46. huggingface_hub/inference/_generated/types/document_question_answering.py +2 -2
  47. huggingface_hub/inference/_generated/types/feature_extraction.py +2 -2
  48. huggingface_hub/inference/_generated/types/fill_mask.py +2 -2
  49. huggingface_hub/inference/_generated/types/image_to_image.py +6 -2
  50. huggingface_hub/inference/_generated/types/image_to_video.py +60 -0
  51. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -3
  52. huggingface_hub/inference/_generated/types/summarization.py +2 -2
  53. huggingface_hub/inference/_generated/types/table_question_answering.py +5 -5
  54. huggingface_hub/inference/_generated/types/text2text_generation.py +2 -2
  55. huggingface_hub/inference/_generated/types/text_generation.py +10 -10
  56. huggingface_hub/inference/_generated/types/text_to_video.py +2 -2
  57. huggingface_hub/inference/_generated/types/token_classification.py +2 -2
  58. huggingface_hub/inference/_generated/types/translation.py +2 -2
  59. huggingface_hub/inference/_generated/types/zero_shot_classification.py +2 -2
  60. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +2 -2
  61. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +1 -3
  62. huggingface_hub/inference/_mcp/__init__.py +0 -0
  63. huggingface_hub/inference/_mcp/_cli_hacks.py +88 -0
  64. huggingface_hub/inference/_mcp/agent.py +100 -0
  65. huggingface_hub/inference/_mcp/cli.py +247 -0
  66. huggingface_hub/inference/_mcp/constants.py +81 -0
  67. huggingface_hub/inference/_mcp/mcp_client.py +395 -0
  68. huggingface_hub/inference/_mcp/types.py +45 -0
  69. huggingface_hub/inference/_mcp/utils.py +128 -0
  70. huggingface_hub/inference/_providers/__init__.py +82 -7
  71. huggingface_hub/inference/_providers/_common.py +129 -27
  72. huggingface_hub/inference/_providers/black_forest_labs.py +6 -6
  73. huggingface_hub/inference/_providers/cerebras.py +1 -1
  74. huggingface_hub/inference/_providers/clarifai.py +13 -0
  75. huggingface_hub/inference/_providers/cohere.py +20 -3
  76. huggingface_hub/inference/_providers/fal_ai.py +183 -56
  77. huggingface_hub/inference/_providers/featherless_ai.py +38 -0
  78. huggingface_hub/inference/_providers/fireworks_ai.py +18 -0
  79. huggingface_hub/inference/_providers/groq.py +9 -0
  80. huggingface_hub/inference/_providers/hf_inference.py +69 -30
  81. huggingface_hub/inference/_providers/hyperbolic.py +4 -4
  82. huggingface_hub/inference/_providers/nebius.py +33 -5
  83. huggingface_hub/inference/_providers/novita.py +5 -5
  84. huggingface_hub/inference/_providers/nscale.py +44 -0
  85. huggingface_hub/inference/_providers/openai.py +3 -1
  86. huggingface_hub/inference/_providers/publicai.py +6 -0
  87. huggingface_hub/inference/_providers/replicate.py +31 -13
  88. huggingface_hub/inference/_providers/sambanova.py +18 -4
  89. huggingface_hub/inference/_providers/scaleway.py +28 -0
  90. huggingface_hub/inference/_providers/together.py +20 -5
  91. huggingface_hub/inference/_providers/wavespeed.py +138 -0
  92. huggingface_hub/inference/_providers/zai_org.py +17 -0
  93. huggingface_hub/lfs.py +33 -100
  94. huggingface_hub/repocard.py +34 -38
  95. huggingface_hub/repocard_data.py +57 -57
  96. huggingface_hub/serialization/__init__.py +0 -1
  97. huggingface_hub/serialization/_base.py +12 -15
  98. huggingface_hub/serialization/_dduf.py +8 -8
  99. huggingface_hub/serialization/_torch.py +69 -69
  100. huggingface_hub/utils/__init__.py +19 -8
  101. huggingface_hub/utils/_auth.py +7 -7
  102. huggingface_hub/utils/_cache_manager.py +92 -147
  103. huggingface_hub/utils/_chunk_utils.py +2 -3
  104. huggingface_hub/utils/_deprecation.py +1 -1
  105. huggingface_hub/utils/_dotenv.py +55 -0
  106. huggingface_hub/utils/_experimental.py +7 -5
  107. huggingface_hub/utils/_fixes.py +0 -10
  108. huggingface_hub/utils/_git_credential.py +5 -5
  109. huggingface_hub/utils/_headers.py +8 -30
  110. huggingface_hub/utils/_http.py +398 -239
  111. huggingface_hub/utils/_pagination.py +4 -4
  112. huggingface_hub/utils/_parsing.py +98 -0
  113. huggingface_hub/utils/_paths.py +5 -5
  114. huggingface_hub/utils/_runtime.py +61 -24
  115. huggingface_hub/utils/_safetensors.py +21 -21
  116. huggingface_hub/utils/_subprocess.py +9 -9
  117. huggingface_hub/utils/_telemetry.py +4 -4
  118. huggingface_hub/{commands/_cli_utils.py → utils/_terminal.py} +4 -4
  119. huggingface_hub/utils/_typing.py +25 -5
  120. huggingface_hub/utils/_validators.py +55 -74
  121. huggingface_hub/utils/_verification.py +167 -0
  122. huggingface_hub/utils/_xet.py +64 -17
  123. huggingface_hub/utils/_xet_progress_reporting.py +162 -0
  124. huggingface_hub/utils/insecure_hashlib.py +3 -5
  125. huggingface_hub/utils/logging.py +8 -11
  126. huggingface_hub/utils/tqdm.py +5 -4
  127. {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/METADATA +94 -85
  128. huggingface_hub-1.1.3.dist-info/RECORD +155 -0
  129. {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/WHEEL +1 -1
  130. huggingface_hub-1.1.3.dist-info/entry_points.txt +6 -0
  131. huggingface_hub/commands/delete_cache.py +0 -474
  132. huggingface_hub/commands/download.py +0 -200
  133. huggingface_hub/commands/huggingface_cli.py +0 -61
  134. huggingface_hub/commands/lfs.py +0 -200
  135. huggingface_hub/commands/repo_files.py +0 -128
  136. huggingface_hub/commands/scan_cache.py +0 -181
  137. huggingface_hub/commands/tag.py +0 -159
  138. huggingface_hub/commands/upload.py +0 -314
  139. huggingface_hub/commands/upload_large_folder.py +0 -129
  140. huggingface_hub/commands/user.py +0 -304
  141. huggingface_hub/commands/version.py +0 -37
  142. huggingface_hub/inference_api.py +0 -217
  143. huggingface_hub/keras_mixin.py +0 -500
  144. huggingface_hub/repository.py +0 -1477
  145. huggingface_hub/serialization/_tensorflow.py +0 -95
  146. huggingface_hub/utils/_hf_folder.py +0 -68
  147. huggingface_hub-0.31.0rc0.dist-info/RECORD +0 -135
  148. huggingface_hub-0.31.0rc0.dist-info/entry_points.txt +0 -6
  149. {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info/licenses}/LICENSE +0 -0
  150. {huggingface_hub-0.31.0rc0.dist-info → huggingface_hub-1.1.3.dist-info}/top_level.txt +0 -0
@@ -34,18 +34,17 @@
34
34
  # - Only the main parameters are publicly exposed. Power users can always read the docs for more options.
35
35
  import base64
36
36
  import logging
37
+ import os
37
38
  import re
38
39
  import warnings
39
- from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Literal, Optional, Union, overload
40
-
41
- from requests import HTTPError
40
+ from contextlib import ExitStack
41
+ from typing import TYPE_CHECKING, Any, Iterable, Literal, Optional, Union, overload
42
42
 
43
43
  from huggingface_hub import constants
44
- from huggingface_hub.errors import BadRequestError, InferenceTimeoutError
44
+ from huggingface_hub.errors import BadRequestError, HfHubHTTPError, InferenceTimeoutError
45
45
  from huggingface_hub.inference._common import (
46
46
  TASKS_EXPECTING_IMAGES,
47
47
  ContentT,
48
- ModelStatus,
49
48
  RequestParameters,
50
49
  _b64_encode,
51
50
  _b64_to_image,
@@ -54,7 +53,6 @@ from huggingface_hub.inference._common import (
54
53
  _bytes_to_list,
55
54
  _get_unsupported_text_generation_kwargs,
56
55
  _import_numpy,
57
- _open_as_binary,
58
56
  _set_unsupported_text_generation_kwargs,
59
57
  _stream_chat_completion_response,
60
58
  _stream_text_generation_response,
@@ -66,6 +64,7 @@ from huggingface_hub.inference._generated.types import (
66
64
  AudioToAudioOutputElement,
67
65
  AutomaticSpeechRecognitionOutput,
68
66
  ChatCompletionInputGrammarType,
67
+ ChatCompletionInputMessage,
69
68
  ChatCompletionInputStreamOptions,
70
69
  ChatCompletionInputTool,
71
70
  ChatCompletionInputToolChoiceClass,
@@ -80,6 +79,7 @@ from huggingface_hub.inference._generated.types import (
80
79
  ImageSegmentationSubtask,
81
80
  ImageToImageTargetSize,
82
81
  ImageToTextOutput,
82
+ ImageToVideoTargetSize,
83
83
  ObjectDetectionOutputElement,
84
84
  Padding,
85
85
  QuestionAnsweringOutputElement,
@@ -100,10 +100,14 @@ from huggingface_hub.inference._generated.types import (
100
100
  ZeroShotClassificationOutputElement,
101
101
  ZeroShotImageClassificationOutputElement,
102
102
  )
103
- from huggingface_hub.inference._providers import PROVIDER_T, get_provider_helper
104
- from huggingface_hub.utils import build_hf_headers, get_session, hf_raise_for_status
103
+ from huggingface_hub.inference._providers import PROVIDER_OR_POLICY_T, get_provider_helper
104
+ from huggingface_hub.utils import (
105
+ build_hf_headers,
106
+ get_session,
107
+ hf_raise_for_status,
108
+ validate_hf_hub_args,
109
+ )
105
110
  from huggingface_hub.utils._auth import get_token
106
- from huggingface_hub.utils._deprecation import _deprecate_method
107
111
 
108
112
 
109
113
  if TYPE_CHECKING:
@@ -129,11 +133,9 @@ class InferenceClient:
129
133
  or a URL to a deployed Inference Endpoint. Defaults to None, in which case a recommended model is
130
134
  automatically selected for the task.
131
135
  Note: for better compatibility with OpenAI's client, `model` has been aliased as `base_url`. Those 2
132
- arguments are mutually exclusive. If using `base_url` for chat completion, the `/chat/completions` suffix
133
- path will be appended to the base URL (see the [TGI Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api)
134
- documentation for details). When passing a URL as `model`, the client will not append any suffix path to it.
136
+ arguments are mutually exclusive. If a URL is passed as `model` or `base_url` for chat completion, the `(/v1)/chat/completions` suffix path will be appended to the URL.
135
137
  provider (`str`, *optional*):
136
- Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"cohere"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"openai"`, `"replicate"`, "sambanova"` or `"together"`.
138
+ Name of the provider to use for inference. Can be `"black-forest-labs"`, `"cerebras"`, `"clarifai"`, `"cohere"`, `"fal-ai"`, `"featherless-ai"`, `"fireworks-ai"`, `"groq"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"nscale"`, `"openai"`, `"publicai"`, `"replicate"`, `"sambanova"`, `"scaleway"`, `"together"`, `"wavespeed"` or `"zai-org"`.
137
139
  Defaults to "auto" i.e. the first of the providers available for the model, sorted by the user's order in https://hf.co/settings/inference-providers.
138
140
  If model is a URL or `base_url` is passed, then `provider` is not used.
139
141
  token (`str`, *optional*):
@@ -142,16 +144,14 @@ class InferenceClient:
142
144
  arguments are mutually exclusive and have the exact same behavior.
143
145
  timeout (`float`, `optional`):
144
146
  The maximum number of seconds to wait for a response from the server. Defaults to None, meaning it will loop until the server is available.
145
- headers (`Dict[str, str]`, `optional`):
147
+ headers (`dict[str, str]`, `optional`):
146
148
  Additional headers to send to the server. By default only the authorization and user-agent headers are sent.
147
149
  Values in this dictionary will override the default values.
148
150
  bill_to (`str`, `optional`):
149
151
  The billing account to use for the requests. By default the requests are billed on the user's account.
150
152
  Requests can only be billed to an organization the user is a member of, and which has subscribed to Enterprise Hub.
151
- cookies (`Dict[str, str]`, `optional`):
153
+ cookies (`dict[str, str]`, `optional`):
152
154
  Additional cookies to send to the server.
153
- proxies (`Any`, `optional`):
154
- Proxies to use for the request.
155
155
  base_url (`str`, `optional`):
156
156
  Base URL to run inference. This is a duplicated argument from `model` to make [`InferenceClient`]
157
157
  follow the same pattern as `openai.OpenAI` client. Cannot be used if `model` is set. Defaults to None.
@@ -160,16 +160,16 @@ class InferenceClient:
160
160
  follow the same pattern as `openai.OpenAI` client. Cannot be used if `token` is set. Defaults to None.
161
161
  """
162
162
 
163
+ @validate_hf_hub_args
163
164
  def __init__(
164
165
  self,
165
166
  model: Optional[str] = None,
166
167
  *,
167
- provider: Union[Literal["auto"], PROVIDER_T, None] = None,
168
+ provider: Optional[PROVIDER_OR_POLICY_T] = None,
168
169
  token: Optional[str] = None,
169
170
  timeout: Optional[float] = None,
170
- headers: Optional[Dict[str, str]] = None,
171
- cookies: Optional[Dict[str, str]] = None,
172
- proxies: Optional[Any] = None,
171
+ headers: Optional[dict[str, str]] = None,
172
+ cookies: Optional[dict[str, str]] = None,
173
173
  bill_to: Optional[str] = None,
174
174
  # OpenAI compatibility
175
175
  base_url: Optional[str] = None,
@@ -190,7 +190,7 @@ class InferenceClient:
190
190
  )
191
191
  token = token if token is not None else api_key
192
192
  if isinstance(token, bool):
193
- # Legacy behavior: previously is was possible to pass `token=False` to disable authentication. This is not
193
+ # Legacy behavior: previously it was possible to pass `token=False` to disable authentication. This is not
194
194
  # supported anymore as authentication is required. Better to explicitly raise here rather than risking
195
195
  # sending the locally saved token without the user knowing about it.
196
196
  if token is False:
@@ -231,11 +231,21 @@ class InferenceClient:
231
231
 
232
232
  self.cookies = cookies
233
233
  self.timeout = timeout
234
- self.proxies = proxies
234
+
235
+ self.exit_stack = ExitStack()
235
236
 
236
237
  def __repr__(self):
237
238
  return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
238
239
 
240
+ def __enter__(self):
241
+ return self
242
+
243
+ def __exit__(self, exc_type, exc_value, traceback):
244
+ self.exit_stack.close()
245
+
246
+ def close(self):
247
+ self.exit_stack.close()
248
+
239
249
  @overload
240
250
  def _inner_post( # type: ignore[misc]
241
251
  self, request_parameters: RequestParameters, *, stream: Literal[False] = ...
@@ -244,45 +254,46 @@ class InferenceClient:
244
254
  @overload
245
255
  def _inner_post( # type: ignore[misc]
246
256
  self, request_parameters: RequestParameters, *, stream: Literal[True] = ...
247
- ) -> Iterable[bytes]: ...
257
+ ) -> Iterable[str]: ...
248
258
 
249
259
  @overload
250
260
  def _inner_post(
251
261
  self, request_parameters: RequestParameters, *, stream: bool = False
252
- ) -> Union[bytes, Iterable[bytes]]: ...
262
+ ) -> Union[bytes, Iterable[str]]: ...
253
263
 
254
264
  def _inner_post(
255
265
  self, request_parameters: RequestParameters, *, stream: bool = False
256
- ) -> Union[bytes, Iterable[bytes]]:
266
+ ) -> Union[bytes, Iterable[str]]:
257
267
  """Make a request to the inference server."""
258
268
  # TODO: this should be handled in provider helpers directly
259
269
  if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
260
270
  request_parameters.headers["Accept"] = "image/png"
261
271
 
262
- with _open_as_binary(request_parameters.data) as data_as_binary:
263
- try:
264
- response = get_session().post(
272
+ try:
273
+ response = self.exit_stack.enter_context(
274
+ get_session().stream(
275
+ "POST",
265
276
  request_parameters.url,
266
277
  json=request_parameters.json,
267
- data=data_as_binary,
278
+ content=request_parameters.data,
268
279
  headers=request_parameters.headers,
269
280
  cookies=self.cookies,
270
281
  timeout=self.timeout,
271
- stream=stream,
272
- proxies=self.proxies,
273
282
  )
274
- except TimeoutError as error:
275
- # Convert any `TimeoutError` to a `InferenceTimeoutError`
276
- raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
277
-
278
- try:
283
+ )
279
284
  hf_raise_for_status(response)
280
- return response.iter_lines() if stream else response.content
281
- except HTTPError as error:
285
+ if stream:
286
+ return response.iter_lines()
287
+ else:
288
+ return response.read()
289
+ except TimeoutError as error:
290
+ # Convert any `TimeoutError` to a `InferenceTimeoutError`
291
+ raise InferenceTimeoutError(f"Inference call timed out: {request_parameters.url}") from error # type: ignore
292
+ except HfHubHTTPError as error:
282
293
  if error.response.status_code == 422 and request_parameters.task != "unknown":
283
294
  msg = str(error.args[0])
284
295
  if len(error.response.text) > 0:
285
- msg += f"\n{error.response.text}\n"
296
+ msg += f"{os.linesep}{error.response.text}{os.linesep}"
286
297
  error.args = (msg,) + error.args[1:]
287
298
  raise
288
299
 
@@ -293,7 +304,7 @@ class InferenceClient:
293
304
  model: Optional[str] = None,
294
305
  top_k: Optional[int] = None,
295
306
  function_to_apply: Optional["AudioClassificationOutputTransform"] = None,
296
- ) -> List[AudioClassificationOutputElement]:
307
+ ) -> list[AudioClassificationOutputElement]:
297
308
  """
298
309
  Perform audio classification on the provided audio content.
299
310
 
@@ -311,12 +322,12 @@ class InferenceClient:
311
322
  The function to apply to the model outputs in order to retrieve the scores.
312
323
 
313
324
  Returns:
314
- `List[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
325
+ `list[AudioClassificationOutputElement]`: List of [`AudioClassificationOutputElement`] items containing the predicted labels and their confidence.
315
326
 
316
327
  Raises:
317
328
  [`InferenceTimeoutError`]:
318
329
  If the model is unavailable or the request times out.
319
- `HTTPError`:
330
+ [`HfHubHTTPError`]:
320
331
  If the request fails with an HTTP error status code other than HTTP 503.
321
332
 
322
333
  Example:
@@ -348,7 +359,7 @@ class InferenceClient:
348
359
  audio: ContentT,
349
360
  *,
350
361
  model: Optional[str] = None,
351
- ) -> List[AudioToAudioOutputElement]:
362
+ ) -> list[AudioToAudioOutputElement]:
352
363
  """
353
364
  Performs multiple tasks related to audio-to-audio depending on the model (eg: speech enhancement, source separation).
354
365
 
@@ -362,12 +373,12 @@ class InferenceClient:
362
373
  audio_to_audio will be used.
363
374
 
364
375
  Returns:
365
- `List[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
376
+ `list[AudioToAudioOutputElement]`: A list of [`AudioToAudioOutputElement`] items containing audios label, content-type, and audio content in blob.
366
377
 
367
378
  Raises:
368
379
  `InferenceTimeoutError`:
369
380
  If the model is unavailable or the request times out.
370
- `HTTPError`:
381
+ [`HfHubHTTPError`]:
371
382
  If the request fails with an HTTP error status code other than HTTP 503.
372
383
 
373
384
  Example:
@@ -400,7 +411,7 @@ class InferenceClient:
400
411
  audio: ContentT,
401
412
  *,
402
413
  model: Optional[str] = None,
403
- extra_body: Optional[Dict] = None,
414
+ extra_body: Optional[dict] = None,
404
415
  ) -> AutomaticSpeechRecognitionOutput:
405
416
  """
406
417
  Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
@@ -411,7 +422,7 @@ class InferenceClient:
411
422
  model (`str`, *optional*):
412
423
  The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
413
424
  Inference Endpoint. If not provided, the default recommended model for ASR will be used.
414
- extra_body (`Dict`, *optional*):
425
+ extra_body (`dict`, *optional*):
415
426
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
416
427
  for supported parameters.
417
428
  Returns:
@@ -420,7 +431,7 @@ class InferenceClient:
420
431
  Raises:
421
432
  [`InferenceTimeoutError`]:
422
433
  If the model is unavailable or the request times out.
423
- `HTTPError`:
434
+ [`HfHubHTTPError`]:
424
435
  If the request fails with an HTTP error status code other than HTTP 503.
425
436
 
426
437
  Example:
@@ -446,121 +457,117 @@ class InferenceClient:
446
457
  @overload
447
458
  def chat_completion( # type: ignore
448
459
  self,
449
- messages: List[Dict],
460
+ messages: list[Union[dict, ChatCompletionInputMessage]],
450
461
  *,
451
462
  model: Optional[str] = None,
452
463
  stream: Literal[False] = False,
453
464
  frequency_penalty: Optional[float] = None,
454
- logit_bias: Optional[List[float]] = None,
465
+ logit_bias: Optional[list[float]] = None,
455
466
  logprobs: Optional[bool] = None,
456
467
  max_tokens: Optional[int] = None,
457
468
  n: Optional[int] = None,
458
469
  presence_penalty: Optional[float] = None,
459
470
  response_format: Optional[ChatCompletionInputGrammarType] = None,
460
471
  seed: Optional[int] = None,
461
- stop: Optional[List[str]] = None,
472
+ stop: Optional[list[str]] = None,
462
473
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
463
474
  temperature: Optional[float] = None,
464
475
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
465
476
  tool_prompt: Optional[str] = None,
466
- tools: Optional[List[ChatCompletionInputTool]] = None,
477
+ tools: Optional[list[ChatCompletionInputTool]] = None,
467
478
  top_logprobs: Optional[int] = None,
468
479
  top_p: Optional[float] = None,
469
- extra_body: Optional[Dict] = None,
480
+ extra_body: Optional[dict] = None,
470
481
  ) -> ChatCompletionOutput: ...
471
482
 
472
483
  @overload
473
484
  def chat_completion( # type: ignore
474
485
  self,
475
- messages: List[Dict],
486
+ messages: list[Union[dict, ChatCompletionInputMessage]],
476
487
  *,
477
488
  model: Optional[str] = None,
478
489
  stream: Literal[True] = True,
479
490
  frequency_penalty: Optional[float] = None,
480
- logit_bias: Optional[List[float]] = None,
491
+ logit_bias: Optional[list[float]] = None,
481
492
  logprobs: Optional[bool] = None,
482
493
  max_tokens: Optional[int] = None,
483
494
  n: Optional[int] = None,
484
495
  presence_penalty: Optional[float] = None,
485
496
  response_format: Optional[ChatCompletionInputGrammarType] = None,
486
497
  seed: Optional[int] = None,
487
- stop: Optional[List[str]] = None,
498
+ stop: Optional[list[str]] = None,
488
499
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
489
500
  temperature: Optional[float] = None,
490
501
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
491
502
  tool_prompt: Optional[str] = None,
492
- tools: Optional[List[ChatCompletionInputTool]] = None,
503
+ tools: Optional[list[ChatCompletionInputTool]] = None,
493
504
  top_logprobs: Optional[int] = None,
494
505
  top_p: Optional[float] = None,
495
- extra_body: Optional[Dict] = None,
506
+ extra_body: Optional[dict] = None,
496
507
  ) -> Iterable[ChatCompletionStreamOutput]: ...
497
508
 
498
509
  @overload
499
510
  def chat_completion(
500
511
  self,
501
- messages: List[Dict],
512
+ messages: list[Union[dict, ChatCompletionInputMessage]],
502
513
  *,
503
514
  model: Optional[str] = None,
504
515
  stream: bool = False,
505
516
  frequency_penalty: Optional[float] = None,
506
- logit_bias: Optional[List[float]] = None,
517
+ logit_bias: Optional[list[float]] = None,
507
518
  logprobs: Optional[bool] = None,
508
519
  max_tokens: Optional[int] = None,
509
520
  n: Optional[int] = None,
510
521
  presence_penalty: Optional[float] = None,
511
522
  response_format: Optional[ChatCompletionInputGrammarType] = None,
512
523
  seed: Optional[int] = None,
513
- stop: Optional[List[str]] = None,
524
+ stop: Optional[list[str]] = None,
514
525
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
515
526
  temperature: Optional[float] = None,
516
527
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
517
528
  tool_prompt: Optional[str] = None,
518
- tools: Optional[List[ChatCompletionInputTool]] = None,
529
+ tools: Optional[list[ChatCompletionInputTool]] = None,
519
530
  top_logprobs: Optional[int] = None,
520
531
  top_p: Optional[float] = None,
521
- extra_body: Optional[Dict] = None,
532
+ extra_body: Optional[dict] = None,
522
533
  ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: ...
523
534
 
524
535
  def chat_completion(
525
536
  self,
526
- messages: List[Dict],
537
+ messages: list[Union[dict, ChatCompletionInputMessage]],
527
538
  *,
528
539
  model: Optional[str] = None,
529
540
  stream: bool = False,
530
541
  # Parameters from ChatCompletionInput (handled manually)
531
542
  frequency_penalty: Optional[float] = None,
532
- logit_bias: Optional[List[float]] = None,
543
+ logit_bias: Optional[list[float]] = None,
533
544
  logprobs: Optional[bool] = None,
534
545
  max_tokens: Optional[int] = None,
535
546
  n: Optional[int] = None,
536
547
  presence_penalty: Optional[float] = None,
537
548
  response_format: Optional[ChatCompletionInputGrammarType] = None,
538
549
  seed: Optional[int] = None,
539
- stop: Optional[List[str]] = None,
550
+ stop: Optional[list[str]] = None,
540
551
  stream_options: Optional[ChatCompletionInputStreamOptions] = None,
541
552
  temperature: Optional[float] = None,
542
553
  tool_choice: Optional[Union[ChatCompletionInputToolChoiceClass, "ChatCompletionInputToolChoiceEnum"]] = None,
543
554
  tool_prompt: Optional[str] = None,
544
- tools: Optional[List[ChatCompletionInputTool]] = None,
555
+ tools: Optional[list[ChatCompletionInputTool]] = None,
545
556
  top_logprobs: Optional[int] = None,
546
557
  top_p: Optional[float] = None,
547
- extra_body: Optional[Dict] = None,
558
+ extra_body: Optional[dict] = None,
548
559
  ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]:
549
560
  """
550
561
  A method for completing conversations using a specified language model.
551
562
 
552
- <Tip>
553
-
554
- The `client.chat_completion` method is aliased as `client.chat.completions.create` for compatibility with OpenAI's client.
555
- Inputs and outputs are strictly the same and using either syntax will yield the same results.
556
- Check out the [Inference guide](https://huggingface.co/docs/huggingface_hub/guides/inference#openai-compatibility)
557
- for more details about OpenAI's compatibility.
558
-
559
- </Tip>
563
+ > [!TIP]
564
+ > The `client.chat_completion` method is aliased as `client.chat.completions.create` for compatibility with OpenAI's client.
565
+ > Inputs and outputs are strictly the same and using either syntax will yield the same results.
566
+ > Check out the [Inference guide](https://huggingface.co/docs/huggingface_hub/guides/inference#openai-compatibility)
567
+ > for more details about OpenAI's compatibility.
560
568
 
561
- <Tip>
562
- You can pass provider-specific parameters to the model by using the `extra_body` argument.
563
- </Tip>
569
+ > [!TIP]
570
+ > You can pass provider-specific parameters to the model by using the `extra_body` argument.
564
571
 
565
572
  Args:
566
573
  messages (List of [`ChatCompletionInputMessage`]):
@@ -574,7 +581,7 @@ class InferenceClient:
574
581
  frequency_penalty (`float`, *optional*):
575
582
  Penalizes new tokens based on their existing frequency
576
583
  in the text so far. Range: [-2.0, 2.0]. Defaults to 0.0.
577
- logit_bias (`List[float]`, *optional*):
584
+ logit_bias (`list[float]`, *optional*):
578
585
  Adjusts the likelihood of specific tokens appearing in the generated output.
579
586
  logprobs (`bool`, *optional*):
580
587
  Whether to return log probabilities of the output tokens or not. If true, returns the log
@@ -590,7 +597,7 @@ class InferenceClient:
590
597
  Grammar constraints. Can be either a JSONSchema or a regex.
591
598
  seed (Optional[`int`], *optional*):
592
599
  Seed for reproducible control flow. Defaults to None.
593
- stop (`List[str]`, *optional*):
600
+ stop (`list[str]`, *optional*):
594
601
  Up to four strings which trigger the end of the response.
595
602
  Defaults to None.
596
603
  stream (`bool`, *optional*):
@@ -614,7 +621,7 @@ class InferenceClient:
614
621
  tools (List of [`ChatCompletionInputTool`], *optional*):
615
622
  A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
616
623
  provide a list of functions the model may generate JSON inputs for.
617
- extra_body (`Dict`, *optional*):
624
+ extra_body (`dict`, *optional*):
618
625
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
619
626
  for supported parameters.
620
627
  Returns:
@@ -626,7 +633,7 @@ class InferenceClient:
626
633
  Raises:
627
634
  [`InferenceTimeoutError`]:
628
635
  If the model is unavailable or the request times out.
629
- `HTTPError`:
636
+ [`HfHubHTTPError`]:
630
637
  If the request fails with an HTTP error status code other than HTTP 503.
631
638
 
632
639
  Example:
@@ -852,7 +859,7 @@ class InferenceClient:
852
859
  >>> messages = [
853
860
  ... {
854
861
  ... "role": "user",
855
- ... "content": "I saw a puppy a cat and a raccoon during my bike ride in the park. What did I saw and when?",
862
+ ... "content": "I saw a puppy a cat and a raccoon during my bike ride in the park. What did I see and when?",
856
863
  ... },
857
864
  ... ]
858
865
  >>> response_format = {
@@ -940,8 +947,8 @@ class InferenceClient:
940
947
  max_question_len: Optional[int] = None,
941
948
  max_seq_len: Optional[int] = None,
942
949
  top_k: Optional[int] = None,
943
- word_boxes: Optional[List[Union[List[float], str]]] = None,
944
- ) -> List[DocumentQuestionAnsweringOutputElement]:
950
+ word_boxes: Optional[list[Union[list[float], str]]] = None,
951
+ ) -> list[DocumentQuestionAnsweringOutputElement]:
945
952
  """
946
953
  Answer questions on document images.
947
954
 
@@ -971,16 +978,16 @@ class InferenceClient:
971
978
  top_k (`int`, *optional*):
972
979
  The number of answers to return (will be chosen by order of likelihood). Can return less than top_k
973
980
  answers if there are not enough options available within the context.
974
- word_boxes (`List[Union[List[float], str`, *optional*):
981
+ word_boxes (`list[Union[list[float], str`, *optional*):
975
982
  A list of words and bounding boxes (normalized 0->1000). If provided, the inference will skip the OCR
976
983
  step and use the provided bounding boxes instead.
977
984
  Returns:
978
- `List[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
985
+ `list[DocumentQuestionAnsweringOutputElement]`: a list of [`DocumentQuestionAnsweringOutputElement`] items containing the predicted label, associated probability, word ids, and page number.
979
986
 
980
987
  Raises:
981
988
  [`InferenceTimeoutError`]:
982
989
  If the model is unavailable or the request times out.
983
- `HTTPError`:
990
+ [`HfHubHTTPError`]:
984
991
  If the request fails with an HTTP error status code other than HTTP 503.
985
992
 
986
993
 
@@ -994,7 +1001,7 @@ class InferenceClient:
994
1001
  """
995
1002
  model_id = model or self.model
996
1003
  provider_helper = get_provider_helper(self.provider, task="document-question-answering", model=model_id)
997
- inputs: Dict[str, Any] = {"question": question, "image": _b64_encode(image)}
1004
+ inputs: dict[str, Any] = {"question": question, "image": _b64_encode(image)}
998
1005
  request_parameters = provider_helper.prepare_request(
999
1006
  inputs=inputs,
1000
1007
  parameters={
@@ -1055,7 +1062,7 @@ class InferenceClient:
1055
1062
  Raises:
1056
1063
  [`InferenceTimeoutError`]:
1057
1064
  If the model is unavailable or the request times out.
1058
- `HTTPError`:
1065
+ [`HfHubHTTPError`]:
1059
1066
  If the request fails with an HTTP error status code other than HTTP 503.
1060
1067
 
1061
1068
  Example:
@@ -1092,9 +1099,9 @@ class InferenceClient:
1092
1099
  text: str,
1093
1100
  *,
1094
1101
  model: Optional[str] = None,
1095
- targets: Optional[List[str]] = None,
1102
+ targets: Optional[list[str]] = None,
1096
1103
  top_k: Optional[int] = None,
1097
- ) -> List[FillMaskOutputElement]:
1104
+ ) -> list[FillMaskOutputElement]:
1098
1105
  """
1099
1106
  Fill in a hole with a missing word (token to be precise).
1100
1107
 
@@ -1104,20 +1111,20 @@ class InferenceClient:
1104
1111
  model (`str`, *optional*):
1105
1112
  The model to use for the fill mask task. Can be a model ID hosted on the Hugging Face Hub or a URL to
1106
1113
  a deployed Inference Endpoint. If not provided, the default recommended fill mask model will be used.
1107
- targets (`List[str`, *optional*):
1114
+ targets (`list[str`, *optional*):
1108
1115
  When passed, the model will limit the scores to the passed targets instead of looking up in the whole
1109
1116
  vocabulary. If the provided targets are not in the model vocab, they will be tokenized and the first
1110
1117
  resulting token will be used (with a warning, and that might be slower).
1111
1118
  top_k (`int`, *optional*):
1112
1119
  When passed, overrides the number of predictions to return.
1113
1120
  Returns:
1114
- `List[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
1121
+ `list[FillMaskOutputElement]`: a list of [`FillMaskOutputElement`] items containing the predicted label, associated
1115
1122
  probability, token reference, and completed text.
1116
1123
 
1117
1124
  Raises:
1118
1125
  [`InferenceTimeoutError`]:
1119
1126
  If the model is unavailable or the request times out.
1120
- `HTTPError`:
1127
+ [`HfHubHTTPError`]:
1121
1128
  If the request fails with an HTTP error status code other than HTTP 503.
1122
1129
 
1123
1130
  Example:
@@ -1150,13 +1157,13 @@ class InferenceClient:
1150
1157
  model: Optional[str] = None,
1151
1158
  function_to_apply: Optional["ImageClassificationOutputTransform"] = None,
1152
1159
  top_k: Optional[int] = None,
1153
- ) -> List[ImageClassificationOutputElement]:
1160
+ ) -> list[ImageClassificationOutputElement]:
1154
1161
  """
1155
1162
  Perform image classification on the given image using the specified model.
1156
1163
 
1157
1164
  Args:
1158
- image (`Union[str, Path, bytes, BinaryIO]`):
1159
- The image to classify. It can be raw bytes, an image file, or a URL to an online image.
1165
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
1166
+ The image to classify. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
1160
1167
  model (`str`, *optional*):
1161
1168
  The model to use for image classification. Can be a model ID hosted on the Hugging Face Hub or a URL to a
1162
1169
  deployed Inference Endpoint. If not provided, the default recommended model for image classification will be used.
@@ -1165,12 +1172,12 @@ class InferenceClient:
1165
1172
  top_k (`int`, *optional*):
1166
1173
  When specified, limits the output to the top K most probable classes.
1167
1174
  Returns:
1168
- `List[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
1175
+ `list[ImageClassificationOutputElement]`: a list of [`ImageClassificationOutputElement`] items containing the predicted label and associated probability.
1169
1176
 
1170
1177
  Raises:
1171
1178
  [`InferenceTimeoutError`]:
1172
1179
  If the model is unavailable or the request times out.
1173
- `HTTPError`:
1180
+ [`HfHubHTTPError`]:
1174
1181
  If the request fails with an HTTP error status code other than HTTP 503.
1175
1182
 
1176
1183
  Example:
@@ -1202,19 +1209,16 @@ class InferenceClient:
1202
1209
  overlap_mask_area_threshold: Optional[float] = None,
1203
1210
  subtask: Optional["ImageSegmentationSubtask"] = None,
1204
1211
  threshold: Optional[float] = None,
1205
- ) -> List[ImageSegmentationOutputElement]:
1212
+ ) -> list[ImageSegmentationOutputElement]:
1206
1213
  """
1207
1214
  Perform image segmentation on the given image using the specified model.
1208
1215
 
1209
- <Tip warning={true}>
1210
-
1211
- You must have `PIL` installed if you want to work with images (`pip install Pillow`).
1212
-
1213
- </Tip>
1216
+ > [!WARNING]
1217
+ > You must have `PIL` installed if you want to work with images (`pip install Pillow`).
1214
1218
 
1215
1219
  Args:
1216
- image (`Union[str, Path, bytes, BinaryIO]`):
1217
- The image to segment. It can be raw bytes, an image file, or a URL to an online image.
1220
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
1221
+ The image to segment. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
1218
1222
  model (`str`, *optional*):
1219
1223
  The model to use for image segmentation. Can be a model ID hosted on the Hugging Face Hub or a URL to a
1220
1224
  deployed Inference Endpoint. If not provided, the default recommended model for image segmentation will be used.
@@ -1227,12 +1231,12 @@ class InferenceClient:
1227
1231
  threshold (`float`, *optional*):
1228
1232
  Probability threshold to filter out predicted masks.
1229
1233
  Returns:
1230
- `List[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
1234
+ `list[ImageSegmentationOutputElement]`: A list of [`ImageSegmentationOutputElement`] items containing the segmented masks and associated attributes.
1231
1235
 
1232
1236
  Raises:
1233
1237
  [`InferenceTimeoutError`]:
1234
1238
  If the model is unavailable or the request times out.
1235
- `HTTPError`:
1239
+ [`HfHubHTTPError`]:
1236
1240
  If the request fails with an HTTP error status code other than HTTP 503.
1237
1241
 
1238
1242
  Example:
@@ -1258,6 +1262,7 @@ class InferenceClient:
1258
1262
  api_key=self.token,
1259
1263
  )
1260
1264
  response = self._inner_post(request_parameters)
1265
+ response = provider_helper.get_response(response, request_parameters)
1261
1266
  output = ImageSegmentationOutputElement.parse_obj_as_list(response)
1262
1267
  for item in output:
1263
1268
  item.mask = _b64_to_image(item.mask) # type: ignore [assignment]
@@ -1278,15 +1283,12 @@ class InferenceClient:
1278
1283
  """
1279
1284
  Perform image-to-image translation using a specified model.
1280
1285
 
1281
- <Tip warning={true}>
1282
-
1283
- You must have `PIL` installed if you want to work with images (`pip install Pillow`).
1284
-
1285
- </Tip>
1286
+ > [!WARNING]
1287
+ > You must have `PIL` installed if you want to work with images (`pip install Pillow`).
1286
1288
 
1287
1289
  Args:
1288
- image (`Union[str, Path, bytes, BinaryIO]`):
1289
- The input image for translation. It can be raw bytes, an image file, or a URL to an online image.
1290
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
1291
+ The input image for translation. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
1290
1292
  prompt (`str`, *optional*):
1291
1293
  The text prompt to guide the image generation.
1292
1294
  negative_prompt (`str`, *optional*):
@@ -1301,7 +1303,8 @@ class InferenceClient:
1301
1303
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
1302
1304
  Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
1303
1305
  target_size (`ImageToImageTargetSize`, *optional*):
1304
- The size in pixel of the output image.
1306
+ The size in pixels of the output image. This parameter is only supported by some providers and for
1307
+ specific models. It will be ignored when unsupported.
1305
1308
 
1306
1309
  Returns:
1307
1310
  `Image`: The translated image.
@@ -1309,7 +1312,7 @@ class InferenceClient:
1309
1312
  Raises:
1310
1313
  [`InferenceTimeoutError`]:
1311
1314
  If the model is unavailable or the request times out.
1312
- `HTTPError`:
1315
+ [`HfHubHTTPError`]:
1313
1316
  If the request fails with an HTTP error status code other than HTTP 503.
1314
1317
 
1315
1318
  Example:
@@ -1319,6 +1322,7 @@ class InferenceClient:
1319
1322
  >>> image = client.image_to_image("cat.jpg", prompt="turn the cat into a tiger")
1320
1323
  >>> image.save("tiger.jpg")
1321
1324
  ```
1325
+
1322
1326
  """
1323
1327
  model_id = model or self.model
1324
1328
  provider_helper = get_provider_helper(self.provider, task="image-to-image", model=model_id)
@@ -1337,18 +1341,98 @@ class InferenceClient:
1337
1341
  api_key=self.token,
1338
1342
  )
1339
1343
  response = self._inner_post(request_parameters)
1344
+ response = provider_helper.get_response(response, request_parameters)
1340
1345
  return _bytes_to_image(response)
1341
1346
 
1347
+ def image_to_video(
1348
+ self,
1349
+ image: ContentT,
1350
+ *,
1351
+ model: Optional[str] = None,
1352
+ prompt: Optional[str] = None,
1353
+ negative_prompt: Optional[str] = None,
1354
+ num_frames: Optional[float] = None,
1355
+ num_inference_steps: Optional[int] = None,
1356
+ guidance_scale: Optional[float] = None,
1357
+ seed: Optional[int] = None,
1358
+ target_size: Optional[ImageToVideoTargetSize] = None,
1359
+ **kwargs,
1360
+ ) -> bytes:
1361
+ """
1362
+ Generate a video from an input image.
1363
+
1364
+ Args:
1365
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
1366
+ The input image to generate a video from. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
1367
+ model (`str`, *optional*):
1368
+ The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
1369
+ Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
1370
+ prompt (`str`, *optional*):
1371
+ The text prompt to guide the video generation.
1372
+ negative_prompt (`str`, *optional*):
1373
+ One prompt to guide what NOT to include in video generation.
1374
+ num_frames (`float`, *optional*):
1375
+ The num_frames parameter determines how many video frames are generated.
1376
+ num_inference_steps (`int`, *optional*):
1377
+ For diffusion models. The number of denoising steps. More denoising steps usually lead to a higher
1378
+ quality image at the expense of slower inference.
1379
+ guidance_scale (`float`, *optional*):
1380
+ For diffusion models. A higher guidance scale value encourages the model to generate videos closely
1381
+ linked to the text prompt at the expense of lower image quality.
1382
+ seed (`int`, *optional*):
1383
+ The seed to use for the video generation.
1384
+ target_size (`ImageToVideoTargetSize`, *optional*):
1385
+ The size in pixel of the output video frames.
1386
+ num_inference_steps (`int`, *optional*):
1387
+ The number of denoising steps. More denoising steps usually lead to a higher quality video at the
1388
+ expense of slower inference.
1389
+ seed (`int`, *optional*):
1390
+ Seed for the random number generator.
1391
+
1392
+ Returns:
1393
+ `bytes`: The generated video.
1394
+
1395
+ Examples:
1396
+ ```py
1397
+ >>> from huggingface_hub import InferenceClient
1398
+ >>> client = InferenceClient()
1399
+ >>> video = client.image_to_video("cat.jpg", model="Wan-AI/Wan2.2-I2V-A14B", prompt="turn the cat into a tiger")
1400
+ >>> with open("tiger.mp4", "wb") as f:
1401
+ ... f.write(video)
1402
+ ```
1403
+ """
1404
+ model_id = model or self.model
1405
+ provider_helper = get_provider_helper(self.provider, task="image-to-video", model=model_id)
1406
+ request_parameters = provider_helper.prepare_request(
1407
+ inputs=image,
1408
+ parameters={
1409
+ "prompt": prompt,
1410
+ "negative_prompt": negative_prompt,
1411
+ "num_frames": num_frames,
1412
+ "num_inference_steps": num_inference_steps,
1413
+ "guidance_scale": guidance_scale,
1414
+ "seed": seed,
1415
+ "target_size": target_size,
1416
+ **kwargs,
1417
+ },
1418
+ headers=self.headers,
1419
+ model=model_id,
1420
+ api_key=self.token,
1421
+ )
1422
+ response = self._inner_post(request_parameters)
1423
+ response = provider_helper.get_response(response, request_parameters)
1424
+ return response
1425
+
1342
1426
  def image_to_text(self, image: ContentT, *, model: Optional[str] = None) -> ImageToTextOutput:
1343
1427
  """
1344
1428
  Takes an input image and return text.
1345
1429
 
1346
1430
  Models can have very different outputs depending on your use case (image captioning, optical character recognition
1347
- (OCR), Pix2Struct, etc). Please have a look to the model card to learn more about a model's specificities.
1431
+ (OCR), Pix2Struct, etc.). Please have a look to the model card to learn more about a model's specificities.
1348
1432
 
1349
1433
  Args:
1350
- image (`Union[str, Path, bytes, BinaryIO]`):
1351
- The input image to caption. It can be raw bytes, an image file, or a URL to an online image..
1434
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
1435
+ The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
1352
1436
  model (`str`, *optional*):
1353
1437
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
1354
1438
  Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
@@ -1359,7 +1443,7 @@ class InferenceClient:
1359
1443
  Raises:
1360
1444
  [`InferenceTimeoutError`]:
1361
1445
  If the model is unavailable or the request times out.
1362
- `HTTPError`:
1446
+ [`HfHubHTTPError`]:
1363
1447
  If the request fails with an HTTP error status code other than HTTP 503.
1364
1448
 
1365
1449
  Example:
@@ -1382,36 +1466,33 @@ class InferenceClient:
1382
1466
  api_key=self.token,
1383
1467
  )
1384
1468
  response = self._inner_post(request_parameters)
1385
- output = ImageToTextOutput.parse_obj(response)
1386
- return output[0] if isinstance(output, list) else output
1469
+ output_list: list[ImageToTextOutput] = ImageToTextOutput.parse_obj_as_list(response)
1470
+ return output_list[0]
1387
1471
 
1388
1472
  def object_detection(
1389
1473
  self, image: ContentT, *, model: Optional[str] = None, threshold: Optional[float] = None
1390
- ) -> List[ObjectDetectionOutputElement]:
1474
+ ) -> list[ObjectDetectionOutputElement]:
1391
1475
  """
1392
1476
  Perform object detection on the given image using the specified model.
1393
1477
 
1394
- <Tip warning={true}>
1395
-
1396
- You must have `PIL` installed if you want to work with images (`pip install Pillow`).
1397
-
1398
- </Tip>
1478
+ > [!WARNING]
1479
+ > You must have `PIL` installed if you want to work with images (`pip install Pillow`).
1399
1480
 
1400
1481
  Args:
1401
- image (`Union[str, Path, bytes, BinaryIO]`):
1402
- The image to detect objects on. It can be raw bytes, an image file, or a URL to an online image.
1482
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
1483
+ The image to detect objects on. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
1403
1484
  model (`str`, *optional*):
1404
1485
  The model to use for object detection. Can be a model ID hosted on the Hugging Face Hub or a URL to a
1405
1486
  deployed Inference Endpoint. If not provided, the default recommended model for object detection (DETR) will be used.
1406
1487
  threshold (`float`, *optional*):
1407
1488
  The probability necessary to make a prediction.
1408
1489
  Returns:
1409
- `List[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
1490
+ `list[ObjectDetectionOutputElement]`: A list of [`ObjectDetectionOutputElement`] items containing the bounding boxes and associated attributes.
1410
1491
 
1411
1492
  Raises:
1412
1493
  [`InferenceTimeoutError`]:
1413
1494
  If the model is unavailable or the request times out.
1414
- `HTTPError`:
1495
+ [`HfHubHTTPError`]:
1415
1496
  If the request fails with an HTTP error status code other than HTTP 503.
1416
1497
  `ValueError`:
1417
1498
  If the request output is not a List.
@@ -1449,7 +1530,7 @@ class InferenceClient:
1449
1530
  max_question_len: Optional[int] = None,
1450
1531
  max_seq_len: Optional[int] = None,
1451
1532
  top_k: Optional[int] = None,
1452
- ) -> Union[QuestionAnsweringOutputElement, List[QuestionAnsweringOutputElement]]:
1533
+ ) -> Union[QuestionAnsweringOutputElement, list[QuestionAnsweringOutputElement]]:
1453
1534
  """
1454
1535
  Retrieve the answer to a question from a given text.
1455
1536
 
@@ -1481,13 +1562,13 @@ class InferenceClient:
1481
1562
  topk answers if there are not enough options available within the context.
1482
1563
 
1483
1564
  Returns:
1484
- Union[`QuestionAnsweringOutputElement`, List[`QuestionAnsweringOutputElement`]]:
1565
+ Union[`QuestionAnsweringOutputElement`, list[`QuestionAnsweringOutputElement`]]:
1485
1566
  When top_k is 1 or not provided, it returns a single `QuestionAnsweringOutputElement`.
1486
1567
  When top_k is greater than 1, it returns a list of `QuestionAnsweringOutputElement`.
1487
1568
  Raises:
1488
1569
  [`InferenceTimeoutError`]:
1489
1570
  If the model is unavailable or the request times out.
1490
- `HTTPError`:
1571
+ [`HfHubHTTPError`]:
1491
1572
  If the request fails with an HTTP error status code other than HTTP 503.
1492
1573
 
1493
1574
  Example:
@@ -1501,7 +1582,7 @@ class InferenceClient:
1501
1582
  model_id = model or self.model
1502
1583
  provider_helper = get_provider_helper(self.provider, task="question-answering", model=model_id)
1503
1584
  request_parameters = provider_helper.prepare_request(
1504
- inputs=None,
1585
+ inputs={"question": question, "context": context},
1505
1586
  parameters={
1506
1587
  "align_to_words": align_to_words,
1507
1588
  "doc_stride": doc_stride,
@@ -1511,7 +1592,6 @@ class InferenceClient:
1511
1592
  "max_seq_len": max_seq_len,
1512
1593
  "top_k": top_k,
1513
1594
  },
1514
- extra_payload={"question": question, "context": context},
1515
1595
  headers=self.headers,
1516
1596
  model=model_id,
1517
1597
  api_key=self.token,
@@ -1522,15 +1602,15 @@ class InferenceClient:
1522
1602
  return output
1523
1603
 
1524
1604
  def sentence_similarity(
1525
- self, sentence: str, other_sentences: List[str], *, model: Optional[str] = None
1526
- ) -> List[float]:
1605
+ self, sentence: str, other_sentences: list[str], *, model: Optional[str] = None
1606
+ ) -> list[float]:
1527
1607
  """
1528
1608
  Compute the semantic similarity between a sentence and a list of other sentences by comparing their embeddings.
1529
1609
 
1530
1610
  Args:
1531
1611
  sentence (`str`):
1532
1612
  The main sentence to compare to others.
1533
- other_sentences (`List[str]`):
1613
+ other_sentences (`list[str]`):
1534
1614
  The list of sentences to compare to.
1535
1615
  model (`str`, *optional*):
1536
1616
  The model to use for the sentence similarity task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1538,12 +1618,12 @@ class InferenceClient:
1538
1618
  Defaults to None.
1539
1619
 
1540
1620
  Returns:
1541
- `List[float]`: The embedding representing the input text.
1621
+ `list[float]`: The embedding representing the input text.
1542
1622
 
1543
1623
  Raises:
1544
1624
  [`InferenceTimeoutError`]:
1545
1625
  If the model is unavailable or the request times out.
1546
- `HTTPError`:
1626
+ [`HfHubHTTPError`]:
1547
1627
  If the request fails with an HTTP error status code other than HTTP 503.
1548
1628
 
1549
1629
  Example:
@@ -1580,7 +1660,7 @@ class InferenceClient:
1580
1660
  *,
1581
1661
  model: Optional[str] = None,
1582
1662
  clean_up_tokenization_spaces: Optional[bool] = None,
1583
- generate_parameters: Optional[Dict[str, Any]] = None,
1663
+ generate_parameters: Optional[dict[str, Any]] = None,
1584
1664
  truncation: Optional["SummarizationTruncationStrategy"] = None,
1585
1665
  ) -> SummarizationOutput:
1586
1666
  """
@@ -1594,7 +1674,7 @@ class InferenceClient:
1594
1674
  Inference Endpoint. If not provided, the default recommended model for summarization will be used.
1595
1675
  clean_up_tokenization_spaces (`bool`, *optional*):
1596
1676
  Whether to clean up the potential extra spaces in the text output.
1597
- generate_parameters (`Dict[str, Any]`, *optional*):
1677
+ generate_parameters (`dict[str, Any]`, *optional*):
1598
1678
  Additional parametrization of the text generation algorithm.
1599
1679
  truncation (`"SummarizationTruncationStrategy"`, *optional*):
1600
1680
  The truncation strategy to use.
@@ -1604,7 +1684,7 @@ class InferenceClient:
1604
1684
  Raises:
1605
1685
  [`InferenceTimeoutError`]:
1606
1686
  If the model is unavailable or the request times out.
1607
- `HTTPError`:
1687
+ [`HfHubHTTPError`]:
1608
1688
  If the request fails with an HTTP error status code other than HTTP 503.
1609
1689
 
1610
1690
  Example:
@@ -1634,7 +1714,7 @@ class InferenceClient:
1634
1714
 
1635
1715
  def table_question_answering(
1636
1716
  self,
1637
- table: Dict[str, Any],
1717
+ table: dict[str, Any],
1638
1718
  query: str,
1639
1719
  *,
1640
1720
  model: Optional[str] = None,
@@ -1669,7 +1749,7 @@ class InferenceClient:
1669
1749
  Raises:
1670
1750
  [`InferenceTimeoutError`]:
1671
1751
  If the model is unavailable or the request times out.
1672
- `HTTPError`:
1752
+ [`HfHubHTTPError`]:
1673
1753
  If the request fails with an HTTP error status code other than HTTP 503.
1674
1754
 
1675
1755
  Example:
@@ -1685,9 +1765,8 @@ class InferenceClient:
1685
1765
  model_id = model or self.model
1686
1766
  provider_helper = get_provider_helper(self.provider, task="table-question-answering", model=model_id)
1687
1767
  request_parameters = provider_helper.prepare_request(
1688
- inputs=None,
1768
+ inputs={"query": query, "table": table},
1689
1769
  parameters={"model": model, "padding": padding, "sequential": sequential, "truncation": truncation},
1690
- extra_payload={"query": query, "table": table},
1691
1770
  headers=self.headers,
1692
1771
  model=model_id,
1693
1772
  api_key=self.token,
@@ -1695,12 +1774,12 @@ class InferenceClient:
1695
1774
  response = self._inner_post(request_parameters)
1696
1775
  return TableQuestionAnsweringOutputElement.parse_obj_as_instance(response)
1697
1776
 
1698
- def tabular_classification(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[str]:
1777
+ def tabular_classification(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[str]:
1699
1778
  """
1700
1779
  Classifying a target category (a group) based on a set of attributes.
1701
1780
 
1702
1781
  Args:
1703
- table (`Dict[str, Any]`):
1782
+ table (`dict[str, Any]`):
1704
1783
  Set of attributes to classify.
1705
1784
  model (`str`, *optional*):
1706
1785
  The model to use for the tabular classification task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1713,7 +1792,7 @@ class InferenceClient:
1713
1792
  Raises:
1714
1793
  [`InferenceTimeoutError`]:
1715
1794
  If the model is unavailable or the request times out.
1716
- `HTTPError`:
1795
+ [`HfHubHTTPError`]:
1717
1796
  If the request fails with an HTTP error status code other than HTTP 503.
1718
1797
 
1719
1798
  Example:
@@ -1750,12 +1829,12 @@ class InferenceClient:
1750
1829
  response = self._inner_post(request_parameters)
1751
1830
  return _bytes_to_list(response)
1752
1831
 
1753
- def tabular_regression(self, table: Dict[str, Any], *, model: Optional[str] = None) -> List[float]:
1832
+ def tabular_regression(self, table: dict[str, Any], *, model: Optional[str] = None) -> list[float]:
1754
1833
  """
1755
1834
  Predicting a numerical target value given a set of attributes/features in a table.
1756
1835
 
1757
1836
  Args:
1758
- table (`Dict[str, Any]`):
1837
+ table (`dict[str, Any]`):
1759
1838
  Set of attributes stored in a table. The attributes used to predict the target can be both numerical and categorical.
1760
1839
  model (`str`, *optional*):
1761
1840
  The model to use for the tabular regression task. Can be a model ID hosted on the Hugging Face Hub or a URL to
@@ -1768,7 +1847,7 @@ class InferenceClient:
1768
1847
  Raises:
1769
1848
  [`InferenceTimeoutError`]:
1770
1849
  If the model is unavailable or the request times out.
1771
- `HTTPError`:
1850
+ [`HfHubHTTPError`]:
1772
1851
  If the request fails with an HTTP error status code other than HTTP 503.
1773
1852
 
1774
1853
  Example:
@@ -1807,7 +1886,7 @@ class InferenceClient:
1807
1886
  model: Optional[str] = None,
1808
1887
  top_k: Optional[int] = None,
1809
1888
  function_to_apply: Optional["TextClassificationOutputTransform"] = None,
1810
- ) -> List[TextClassificationOutputElement]:
1889
+ ) -> list[TextClassificationOutputElement]:
1811
1890
  """
1812
1891
  Perform text classification (e.g. sentiment-analysis) on the given text.
1813
1892
 
@@ -1824,12 +1903,12 @@ class InferenceClient:
1824
1903
  The function to apply to the model outputs in order to retrieve the scores.
1825
1904
 
1826
1905
  Returns:
1827
- `List[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
1906
+ `list[TextClassificationOutputElement]`: a list of [`TextClassificationOutputElement`] items containing the predicted label and associated probability.
1828
1907
 
1829
1908
  Raises:
1830
1909
  [`InferenceTimeoutError`]:
1831
1910
  If the model is unavailable or the request times out.
1832
- `HTTPError`:
1911
+ [`HfHubHTTPError`]:
1833
1912
  If the request fails with an HTTP error status code other than HTTP 503.
1834
1913
 
1835
1914
  Example:
@@ -1859,26 +1938,26 @@ class InferenceClient:
1859
1938
  return TextClassificationOutputElement.parse_obj_as_list(response)[0] # type: ignore [return-value]
1860
1939
 
1861
1940
  @overload
1862
- def text_generation( # type: ignore
1941
+ def text_generation(
1863
1942
  self,
1864
1943
  prompt: str,
1865
1944
  *,
1866
- details: Literal[False] = ...,
1867
- stream: Literal[False] = ...,
1945
+ details: Literal[True],
1946
+ stream: Literal[True],
1868
1947
  model: Optional[str] = None,
1869
1948
  # Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
1870
1949
  adapter_id: Optional[str] = None,
1871
1950
  best_of: Optional[int] = None,
1872
1951
  decoder_input_details: Optional[bool] = None,
1873
- do_sample: Optional[bool] = False, # Manual default value
1952
+ do_sample: Optional[bool] = None,
1874
1953
  frequency_penalty: Optional[float] = None,
1875
1954
  grammar: Optional[TextGenerationInputGrammarType] = None,
1876
1955
  max_new_tokens: Optional[int] = None,
1877
1956
  repetition_penalty: Optional[float] = None,
1878
- return_full_text: Optional[bool] = False, # Manual default value
1957
+ return_full_text: Optional[bool] = None,
1879
1958
  seed: Optional[int] = None,
1880
- stop: Optional[List[str]] = None,
1881
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
1959
+ stop: Optional[list[str]] = None,
1960
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
1882
1961
  temperature: Optional[float] = None,
1883
1962
  top_k: Optional[int] = None,
1884
1963
  top_n_tokens: Optional[int] = None,
@@ -1886,29 +1965,29 @@ class InferenceClient:
1886
1965
  truncate: Optional[int] = None,
1887
1966
  typical_p: Optional[float] = None,
1888
1967
  watermark: Optional[bool] = None,
1889
- ) -> str: ...
1968
+ ) -> Iterable[TextGenerationStreamOutput]: ...
1890
1969
 
1891
1970
  @overload
1892
- def text_generation( # type: ignore
1971
+ def text_generation(
1893
1972
  self,
1894
1973
  prompt: str,
1895
1974
  *,
1896
- details: Literal[True] = ...,
1897
- stream: Literal[False] = ...,
1975
+ details: Literal[True],
1976
+ stream: Optional[Literal[False]] = None,
1898
1977
  model: Optional[str] = None,
1899
1978
  # Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
1900
1979
  adapter_id: Optional[str] = None,
1901
1980
  best_of: Optional[int] = None,
1902
1981
  decoder_input_details: Optional[bool] = None,
1903
- do_sample: Optional[bool] = False, # Manual default value
1982
+ do_sample: Optional[bool] = None,
1904
1983
  frequency_penalty: Optional[float] = None,
1905
1984
  grammar: Optional[TextGenerationInputGrammarType] = None,
1906
1985
  max_new_tokens: Optional[int] = None,
1907
1986
  repetition_penalty: Optional[float] = None,
1908
- return_full_text: Optional[bool] = False, # Manual default value
1987
+ return_full_text: Optional[bool] = None,
1909
1988
  seed: Optional[int] = None,
1910
- stop: Optional[List[str]] = None,
1911
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
1989
+ stop: Optional[list[str]] = None,
1990
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
1912
1991
  temperature: Optional[float] = None,
1913
1992
  top_k: Optional[int] = None,
1914
1993
  top_n_tokens: Optional[int] = None,
@@ -1919,26 +1998,26 @@ class InferenceClient:
1919
1998
  ) -> TextGenerationOutput: ...
1920
1999
 
1921
2000
  @overload
1922
- def text_generation( # type: ignore
2001
+ def text_generation(
1923
2002
  self,
1924
2003
  prompt: str,
1925
2004
  *,
1926
- details: Literal[False] = ...,
1927
- stream: Literal[True] = ...,
2005
+ details: Optional[Literal[False]] = None,
2006
+ stream: Literal[True],
1928
2007
  model: Optional[str] = None,
1929
2008
  # Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
1930
2009
  adapter_id: Optional[str] = None,
1931
2010
  best_of: Optional[int] = None,
1932
2011
  decoder_input_details: Optional[bool] = None,
1933
- do_sample: Optional[bool] = False, # Manual default value
2012
+ do_sample: Optional[bool] = None,
1934
2013
  frequency_penalty: Optional[float] = None,
1935
2014
  grammar: Optional[TextGenerationInputGrammarType] = None,
1936
2015
  max_new_tokens: Optional[int] = None,
1937
2016
  repetition_penalty: Optional[float] = None,
1938
- return_full_text: Optional[bool] = False, # Manual default value
2017
+ return_full_text: Optional[bool] = None, # Manual default value
1939
2018
  seed: Optional[int] = None,
1940
- stop: Optional[List[str]] = None,
1941
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2019
+ stop: Optional[list[str]] = None,
2020
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
1942
2021
  temperature: Optional[float] = None,
1943
2022
  top_k: Optional[int] = None,
1944
2023
  top_n_tokens: Optional[int] = None,
@@ -1949,26 +2028,26 @@ class InferenceClient:
1949
2028
  ) -> Iterable[str]: ...
1950
2029
 
1951
2030
  @overload
1952
- def text_generation( # type: ignore
2031
+ def text_generation(
1953
2032
  self,
1954
2033
  prompt: str,
1955
2034
  *,
1956
- details: Literal[True] = ...,
1957
- stream: Literal[True] = ...,
2035
+ details: Optional[Literal[False]] = None,
2036
+ stream: Optional[Literal[False]] = None,
1958
2037
  model: Optional[str] = None,
1959
2038
  # Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
1960
2039
  adapter_id: Optional[str] = None,
1961
2040
  best_of: Optional[int] = None,
1962
2041
  decoder_input_details: Optional[bool] = None,
1963
- do_sample: Optional[bool] = False, # Manual default value
2042
+ do_sample: Optional[bool] = None,
1964
2043
  frequency_penalty: Optional[float] = None,
1965
2044
  grammar: Optional[TextGenerationInputGrammarType] = None,
1966
2045
  max_new_tokens: Optional[int] = None,
1967
2046
  repetition_penalty: Optional[float] = None,
1968
- return_full_text: Optional[bool] = False, # Manual default value
2047
+ return_full_text: Optional[bool] = None,
1969
2048
  seed: Optional[int] = None,
1970
- stop: Optional[List[str]] = None,
1971
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2049
+ stop: Optional[list[str]] = None,
2050
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
1972
2051
  temperature: Optional[float] = None,
1973
2052
  top_k: Optional[int] = None,
1974
2053
  top_n_tokens: Optional[int] = None,
@@ -1976,29 +2055,29 @@ class InferenceClient:
1976
2055
  truncate: Optional[int] = None,
1977
2056
  typical_p: Optional[float] = None,
1978
2057
  watermark: Optional[bool] = None,
1979
- ) -> Iterable[TextGenerationStreamOutput]: ...
2058
+ ) -> str: ...
1980
2059
 
1981
2060
  @overload
1982
2061
  def text_generation(
1983
2062
  self,
1984
2063
  prompt: str,
1985
2064
  *,
1986
- details: Literal[True] = ...,
1987
- stream: bool = ...,
2065
+ details: Optional[bool] = None,
2066
+ stream: Optional[bool] = None,
1988
2067
  model: Optional[str] = None,
1989
2068
  # Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
1990
2069
  adapter_id: Optional[str] = None,
1991
2070
  best_of: Optional[int] = None,
1992
2071
  decoder_input_details: Optional[bool] = None,
1993
- do_sample: Optional[bool] = False, # Manual default value
2072
+ do_sample: Optional[bool] = None,
1994
2073
  frequency_penalty: Optional[float] = None,
1995
2074
  grammar: Optional[TextGenerationInputGrammarType] = None,
1996
2075
  max_new_tokens: Optional[int] = None,
1997
2076
  repetition_penalty: Optional[float] = None,
1998
- return_full_text: Optional[bool] = False, # Manual default value
2077
+ return_full_text: Optional[bool] = None,
1999
2078
  seed: Optional[int] = None,
2000
- stop: Optional[List[str]] = None,
2001
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2079
+ stop: Optional[list[str]] = None,
2080
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2002
2081
  temperature: Optional[float] = None,
2003
2082
  top_k: Optional[int] = None,
2004
2083
  top_n_tokens: Optional[int] = None,
@@ -2006,28 +2085,28 @@ class InferenceClient:
2006
2085
  truncate: Optional[int] = None,
2007
2086
  typical_p: Optional[float] = None,
2008
2087
  watermark: Optional[bool] = None,
2009
- ) -> Union[TextGenerationOutput, Iterable[TextGenerationStreamOutput]]: ...
2088
+ ) -> Union[str, TextGenerationOutput, Iterable[str], Iterable[TextGenerationStreamOutput]]: ...
2010
2089
 
2011
2090
  def text_generation(
2012
2091
  self,
2013
2092
  prompt: str,
2014
2093
  *,
2015
- details: bool = False,
2016
- stream: bool = False,
2094
+ details: Optional[bool] = None,
2095
+ stream: Optional[bool] = None,
2017
2096
  model: Optional[str] = None,
2018
2097
  # Parameters from `TextGenerationInputGenerateParameters` (maintained manually)
2019
2098
  adapter_id: Optional[str] = None,
2020
2099
  best_of: Optional[int] = None,
2021
2100
  decoder_input_details: Optional[bool] = None,
2022
- do_sample: Optional[bool] = False, # Manual default value
2101
+ do_sample: Optional[bool] = None,
2023
2102
  frequency_penalty: Optional[float] = None,
2024
2103
  grammar: Optional[TextGenerationInputGrammarType] = None,
2025
2104
  max_new_tokens: Optional[int] = None,
2026
2105
  repetition_penalty: Optional[float] = None,
2027
- return_full_text: Optional[bool] = False, # Manual default value
2106
+ return_full_text: Optional[bool] = None,
2028
2107
  seed: Optional[int] = None,
2029
- stop: Optional[List[str]] = None,
2030
- stop_sequences: Optional[List[str]] = None, # Deprecated, use `stop` instead
2108
+ stop: Optional[list[str]] = None,
2109
+ stop_sequences: Optional[list[str]] = None, # Deprecated, use `stop` instead
2031
2110
  temperature: Optional[float] = None,
2032
2111
  top_k: Optional[int] = None,
2033
2112
  top_n_tokens: Optional[int] = None,
@@ -2039,12 +2118,9 @@ class InferenceClient:
2039
2118
  """
2040
2119
  Given a prompt, generate the following text.
2041
2120
 
2042
- <Tip>
2043
-
2044
- If you want to generate a response from chat messages, you should use the [`InferenceClient.chat_completion`] method.
2045
- It accepts a list of messages instead of a single text prompt and handles the chat templating for you.
2046
-
2047
- </Tip>
2121
+ > [!TIP]
2122
+ > If you want to generate a response from chat messages, you should use the [`InferenceClient.chat_completion`] method.
2123
+ > It accepts a list of messages instead of a single text prompt and handles the chat templating for you.
2048
2124
 
2049
2125
  Args:
2050
2126
  prompt (`str`):
@@ -2083,9 +2159,9 @@ class InferenceClient:
2083
2159
  Whether to prepend the prompt to the generated text
2084
2160
  seed (`int`, *optional*):
2085
2161
  Random sampling seed
2086
- stop (`List[str]`, *optional*):
2162
+ stop (`list[str]`, *optional*):
2087
2163
  Stop generating tokens if a member of `stop` is generated.
2088
- stop_sequences (`List[str]`, *optional*):
2164
+ stop_sequences (`list[str]`, *optional*):
2089
2165
  Deprecated argument. Use `stop` instead.
2090
2166
  temperature (`float`, *optional*):
2091
2167
  The value used to module the logits distribution.
@@ -2102,7 +2178,7 @@ class InferenceClient:
2102
2178
  typical_p (`float`, *optional`):
2103
2179
  Typical Decoding mass
2104
2180
  See [Typical Decoding for Natural Language Generation](https://arxiv.org/abs/2202.00666) for more information
2105
- watermark (`bool`, *optional`):
2181
+ watermark (`bool`, *optional*):
2106
2182
  Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
2107
2183
 
2108
2184
  Returns:
@@ -2118,7 +2194,7 @@ class InferenceClient:
2118
2194
  If input values are not valid. No HTTP call is made to the server.
2119
2195
  [`InferenceTimeoutError`]:
2120
2196
  If the model is unavailable or the request times out.
2121
- `HTTPError`:
2197
+ [`HfHubHTTPError`]:
2122
2198
  If the request fails with an HTTP error status code other than HTTP 503.
2123
2199
 
2124
2200
  Example:
@@ -2252,7 +2328,7 @@ class InferenceClient:
2252
2328
  "repetition_penalty": repetition_penalty,
2253
2329
  "return_full_text": return_full_text,
2254
2330
  "seed": seed,
2255
- "stop": stop if stop is not None else [],
2331
+ "stop": stop,
2256
2332
  "temperature": temperature,
2257
2333
  "top_k": top_k,
2258
2334
  "top_n_tokens": top_n_tokens,
@@ -2306,8 +2382,8 @@ class InferenceClient:
2306
2382
 
2307
2383
  # Handle errors separately for more precise error messages
2308
2384
  try:
2309
- bytes_output = self._inner_post(request_parameters, stream=stream)
2310
- except HTTPError as e:
2385
+ bytes_output = self._inner_post(request_parameters, stream=stream or False)
2386
+ except HfHubHTTPError as e:
2311
2387
  match = MODEL_KWARGS_NOT_USED_REGEX.search(str(e))
2312
2388
  if isinstance(e, BadRequestError) and match:
2313
2389
  unused_params = [kwarg.strip("' ") for kwarg in match.group(1).split(",")]
@@ -2362,20 +2438,16 @@ class InferenceClient:
2362
2438
  model: Optional[str] = None,
2363
2439
  scheduler: Optional[str] = None,
2364
2440
  seed: Optional[int] = None,
2365
- extra_body: Optional[Dict[str, Any]] = None,
2441
+ extra_body: Optional[dict[str, Any]] = None,
2366
2442
  ) -> "Image":
2367
2443
  """
2368
2444
  Generate an image based on a given text using a specified model.
2369
2445
 
2370
- <Tip warning={true}>
2371
-
2372
- You must have `PIL` installed if you want to work with images (`pip install Pillow`).
2373
-
2374
- </Tip>
2446
+ > [!WARNING]
2447
+ > You must have `PIL` installed if you want to work with images (`pip install Pillow`).
2375
2448
 
2376
- <Tip>
2377
- You can pass provider-specific parameters to the model by using the `extra_body` argument.
2378
- </Tip>
2449
+ > [!TIP]
2450
+ > You can pass provider-specific parameters to the model by using the `extra_body` argument.
2379
2451
 
2380
2452
  Args:
2381
2453
  prompt (`str`):
@@ -2400,7 +2472,7 @@ class InferenceClient:
2400
2472
  Override the scheduler with a compatible one.
2401
2473
  seed (`int`, *optional*):
2402
2474
  Seed for the random number generator.
2403
- extra_body (`Dict[str, Any]`, *optional*):
2475
+ extra_body (`dict[str, Any]`, *optional*):
2404
2476
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2405
2477
  for supported parameters.
2406
2478
 
@@ -2410,7 +2482,7 @@ class InferenceClient:
2410
2482
  Raises:
2411
2483
  [`InferenceTimeoutError`]:
2412
2484
  If the model is unavailable or the request times out.
2413
- `HTTPError`:
2485
+ [`HfHubHTTPError`]:
2414
2486
  If the request fails with an HTTP error status code other than HTTP 503.
2415
2487
 
2416
2488
  Example:
@@ -2470,6 +2542,7 @@ class InferenceClient:
2470
2542
  ... )
2471
2543
  >>> image.save("astronaut.png")
2472
2544
  ```
2545
+
2473
2546
  """
2474
2547
  model_id = model or self.model
2475
2548
  provider_helper = get_provider_helper(self.provider, task="text-to-image", model=model_id)
@@ -2490,7 +2563,7 @@ class InferenceClient:
2490
2563
  api_key=self.token,
2491
2564
  )
2492
2565
  response = self._inner_post(request_parameters)
2493
- response = provider_helper.get_response(response)
2566
+ response = provider_helper.get_response(response, request_parameters)
2494
2567
  return _bytes_to_image(response)
2495
2568
 
2496
2569
  def text_to_video(
@@ -2499,18 +2572,17 @@ class InferenceClient:
2499
2572
  *,
2500
2573
  model: Optional[str] = None,
2501
2574
  guidance_scale: Optional[float] = None,
2502
- negative_prompt: Optional[List[str]] = None,
2575
+ negative_prompt: Optional[list[str]] = None,
2503
2576
  num_frames: Optional[float] = None,
2504
2577
  num_inference_steps: Optional[int] = None,
2505
2578
  seed: Optional[int] = None,
2506
- extra_body: Optional[Dict[str, Any]] = None,
2579
+ extra_body: Optional[dict[str, Any]] = None,
2507
2580
  ) -> bytes:
2508
2581
  """
2509
2582
  Generate a video based on a given text.
2510
2583
 
2511
- <Tip>
2512
- You can pass provider-specific parameters to the model by using the `extra_body` argument.
2513
- </Tip>
2584
+ > [!TIP]
2585
+ > You can pass provider-specific parameters to the model by using the `extra_body` argument.
2514
2586
 
2515
2587
  Args:
2516
2588
  prompt (`str`):
@@ -2522,7 +2594,7 @@ class InferenceClient:
2522
2594
  guidance_scale (`float`, *optional*):
2523
2595
  A higher guidance scale value encourages the model to generate videos closely linked to the text
2524
2596
  prompt, but values too high may cause saturation and other artifacts.
2525
- negative_prompt (`List[str]`, *optional*):
2597
+ negative_prompt (`list[str]`, *optional*):
2526
2598
  One or several prompt to guide what NOT to include in video generation.
2527
2599
  num_frames (`float`, *optional*):
2528
2600
  The num_frames parameter determines how many video frames are generated.
@@ -2531,7 +2603,7 @@ class InferenceClient:
2531
2603
  expense of slower inference.
2532
2604
  seed (`int`, *optional*):
2533
2605
  Seed for the random number generator.
2534
- extra_body (`Dict[str, Any]`, *optional*):
2606
+ extra_body (`dict[str, Any]`, *optional*):
2535
2607
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2536
2608
  for supported parameters.
2537
2609
 
@@ -2569,6 +2641,7 @@ class InferenceClient:
2569
2641
  >>> with open("cat.mp4", "wb") as file:
2570
2642
  ... file.write(video)
2571
2643
  ```
2644
+
2572
2645
  """
2573
2646
  model_id = model or self.model
2574
2647
  provider_helper = get_provider_helper(self.provider, task="text-to-video", model=model_id)
@@ -2611,14 +2684,13 @@ class InferenceClient:
2611
2684
  top_p: Optional[float] = None,
2612
2685
  typical_p: Optional[float] = None,
2613
2686
  use_cache: Optional[bool] = None,
2614
- extra_body: Optional[Dict[str, Any]] = None,
2687
+ extra_body: Optional[dict[str, Any]] = None,
2615
2688
  ) -> bytes:
2616
2689
  """
2617
2690
  Synthesize an audio of a voice pronouncing a given text.
2618
2691
 
2619
- <Tip>
2620
- You can pass provider-specific parameters to the model by using the `extra_body` argument.
2621
- </Tip>
2692
+ > [!TIP]
2693
+ > You can pass provider-specific parameters to the model by using the `extra_body` argument.
2622
2694
 
2623
2695
  Args:
2624
2696
  text (`str`):
@@ -2673,7 +2745,7 @@ class InferenceClient:
2673
2745
  paper](https://hf.co/papers/2202.00666) for more details.
2674
2746
  use_cache (`bool`, *optional*):
2675
2747
  Whether the model should use the past last key/values attentions to speed up decoding
2676
- extra_body (`Dict[str, Any]`, *optional*):
2748
+ extra_body (`dict[str, Any]`, *optional*):
2677
2749
  Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2678
2750
  for supported parameters.
2679
2751
  Returns:
@@ -2682,7 +2754,7 @@ class InferenceClient:
2682
2754
  Raises:
2683
2755
  [`InferenceTimeoutError`]:
2684
2756
  If the model is unavailable or the request times out.
2685
- `HTTPError`:
2757
+ [`HfHubHTTPError`]:
2686
2758
  If the request fails with an HTTP error status code other than HTTP 503.
2687
2759
 
2688
2760
  Example:
@@ -2805,9 +2877,9 @@ class InferenceClient:
2805
2877
  *,
2806
2878
  model: Optional[str] = None,
2807
2879
  aggregation_strategy: Optional["TokenClassificationAggregationStrategy"] = None,
2808
- ignore_labels: Optional[List[str]] = None,
2880
+ ignore_labels: Optional[list[str]] = None,
2809
2881
  stride: Optional[int] = None,
2810
- ) -> List[TokenClassificationOutputElement]:
2882
+ ) -> list[TokenClassificationOutputElement]:
2811
2883
  """
2812
2884
  Perform token classification on the given text.
2813
2885
  Usually used for sentence parsing, either grammatical, or Named Entity Recognition (NER) to understand keywords contained within text.
@@ -2821,18 +2893,18 @@ class InferenceClient:
2821
2893
  Defaults to None.
2822
2894
  aggregation_strategy (`"TokenClassificationAggregationStrategy"`, *optional*):
2823
2895
  The strategy used to fuse tokens based on model predictions
2824
- ignore_labels (`List[str`, *optional*):
2896
+ ignore_labels (`list[str`, *optional*):
2825
2897
  A list of labels to ignore
2826
2898
  stride (`int`, *optional*):
2827
2899
  The number of overlapping tokens between chunks when splitting the input text.
2828
2900
 
2829
2901
  Returns:
2830
- `List[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
2902
+ `list[TokenClassificationOutputElement]`: List of [`TokenClassificationOutputElement`] items containing the entity group, confidence score, word, start and end index.
2831
2903
 
2832
2904
  Raises:
2833
2905
  [`InferenceTimeoutError`]:
2834
2906
  If the model is unavailable or the request times out.
2835
- `HTTPError`:
2907
+ [`HfHubHTTPError`]:
2836
2908
  If the request fails with an HTTP error status code other than HTTP 503.
2837
2909
 
2838
2910
  Example:
@@ -2883,7 +2955,7 @@ class InferenceClient:
2883
2955
  tgt_lang: Optional[str] = None,
2884
2956
  clean_up_tokenization_spaces: Optional[bool] = None,
2885
2957
  truncation: Optional["TranslationTruncationStrategy"] = None,
2886
- generate_parameters: Optional[Dict[str, Any]] = None,
2958
+ generate_parameters: Optional[dict[str, Any]] = None,
2887
2959
  ) -> TranslationOutput:
2888
2960
  """
2889
2961
  Convert text from one language to another.
@@ -2908,7 +2980,7 @@ class InferenceClient:
2908
2980
  Whether to clean up the potential extra spaces in the text output.
2909
2981
  truncation (`"TranslationTruncationStrategy"`, *optional*):
2910
2982
  The truncation strategy to use.
2911
- generate_parameters (`Dict[str, Any]`, *optional*):
2983
+ generate_parameters (`dict[str, Any]`, *optional*):
2912
2984
  Additional parametrization of the text generation algorithm.
2913
2985
 
2914
2986
  Returns:
@@ -2917,7 +2989,7 @@ class InferenceClient:
2917
2989
  Raises:
2918
2990
  [`InferenceTimeoutError`]:
2919
2991
  If the model is unavailable or the request times out.
2920
- `HTTPError`:
2992
+ [`HfHubHTTPError`]:
2921
2993
  If the request fails with an HTTP error status code other than HTTP 503.
2922
2994
  `ValueError`:
2923
2995
  If only one of the `src_lang` and `tgt_lang` arguments are provided.
@@ -2970,13 +3042,13 @@ class InferenceClient:
2970
3042
  *,
2971
3043
  model: Optional[str] = None,
2972
3044
  top_k: Optional[int] = None,
2973
- ) -> List[VisualQuestionAnsweringOutputElement]:
3045
+ ) -> list[VisualQuestionAnsweringOutputElement]:
2974
3046
  """
2975
3047
  Answering open-ended questions based on an image.
2976
3048
 
2977
3049
  Args:
2978
- image (`Union[str, Path, bytes, BinaryIO]`):
2979
- The input image for the context. It can be raw bytes, an image file, or a URL to an online image.
3050
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
3051
+ The input image for the context. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
2980
3052
  question (`str`):
2981
3053
  Question to be answered.
2982
3054
  model (`str`, *optional*):
@@ -2987,12 +3059,12 @@ class InferenceClient:
2987
3059
  The number of answers to return (will be chosen by order of likelihood). Note that we return less than
2988
3060
  topk answers if there are not enough options available within the context.
2989
3061
  Returns:
2990
- `List[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
3062
+ `list[VisualQuestionAnsweringOutputElement]`: a list of [`VisualQuestionAnsweringOutputElement`] items containing the predicted label and associated probability.
2991
3063
 
2992
3064
  Raises:
2993
3065
  `InferenceTimeoutError`:
2994
3066
  If the model is unavailable or the request times out.
2995
- `HTTPError`:
3067
+ [`HfHubHTTPError`]:
2996
3068
  If the request fails with an HTTP error status code other than HTTP 503.
2997
3069
 
2998
3070
  Example:
@@ -3025,21 +3097,21 @@ class InferenceClient:
3025
3097
  def zero_shot_classification(
3026
3098
  self,
3027
3099
  text: str,
3028
- candidate_labels: List[str],
3100
+ candidate_labels: list[str],
3029
3101
  *,
3030
3102
  multi_label: Optional[bool] = False,
3031
3103
  hypothesis_template: Optional[str] = None,
3032
3104
  model: Optional[str] = None,
3033
- ) -> List[ZeroShotClassificationOutputElement]:
3105
+ ) -> list[ZeroShotClassificationOutputElement]:
3034
3106
  """
3035
3107
  Provide as input a text and a set of candidate labels to classify the input text.
3036
3108
 
3037
3109
  Args:
3038
3110
  text (`str`):
3039
3111
  The input text to classify.
3040
- candidate_labels (`List[str]`):
3112
+ candidate_labels (`list[str]`):
3041
3113
  The set of possible class labels to classify the text into.
3042
- labels (`List[str]`, *optional*):
3114
+ labels (`list[str]`, *optional*):
3043
3115
  (deprecated) List of strings. Each string is the verbalization of a possible label for the input text.
3044
3116
  multi_label (`bool`, *optional*):
3045
3117
  Whether multiple candidate labels can be true. If false, the scores are normalized such that the sum of
@@ -3054,12 +3126,12 @@ class InferenceClient:
3054
3126
 
3055
3127
 
3056
3128
  Returns:
3057
- `List[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
3129
+ `list[ZeroShotClassificationOutputElement]`: List of [`ZeroShotClassificationOutputElement`] items containing the predicted labels and their confidence.
3058
3130
 
3059
3131
  Raises:
3060
3132
  [`InferenceTimeoutError`]:
3061
3133
  If the model is unavailable or the request times out.
3062
- `HTTPError`:
3134
+ [`HfHubHTTPError`]:
3063
3135
  If the request fails with an HTTP error status code other than HTTP 503.
3064
3136
 
3065
3137
  Example with `multi_label=False`:
@@ -3131,22 +3203,22 @@ class InferenceClient:
3131
3203
  def zero_shot_image_classification(
3132
3204
  self,
3133
3205
  image: ContentT,
3134
- candidate_labels: List[str],
3206
+ candidate_labels: list[str],
3135
3207
  *,
3136
3208
  model: Optional[str] = None,
3137
3209
  hypothesis_template: Optional[str] = None,
3138
3210
  # deprecated argument
3139
- labels: List[str] = None, # type: ignore
3140
- ) -> List[ZeroShotImageClassificationOutputElement]:
3211
+ labels: list[str] = None, # type: ignore
3212
+ ) -> list[ZeroShotImageClassificationOutputElement]:
3141
3213
  """
3142
3214
  Provide input image and text labels to predict text labels for the image.
3143
3215
 
3144
3216
  Args:
3145
- image (`Union[str, Path, bytes, BinaryIO]`):
3146
- The input image to caption. It can be raw bytes, an image file, or a URL to an online image.
3147
- candidate_labels (`List[str]`):
3217
+ image (`Union[str, Path, bytes, BinaryIO, PIL.Image.Image]`):
3218
+ The input image to caption. It can be raw bytes, an image file, a URL to an online image, or a PIL Image.
3219
+ candidate_labels (`list[str]`):
3148
3220
  The candidate labels for this image
3149
- labels (`List[str]`, *optional*):
3221
+ labels (`list[str]`, *optional*):
3150
3222
  (deprecated) List of string possible labels. There must be at least 2 labels.
3151
3223
  model (`str`, *optional*):
3152
3224
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
@@ -3156,12 +3228,12 @@ class InferenceClient:
3156
3228
  replacing the placeholder with the candidate labels.
3157
3229
 
3158
3230
  Returns:
3159
- `List[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
3231
+ `list[ZeroShotImageClassificationOutputElement]`: List of [`ZeroShotImageClassificationOutputElement`] items containing the predicted labels and their confidence.
3160
3232
 
3161
3233
  Raises:
3162
3234
  [`InferenceTimeoutError`]:
3163
3235
  If the model is unavailable or the request times out.
3164
- `HTTPError`:
3236
+ [`HfHubHTTPError`]:
3165
3237
  If the request fails with an HTTP error status code other than HTTP 503.
3166
3238
 
3167
3239
  Example:
@@ -3195,102 +3267,7 @@ class InferenceClient:
3195
3267
  response = self._inner_post(request_parameters)
3196
3268
  return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
3197
3269
 
3198
- @_deprecate_method(
3199
- version="0.33.0",
3200
- message=(
3201
- "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3202
- " Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
3203
- ),
3204
- )
3205
- def list_deployed_models(
3206
- self, frameworks: Union[None, str, Literal["all"], List[str]] = None
3207
- ) -> Dict[str, List[str]]:
3208
- """
3209
- List models deployed on the HF Serverless Inference API service.
3210
-
3211
- This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
3212
- are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
3213
- specify `frameworks="all"` as input. Alternatively, if you know before-hand which framework you are interested
3214
- in, you can also restrict to search to this one (e.g. `frameworks="text-generation-inference"`). The more
3215
- frameworks are checked, the more time it will take.
3216
-
3217
- <Tip warning={true}>
3218
-
3219
- This endpoint method does not return a live list of all models available for the HF Inference API service.
3220
- It searches over a cached list of models that were recently available and the list may not be up to date.
3221
- If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
3222
-
3223
- </Tip>
3224
-
3225
- <Tip>
3226
-
3227
- This endpoint method is mostly useful for discoverability. If you already know which model you want to use and want to
3228
- check its availability, you can directly use [`~InferenceClient.get_model_status`].
3229
-
3230
- </Tip>
3231
-
3232
- Args:
3233
- frameworks (`Literal["all"]` or `List[str]` or `str`, *optional*):
3234
- The frameworks to filter on. By default only a subset of the available frameworks are tested. If set to
3235
- "all", all available frameworks will be tested. It is also possible to provide a single framework or a
3236
- custom set of frameworks to check.
3237
-
3238
- Returns:
3239
- `Dict[str, List[str]]`: A dictionary mapping task names to a sorted list of model IDs.
3240
-
3241
- Example:
3242
- ```python
3243
- >>> from huggingface_hub import InferenceClient
3244
- >>> client = InferenceClient()
3245
-
3246
- # Discover zero-shot-classification models currently deployed
3247
- >>> models = client.list_deployed_models()
3248
- >>> models["zero-shot-classification"]
3249
- ['Narsil/deberta-large-mnli-zero-cls', 'facebook/bart-large-mnli', ...]
3250
-
3251
- # List from only 1 framework
3252
- >>> client.list_deployed_models("text-generation-inference")
3253
- {'text-generation': ['bigcode/starcoder', 'meta-llama/Llama-2-70b-chat-hf', ...], ...}
3254
- ```
3255
- """
3256
- if self.provider != "hf-inference":
3257
- raise ValueError(f"Listing deployed models is not supported on '{self.provider}'.")
3258
-
3259
- # Resolve which frameworks to check
3260
- if frameworks is None:
3261
- frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
3262
- elif frameworks == "all":
3263
- frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
3264
- elif isinstance(frameworks, str):
3265
- frameworks = [frameworks]
3266
- frameworks = list(set(frameworks))
3267
-
3268
- # Fetch them iteratively
3269
- models_by_task: Dict[str, List[str]] = {}
3270
-
3271
- def _unpack_response(framework: str, items: List[Dict]) -> None:
3272
- for model in items:
3273
- if framework == "sentence-transformers":
3274
- # Model running with the `sentence-transformers` framework can work with both tasks even if not
3275
- # branded as such in the API response
3276
- models_by_task.setdefault("feature-extraction", []).append(model["model_id"])
3277
- models_by_task.setdefault("sentence-similarity", []).append(model["model_id"])
3278
- else:
3279
- models_by_task.setdefault(model["task"], []).append(model["model_id"])
3280
-
3281
- for framework in frameworks:
3282
- response = get_session().get(
3283
- f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
3284
- )
3285
- hf_raise_for_status(response)
3286
- _unpack_response(framework, response.json())
3287
-
3288
- # Sort alphabetically for discoverability and return
3289
- for task, models in models_by_task.items():
3290
- models_by_task[task] = sorted(set(models), key=lambda x: x.lower())
3291
- return models_by_task
3292
-
3293
- def get_endpoint_info(self, *, model: Optional[str] = None) -> Dict[str, Any]:
3270
+ def get_endpoint_info(self, *, model: Optional[str] = None) -> dict[str, Any]:
3294
3271
  """
3295
3272
  Get information about the deployed endpoint.
3296
3273
 
@@ -3303,7 +3280,7 @@ class InferenceClient:
3303
3280
  Inference Endpoint. This parameter overrides the model defined at the instance level. Defaults to None.
3304
3281
 
3305
3282
  Returns:
3306
- `Dict[str, Any]`: Information about the endpoint.
3283
+ `dict[str, Any]`: Information about the endpoint.
3307
3284
 
3308
3285
  Example:
3309
3286
  ```py
@@ -3353,7 +3330,6 @@ class InferenceClient:
3353
3330
  Check the health of the deployed endpoint.
3354
3331
 
3355
3332
  Health check is only available with Inference Endpoints powered by Text-Generation-Inference (TGI) or Text-Embedding-Inference (TEI).
3356
- For Inference API, please use [`InferenceClient.get_model_status`] instead.
3357
3333
 
3358
3334
  Args:
3359
3335
  model (`str`, *optional*):
@@ -3377,75 +3353,12 @@ class InferenceClient:
3377
3353
  if model is None:
3378
3354
  raise ValueError("Model id not provided.")
3379
3355
  if not model.startswith(("http://", "https://")):
3380
- raise ValueError(
3381
- "Model must be an Inference Endpoint URL. For serverless Inference API, please use `InferenceClient.get_model_status`."
3382
- )
3356
+ raise ValueError("Model must be an Inference Endpoint URL.")
3383
3357
  url = model.rstrip("/") + "/health"
3384
3358
 
3385
3359
  response = get_session().get(url, headers=build_hf_headers(token=self.token))
3386
3360
  return response.status_code == 200
3387
3361
 
3388
- @_deprecate_method(
3389
- version="0.33.0",
3390
- message=(
3391
- "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3392
- " Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
3393
- ),
3394
- )
3395
- def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
3396
- """
3397
- Get the status of a model hosted on the HF Inference API.
3398
-
3399
- <Tip>
3400
-
3401
- This endpoint is mostly useful when you already know which model you want to use and want to check its
3402
- availability. If you want to discover already deployed models, you should rather use [`~InferenceClient.list_deployed_models`].
3403
-
3404
- </Tip>
3405
-
3406
- Args:
3407
- model (`str`, *optional*):
3408
- Identifier of the model for witch the status gonna be checked. If model is not provided,
3409
- the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
3410
- identifier cannot be a URL.
3411
-
3412
-
3413
- Returns:
3414
- [`ModelStatus`]: An instance of ModelStatus dataclass, containing information,
3415
- about the state of the model: load, state, compute type and framework.
3416
-
3417
- Example:
3418
- ```py
3419
- >>> from huggingface_hub import InferenceClient
3420
- >>> client = InferenceClient()
3421
- >>> client.get_model_status("meta-llama/Meta-Llama-3-8B-Instruct")
3422
- ModelStatus(loaded=True, state='Loaded', compute_type='gpu', framework='text-generation-inference')
3423
- ```
3424
- """
3425
- if self.provider != "hf-inference":
3426
- raise ValueError(f"Getting model status is not supported on '{self.provider}'.")
3427
-
3428
- model = model or self.model
3429
- if model is None:
3430
- raise ValueError("Model id not provided.")
3431
- if model.startswith("https://"):
3432
- raise NotImplementedError("Model status is only available for Inference API endpoints.")
3433
- url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
3434
-
3435
- response = get_session().get(url, headers=build_hf_headers(token=self.token))
3436
- hf_raise_for_status(response)
3437
- response_data = response.json()
3438
-
3439
- if "error" in response_data:
3440
- raise ValueError(response_data["error"])
3441
-
3442
- return ModelStatus(
3443
- loaded=response_data["loaded"],
3444
- state=response_data["state"],
3445
- compute_type=response_data["compute_type"],
3446
- framework=response_data["framework"],
3447
- )
3448
-
3449
3362
  @property
3450
3363
  def chat(self) -> "ProxyClientChat":
3451
3364
  return ProxyClientChat(self)