huggingface-hub 0.28.1__py3-none-any.whl → 0.29.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (63) hide show
  1. huggingface_hub/__init__.py +1 -4
  2. huggingface_hub/constants.py +16 -10
  3. huggingface_hub/file_download.py +10 -6
  4. huggingface_hub/hf_api.py +53 -23
  5. huggingface_hub/inference/_client.py +151 -84
  6. huggingface_hub/inference/_common.py +3 -27
  7. huggingface_hub/inference/_generated/_async_client.py +147 -83
  8. huggingface_hub/inference/_generated/types/__init__.py +1 -1
  9. huggingface_hub/inference/_generated/types/audio_classification.py +4 -5
  10. huggingface_hub/inference/_generated/types/audio_to_audio.py +3 -4
  11. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +7 -8
  12. huggingface_hub/inference/_generated/types/base.py +21 -0
  13. huggingface_hub/inference/_generated/types/chat_completion.py +29 -30
  14. huggingface_hub/inference/_generated/types/depth_estimation.py +3 -4
  15. huggingface_hub/inference/_generated/types/document_question_answering.py +5 -6
  16. huggingface_hub/inference/_generated/types/feature_extraction.py +5 -6
  17. huggingface_hub/inference/_generated/types/fill_mask.py +4 -5
  18. huggingface_hub/inference/_generated/types/image_classification.py +4 -5
  19. huggingface_hub/inference/_generated/types/image_segmentation.py +4 -5
  20. huggingface_hub/inference/_generated/types/image_to_image.py +5 -6
  21. huggingface_hub/inference/_generated/types/image_to_text.py +5 -6
  22. huggingface_hub/inference/_generated/types/object_detection.py +5 -6
  23. huggingface_hub/inference/_generated/types/question_answering.py +5 -6
  24. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -4
  25. huggingface_hub/inference/_generated/types/summarization.py +4 -5
  26. huggingface_hub/inference/_generated/types/table_question_answering.py +5 -6
  27. huggingface_hub/inference/_generated/types/text2text_generation.py +4 -5
  28. huggingface_hub/inference/_generated/types/text_classification.py +4 -5
  29. huggingface_hub/inference/_generated/types/text_generation.py +12 -13
  30. huggingface_hub/inference/_generated/types/text_to_audio.py +5 -6
  31. huggingface_hub/inference/_generated/types/text_to_image.py +8 -15
  32. huggingface_hub/inference/_generated/types/text_to_speech.py +5 -6
  33. huggingface_hub/inference/_generated/types/text_to_video.py +4 -5
  34. huggingface_hub/inference/_generated/types/token_classification.py +4 -5
  35. huggingface_hub/inference/_generated/types/translation.py +4 -5
  36. huggingface_hub/inference/_generated/types/video_classification.py +4 -5
  37. huggingface_hub/inference/_generated/types/visual_question_answering.py +5 -6
  38. huggingface_hub/inference/_generated/types/zero_shot_classification.py +4 -5
  39. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +4 -5
  40. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +5 -6
  41. huggingface_hub/inference/_providers/__init__.py +44 -8
  42. huggingface_hub/inference/_providers/_common.py +239 -0
  43. huggingface_hub/inference/_providers/black_forest_labs.py +66 -0
  44. huggingface_hub/inference/_providers/fal_ai.py +31 -100
  45. huggingface_hub/inference/_providers/fireworks_ai.py +6 -0
  46. huggingface_hub/inference/_providers/hf_inference.py +58 -142
  47. huggingface_hub/inference/_providers/hyperbolic.py +43 -0
  48. huggingface_hub/inference/_providers/nebius.py +41 -0
  49. huggingface_hub/inference/_providers/novita.py +26 -0
  50. huggingface_hub/inference/_providers/replicate.py +24 -119
  51. huggingface_hub/inference/_providers/sambanova.py +3 -86
  52. huggingface_hub/inference/_providers/together.py +36 -130
  53. huggingface_hub/utils/_headers.py +5 -0
  54. huggingface_hub/utils/_hf_folder.py +4 -32
  55. huggingface_hub/utils/_http.py +85 -2
  56. huggingface_hub/utils/_typing.py +1 -1
  57. huggingface_hub/utils/logging.py +6 -0
  58. {huggingface_hub-0.28.1.dist-info → huggingface_hub-0.29.0rc0.dist-info}/METADATA +1 -1
  59. {huggingface_hub-0.28.1.dist-info → huggingface_hub-0.29.0rc0.dist-info}/RECORD +63 -57
  60. {huggingface_hub-0.28.1.dist-info → huggingface_hub-0.29.0rc0.dist-info}/LICENSE +0 -0
  61. {huggingface_hub-0.28.1.dist-info → huggingface_hub-0.29.0rc0.dist-info}/WHEEL +0 -0
  62. {huggingface_hub-0.28.1.dist-info → huggingface_hub-0.29.0rc0.dist-info}/entry_points.txt +0 -0
  63. {huggingface_hub-0.28.1.dist-info → huggingface_hub-0.29.0rc0.dist-info}/top_level.txt +0 -0
@@ -22,11 +22,10 @@ import asyncio
22
22
  import base64
23
23
  import logging
24
24
  import re
25
- import time
26
25
  import warnings
27
26
  from typing import TYPE_CHECKING, Any, AsyncIterable, Dict, List, Literal, Optional, Set, Union, overload
28
27
 
29
- from huggingface_hub.constants import ALL_INFERENCE_API_FRAMEWORKS, INFERENCE_ENDPOINT, MAIN_INFERENCE_API_FRAMEWORKS
28
+ from huggingface_hub import constants
30
29
  from huggingface_hub.errors import InferenceTimeoutError
31
30
  from huggingface_hub.inference._common import (
32
31
  TASKS_EXPECTING_IMAGES,
@@ -77,7 +76,6 @@ from huggingface_hub.inference._generated.types import (
77
76
  TextGenerationInputGrammarType,
78
77
  TextGenerationOutput,
79
78
  TextGenerationStreamOutput,
80
- TextToImageTargetSize,
81
79
  TextToSpeechEarlyStoppingEnum,
82
80
  TokenClassificationAggregationStrategy,
83
81
  TokenClassificationOutputElement,
@@ -122,9 +120,9 @@ class AsyncInferenceClient:
122
120
  path will be appended to the base URL (see the [TGI Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api)
123
121
  documentation for details). When passing a URL as `model`, the client will not append any suffix path to it.
124
122
  provider (`str`, *optional*):
125
- Name of the provider to use for inference. Can be `"replicate"`, `"together"`, `"fal-ai"`, `"sambanova"` or `"hf-inference"`.
126
- defaults to hf-inference (Hugging Face Serverless Inference API).
127
- If model is a URL or `base_url` is passed, then `provider` is not used.
123
+ Name of the provider to use for inference. Can be `"black-forest-labs"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"replicate"`, "sambanova"` or `"together"`.
124
+ defaults to hf-inference (Hugging Face Serverless Inference API).
125
+ If model is a URL or `base_url` is passed, then `provider` is not used.
128
126
  token (`str` or `bool`, *optional*):
129
127
  Hugging Face token. Will default to the locally saved token if not provided.
130
128
  Pass `token=False` if you don't want to send your token to the server.
@@ -179,7 +177,7 @@ class AsyncInferenceClient:
179
177
  " It has the exact same behavior as `token`."
180
178
  )
181
179
 
182
- self.model: Optional[str] = model
180
+ self.model: Optional[str] = base_url or model
183
181
  self.token: Optional[str] = token if token is not None else api_key
184
182
  self.headers = headers if headers is not None else {}
185
183
 
@@ -191,9 +189,6 @@ class AsyncInferenceClient:
191
189
  self.trust_env = trust_env
192
190
  self.proxies = proxies
193
191
 
194
- # OpenAI compatibility
195
- self.base_url = base_url
196
-
197
192
  # Keep track of the sessions to close them properly
198
193
  self._sessions: Dict["ClientSession", Set["ClientResponse"]] = dict()
199
194
 
@@ -262,8 +257,9 @@ class AsyncInferenceClient:
262
257
  "`InferenceClient.post` is deprecated and should not be used directly anymore."
263
258
  )
264
259
  provider_helper = HFInferenceTask(task or "unknown")
265
- url = provider_helper.build_url(provider_helper.map_model(model))
266
- headers = provider_helper.prepare_headers(headers=self.headers, api_key=self.token)
260
+ mapped_model = provider_helper._prepare_mapped_model(model or self.model)
261
+ url = provider_helper._prepare_url(self.token, mapped_model) # type: ignore[arg-type]
262
+ headers = provider_helper._prepare_headers(self.headers, self.token) # type: ignore[arg-type]
267
263
  return await self._inner_post(
268
264
  request_parameters=RequestParameters(
269
265
  url=url,
@@ -302,8 +298,6 @@ class AsyncInferenceClient:
302
298
  if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
303
299
  request_parameters.headers["Accept"] = "image/png"
304
300
 
305
- t0 = time.time()
306
- timeout = self.timeout
307
301
  while True:
308
302
  with _open_as_binary(request_parameters.data) as data_as_binary:
309
303
  # Do not use context manager as we don't want to close the connection immediately when returning
@@ -334,27 +328,6 @@ class AsyncInferenceClient:
334
328
  except aiohttp.ClientResponseError as error:
335
329
  error.response_error_payload = response_error_payload
336
330
  await session.close()
337
- if response.status == 422 and request_parameters.task != "unknown":
338
- error.message += f". Make sure '{request_parameters.task}' task is supported by the model."
339
- if response.status == 503:
340
- # If Model is unavailable, either raise a TimeoutError...
341
- if timeout is not None and time.time() - t0 > timeout:
342
- raise InferenceTimeoutError(
343
- f"Model not loaded on the server: {request_parameters.url}. Please retry with a higher timeout"
344
- f" (current: {self.timeout}).",
345
- request=error.request,
346
- response=error.response,
347
- ) from error
348
- # ...or wait 1s and retry
349
- logger.info(f"Waiting for model to be loaded on the server: {error}")
350
- if "X-wait-for-model" not in request_parameters.headers and request_parameters.url.startswith(
351
- INFERENCE_ENDPOINT
352
- ):
353
- request_parameters.headers["X-wait-for-model"] = "1"
354
- await asyncio.sleep(1)
355
- if timeout is not None:
356
- timeout = max(self.timeout - (time.time() - t0), 1) # type: ignore
357
- continue
358
331
  raise error
359
332
  except Exception:
360
333
  await session.close()
@@ -500,6 +473,7 @@ class AsyncInferenceClient:
500
473
  audio: ContentT,
501
474
  *,
502
475
  model: Optional[str] = None,
476
+ extra_body: Optional[Dict] = None,
503
477
  ) -> AutomaticSpeechRecognitionOutput:
504
478
  """
505
479
  Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
@@ -510,8 +484,9 @@ class AsyncInferenceClient:
510
484
  model (`str`, *optional*):
511
485
  The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
512
486
  Inference Endpoint. If not provided, the default recommended model for ASR will be used.
513
- parameters (Dict[str, Any], *optional*):
514
- Additional parameters to pass to the model.
487
+ extra_body (`Dict`, *optional*):
488
+ Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
489
+ for supported parameters.
515
490
  Returns:
516
491
  [`AutomaticSpeechRecognitionOutput`]: An item containing the transcribed text and optionally the timestamp chunks.
517
492
 
@@ -533,7 +508,7 @@ class AsyncInferenceClient:
533
508
  provider_helper = get_provider_helper(self.provider, task="automatic-speech-recognition")
534
509
  request_parameters = provider_helper.prepare_request(
535
510
  inputs=audio,
536
- parameters={},
511
+ parameters={**(extra_body or {})},
537
512
  headers=self.headers,
538
513
  model=model or self.model,
539
514
  api_key=self.token,
@@ -564,6 +539,7 @@ class AsyncInferenceClient:
564
539
  tools: Optional[List[ChatCompletionInputTool]] = None,
565
540
  top_logprobs: Optional[int] = None,
566
541
  top_p: Optional[float] = None,
542
+ extra_body: Optional[Dict] = None,
567
543
  ) -> ChatCompletionOutput: ...
568
544
 
569
545
  @overload
@@ -589,6 +565,7 @@ class AsyncInferenceClient:
589
565
  tools: Optional[List[ChatCompletionInputTool]] = None,
590
566
  top_logprobs: Optional[int] = None,
591
567
  top_p: Optional[float] = None,
568
+ extra_body: Optional[Dict] = None,
592
569
  ) -> AsyncIterable[ChatCompletionStreamOutput]: ...
593
570
 
594
571
  @overload
@@ -614,6 +591,7 @@ class AsyncInferenceClient:
614
591
  tools: Optional[List[ChatCompletionInputTool]] = None,
615
592
  top_logprobs: Optional[int] = None,
616
593
  top_p: Optional[float] = None,
594
+ extra_body: Optional[Dict] = None,
617
595
  ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: ...
618
596
 
619
597
  async def chat_completion(
@@ -639,6 +617,7 @@ class AsyncInferenceClient:
639
617
  tools: Optional[List[ChatCompletionInputTool]] = None,
640
618
  top_logprobs: Optional[int] = None,
641
619
  top_p: Optional[float] = None,
620
+ extra_body: Optional[Dict] = None,
642
621
  ) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]:
643
622
  """
644
623
  A method for completing conversations using a specified language model.
@@ -653,7 +632,7 @@ class AsyncInferenceClient:
653
632
  </Tip>
654
633
 
655
634
  <Tip>
656
- Some parameters might not be supported by some providers.
635
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
657
636
  </Tip>
658
637
 
659
638
  Args:
@@ -708,7 +687,9 @@ class AsyncInferenceClient:
708
687
  tools (List of [`ChatCompletionInputTool`], *optional*):
709
688
  A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
710
689
  provide a list of functions the model may generate JSON inputs for.
711
-
690
+ extra_body (`Dict`, *optional*):
691
+ Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
692
+ for supported parameters.
712
693
  Returns:
713
694
  [`ChatCompletionOutput`] or Iterable of [`ChatCompletionStreamOutput`]:
714
695
  Generated text returned from the server:
@@ -796,7 +777,7 @@ class AsyncInferenceClient:
796
777
  print(chunk.choices[0].delta.content)
797
778
  ```
798
779
 
799
- Example using a third-party provider directly. Usage will be billed on your Together AI account.
780
+ Example using a third-party provider directly with extra (provider-specific) parameters. Usage will be billed on your Together AI account.
800
781
  ```py
801
782
  >>> from huggingface_hub import InferenceClient
802
783
  >>> client = InferenceClient(
@@ -806,6 +787,7 @@ class AsyncInferenceClient:
806
787
  >>> client.chat_completion(
807
788
  ... model="meta-llama/Meta-Llama-3-8B-Instruct",
808
789
  ... messages=[{"role": "user", "content": "What is the capital of France?"}],
790
+ ... extra_body={"safety_model": "Meta-Llama/Llama-Guard-7b"},
809
791
  ... )
810
792
  ```
811
793
 
@@ -977,9 +959,9 @@ class AsyncInferenceClient:
977
959
  provider_helper = get_provider_helper(self.provider, task="conversational")
978
960
 
979
961
  # Since `chat_completion(..., model=xxx)` is also a payload parameter for the server, we need to handle 'model' differently.
980
- # `self.base_url` and `self.model` takes precedence over 'model' argument for building URL.
962
+ # `self.model` takes precedence over 'model' argument for building URL.
981
963
  # `model` takes precedence for payload value.
982
- model_id_or_url = self.base_url or self.model or model
964
+ model_id_or_url = self.model or model
983
965
  payload_model = model or self.model
984
966
 
985
967
  # Prepare the payload
@@ -1002,6 +984,7 @@ class AsyncInferenceClient:
1002
984
  "top_p": top_p,
1003
985
  "stream": stream,
1004
986
  "stream_options": stream_options,
987
+ **(extra_body or {}),
1005
988
  }
1006
989
  request_parameters = provider_helper.prepare_request(
1007
990
  inputs=messages,
@@ -1664,19 +1647,10 @@ class AsyncInferenceClient:
1664
1647
  response = await self._inner_post(request_parameters)
1665
1648
  return _bytes_to_list(response)
1666
1649
 
1667
- @_deprecate_arguments(
1668
- version="0.29",
1669
- deprecated_args=["parameters"],
1670
- custom_message=(
1671
- "The `parameters` argument is deprecated and will be removed in a future version. "
1672
- "Provide individual parameters instead: `clean_up_tokenization_spaces`, `generate_parameters`, and `truncation`."
1673
- ),
1674
- )
1675
1650
  async def summarization(
1676
1651
  self,
1677
1652
  text: str,
1678
1653
  *,
1679
- parameters: Optional[Dict[str, Any]] = None,
1680
1654
  model: Optional[str] = None,
1681
1655
  clean_up_tokenization_spaces: Optional[bool] = None,
1682
1656
  generate_parameters: Optional[Dict[str, Any]] = None,
@@ -1688,9 +1662,6 @@ class AsyncInferenceClient:
1688
1662
  Args:
1689
1663
  text (`str`):
1690
1664
  The input text to summarize.
1691
- parameters (`Dict[str, Any]`, *optional*):
1692
- Additional parameters for summarization. Check out this [page](https://huggingface.co/docs/api-inference/detailed_parameters#summarization-task)
1693
- for more details.
1694
1665
  model (`str`, *optional*):
1695
1666
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
1696
1667
  Inference Endpoint. If not provided, the default recommended model for summarization will be used.
@@ -1718,12 +1689,11 @@ class AsyncInferenceClient:
1718
1689
  SummarizationOutput(generated_text="The Eiffel tower is one of the most famous landmarks in the world....")
1719
1690
  ```
1720
1691
  """
1721
- if parameters is None:
1722
- parameters = {
1723
- "clean_up_tokenization_spaces": clean_up_tokenization_spaces,
1724
- "generate_parameters": generate_parameters,
1725
- "truncation": truncation,
1726
- }
1692
+ parameters = {
1693
+ "clean_up_tokenization_spaces": clean_up_tokenization_spaces,
1694
+ "generate_parameters": generate_parameters,
1695
+ "truncation": truncation,
1696
+ }
1727
1697
  provider_helper = get_provider_helper(self.provider, task="summarization")
1728
1698
  request_parameters = provider_helper.prepare_request(
1729
1699
  inputs=text,
@@ -2458,15 +2428,14 @@ class AsyncInferenceClient:
2458
2428
  prompt: str,
2459
2429
  *,
2460
2430
  negative_prompt: Optional[str] = None,
2461
- height: Optional[float] = None,
2462
- width: Optional[float] = None,
2431
+ height: Optional[int] = None,
2432
+ width: Optional[int] = None,
2463
2433
  num_inference_steps: Optional[int] = None,
2464
2434
  guidance_scale: Optional[float] = None,
2465
2435
  model: Optional[str] = None,
2466
2436
  scheduler: Optional[str] = None,
2467
- target_size: Optional[TextToImageTargetSize] = None,
2468
2437
  seed: Optional[int] = None,
2469
- **kwargs,
2438
+ extra_body: Optional[Dict[str, Any]] = None,
2470
2439
  ) -> "Image":
2471
2440
  """
2472
2441
  Generate an image based on a given text using a specified model.
@@ -2477,15 +2446,19 @@ class AsyncInferenceClient:
2477
2446
 
2478
2447
  </Tip>
2479
2448
 
2449
+ <Tip>
2450
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
2451
+ </Tip>
2452
+
2480
2453
  Args:
2481
2454
  prompt (`str`):
2482
2455
  The prompt to generate an image from.
2483
2456
  negative_prompt (`str`, *optional*):
2484
2457
  One prompt to guide what NOT to include in image generation.
2485
- height (`float`, *optional*):
2486
- The height in pixels of the image to generate.
2487
- width (`float`, *optional*):
2488
- The width in pixels of the image to generate.
2458
+ height (`int`, *optional*):
2459
+ The height in pixels of the output image
2460
+ width (`int`, *optional*):
2461
+ The width in pixels of the output image
2489
2462
  num_inference_steps (`int`, *optional*):
2490
2463
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
2491
2464
  expense of slower inference.
@@ -2498,10 +2471,11 @@ class AsyncInferenceClient:
2498
2471
  Defaults to None.
2499
2472
  scheduler (`str`, *optional*):
2500
2473
  Override the scheduler with a compatible one.
2501
- target_size (`TextToImageTargetSize`, *optional*):
2502
- The size in pixel of the output image
2503
2474
  seed (`int`, *optional*):
2504
2475
  Seed for the random number generator.
2476
+ extra_body (`Dict[str, Any]`, *optional*):
2477
+ Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2478
+ for supported parameters.
2505
2479
 
2506
2480
  Returns:
2507
2481
  `Image`: The generated image.
@@ -2555,6 +2529,21 @@ class AsyncInferenceClient:
2555
2529
  ... )
2556
2530
  >>> image.save("astronaut.png")
2557
2531
  ```
2532
+
2533
+ Example using Replicate provider with extra parameters
2534
+ ```py
2535
+ >>> from huggingface_hub import InferenceClient
2536
+ >>> client = InferenceClient(
2537
+ ... provider="replicate", # Use replicate provider
2538
+ ... api_key="hf_...", # Pass your HF token
2539
+ ... )
2540
+ >>> image = client.text_to_image(
2541
+ ... "An astronaut riding a horse on the moon.",
2542
+ ... model="black-forest-labs/FLUX.1-schnell",
2543
+ ... extra_body={"output_quality": 100},
2544
+ ... )
2545
+ >>> image.save("astronaut.png")
2546
+ ```
2558
2547
  """
2559
2548
  provider_helper = get_provider_helper(self.provider, task="text-to-image")
2560
2549
  request_parameters = provider_helper.prepare_request(
@@ -2566,9 +2555,8 @@ class AsyncInferenceClient:
2566
2555
  "num_inference_steps": num_inference_steps,
2567
2556
  "guidance_scale": guidance_scale,
2568
2557
  "scheduler": scheduler,
2569
- "target_size": target_size,
2570
2558
  "seed": seed,
2571
- **kwargs,
2559
+ **(extra_body or {}),
2572
2560
  },
2573
2561
  headers=self.headers,
2574
2562
  model=model or self.model,
@@ -2588,10 +2576,15 @@ class AsyncInferenceClient:
2588
2576
  num_frames: Optional[float] = None,
2589
2577
  num_inference_steps: Optional[int] = None,
2590
2578
  seed: Optional[int] = None,
2579
+ extra_body: Optional[Dict[str, Any]] = None,
2591
2580
  ) -> bytes:
2592
2581
  """
2593
2582
  Generate a video based on a given text.
2594
2583
 
2584
+ <Tip>
2585
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
2586
+ </Tip>
2587
+
2595
2588
  Args:
2596
2589
  prompt (`str`):
2597
2590
  The prompt to generate a video from.
@@ -2611,6 +2604,9 @@ class AsyncInferenceClient:
2611
2604
  expense of slower inference.
2612
2605
  seed (`int`, *optional*):
2613
2606
  Seed for the random number generator.
2607
+ extra_body (`Dict[str, Any]`, *optional*):
2608
+ Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2609
+ for supported parameters.
2614
2610
 
2615
2611
  Returns:
2616
2612
  `bytes`: The generated video.
@@ -2656,6 +2652,7 @@ class AsyncInferenceClient:
2656
2652
  "num_frames": num_frames,
2657
2653
  "num_inference_steps": num_inference_steps,
2658
2654
  "seed": seed,
2655
+ **(extra_body or {}),
2659
2656
  },
2660
2657
  headers=self.headers,
2661
2658
  model=model or self.model,
@@ -2686,10 +2683,15 @@ class AsyncInferenceClient:
2686
2683
  top_p: Optional[float] = None,
2687
2684
  typical_p: Optional[float] = None,
2688
2685
  use_cache: Optional[bool] = None,
2686
+ extra_body: Optional[Dict[str, Any]] = None,
2689
2687
  ) -> bytes:
2690
2688
  """
2691
2689
  Synthesize an audio of a voice pronouncing a given text.
2692
2690
 
2691
+ <Tip>
2692
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
2693
+ </Tip>
2694
+
2693
2695
  Args:
2694
2696
  text (`str`):
2695
2697
  The text to synthesize.
@@ -2743,7 +2745,9 @@ class AsyncInferenceClient:
2743
2745
  paper](https://hf.co/papers/2202.00666) for more details.
2744
2746
  use_cache (`bool`, *optional*):
2745
2747
  Whether the model should use the past last key/values attentions to speed up decoding
2746
-
2748
+ extra_body (`Dict[str, Any]`, *optional*):
2749
+ Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2750
+ for supported parameters.
2747
2751
  Returns:
2748
2752
  `bytes`: The generated audio.
2749
2753
 
@@ -2791,6 +2795,51 @@ class AsyncInferenceClient:
2791
2795
  ... )
2792
2796
  >>> Path("hello_world.flac").write_bytes(audio)
2793
2797
  ```
2798
+ Example using Replicate provider with extra parameters
2799
+ ```py
2800
+ >>> from huggingface_hub import InferenceClient
2801
+ >>> client = InferenceClient(
2802
+ ... provider="replicate", # Use replicate provider
2803
+ ... api_key="hf_...", # Pass your HF token
2804
+ ... )
2805
+ >>> audio = client.text_to_speech(
2806
+ ... "Hello, my name is Kororo, an awesome text-to-speech model.",
2807
+ ... model="hexgrad/Kokoro-82M",
2808
+ ... extra_body={"voice": "af_nicole"},
2809
+ ... )
2810
+ >>> Path("hello.flac").write_bytes(audio)
2811
+ ```
2812
+
2813
+ Example music-gen using "YuE-s1-7B-anneal-en-cot" on fal.ai
2814
+ ```py
2815
+ >>> from huggingface_hub import InferenceClient
2816
+ >>> lyrics = '''
2817
+ ... [verse]
2818
+ ... In the town where I was born
2819
+ ... Lived a man who sailed to sea
2820
+ ... And he told us of his life
2821
+ ... In the land of submarines
2822
+ ... So we sailed on to the sun
2823
+ ... 'Til we found a sea of green
2824
+ ... And we lived beneath the waves
2825
+ ... In our yellow submarine
2826
+
2827
+ ... [chorus]
2828
+ ... We all live in a yellow submarine
2829
+ ... Yellow submarine, yellow submarine
2830
+ ... We all live in a yellow submarine
2831
+ ... Yellow submarine, yellow submarine
2832
+ ... '''
2833
+ >>> genres = "pavarotti-style tenor voice"
2834
+ >>> client = InferenceClient(
2835
+ ... provider="fal-ai",
2836
+ ... model="m-a-p/YuE-s1-7B-anneal-en-cot",
2837
+ ... api_key=...,
2838
+ ... )
2839
+ >>> audio = client.text_to_speech(lyrics, extra_body={"genres": genres})
2840
+ >>> with open("output.mp3", "wb") as f:
2841
+ ... f.write(audio)
2842
+ ```
2794
2843
  """
2795
2844
  provider_helper = get_provider_helper(self.provider, task="text-to-speech")
2796
2845
  request_parameters = provider_helper.prepare_request(
@@ -2812,6 +2861,7 @@ class AsyncInferenceClient:
2812
2861
  "top_p": top_p,
2813
2862
  "typical_p": typical_p,
2814
2863
  "use_cache": use_cache,
2864
+ **(extra_body or {}),
2815
2865
  },
2816
2866
  headers=self.headers,
2817
2867
  model=model or self.model,
@@ -3251,11 +3301,18 @@ class AsyncInferenceClient:
3251
3301
  response = await self._inner_post(request_parameters)
3252
3302
  return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
3253
3303
 
3304
+ @_deprecate_method(
3305
+ version="0.33.0",
3306
+ message=(
3307
+ "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3308
+ " Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
3309
+ ),
3310
+ )
3254
3311
  async def list_deployed_models(
3255
3312
  self, frameworks: Union[None, str, Literal["all"], List[str]] = None
3256
3313
  ) -> Dict[str, List[str]]:
3257
3314
  """
3258
- List models deployed on the Serverless Inference API service.
3315
+ List models deployed on the HF Serverless Inference API service.
3259
3316
 
3260
3317
  This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
3261
3318
  are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
@@ -3265,7 +3322,7 @@ class AsyncInferenceClient:
3265
3322
 
3266
3323
  <Tip warning={true}>
3267
3324
 
3268
- This endpoint method does not return a live list of all models available for the Serverless Inference API service.
3325
+ This endpoint method does not return a live list of all models available for the HF Inference API service.
3269
3326
  It searches over a cached list of models that were recently available and the list may not be up to date.
3270
3327
  If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
3271
3328
 
@@ -3308,9 +3365,9 @@ class AsyncInferenceClient:
3308
3365
 
3309
3366
  # Resolve which frameworks to check
3310
3367
  if frameworks is None:
3311
- frameworks = MAIN_INFERENCE_API_FRAMEWORKS
3368
+ frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
3312
3369
  elif frameworks == "all":
3313
- frameworks = ALL_INFERENCE_API_FRAMEWORKS
3370
+ frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
3314
3371
  elif isinstance(frameworks, str):
3315
3372
  frameworks = [frameworks]
3316
3373
  frameworks = list(set(frameworks))
@@ -3330,7 +3387,7 @@ class AsyncInferenceClient:
3330
3387
 
3331
3388
  for framework in frameworks:
3332
3389
  response = get_session().get(
3333
- f"{INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
3390
+ f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
3334
3391
  )
3335
3392
  hf_raise_for_status(response)
3336
3393
  _unpack_response(framework, response.json())
@@ -3434,7 +3491,7 @@ class AsyncInferenceClient:
3434
3491
  if model.startswith(("http://", "https://")):
3435
3492
  url = model.rstrip("/") + "/info"
3436
3493
  else:
3437
- url = f"{INFERENCE_ENDPOINT}/models/{model}/info"
3494
+ url = f"{constants.INFERENCE_ENDPOINT}/models/{model}/info"
3438
3495
 
3439
3496
  async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
3440
3497
  response = await client.get(url, proxy=self.proxies)
@@ -3480,9 +3537,16 @@ class AsyncInferenceClient:
3480
3537
  response = await client.get(url, proxy=self.proxies)
3481
3538
  return response.status == 200
3482
3539
 
3540
+ @_deprecate_method(
3541
+ version="0.33.0",
3542
+ message=(
3543
+ "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3544
+ " Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
3545
+ ),
3546
+ )
3483
3547
  async def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
3484
3548
  """
3485
- Get the status of a model hosted on the Inference API.
3549
+ Get the status of a model hosted on the HF Inference API.
3486
3550
 
3487
3551
  <Tip>
3488
3552
 
@@ -3494,7 +3558,7 @@ class AsyncInferenceClient:
3494
3558
  Args:
3495
3559
  model (`str`, *optional*):
3496
3560
  Identifier of the model for witch the status gonna be checked. If model is not provided,
3497
- the model associated with this instance of [`InferenceClient`] will be used. Only InferenceAPI service can be checked so the
3561
+ the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
3498
3562
  identifier cannot be a URL.
3499
3563
 
3500
3564
 
@@ -3519,7 +3583,7 @@ class AsyncInferenceClient:
3519
3583
  raise ValueError("Model id not provided.")
3520
3584
  if model.startswith("https://"):
3521
3585
  raise NotImplementedError("Model status is only available for Inference API endpoints.")
3522
- url = f"{INFERENCE_ENDPOINT}/status/{model}"
3586
+ url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
3523
3587
 
3524
3588
  async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
3525
3589
  response = await client.get(url, proxy=self.proxies)
@@ -141,7 +141,7 @@ from .text_to_audio import (
141
141
  TextToAudioOutput,
142
142
  TextToAudioParameters,
143
143
  )
144
- from .text_to_image import TextToImageInput, TextToImageOutput, TextToImageParameters, TextToImageTargetSize
144
+ from .text_to_image import TextToImageInput, TextToImageOutput, TextToImageParameters
145
145
  from .text_to_speech import (
146
146
  TextToSpeechEarlyStoppingEnum,
147
147
  TextToSpeechGenerationParameters,
@@ -3,16 +3,15 @@
3
3
  # See:
4
4
  # - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
5
5
  # - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
6
- from dataclasses import dataclass
7
6
  from typing import Literal, Optional
8
7
 
9
- from .base import BaseInferenceType
8
+ from .base import BaseInferenceType, dataclass_with_extra
10
9
 
11
10
 
12
11
  AudioClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
13
12
 
14
13
 
15
- @dataclass
14
+ @dataclass_with_extra
16
15
  class AudioClassificationParameters(BaseInferenceType):
17
16
  """Additional inference parameters for Audio Classification"""
18
17
 
@@ -22,7 +21,7 @@ class AudioClassificationParameters(BaseInferenceType):
22
21
  """When specified, limits the output to the top K most probable classes."""
23
22
 
24
23
 
25
- @dataclass
24
+ @dataclass_with_extra
26
25
  class AudioClassificationInput(BaseInferenceType):
27
26
  """Inputs for Audio Classification inference"""
28
27
 
@@ -34,7 +33,7 @@ class AudioClassificationInput(BaseInferenceType):
34
33
  """Additional inference parameters for Audio Classification"""
35
34
 
36
35
 
37
- @dataclass
36
+ @dataclass_with_extra
38
37
  class AudioClassificationOutputElement(BaseInferenceType):
39
38
  """Outputs for Audio Classification inference"""
40
39
 
@@ -3,13 +3,12 @@
3
3
  # See:
4
4
  # - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
5
5
  # - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
6
- from dataclasses import dataclass
7
6
  from typing import Any
8
7
 
9
- from .base import BaseInferenceType
8
+ from .base import BaseInferenceType, dataclass_with_extra
10
9
 
11
10
 
12
- @dataclass
11
+ @dataclass_with_extra
13
12
  class AudioToAudioInput(BaseInferenceType):
14
13
  """Inputs for Audio to Audio inference"""
15
14
 
@@ -17,7 +16,7 @@ class AudioToAudioInput(BaseInferenceType):
17
16
  """The input audio data"""
18
17
 
19
18
 
20
- @dataclass
19
+ @dataclass_with_extra
21
20
  class AudioToAudioOutputElement(BaseInferenceType):
22
21
  """Outputs of inference for the Audio To Audio task
23
22
  A generated audio file with its label.