huggingface-hub 0.28.1__py3-none-any.whl → 0.29.0rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of huggingface-hub might be problematic. Click here for more details.

Files changed (63) hide show
  1. huggingface_hub/__init__.py +1 -4
  2. huggingface_hub/constants.py +16 -10
  3. huggingface_hub/file_download.py +10 -6
  4. huggingface_hub/hf_api.py +53 -23
  5. huggingface_hub/inference/_client.py +151 -84
  6. huggingface_hub/inference/_common.py +3 -27
  7. huggingface_hub/inference/_generated/_async_client.py +147 -83
  8. huggingface_hub/inference/_generated/types/__init__.py +1 -1
  9. huggingface_hub/inference/_generated/types/audio_classification.py +4 -5
  10. huggingface_hub/inference/_generated/types/audio_to_audio.py +3 -4
  11. huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +7 -8
  12. huggingface_hub/inference/_generated/types/base.py +21 -0
  13. huggingface_hub/inference/_generated/types/chat_completion.py +29 -30
  14. huggingface_hub/inference/_generated/types/depth_estimation.py +3 -4
  15. huggingface_hub/inference/_generated/types/document_question_answering.py +5 -6
  16. huggingface_hub/inference/_generated/types/feature_extraction.py +5 -6
  17. huggingface_hub/inference/_generated/types/fill_mask.py +4 -5
  18. huggingface_hub/inference/_generated/types/image_classification.py +4 -5
  19. huggingface_hub/inference/_generated/types/image_segmentation.py +4 -5
  20. huggingface_hub/inference/_generated/types/image_to_image.py +5 -6
  21. huggingface_hub/inference/_generated/types/image_to_text.py +5 -6
  22. huggingface_hub/inference/_generated/types/object_detection.py +5 -6
  23. huggingface_hub/inference/_generated/types/question_answering.py +5 -6
  24. huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -4
  25. huggingface_hub/inference/_generated/types/summarization.py +4 -5
  26. huggingface_hub/inference/_generated/types/table_question_answering.py +5 -6
  27. huggingface_hub/inference/_generated/types/text2text_generation.py +4 -5
  28. huggingface_hub/inference/_generated/types/text_classification.py +4 -5
  29. huggingface_hub/inference/_generated/types/text_generation.py +12 -13
  30. huggingface_hub/inference/_generated/types/text_to_audio.py +5 -6
  31. huggingface_hub/inference/_generated/types/text_to_image.py +8 -15
  32. huggingface_hub/inference/_generated/types/text_to_speech.py +5 -6
  33. huggingface_hub/inference/_generated/types/text_to_video.py +4 -5
  34. huggingface_hub/inference/_generated/types/token_classification.py +4 -5
  35. huggingface_hub/inference/_generated/types/translation.py +4 -5
  36. huggingface_hub/inference/_generated/types/video_classification.py +4 -5
  37. huggingface_hub/inference/_generated/types/visual_question_answering.py +5 -6
  38. huggingface_hub/inference/_generated/types/zero_shot_classification.py +4 -5
  39. huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +4 -5
  40. huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +5 -6
  41. huggingface_hub/inference/_providers/__init__.py +44 -8
  42. huggingface_hub/inference/_providers/_common.py +239 -0
  43. huggingface_hub/inference/_providers/black_forest_labs.py +66 -0
  44. huggingface_hub/inference/_providers/fal_ai.py +31 -100
  45. huggingface_hub/inference/_providers/fireworks_ai.py +6 -0
  46. huggingface_hub/inference/_providers/hf_inference.py +58 -142
  47. huggingface_hub/inference/_providers/hyperbolic.py +43 -0
  48. huggingface_hub/inference/_providers/nebius.py +41 -0
  49. huggingface_hub/inference/_providers/novita.py +26 -0
  50. huggingface_hub/inference/_providers/replicate.py +24 -119
  51. huggingface_hub/inference/_providers/sambanova.py +3 -86
  52. huggingface_hub/inference/_providers/together.py +36 -130
  53. huggingface_hub/utils/_headers.py +5 -0
  54. huggingface_hub/utils/_hf_folder.py +4 -32
  55. huggingface_hub/utils/_http.py +85 -2
  56. huggingface_hub/utils/_typing.py +1 -1
  57. huggingface_hub/utils/logging.py +6 -0
  58. {huggingface_hub-0.28.1.dist-info → huggingface_hub-0.29.0rc0.dist-info}/METADATA +1 -1
  59. {huggingface_hub-0.28.1.dist-info → huggingface_hub-0.29.0rc0.dist-info}/RECORD +63 -57
  60. {huggingface_hub-0.28.1.dist-info → huggingface_hub-0.29.0rc0.dist-info}/LICENSE +0 -0
  61. {huggingface_hub-0.28.1.dist-info → huggingface_hub-0.29.0rc0.dist-info}/WHEEL +0 -0
  62. {huggingface_hub-0.28.1.dist-info → huggingface_hub-0.29.0rc0.dist-info}/entry_points.txt +0 -0
  63. {huggingface_hub-0.28.1.dist-info → huggingface_hub-0.29.0rc0.dist-info}/top_level.txt +0 -0
@@ -35,13 +35,12 @@
35
35
  import base64
36
36
  import logging
37
37
  import re
38
- import time
39
38
  import warnings
40
39
  from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Literal, Optional, Union, overload
41
40
 
42
41
  from requests import HTTPError
43
42
 
44
- from huggingface_hub.constants import ALL_INFERENCE_API_FRAMEWORKS, INFERENCE_ENDPOINT, MAIN_INFERENCE_API_FRAMEWORKS
43
+ from huggingface_hub import constants
45
44
  from huggingface_hub.errors import BadRequestError, InferenceTimeoutError
46
45
  from huggingface_hub.inference._common import (
47
46
  TASKS_EXPECTING_IMAGES,
@@ -92,7 +91,6 @@ from huggingface_hub.inference._generated.types import (
92
91
  TextGenerationInputGrammarType,
93
92
  TextGenerationOutput,
94
93
  TextGenerationStreamOutput,
95
- TextToImageTargetSize,
96
94
  TextToSpeechEarlyStoppingEnum,
97
95
  TokenClassificationAggregationStrategy,
98
96
  TokenClassificationOutputElement,
@@ -134,9 +132,9 @@ class InferenceClient:
134
132
  path will be appended to the base URL (see the [TGI Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api)
135
133
  documentation for details). When passing a URL as `model`, the client will not append any suffix path to it.
136
134
  provider (`str`, *optional*):
137
- Name of the provider to use for inference. Can be `"replicate"`, `"together"`, `"fal-ai"`, `"sambanova"` or `"hf-inference"`.
138
- defaults to hf-inference (Hugging Face Serverless Inference API).
139
- If model is a URL or `base_url` is passed, then `provider` is not used.
135
+ Name of the provider to use for inference. Can be `"black-forest-labs"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"replicate"`, "sambanova"` or `"together"`.
136
+ defaults to hf-inference (Hugging Face Serverless Inference API).
137
+ If model is a URL or `base_url` is passed, then `provider` is not used.
140
138
  token (`str` or `bool`, *optional*):
141
139
  Hugging Face token. Will default to the locally saved token if not provided.
142
140
  Pass `token=False` if you don't want to send your token to the server.
@@ -188,7 +186,7 @@ class InferenceClient:
188
186
  " It has the exact same behavior as `token`."
189
187
  )
190
188
 
191
- self.model: Optional[str] = model
189
+ self.model: Optional[str] = base_url or model
192
190
  self.token: Optional[str] = token if token is not None else api_key
193
191
  self.headers = headers if headers is not None else {}
194
192
 
@@ -199,9 +197,6 @@ class InferenceClient:
199
197
  self.timeout = timeout
200
198
  self.proxies = proxies
201
199
 
202
- # OpenAI compatibility
203
- self.base_url = base_url
204
-
205
200
  def __repr__(self):
206
201
  return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
207
202
 
@@ -267,8 +262,9 @@ class InferenceClient:
267
262
  "`InferenceClient.post` is deprecated and should not be used directly anymore."
268
263
  )
269
264
  provider_helper = HFInferenceTask(task or "unknown")
270
- url = provider_helper.build_url(provider_helper.map_model(model))
271
- headers = provider_helper.prepare_headers(headers=self.headers, api_key=self.token)
265
+ mapped_model = provider_helper._prepare_mapped_model(model or self.model)
266
+ url = provider_helper._prepare_url(self.token, mapped_model) # type: ignore[arg-type]
267
+ headers = provider_helper._prepare_headers(self.headers, self.token) # type: ignore[arg-type]
272
268
  return self._inner_post(
273
269
  request_parameters=RequestParameters(
274
270
  url=url,
@@ -304,8 +300,6 @@ class InferenceClient:
304
300
  if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
305
301
  request_parameters.headers["Accept"] = "image/png"
306
302
 
307
- t0 = time.time()
308
- timeout = self.timeout
309
303
  while True:
310
304
  with _open_as_binary(request_parameters.data) as data_as_binary:
311
305
  try:
@@ -328,28 +322,10 @@ class InferenceClient:
328
322
  return response.iter_lines() if stream else response.content
329
323
  except HTTPError as error:
330
324
  if error.response.status_code == 422 and request_parameters.task != "unknown":
331
- error.args = (
332
- f"{error.args[0]}\nMake sure '{request_parameters.task}' task is supported by the model.",
333
- ) + error.args[1:]
334
- if error.response.status_code == 503:
335
- # If Model is unavailable, either raise a TimeoutError...
336
- if timeout is not None and time.time() - t0 > timeout:
337
- raise InferenceTimeoutError(
338
- f"Model not loaded on the server: {request_parameters.url}. Please retry with a higher timeout (current:"
339
- f" {self.timeout}).",
340
- request=error.request,
341
- response=error.response,
342
- ) from error
343
- # ...or wait 1s and retry
344
- logger.info(f"Waiting for model to be loaded on the server: {error}")
345
- time.sleep(1)
346
- if "X-wait-for-model" not in request_parameters.headers and request_parameters.url.startswith(
347
- INFERENCE_ENDPOINT
348
- ):
349
- request_parameters.headers["X-wait-for-model"] = "1"
350
- if timeout is not None:
351
- timeout = max(self.timeout - (time.time() - t0), 1) # type: ignore
352
- continue
325
+ msg = str(error.args[0])
326
+ if len(error.response.text) > 0:
327
+ msg += f"\n{error.response.text}\n"
328
+ error.args = (msg,) + error.args[1:]
353
329
  raise
354
330
 
355
331
  def audio_classification(
@@ -464,6 +440,7 @@ class InferenceClient:
464
440
  audio: ContentT,
465
441
  *,
466
442
  model: Optional[str] = None,
443
+ extra_body: Optional[Dict] = None,
467
444
  ) -> AutomaticSpeechRecognitionOutput:
468
445
  """
469
446
  Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
@@ -474,8 +451,9 @@ class InferenceClient:
474
451
  model (`str`, *optional*):
475
452
  The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
476
453
  Inference Endpoint. If not provided, the default recommended model for ASR will be used.
477
- parameters (Dict[str, Any], *optional*):
478
- Additional parameters to pass to the model.
454
+ extra_body (`Dict`, *optional*):
455
+ Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
456
+ for supported parameters.
479
457
  Returns:
480
458
  [`AutomaticSpeechRecognitionOutput`]: An item containing the transcribed text and optionally the timestamp chunks.
481
459
 
@@ -496,7 +474,7 @@ class InferenceClient:
496
474
  provider_helper = get_provider_helper(self.provider, task="automatic-speech-recognition")
497
475
  request_parameters = provider_helper.prepare_request(
498
476
  inputs=audio,
499
- parameters={},
477
+ parameters={**(extra_body or {})},
500
478
  headers=self.headers,
501
479
  model=model or self.model,
502
480
  api_key=self.token,
@@ -527,6 +505,7 @@ class InferenceClient:
527
505
  tools: Optional[List[ChatCompletionInputTool]] = None,
528
506
  top_logprobs: Optional[int] = None,
529
507
  top_p: Optional[float] = None,
508
+ extra_body: Optional[Dict] = None,
530
509
  ) -> ChatCompletionOutput: ...
531
510
 
532
511
  @overload
@@ -552,6 +531,7 @@ class InferenceClient:
552
531
  tools: Optional[List[ChatCompletionInputTool]] = None,
553
532
  top_logprobs: Optional[int] = None,
554
533
  top_p: Optional[float] = None,
534
+ extra_body: Optional[Dict] = None,
555
535
  ) -> Iterable[ChatCompletionStreamOutput]: ...
556
536
 
557
537
  @overload
@@ -577,6 +557,7 @@ class InferenceClient:
577
557
  tools: Optional[List[ChatCompletionInputTool]] = None,
578
558
  top_logprobs: Optional[int] = None,
579
559
  top_p: Optional[float] = None,
560
+ extra_body: Optional[Dict] = None,
580
561
  ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: ...
581
562
 
582
563
  def chat_completion(
@@ -602,6 +583,7 @@ class InferenceClient:
602
583
  tools: Optional[List[ChatCompletionInputTool]] = None,
603
584
  top_logprobs: Optional[int] = None,
604
585
  top_p: Optional[float] = None,
586
+ extra_body: Optional[Dict] = None,
605
587
  ) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]:
606
588
  """
607
589
  A method for completing conversations using a specified language model.
@@ -616,7 +598,7 @@ class InferenceClient:
616
598
  </Tip>
617
599
 
618
600
  <Tip>
619
- Some parameters might not be supported by some providers.
601
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
620
602
  </Tip>
621
603
 
622
604
  Args:
@@ -671,7 +653,9 @@ class InferenceClient:
671
653
  tools (List of [`ChatCompletionInputTool`], *optional*):
672
654
  A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
673
655
  provide a list of functions the model may generate JSON inputs for.
674
-
656
+ extra_body (`Dict`, *optional*):
657
+ Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
658
+ for supported parameters.
675
659
  Returns:
676
660
  [`ChatCompletionOutput`] or Iterable of [`ChatCompletionStreamOutput`]:
677
661
  Generated text returned from the server:
@@ -756,7 +740,7 @@ class InferenceClient:
756
740
  print(chunk.choices[0].delta.content)
757
741
  ```
758
742
 
759
- Example using a third-party provider directly. Usage will be billed on your Together AI account.
743
+ Example using a third-party provider directly with extra (provider-specific) parameters. Usage will be billed on your Together AI account.
760
744
  ```py
761
745
  >>> from huggingface_hub import InferenceClient
762
746
  >>> client = InferenceClient(
@@ -766,6 +750,7 @@ class InferenceClient:
766
750
  >>> client.chat_completion(
767
751
  ... model="meta-llama/Meta-Llama-3-8B-Instruct",
768
752
  ... messages=[{"role": "user", "content": "What is the capital of France?"}],
753
+ ... extra_body={"safety_model": "Meta-Llama/Llama-Guard-7b"},
769
754
  ... )
770
755
  ```
771
756
 
@@ -934,9 +919,9 @@ class InferenceClient:
934
919
  provider_helper = get_provider_helper(self.provider, task="conversational")
935
920
 
936
921
  # Since `chat_completion(..., model=xxx)` is also a payload parameter for the server, we need to handle 'model' differently.
937
- # `self.base_url` and `self.model` takes precedence over 'model' argument for building URL.
922
+ # `self.model` takes precedence over 'model' argument for building URL.
938
923
  # `model` takes precedence for payload value.
939
- model_id_or_url = self.base_url or self.model or model
924
+ model_id_or_url = self.model or model
940
925
  payload_model = model or self.model
941
926
 
942
927
  # Prepare the payload
@@ -959,6 +944,7 @@ class InferenceClient:
959
944
  "top_p": top_p,
960
945
  "stream": stream,
961
946
  "stream_options": stream_options,
947
+ **(extra_body or {}),
962
948
  }
963
949
  request_parameters = provider_helper.prepare_request(
964
950
  inputs=messages,
@@ -1611,19 +1597,10 @@ class InferenceClient:
1611
1597
  response = self._inner_post(request_parameters)
1612
1598
  return _bytes_to_list(response)
1613
1599
 
1614
- @_deprecate_arguments(
1615
- version="0.29",
1616
- deprecated_args=["parameters"],
1617
- custom_message=(
1618
- "The `parameters` argument is deprecated and will be removed in a future version. "
1619
- "Provide individual parameters instead: `clean_up_tokenization_spaces`, `generate_parameters`, and `truncation`."
1620
- ),
1621
- )
1622
1600
  def summarization(
1623
1601
  self,
1624
1602
  text: str,
1625
1603
  *,
1626
- parameters: Optional[Dict[str, Any]] = None,
1627
1604
  model: Optional[str] = None,
1628
1605
  clean_up_tokenization_spaces: Optional[bool] = None,
1629
1606
  generate_parameters: Optional[Dict[str, Any]] = None,
@@ -1635,9 +1612,6 @@ class InferenceClient:
1635
1612
  Args:
1636
1613
  text (`str`):
1637
1614
  The input text to summarize.
1638
- parameters (`Dict[str, Any]`, *optional*):
1639
- Additional parameters for summarization. Check out this [page](https://huggingface.co/docs/api-inference/detailed_parameters#summarization-task)
1640
- for more details.
1641
1615
  model (`str`, *optional*):
1642
1616
  The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
1643
1617
  Inference Endpoint. If not provided, the default recommended model for summarization will be used.
@@ -1664,12 +1638,11 @@ class InferenceClient:
1664
1638
  SummarizationOutput(generated_text="The Eiffel tower is one of the most famous landmarks in the world....")
1665
1639
  ```
1666
1640
  """
1667
- if parameters is None:
1668
- parameters = {
1669
- "clean_up_tokenization_spaces": clean_up_tokenization_spaces,
1670
- "generate_parameters": generate_parameters,
1671
- "truncation": truncation,
1672
- }
1641
+ parameters = {
1642
+ "clean_up_tokenization_spaces": clean_up_tokenization_spaces,
1643
+ "generate_parameters": generate_parameters,
1644
+ "truncation": truncation,
1645
+ }
1673
1646
  provider_helper = get_provider_helper(self.provider, task="summarization")
1674
1647
  request_parameters = provider_helper.prepare_request(
1675
1648
  inputs=text,
@@ -2399,15 +2372,14 @@ class InferenceClient:
2399
2372
  prompt: str,
2400
2373
  *,
2401
2374
  negative_prompt: Optional[str] = None,
2402
- height: Optional[float] = None,
2403
- width: Optional[float] = None,
2375
+ height: Optional[int] = None,
2376
+ width: Optional[int] = None,
2404
2377
  num_inference_steps: Optional[int] = None,
2405
2378
  guidance_scale: Optional[float] = None,
2406
2379
  model: Optional[str] = None,
2407
2380
  scheduler: Optional[str] = None,
2408
- target_size: Optional[TextToImageTargetSize] = None,
2409
2381
  seed: Optional[int] = None,
2410
- **kwargs,
2382
+ extra_body: Optional[Dict[str, Any]] = None,
2411
2383
  ) -> "Image":
2412
2384
  """
2413
2385
  Generate an image based on a given text using a specified model.
@@ -2418,15 +2390,19 @@ class InferenceClient:
2418
2390
 
2419
2391
  </Tip>
2420
2392
 
2393
+ <Tip>
2394
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
2395
+ </Tip>
2396
+
2421
2397
  Args:
2422
2398
  prompt (`str`):
2423
2399
  The prompt to generate an image from.
2424
2400
  negative_prompt (`str`, *optional*):
2425
2401
  One prompt to guide what NOT to include in image generation.
2426
- height (`float`, *optional*):
2427
- The height in pixels of the image to generate.
2428
- width (`float`, *optional*):
2429
- The width in pixels of the image to generate.
2402
+ height (`int`, *optional*):
2403
+ The height in pixels of the output image
2404
+ width (`int`, *optional*):
2405
+ The width in pixels of the output image
2430
2406
  num_inference_steps (`int`, *optional*):
2431
2407
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
2432
2408
  expense of slower inference.
@@ -2439,10 +2415,11 @@ class InferenceClient:
2439
2415
  Defaults to None.
2440
2416
  scheduler (`str`, *optional*):
2441
2417
  Override the scheduler with a compatible one.
2442
- target_size (`TextToImageTargetSize`, *optional*):
2443
- The size in pixel of the output image
2444
2418
  seed (`int`, *optional*):
2445
2419
  Seed for the random number generator.
2420
+ extra_body (`Dict[str, Any]`, *optional*):
2421
+ Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2422
+ for supported parameters.
2446
2423
 
2447
2424
  Returns:
2448
2425
  `Image`: The generated image.
@@ -2495,6 +2472,21 @@ class InferenceClient:
2495
2472
  ... )
2496
2473
  >>> image.save("astronaut.png")
2497
2474
  ```
2475
+
2476
+ Example using Replicate provider with extra parameters
2477
+ ```py
2478
+ >>> from huggingface_hub import InferenceClient
2479
+ >>> client = InferenceClient(
2480
+ ... provider="replicate", # Use replicate provider
2481
+ ... api_key="hf_...", # Pass your HF token
2482
+ ... )
2483
+ >>> image = client.text_to_image(
2484
+ ... "An astronaut riding a horse on the moon.",
2485
+ ... model="black-forest-labs/FLUX.1-schnell",
2486
+ ... extra_body={"output_quality": 100},
2487
+ ... )
2488
+ >>> image.save("astronaut.png")
2489
+ ```
2498
2490
  """
2499
2491
  provider_helper = get_provider_helper(self.provider, task="text-to-image")
2500
2492
  request_parameters = provider_helper.prepare_request(
@@ -2506,9 +2498,8 @@ class InferenceClient:
2506
2498
  "num_inference_steps": num_inference_steps,
2507
2499
  "guidance_scale": guidance_scale,
2508
2500
  "scheduler": scheduler,
2509
- "target_size": target_size,
2510
2501
  "seed": seed,
2511
- **kwargs,
2502
+ **(extra_body or {}),
2512
2503
  },
2513
2504
  headers=self.headers,
2514
2505
  model=model or self.model,
@@ -2528,10 +2519,15 @@ class InferenceClient:
2528
2519
  num_frames: Optional[float] = None,
2529
2520
  num_inference_steps: Optional[int] = None,
2530
2521
  seed: Optional[int] = None,
2522
+ extra_body: Optional[Dict[str, Any]] = None,
2531
2523
  ) -> bytes:
2532
2524
  """
2533
2525
  Generate a video based on a given text.
2534
2526
 
2527
+ <Tip>
2528
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
2529
+ </Tip>
2530
+
2535
2531
  Args:
2536
2532
  prompt (`str`):
2537
2533
  The prompt to generate a video from.
@@ -2551,6 +2547,9 @@ class InferenceClient:
2551
2547
  expense of slower inference.
2552
2548
  seed (`int`, *optional*):
2553
2549
  Seed for the random number generator.
2550
+ extra_body (`Dict[str, Any]`, *optional*):
2551
+ Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2552
+ for supported parameters.
2554
2553
 
2555
2554
  Returns:
2556
2555
  `bytes`: The generated video.
@@ -2596,6 +2595,7 @@ class InferenceClient:
2596
2595
  "num_frames": num_frames,
2597
2596
  "num_inference_steps": num_inference_steps,
2598
2597
  "seed": seed,
2598
+ **(extra_body or {}),
2599
2599
  },
2600
2600
  headers=self.headers,
2601
2601
  model=model or self.model,
@@ -2626,10 +2626,15 @@ class InferenceClient:
2626
2626
  top_p: Optional[float] = None,
2627
2627
  typical_p: Optional[float] = None,
2628
2628
  use_cache: Optional[bool] = None,
2629
+ extra_body: Optional[Dict[str, Any]] = None,
2629
2630
  ) -> bytes:
2630
2631
  """
2631
2632
  Synthesize an audio of a voice pronouncing a given text.
2632
2633
 
2634
+ <Tip>
2635
+ You can pass provider-specific parameters to the model by using the `extra_body` argument.
2636
+ </Tip>
2637
+
2633
2638
  Args:
2634
2639
  text (`str`):
2635
2640
  The text to synthesize.
@@ -2683,7 +2688,9 @@ class InferenceClient:
2683
2688
  paper](https://hf.co/papers/2202.00666) for more details.
2684
2689
  use_cache (`bool`, *optional*):
2685
2690
  Whether the model should use the past last key/values attentions to speed up decoding
2686
-
2691
+ extra_body (`Dict[str, Any]`, *optional*):
2692
+ Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
2693
+ for supported parameters.
2687
2694
  Returns:
2688
2695
  `bytes`: The generated audio.
2689
2696
 
@@ -2730,6 +2737,51 @@ class InferenceClient:
2730
2737
  ... )
2731
2738
  >>> Path("hello_world.flac").write_bytes(audio)
2732
2739
  ```
2740
+ Example using Replicate provider with extra parameters
2741
+ ```py
2742
+ >>> from huggingface_hub import InferenceClient
2743
+ >>> client = InferenceClient(
2744
+ ... provider="replicate", # Use replicate provider
2745
+ ... api_key="hf_...", # Pass your HF token
2746
+ ... )
2747
+ >>> audio = client.text_to_speech(
2748
+ ... "Hello, my name is Kororo, an awesome text-to-speech model.",
2749
+ ... model="hexgrad/Kokoro-82M",
2750
+ ... extra_body={"voice": "af_nicole"},
2751
+ ... )
2752
+ >>> Path("hello.flac").write_bytes(audio)
2753
+ ```
2754
+
2755
+ Example music-gen using "YuE-s1-7B-anneal-en-cot" on fal.ai
2756
+ ```py
2757
+ >>> from huggingface_hub import InferenceClient
2758
+ >>> lyrics = '''
2759
+ ... [verse]
2760
+ ... In the town where I was born
2761
+ ... Lived a man who sailed to sea
2762
+ ... And he told us of his life
2763
+ ... In the land of submarines
2764
+ ... So we sailed on to the sun
2765
+ ... 'Til we found a sea of green
2766
+ ... And we lived beneath the waves
2767
+ ... In our yellow submarine
2768
+
2769
+ ... [chorus]
2770
+ ... We all live in a yellow submarine
2771
+ ... Yellow submarine, yellow submarine
2772
+ ... We all live in a yellow submarine
2773
+ ... Yellow submarine, yellow submarine
2774
+ ... '''
2775
+ >>> genres = "pavarotti-style tenor voice"
2776
+ >>> client = InferenceClient(
2777
+ ... provider="fal-ai",
2778
+ ... model="m-a-p/YuE-s1-7B-anneal-en-cot",
2779
+ ... api_key=...,
2780
+ ... )
2781
+ >>> audio = client.text_to_speech(lyrics, extra_body={"genres": genres})
2782
+ >>> with open("output.mp3", "wb") as f:
2783
+ ... f.write(audio)
2784
+ ```
2733
2785
  """
2734
2786
  provider_helper = get_provider_helper(self.provider, task="text-to-speech")
2735
2787
  request_parameters = provider_helper.prepare_request(
@@ -2751,6 +2803,7 @@ class InferenceClient:
2751
2803
  "top_p": top_p,
2752
2804
  "typical_p": typical_p,
2753
2805
  "use_cache": use_cache,
2806
+ **(extra_body or {}),
2754
2807
  },
2755
2808
  headers=self.headers,
2756
2809
  model=model or self.model,
@@ -3184,11 +3237,18 @@ class InferenceClient:
3184
3237
  response = self._inner_post(request_parameters)
3185
3238
  return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
3186
3239
 
3240
+ @_deprecate_method(
3241
+ version="0.33.0",
3242
+ message=(
3243
+ "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3244
+ " Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
3245
+ ),
3246
+ )
3187
3247
  def list_deployed_models(
3188
3248
  self, frameworks: Union[None, str, Literal["all"], List[str]] = None
3189
3249
  ) -> Dict[str, List[str]]:
3190
3250
  """
3191
- List models deployed on the Serverless Inference API service.
3251
+ List models deployed on the HF Serverless Inference API service.
3192
3252
 
3193
3253
  This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
3194
3254
  are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
@@ -3198,7 +3258,7 @@ class InferenceClient:
3198
3258
 
3199
3259
  <Tip warning={true}>
3200
3260
 
3201
- This endpoint method does not return a live list of all models available for the Serverless Inference API service.
3261
+ This endpoint method does not return a live list of all models available for the HF Inference API service.
3202
3262
  It searches over a cached list of models that were recently available and the list may not be up to date.
3203
3263
  If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
3204
3264
 
@@ -3240,9 +3300,9 @@ class InferenceClient:
3240
3300
 
3241
3301
  # Resolve which frameworks to check
3242
3302
  if frameworks is None:
3243
- frameworks = MAIN_INFERENCE_API_FRAMEWORKS
3303
+ frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
3244
3304
  elif frameworks == "all":
3245
- frameworks = ALL_INFERENCE_API_FRAMEWORKS
3305
+ frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
3246
3306
  elif isinstance(frameworks, str):
3247
3307
  frameworks = [frameworks]
3248
3308
  frameworks = list(set(frameworks))
@@ -3262,7 +3322,7 @@ class InferenceClient:
3262
3322
 
3263
3323
  for framework in frameworks:
3264
3324
  response = get_session().get(
3265
- f"{INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
3325
+ f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
3266
3326
  )
3267
3327
  hf_raise_for_status(response)
3268
3328
  _unpack_response(framework, response.json())
@@ -3324,7 +3384,7 @@ class InferenceClient:
3324
3384
  if model.startswith(("http://", "https://")):
3325
3385
  url = model.rstrip("/") + "/info"
3326
3386
  else:
3327
- url = f"{INFERENCE_ENDPOINT}/models/{model}/info"
3387
+ url = f"{constants.INFERENCE_ENDPOINT}/models/{model}/info"
3328
3388
 
3329
3389
  response = get_session().get(url, headers=build_hf_headers(token=self.token))
3330
3390
  hf_raise_for_status(response)
@@ -3367,9 +3427,16 @@ class InferenceClient:
3367
3427
  response = get_session().get(url, headers=build_hf_headers(token=self.token))
3368
3428
  return response.status_code == 200
3369
3429
 
3430
+ @_deprecate_method(
3431
+ version="0.33.0",
3432
+ message=(
3433
+ "HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
3434
+ " Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
3435
+ ),
3436
+ )
3370
3437
  def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
3371
3438
  """
3372
- Get the status of a model hosted on the Inference API.
3439
+ Get the status of a model hosted on the HF Inference API.
3373
3440
 
3374
3441
  <Tip>
3375
3442
 
@@ -3381,7 +3448,7 @@ class InferenceClient:
3381
3448
  Args:
3382
3449
  model (`str`, *optional*):
3383
3450
  Identifier of the model for witch the status gonna be checked. If model is not provided,
3384
- the model associated with this instance of [`InferenceClient`] will be used. Only InferenceAPI service can be checked so the
3451
+ the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
3385
3452
  identifier cannot be a URL.
3386
3453
 
3387
3454
 
@@ -3405,7 +3472,7 @@ class InferenceClient:
3405
3472
  raise ValueError("Model id not provided.")
3406
3473
  if model.startswith("https://"):
3407
3474
  raise NotImplementedError("Model status is only available for Inference API endpoints.")
3408
- url = f"{INFERENCE_ENDPOINT}/status/{model}"
3475
+ url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
3409
3476
 
3410
3477
  response = get_session().get(url, headers=build_hf_headers(token=self.token))
3411
3478
  hf_raise_for_status(response)
@@ -18,7 +18,6 @@ import base64
18
18
  import io
19
19
  import json
20
20
  import logging
21
- from abc import ABC, abstractmethod
22
21
  from contextlib import contextmanager
23
22
  from dataclasses import dataclass
24
23
  from pathlib import Path
@@ -50,12 +49,7 @@ from huggingface_hub.errors import (
50
49
  ValidationError,
51
50
  )
52
51
 
53
- from ..utils import (
54
- get_session,
55
- is_aiohttp_available,
56
- is_numpy_available,
57
- is_pillow_available,
58
- )
52
+ from ..utils import get_session, is_aiohttp_available, is_numpy_available, is_pillow_available
59
53
  from ._generated.types import ChatCompletionStreamOutput, TextGenerationStreamOutput
60
54
 
61
55
 
@@ -85,33 +79,15 @@ class RequestParameters:
85
79
  headers: Dict[str, Any]
86
80
 
87
81
 
88
- class TaskProviderHelper(ABC):
89
- """Protocol defining the interface for task-specific provider helpers."""
90
-
91
- @abstractmethod
92
- def prepare_request(
93
- self,
94
- *,
95
- inputs: Any,
96
- parameters: Dict[str, Any],
97
- headers: Dict,
98
- model: Optional[str],
99
- api_key: Optional[str],
100
- extra_payload: Optional[Dict[str, Any]] = None,
101
- ) -> RequestParameters: ...
102
- @abstractmethod
103
- def get_response(self, response: Union[bytes, Dict]) -> Any: ...
104
-
105
-
106
82
  # Add dataclass for ModelStatus. We use this dataclass in get_model_status function.
107
83
  @dataclass
108
84
  class ModelStatus:
109
85
  """
110
- This Dataclass represents the model status in the Hugging Face Inference API.
86
+ This Dataclass represents the model status in the HF Inference API.
111
87
 
112
88
  Args:
113
89
  loaded (`bool`):
114
- If the model is currently loaded into Hugging Face's InferenceAPI. Models
90
+ If the model is currently loaded into HF's Inference API. Models
115
91
  are loaded on-demand, leading to the user's first request taking longer.
116
92
  If a model is loaded, you can be assured that it is in a healthy state.
117
93
  state (`str`):