huggingface-hub 0.28.1__py3-none-any.whl → 0.29.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +1 -4
- huggingface_hub/constants.py +16 -10
- huggingface_hub/file_download.py +10 -6
- huggingface_hub/hf_api.py +53 -23
- huggingface_hub/inference/_client.py +151 -84
- huggingface_hub/inference/_common.py +3 -27
- huggingface_hub/inference/_generated/_async_client.py +147 -83
- huggingface_hub/inference/_generated/types/__init__.py +1 -1
- huggingface_hub/inference/_generated/types/audio_classification.py +4 -5
- huggingface_hub/inference/_generated/types/audio_to_audio.py +3 -4
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +7 -8
- huggingface_hub/inference/_generated/types/base.py +21 -0
- huggingface_hub/inference/_generated/types/chat_completion.py +29 -30
- huggingface_hub/inference/_generated/types/depth_estimation.py +3 -4
- huggingface_hub/inference/_generated/types/document_question_answering.py +5 -6
- huggingface_hub/inference/_generated/types/feature_extraction.py +5 -6
- huggingface_hub/inference/_generated/types/fill_mask.py +4 -5
- huggingface_hub/inference/_generated/types/image_classification.py +4 -5
- huggingface_hub/inference/_generated/types/image_segmentation.py +4 -5
- huggingface_hub/inference/_generated/types/image_to_image.py +5 -6
- huggingface_hub/inference/_generated/types/image_to_text.py +5 -6
- huggingface_hub/inference/_generated/types/object_detection.py +5 -6
- huggingface_hub/inference/_generated/types/question_answering.py +5 -6
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -4
- huggingface_hub/inference/_generated/types/summarization.py +4 -5
- huggingface_hub/inference/_generated/types/table_question_answering.py +5 -6
- huggingface_hub/inference/_generated/types/text2text_generation.py +4 -5
- huggingface_hub/inference/_generated/types/text_classification.py +4 -5
- huggingface_hub/inference/_generated/types/text_generation.py +12 -13
- huggingface_hub/inference/_generated/types/text_to_audio.py +5 -6
- huggingface_hub/inference/_generated/types/text_to_image.py +8 -15
- huggingface_hub/inference/_generated/types/text_to_speech.py +5 -6
- huggingface_hub/inference/_generated/types/text_to_video.py +4 -5
- huggingface_hub/inference/_generated/types/token_classification.py +4 -5
- huggingface_hub/inference/_generated/types/translation.py +4 -5
- huggingface_hub/inference/_generated/types/video_classification.py +4 -5
- huggingface_hub/inference/_generated/types/visual_question_answering.py +5 -6
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +4 -5
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +4 -5
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +5 -6
- huggingface_hub/inference/_providers/__init__.py +44 -8
- huggingface_hub/inference/_providers/_common.py +239 -0
- huggingface_hub/inference/_providers/black_forest_labs.py +66 -0
- huggingface_hub/inference/_providers/fal_ai.py +31 -100
- huggingface_hub/inference/_providers/fireworks_ai.py +6 -0
- huggingface_hub/inference/_providers/hf_inference.py +58 -142
- huggingface_hub/inference/_providers/hyperbolic.py +43 -0
- huggingface_hub/inference/_providers/nebius.py +41 -0
- huggingface_hub/inference/_providers/novita.py +26 -0
- huggingface_hub/inference/_providers/replicate.py +24 -119
- huggingface_hub/inference/_providers/sambanova.py +3 -86
- huggingface_hub/inference/_providers/together.py +36 -130
- huggingface_hub/utils/_headers.py +5 -0
- huggingface_hub/utils/_hf_folder.py +4 -32
- huggingface_hub/utils/_http.py +85 -2
- huggingface_hub/utils/_typing.py +1 -1
- huggingface_hub/utils/logging.py +6 -0
- {huggingface_hub-0.28.1.dist-info → huggingface_hub-0.29.0.dist-info}/METADATA +1 -1
- {huggingface_hub-0.28.1.dist-info → huggingface_hub-0.29.0.dist-info}/RECORD +63 -57
- {huggingface_hub-0.28.1.dist-info → huggingface_hub-0.29.0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.28.1.dist-info → huggingface_hub-0.29.0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.28.1.dist-info → huggingface_hub-0.29.0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.28.1.dist-info → huggingface_hub-0.29.0.dist-info}/top_level.txt +0 -0
|
@@ -22,11 +22,10 @@ import asyncio
|
|
|
22
22
|
import base64
|
|
23
23
|
import logging
|
|
24
24
|
import re
|
|
25
|
-
import time
|
|
26
25
|
import warnings
|
|
27
26
|
from typing import TYPE_CHECKING, Any, AsyncIterable, Dict, List, Literal, Optional, Set, Union, overload
|
|
28
27
|
|
|
29
|
-
from huggingface_hub
|
|
28
|
+
from huggingface_hub import constants
|
|
30
29
|
from huggingface_hub.errors import InferenceTimeoutError
|
|
31
30
|
from huggingface_hub.inference._common import (
|
|
32
31
|
TASKS_EXPECTING_IMAGES,
|
|
@@ -77,7 +76,6 @@ from huggingface_hub.inference._generated.types import (
|
|
|
77
76
|
TextGenerationInputGrammarType,
|
|
78
77
|
TextGenerationOutput,
|
|
79
78
|
TextGenerationStreamOutput,
|
|
80
|
-
TextToImageTargetSize,
|
|
81
79
|
TextToSpeechEarlyStoppingEnum,
|
|
82
80
|
TokenClassificationAggregationStrategy,
|
|
83
81
|
TokenClassificationOutputElement,
|
|
@@ -122,9 +120,9 @@ class AsyncInferenceClient:
|
|
|
122
120
|
path will be appended to the base URL (see the [TGI Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api)
|
|
123
121
|
documentation for details). When passing a URL as `model`, the client will not append any suffix path to it.
|
|
124
122
|
provider (`str`, *optional*):
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
123
|
+
Name of the provider to use for inference. Can be `"black-forest-labs"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"replicate"`, "sambanova"` or `"together"`.
|
|
124
|
+
defaults to hf-inference (Hugging Face Serverless Inference API).
|
|
125
|
+
If model is a URL or `base_url` is passed, then `provider` is not used.
|
|
128
126
|
token (`str` or `bool`, *optional*):
|
|
129
127
|
Hugging Face token. Will default to the locally saved token if not provided.
|
|
130
128
|
Pass `token=False` if you don't want to send your token to the server.
|
|
@@ -179,7 +177,7 @@ class AsyncInferenceClient:
|
|
|
179
177
|
" It has the exact same behavior as `token`."
|
|
180
178
|
)
|
|
181
179
|
|
|
182
|
-
self.model: Optional[str] = model
|
|
180
|
+
self.model: Optional[str] = base_url or model
|
|
183
181
|
self.token: Optional[str] = token if token is not None else api_key
|
|
184
182
|
self.headers = headers if headers is not None else {}
|
|
185
183
|
|
|
@@ -191,9 +189,6 @@ class AsyncInferenceClient:
|
|
|
191
189
|
self.trust_env = trust_env
|
|
192
190
|
self.proxies = proxies
|
|
193
191
|
|
|
194
|
-
# OpenAI compatibility
|
|
195
|
-
self.base_url = base_url
|
|
196
|
-
|
|
197
192
|
# Keep track of the sessions to close them properly
|
|
198
193
|
self._sessions: Dict["ClientSession", Set["ClientResponse"]] = dict()
|
|
199
194
|
|
|
@@ -262,8 +257,9 @@ class AsyncInferenceClient:
|
|
|
262
257
|
"`InferenceClient.post` is deprecated and should not be used directly anymore."
|
|
263
258
|
)
|
|
264
259
|
provider_helper = HFInferenceTask(task or "unknown")
|
|
265
|
-
|
|
266
|
-
|
|
260
|
+
mapped_model = provider_helper._prepare_mapped_model(model or self.model)
|
|
261
|
+
url = provider_helper._prepare_url(self.token, mapped_model) # type: ignore[arg-type]
|
|
262
|
+
headers = provider_helper._prepare_headers(self.headers, self.token) # type: ignore[arg-type]
|
|
267
263
|
return await self._inner_post(
|
|
268
264
|
request_parameters=RequestParameters(
|
|
269
265
|
url=url,
|
|
@@ -302,8 +298,6 @@ class AsyncInferenceClient:
|
|
|
302
298
|
if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
|
|
303
299
|
request_parameters.headers["Accept"] = "image/png"
|
|
304
300
|
|
|
305
|
-
t0 = time.time()
|
|
306
|
-
timeout = self.timeout
|
|
307
301
|
while True:
|
|
308
302
|
with _open_as_binary(request_parameters.data) as data_as_binary:
|
|
309
303
|
# Do not use context manager as we don't want to close the connection immediately when returning
|
|
@@ -334,27 +328,6 @@ class AsyncInferenceClient:
|
|
|
334
328
|
except aiohttp.ClientResponseError as error:
|
|
335
329
|
error.response_error_payload = response_error_payload
|
|
336
330
|
await session.close()
|
|
337
|
-
if response.status == 422 and request_parameters.task != "unknown":
|
|
338
|
-
error.message += f". Make sure '{request_parameters.task}' task is supported by the model."
|
|
339
|
-
if response.status == 503:
|
|
340
|
-
# If Model is unavailable, either raise a TimeoutError...
|
|
341
|
-
if timeout is not None and time.time() - t0 > timeout:
|
|
342
|
-
raise InferenceTimeoutError(
|
|
343
|
-
f"Model not loaded on the server: {request_parameters.url}. Please retry with a higher timeout"
|
|
344
|
-
f" (current: {self.timeout}).",
|
|
345
|
-
request=error.request,
|
|
346
|
-
response=error.response,
|
|
347
|
-
) from error
|
|
348
|
-
# ...or wait 1s and retry
|
|
349
|
-
logger.info(f"Waiting for model to be loaded on the server: {error}")
|
|
350
|
-
if "X-wait-for-model" not in request_parameters.headers and request_parameters.url.startswith(
|
|
351
|
-
INFERENCE_ENDPOINT
|
|
352
|
-
):
|
|
353
|
-
request_parameters.headers["X-wait-for-model"] = "1"
|
|
354
|
-
await asyncio.sleep(1)
|
|
355
|
-
if timeout is not None:
|
|
356
|
-
timeout = max(self.timeout - (time.time() - t0), 1) # type: ignore
|
|
357
|
-
continue
|
|
358
331
|
raise error
|
|
359
332
|
except Exception:
|
|
360
333
|
await session.close()
|
|
@@ -500,6 +473,7 @@ class AsyncInferenceClient:
|
|
|
500
473
|
audio: ContentT,
|
|
501
474
|
*,
|
|
502
475
|
model: Optional[str] = None,
|
|
476
|
+
extra_body: Optional[Dict] = None,
|
|
503
477
|
) -> AutomaticSpeechRecognitionOutput:
|
|
504
478
|
"""
|
|
505
479
|
Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
|
|
@@ -510,8 +484,9 @@ class AsyncInferenceClient:
|
|
|
510
484
|
model (`str`, *optional*):
|
|
511
485
|
The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
512
486
|
Inference Endpoint. If not provided, the default recommended model for ASR will be used.
|
|
513
|
-
|
|
514
|
-
Additional parameters to pass to the model.
|
|
487
|
+
extra_body (`Dict`, *optional*):
|
|
488
|
+
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
489
|
+
for supported parameters.
|
|
515
490
|
Returns:
|
|
516
491
|
[`AutomaticSpeechRecognitionOutput`]: An item containing the transcribed text and optionally the timestamp chunks.
|
|
517
492
|
|
|
@@ -533,7 +508,7 @@ class AsyncInferenceClient:
|
|
|
533
508
|
provider_helper = get_provider_helper(self.provider, task="automatic-speech-recognition")
|
|
534
509
|
request_parameters = provider_helper.prepare_request(
|
|
535
510
|
inputs=audio,
|
|
536
|
-
parameters={},
|
|
511
|
+
parameters={**(extra_body or {})},
|
|
537
512
|
headers=self.headers,
|
|
538
513
|
model=model or self.model,
|
|
539
514
|
api_key=self.token,
|
|
@@ -564,6 +539,7 @@ class AsyncInferenceClient:
|
|
|
564
539
|
tools: Optional[List[ChatCompletionInputTool]] = None,
|
|
565
540
|
top_logprobs: Optional[int] = None,
|
|
566
541
|
top_p: Optional[float] = None,
|
|
542
|
+
extra_body: Optional[Dict] = None,
|
|
567
543
|
) -> ChatCompletionOutput: ...
|
|
568
544
|
|
|
569
545
|
@overload
|
|
@@ -589,6 +565,7 @@ class AsyncInferenceClient:
|
|
|
589
565
|
tools: Optional[List[ChatCompletionInputTool]] = None,
|
|
590
566
|
top_logprobs: Optional[int] = None,
|
|
591
567
|
top_p: Optional[float] = None,
|
|
568
|
+
extra_body: Optional[Dict] = None,
|
|
592
569
|
) -> AsyncIterable[ChatCompletionStreamOutput]: ...
|
|
593
570
|
|
|
594
571
|
@overload
|
|
@@ -614,6 +591,7 @@ class AsyncInferenceClient:
|
|
|
614
591
|
tools: Optional[List[ChatCompletionInputTool]] = None,
|
|
615
592
|
top_logprobs: Optional[int] = None,
|
|
616
593
|
top_p: Optional[float] = None,
|
|
594
|
+
extra_body: Optional[Dict] = None,
|
|
617
595
|
) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]: ...
|
|
618
596
|
|
|
619
597
|
async def chat_completion(
|
|
@@ -639,6 +617,7 @@ class AsyncInferenceClient:
|
|
|
639
617
|
tools: Optional[List[ChatCompletionInputTool]] = None,
|
|
640
618
|
top_logprobs: Optional[int] = None,
|
|
641
619
|
top_p: Optional[float] = None,
|
|
620
|
+
extra_body: Optional[Dict] = None,
|
|
642
621
|
) -> Union[ChatCompletionOutput, AsyncIterable[ChatCompletionStreamOutput]]:
|
|
643
622
|
"""
|
|
644
623
|
A method for completing conversations using a specified language model.
|
|
@@ -653,7 +632,7 @@ class AsyncInferenceClient:
|
|
|
653
632
|
</Tip>
|
|
654
633
|
|
|
655
634
|
<Tip>
|
|
656
|
-
|
|
635
|
+
You can pass provider-specific parameters to the model by using the `extra_body` argument.
|
|
657
636
|
</Tip>
|
|
658
637
|
|
|
659
638
|
Args:
|
|
@@ -708,7 +687,9 @@ class AsyncInferenceClient:
|
|
|
708
687
|
tools (List of [`ChatCompletionInputTool`], *optional*):
|
|
709
688
|
A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
|
|
710
689
|
provide a list of functions the model may generate JSON inputs for.
|
|
711
|
-
|
|
690
|
+
extra_body (`Dict`, *optional*):
|
|
691
|
+
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
692
|
+
for supported parameters.
|
|
712
693
|
Returns:
|
|
713
694
|
[`ChatCompletionOutput`] or Iterable of [`ChatCompletionStreamOutput`]:
|
|
714
695
|
Generated text returned from the server:
|
|
@@ -796,7 +777,7 @@ class AsyncInferenceClient:
|
|
|
796
777
|
print(chunk.choices[0].delta.content)
|
|
797
778
|
```
|
|
798
779
|
|
|
799
|
-
Example using a third-party provider directly. Usage will be billed on your Together AI account.
|
|
780
|
+
Example using a third-party provider directly with extra (provider-specific) parameters. Usage will be billed on your Together AI account.
|
|
800
781
|
```py
|
|
801
782
|
>>> from huggingface_hub import InferenceClient
|
|
802
783
|
>>> client = InferenceClient(
|
|
@@ -806,6 +787,7 @@ class AsyncInferenceClient:
|
|
|
806
787
|
>>> client.chat_completion(
|
|
807
788
|
... model="meta-llama/Meta-Llama-3-8B-Instruct",
|
|
808
789
|
... messages=[{"role": "user", "content": "What is the capital of France?"}],
|
|
790
|
+
... extra_body={"safety_model": "Meta-Llama/Llama-Guard-7b"},
|
|
809
791
|
... )
|
|
810
792
|
```
|
|
811
793
|
|
|
@@ -977,9 +959,9 @@ class AsyncInferenceClient:
|
|
|
977
959
|
provider_helper = get_provider_helper(self.provider, task="conversational")
|
|
978
960
|
|
|
979
961
|
# Since `chat_completion(..., model=xxx)` is also a payload parameter for the server, we need to handle 'model' differently.
|
|
980
|
-
# `self.
|
|
962
|
+
# `self.model` takes precedence over 'model' argument for building URL.
|
|
981
963
|
# `model` takes precedence for payload value.
|
|
982
|
-
model_id_or_url = self.
|
|
964
|
+
model_id_or_url = self.model or model
|
|
983
965
|
payload_model = model or self.model
|
|
984
966
|
|
|
985
967
|
# Prepare the payload
|
|
@@ -1002,6 +984,7 @@ class AsyncInferenceClient:
|
|
|
1002
984
|
"top_p": top_p,
|
|
1003
985
|
"stream": stream,
|
|
1004
986
|
"stream_options": stream_options,
|
|
987
|
+
**(extra_body or {}),
|
|
1005
988
|
}
|
|
1006
989
|
request_parameters = provider_helper.prepare_request(
|
|
1007
990
|
inputs=messages,
|
|
@@ -1664,19 +1647,10 @@ class AsyncInferenceClient:
|
|
|
1664
1647
|
response = await self._inner_post(request_parameters)
|
|
1665
1648
|
return _bytes_to_list(response)
|
|
1666
1649
|
|
|
1667
|
-
@_deprecate_arguments(
|
|
1668
|
-
version="0.29",
|
|
1669
|
-
deprecated_args=["parameters"],
|
|
1670
|
-
custom_message=(
|
|
1671
|
-
"The `parameters` argument is deprecated and will be removed in a future version. "
|
|
1672
|
-
"Provide individual parameters instead: `clean_up_tokenization_spaces`, `generate_parameters`, and `truncation`."
|
|
1673
|
-
),
|
|
1674
|
-
)
|
|
1675
1650
|
async def summarization(
|
|
1676
1651
|
self,
|
|
1677
1652
|
text: str,
|
|
1678
1653
|
*,
|
|
1679
|
-
parameters: Optional[Dict[str, Any]] = None,
|
|
1680
1654
|
model: Optional[str] = None,
|
|
1681
1655
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
1682
1656
|
generate_parameters: Optional[Dict[str, Any]] = None,
|
|
@@ -1688,9 +1662,6 @@ class AsyncInferenceClient:
|
|
|
1688
1662
|
Args:
|
|
1689
1663
|
text (`str`):
|
|
1690
1664
|
The input text to summarize.
|
|
1691
|
-
parameters (`Dict[str, Any]`, *optional*):
|
|
1692
|
-
Additional parameters for summarization. Check out this [page](https://huggingface.co/docs/api-inference/detailed_parameters#summarization-task)
|
|
1693
|
-
for more details.
|
|
1694
1665
|
model (`str`, *optional*):
|
|
1695
1666
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
1696
1667
|
Inference Endpoint. If not provided, the default recommended model for summarization will be used.
|
|
@@ -1718,12 +1689,11 @@ class AsyncInferenceClient:
|
|
|
1718
1689
|
SummarizationOutput(generated_text="The Eiffel tower is one of the most famous landmarks in the world....")
|
|
1719
1690
|
```
|
|
1720
1691
|
"""
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
}
|
|
1692
|
+
parameters = {
|
|
1693
|
+
"clean_up_tokenization_spaces": clean_up_tokenization_spaces,
|
|
1694
|
+
"generate_parameters": generate_parameters,
|
|
1695
|
+
"truncation": truncation,
|
|
1696
|
+
}
|
|
1727
1697
|
provider_helper = get_provider_helper(self.provider, task="summarization")
|
|
1728
1698
|
request_parameters = provider_helper.prepare_request(
|
|
1729
1699
|
inputs=text,
|
|
@@ -2458,15 +2428,14 @@ class AsyncInferenceClient:
|
|
|
2458
2428
|
prompt: str,
|
|
2459
2429
|
*,
|
|
2460
2430
|
negative_prompt: Optional[str] = None,
|
|
2461
|
-
height: Optional[
|
|
2462
|
-
width: Optional[
|
|
2431
|
+
height: Optional[int] = None,
|
|
2432
|
+
width: Optional[int] = None,
|
|
2463
2433
|
num_inference_steps: Optional[int] = None,
|
|
2464
2434
|
guidance_scale: Optional[float] = None,
|
|
2465
2435
|
model: Optional[str] = None,
|
|
2466
2436
|
scheduler: Optional[str] = None,
|
|
2467
|
-
target_size: Optional[TextToImageTargetSize] = None,
|
|
2468
2437
|
seed: Optional[int] = None,
|
|
2469
|
-
|
|
2438
|
+
extra_body: Optional[Dict[str, Any]] = None,
|
|
2470
2439
|
) -> "Image":
|
|
2471
2440
|
"""
|
|
2472
2441
|
Generate an image based on a given text using a specified model.
|
|
@@ -2477,15 +2446,19 @@ class AsyncInferenceClient:
|
|
|
2477
2446
|
|
|
2478
2447
|
</Tip>
|
|
2479
2448
|
|
|
2449
|
+
<Tip>
|
|
2450
|
+
You can pass provider-specific parameters to the model by using the `extra_body` argument.
|
|
2451
|
+
</Tip>
|
|
2452
|
+
|
|
2480
2453
|
Args:
|
|
2481
2454
|
prompt (`str`):
|
|
2482
2455
|
The prompt to generate an image from.
|
|
2483
2456
|
negative_prompt (`str`, *optional*):
|
|
2484
2457
|
One prompt to guide what NOT to include in image generation.
|
|
2485
|
-
height (`
|
|
2486
|
-
The height in pixels of the image
|
|
2487
|
-
width (`
|
|
2488
|
-
The width in pixels of the image
|
|
2458
|
+
height (`int`, *optional*):
|
|
2459
|
+
The height in pixels of the output image
|
|
2460
|
+
width (`int`, *optional*):
|
|
2461
|
+
The width in pixels of the output image
|
|
2489
2462
|
num_inference_steps (`int`, *optional*):
|
|
2490
2463
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
|
2491
2464
|
expense of slower inference.
|
|
@@ -2498,10 +2471,11 @@ class AsyncInferenceClient:
|
|
|
2498
2471
|
Defaults to None.
|
|
2499
2472
|
scheduler (`str`, *optional*):
|
|
2500
2473
|
Override the scheduler with a compatible one.
|
|
2501
|
-
target_size (`TextToImageTargetSize`, *optional*):
|
|
2502
|
-
The size in pixel of the output image
|
|
2503
2474
|
seed (`int`, *optional*):
|
|
2504
2475
|
Seed for the random number generator.
|
|
2476
|
+
extra_body (`Dict[str, Any]`, *optional*):
|
|
2477
|
+
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2478
|
+
for supported parameters.
|
|
2505
2479
|
|
|
2506
2480
|
Returns:
|
|
2507
2481
|
`Image`: The generated image.
|
|
@@ -2555,6 +2529,21 @@ class AsyncInferenceClient:
|
|
|
2555
2529
|
... )
|
|
2556
2530
|
>>> image.save("astronaut.png")
|
|
2557
2531
|
```
|
|
2532
|
+
|
|
2533
|
+
Example using Replicate provider with extra parameters
|
|
2534
|
+
```py
|
|
2535
|
+
>>> from huggingface_hub import InferenceClient
|
|
2536
|
+
>>> client = InferenceClient(
|
|
2537
|
+
... provider="replicate", # Use replicate provider
|
|
2538
|
+
... api_key="hf_...", # Pass your HF token
|
|
2539
|
+
... )
|
|
2540
|
+
>>> image = client.text_to_image(
|
|
2541
|
+
... "An astronaut riding a horse on the moon.",
|
|
2542
|
+
... model="black-forest-labs/FLUX.1-schnell",
|
|
2543
|
+
... extra_body={"output_quality": 100},
|
|
2544
|
+
... )
|
|
2545
|
+
>>> image.save("astronaut.png")
|
|
2546
|
+
```
|
|
2558
2547
|
"""
|
|
2559
2548
|
provider_helper = get_provider_helper(self.provider, task="text-to-image")
|
|
2560
2549
|
request_parameters = provider_helper.prepare_request(
|
|
@@ -2566,9 +2555,8 @@ class AsyncInferenceClient:
|
|
|
2566
2555
|
"num_inference_steps": num_inference_steps,
|
|
2567
2556
|
"guidance_scale": guidance_scale,
|
|
2568
2557
|
"scheduler": scheduler,
|
|
2569
|
-
"target_size": target_size,
|
|
2570
2558
|
"seed": seed,
|
|
2571
|
-
**
|
|
2559
|
+
**(extra_body or {}),
|
|
2572
2560
|
},
|
|
2573
2561
|
headers=self.headers,
|
|
2574
2562
|
model=model or self.model,
|
|
@@ -2588,10 +2576,15 @@ class AsyncInferenceClient:
|
|
|
2588
2576
|
num_frames: Optional[float] = None,
|
|
2589
2577
|
num_inference_steps: Optional[int] = None,
|
|
2590
2578
|
seed: Optional[int] = None,
|
|
2579
|
+
extra_body: Optional[Dict[str, Any]] = None,
|
|
2591
2580
|
) -> bytes:
|
|
2592
2581
|
"""
|
|
2593
2582
|
Generate a video based on a given text.
|
|
2594
2583
|
|
|
2584
|
+
<Tip>
|
|
2585
|
+
You can pass provider-specific parameters to the model by using the `extra_body` argument.
|
|
2586
|
+
</Tip>
|
|
2587
|
+
|
|
2595
2588
|
Args:
|
|
2596
2589
|
prompt (`str`):
|
|
2597
2590
|
The prompt to generate a video from.
|
|
@@ -2611,6 +2604,9 @@ class AsyncInferenceClient:
|
|
|
2611
2604
|
expense of slower inference.
|
|
2612
2605
|
seed (`int`, *optional*):
|
|
2613
2606
|
Seed for the random number generator.
|
|
2607
|
+
extra_body (`Dict[str, Any]`, *optional*):
|
|
2608
|
+
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2609
|
+
for supported parameters.
|
|
2614
2610
|
|
|
2615
2611
|
Returns:
|
|
2616
2612
|
`bytes`: The generated video.
|
|
@@ -2656,6 +2652,7 @@ class AsyncInferenceClient:
|
|
|
2656
2652
|
"num_frames": num_frames,
|
|
2657
2653
|
"num_inference_steps": num_inference_steps,
|
|
2658
2654
|
"seed": seed,
|
|
2655
|
+
**(extra_body or {}),
|
|
2659
2656
|
},
|
|
2660
2657
|
headers=self.headers,
|
|
2661
2658
|
model=model or self.model,
|
|
@@ -2686,10 +2683,15 @@ class AsyncInferenceClient:
|
|
|
2686
2683
|
top_p: Optional[float] = None,
|
|
2687
2684
|
typical_p: Optional[float] = None,
|
|
2688
2685
|
use_cache: Optional[bool] = None,
|
|
2686
|
+
extra_body: Optional[Dict[str, Any]] = None,
|
|
2689
2687
|
) -> bytes:
|
|
2690
2688
|
"""
|
|
2691
2689
|
Synthesize an audio of a voice pronouncing a given text.
|
|
2692
2690
|
|
|
2691
|
+
<Tip>
|
|
2692
|
+
You can pass provider-specific parameters to the model by using the `extra_body` argument.
|
|
2693
|
+
</Tip>
|
|
2694
|
+
|
|
2693
2695
|
Args:
|
|
2694
2696
|
text (`str`):
|
|
2695
2697
|
The text to synthesize.
|
|
@@ -2743,7 +2745,9 @@ class AsyncInferenceClient:
|
|
|
2743
2745
|
paper](https://hf.co/papers/2202.00666) for more details.
|
|
2744
2746
|
use_cache (`bool`, *optional*):
|
|
2745
2747
|
Whether the model should use the past last key/values attentions to speed up decoding
|
|
2746
|
-
|
|
2748
|
+
extra_body (`Dict[str, Any]`, *optional*):
|
|
2749
|
+
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2750
|
+
for supported parameters.
|
|
2747
2751
|
Returns:
|
|
2748
2752
|
`bytes`: The generated audio.
|
|
2749
2753
|
|
|
@@ -2791,6 +2795,51 @@ class AsyncInferenceClient:
|
|
|
2791
2795
|
... )
|
|
2792
2796
|
>>> Path("hello_world.flac").write_bytes(audio)
|
|
2793
2797
|
```
|
|
2798
|
+
Example using Replicate provider with extra parameters
|
|
2799
|
+
```py
|
|
2800
|
+
>>> from huggingface_hub import InferenceClient
|
|
2801
|
+
>>> client = InferenceClient(
|
|
2802
|
+
... provider="replicate", # Use replicate provider
|
|
2803
|
+
... api_key="hf_...", # Pass your HF token
|
|
2804
|
+
... )
|
|
2805
|
+
>>> audio = client.text_to_speech(
|
|
2806
|
+
... "Hello, my name is Kororo, an awesome text-to-speech model.",
|
|
2807
|
+
... model="hexgrad/Kokoro-82M",
|
|
2808
|
+
... extra_body={"voice": "af_nicole"},
|
|
2809
|
+
... )
|
|
2810
|
+
>>> Path("hello.flac").write_bytes(audio)
|
|
2811
|
+
```
|
|
2812
|
+
|
|
2813
|
+
Example music-gen using "YuE-s1-7B-anneal-en-cot" on fal.ai
|
|
2814
|
+
```py
|
|
2815
|
+
>>> from huggingface_hub import InferenceClient
|
|
2816
|
+
>>> lyrics = '''
|
|
2817
|
+
... [verse]
|
|
2818
|
+
... In the town where I was born
|
|
2819
|
+
... Lived a man who sailed to sea
|
|
2820
|
+
... And he told us of his life
|
|
2821
|
+
... In the land of submarines
|
|
2822
|
+
... So we sailed on to the sun
|
|
2823
|
+
... 'Til we found a sea of green
|
|
2824
|
+
... And we lived beneath the waves
|
|
2825
|
+
... In our yellow submarine
|
|
2826
|
+
|
|
2827
|
+
... [chorus]
|
|
2828
|
+
... We all live in a yellow submarine
|
|
2829
|
+
... Yellow submarine, yellow submarine
|
|
2830
|
+
... We all live in a yellow submarine
|
|
2831
|
+
... Yellow submarine, yellow submarine
|
|
2832
|
+
... '''
|
|
2833
|
+
>>> genres = "pavarotti-style tenor voice"
|
|
2834
|
+
>>> client = InferenceClient(
|
|
2835
|
+
... provider="fal-ai",
|
|
2836
|
+
... model="m-a-p/YuE-s1-7B-anneal-en-cot",
|
|
2837
|
+
... api_key=...,
|
|
2838
|
+
... )
|
|
2839
|
+
>>> audio = client.text_to_speech(lyrics, extra_body={"genres": genres})
|
|
2840
|
+
>>> with open("output.mp3", "wb") as f:
|
|
2841
|
+
... f.write(audio)
|
|
2842
|
+
```
|
|
2794
2843
|
"""
|
|
2795
2844
|
provider_helper = get_provider_helper(self.provider, task="text-to-speech")
|
|
2796
2845
|
request_parameters = provider_helper.prepare_request(
|
|
@@ -2812,6 +2861,7 @@ class AsyncInferenceClient:
|
|
|
2812
2861
|
"top_p": top_p,
|
|
2813
2862
|
"typical_p": typical_p,
|
|
2814
2863
|
"use_cache": use_cache,
|
|
2864
|
+
**(extra_body or {}),
|
|
2815
2865
|
},
|
|
2816
2866
|
headers=self.headers,
|
|
2817
2867
|
model=model or self.model,
|
|
@@ -3251,11 +3301,18 @@ class AsyncInferenceClient:
|
|
|
3251
3301
|
response = await self._inner_post(request_parameters)
|
|
3252
3302
|
return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
|
|
3253
3303
|
|
|
3304
|
+
@_deprecate_method(
|
|
3305
|
+
version="0.33.0",
|
|
3306
|
+
message=(
|
|
3307
|
+
"HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
|
|
3308
|
+
" Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
|
|
3309
|
+
),
|
|
3310
|
+
)
|
|
3254
3311
|
async def list_deployed_models(
|
|
3255
3312
|
self, frameworks: Union[None, str, Literal["all"], List[str]] = None
|
|
3256
3313
|
) -> Dict[str, List[str]]:
|
|
3257
3314
|
"""
|
|
3258
|
-
List models deployed on the Serverless Inference API service.
|
|
3315
|
+
List models deployed on the HF Serverless Inference API service.
|
|
3259
3316
|
|
|
3260
3317
|
This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
|
|
3261
3318
|
are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
|
|
@@ -3265,7 +3322,7 @@ class AsyncInferenceClient:
|
|
|
3265
3322
|
|
|
3266
3323
|
<Tip warning={true}>
|
|
3267
3324
|
|
|
3268
|
-
This endpoint method does not return a live list of all models available for the
|
|
3325
|
+
This endpoint method does not return a live list of all models available for the HF Inference API service.
|
|
3269
3326
|
It searches over a cached list of models that were recently available and the list may not be up to date.
|
|
3270
3327
|
If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
|
|
3271
3328
|
|
|
@@ -3308,9 +3365,9 @@ class AsyncInferenceClient:
|
|
|
3308
3365
|
|
|
3309
3366
|
# Resolve which frameworks to check
|
|
3310
3367
|
if frameworks is None:
|
|
3311
|
-
frameworks = MAIN_INFERENCE_API_FRAMEWORKS
|
|
3368
|
+
frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
|
|
3312
3369
|
elif frameworks == "all":
|
|
3313
|
-
frameworks = ALL_INFERENCE_API_FRAMEWORKS
|
|
3370
|
+
frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
|
|
3314
3371
|
elif isinstance(frameworks, str):
|
|
3315
3372
|
frameworks = [frameworks]
|
|
3316
3373
|
frameworks = list(set(frameworks))
|
|
@@ -3330,7 +3387,7 @@ class AsyncInferenceClient:
|
|
|
3330
3387
|
|
|
3331
3388
|
for framework in frameworks:
|
|
3332
3389
|
response = get_session().get(
|
|
3333
|
-
f"{INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
|
|
3390
|
+
f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
|
|
3334
3391
|
)
|
|
3335
3392
|
hf_raise_for_status(response)
|
|
3336
3393
|
_unpack_response(framework, response.json())
|
|
@@ -3434,7 +3491,7 @@ class AsyncInferenceClient:
|
|
|
3434
3491
|
if model.startswith(("http://", "https://")):
|
|
3435
3492
|
url = model.rstrip("/") + "/info"
|
|
3436
3493
|
else:
|
|
3437
|
-
url = f"{INFERENCE_ENDPOINT}/models/{model}/info"
|
|
3494
|
+
url = f"{constants.INFERENCE_ENDPOINT}/models/{model}/info"
|
|
3438
3495
|
|
|
3439
3496
|
async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
|
|
3440
3497
|
response = await client.get(url, proxy=self.proxies)
|
|
@@ -3480,9 +3537,16 @@ class AsyncInferenceClient:
|
|
|
3480
3537
|
response = await client.get(url, proxy=self.proxies)
|
|
3481
3538
|
return response.status == 200
|
|
3482
3539
|
|
|
3540
|
+
@_deprecate_method(
|
|
3541
|
+
version="0.33.0",
|
|
3542
|
+
message=(
|
|
3543
|
+
"HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
|
|
3544
|
+
" Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
|
|
3545
|
+
),
|
|
3546
|
+
)
|
|
3483
3547
|
async def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
|
|
3484
3548
|
"""
|
|
3485
|
-
Get the status of a model hosted on the Inference API.
|
|
3549
|
+
Get the status of a model hosted on the HF Inference API.
|
|
3486
3550
|
|
|
3487
3551
|
<Tip>
|
|
3488
3552
|
|
|
@@ -3494,7 +3558,7 @@ class AsyncInferenceClient:
|
|
|
3494
3558
|
Args:
|
|
3495
3559
|
model (`str`, *optional*):
|
|
3496
3560
|
Identifier of the model for witch the status gonna be checked. If model is not provided,
|
|
3497
|
-
the model associated with this instance of [`InferenceClient`] will be used. Only
|
|
3561
|
+
the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
|
|
3498
3562
|
identifier cannot be a URL.
|
|
3499
3563
|
|
|
3500
3564
|
|
|
@@ -3519,7 +3583,7 @@ class AsyncInferenceClient:
|
|
|
3519
3583
|
raise ValueError("Model id not provided.")
|
|
3520
3584
|
if model.startswith("https://"):
|
|
3521
3585
|
raise NotImplementedError("Model status is only available for Inference API endpoints.")
|
|
3522
|
-
url = f"{INFERENCE_ENDPOINT}/status/{model}"
|
|
3586
|
+
url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
|
|
3523
3587
|
|
|
3524
3588
|
async with self._get_client_session(headers=build_hf_headers(token=self.token)) as client:
|
|
3525
3589
|
response = await client.get(url, proxy=self.proxies)
|
|
@@ -141,7 +141,7 @@ from .text_to_audio import (
|
|
|
141
141
|
TextToAudioOutput,
|
|
142
142
|
TextToAudioParameters,
|
|
143
143
|
)
|
|
144
|
-
from .text_to_image import TextToImageInput, TextToImageOutput, TextToImageParameters
|
|
144
|
+
from .text_to_image import TextToImageInput, TextToImageOutput, TextToImageParameters
|
|
145
145
|
from .text_to_speech import (
|
|
146
146
|
TextToSpeechEarlyStoppingEnum,
|
|
147
147
|
TextToSpeechGenerationParameters,
|
|
@@ -3,16 +3,15 @@
|
|
|
3
3
|
# See:
|
|
4
4
|
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
|
|
5
5
|
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
|
|
6
|
-
from dataclasses import dataclass
|
|
7
6
|
from typing import Literal, Optional
|
|
8
7
|
|
|
9
|
-
from .base import BaseInferenceType
|
|
8
|
+
from .base import BaseInferenceType, dataclass_with_extra
|
|
10
9
|
|
|
11
10
|
|
|
12
11
|
AudioClassificationOutputTransform = Literal["sigmoid", "softmax", "none"]
|
|
13
12
|
|
|
14
13
|
|
|
15
|
-
@
|
|
14
|
+
@dataclass_with_extra
|
|
16
15
|
class AudioClassificationParameters(BaseInferenceType):
|
|
17
16
|
"""Additional inference parameters for Audio Classification"""
|
|
18
17
|
|
|
@@ -22,7 +21,7 @@ class AudioClassificationParameters(BaseInferenceType):
|
|
|
22
21
|
"""When specified, limits the output to the top K most probable classes."""
|
|
23
22
|
|
|
24
23
|
|
|
25
|
-
@
|
|
24
|
+
@dataclass_with_extra
|
|
26
25
|
class AudioClassificationInput(BaseInferenceType):
|
|
27
26
|
"""Inputs for Audio Classification inference"""
|
|
28
27
|
|
|
@@ -34,7 +33,7 @@ class AudioClassificationInput(BaseInferenceType):
|
|
|
34
33
|
"""Additional inference parameters for Audio Classification"""
|
|
35
34
|
|
|
36
35
|
|
|
37
|
-
@
|
|
36
|
+
@dataclass_with_extra
|
|
38
37
|
class AudioClassificationOutputElement(BaseInferenceType):
|
|
39
38
|
"""Outputs for Audio Classification inference"""
|
|
40
39
|
|
|
@@ -3,13 +3,12 @@
|
|
|
3
3
|
# See:
|
|
4
4
|
# - script: https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/scripts/inference-codegen.ts
|
|
5
5
|
# - specs: https://github.com/huggingface/huggingface.js/tree/main/packages/tasks/src/tasks.
|
|
6
|
-
from dataclasses import dataclass
|
|
7
6
|
from typing import Any
|
|
8
7
|
|
|
9
|
-
from .base import BaseInferenceType
|
|
8
|
+
from .base import BaseInferenceType, dataclass_with_extra
|
|
10
9
|
|
|
11
10
|
|
|
12
|
-
@
|
|
11
|
+
@dataclass_with_extra
|
|
13
12
|
class AudioToAudioInput(BaseInferenceType):
|
|
14
13
|
"""Inputs for Audio to Audio inference"""
|
|
15
14
|
|
|
@@ -17,7 +16,7 @@ class AudioToAudioInput(BaseInferenceType):
|
|
|
17
16
|
"""The input audio data"""
|
|
18
17
|
|
|
19
18
|
|
|
20
|
-
@
|
|
19
|
+
@dataclass_with_extra
|
|
21
20
|
class AudioToAudioOutputElement(BaseInferenceType):
|
|
22
21
|
"""Outputs of inference for the Audio To Audio task
|
|
23
22
|
A generated audio file with its label.
|