huggingface-hub 0.28.0rc5__py3-none-any.whl → 0.29.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of huggingface-hub might be problematic. Click here for more details.
- huggingface_hub/__init__.py +1 -4
- huggingface_hub/constants.py +16 -11
- huggingface_hub/file_download.py +10 -6
- huggingface_hub/hf_api.py +53 -23
- huggingface_hub/inference/_client.py +151 -84
- huggingface_hub/inference/_common.py +3 -27
- huggingface_hub/inference/_generated/_async_client.py +147 -83
- huggingface_hub/inference/_generated/types/__init__.py +1 -1
- huggingface_hub/inference/_generated/types/audio_classification.py +4 -5
- huggingface_hub/inference/_generated/types/audio_to_audio.py +3 -4
- huggingface_hub/inference/_generated/types/automatic_speech_recognition.py +7 -8
- huggingface_hub/inference/_generated/types/base.py +21 -0
- huggingface_hub/inference/_generated/types/chat_completion.py +29 -30
- huggingface_hub/inference/_generated/types/depth_estimation.py +3 -4
- huggingface_hub/inference/_generated/types/document_question_answering.py +5 -6
- huggingface_hub/inference/_generated/types/feature_extraction.py +5 -6
- huggingface_hub/inference/_generated/types/fill_mask.py +4 -5
- huggingface_hub/inference/_generated/types/image_classification.py +4 -5
- huggingface_hub/inference/_generated/types/image_segmentation.py +4 -5
- huggingface_hub/inference/_generated/types/image_to_image.py +5 -6
- huggingface_hub/inference/_generated/types/image_to_text.py +5 -6
- huggingface_hub/inference/_generated/types/object_detection.py +5 -6
- huggingface_hub/inference/_generated/types/question_answering.py +5 -6
- huggingface_hub/inference/_generated/types/sentence_similarity.py +3 -4
- huggingface_hub/inference/_generated/types/summarization.py +4 -5
- huggingface_hub/inference/_generated/types/table_question_answering.py +5 -6
- huggingface_hub/inference/_generated/types/text2text_generation.py +4 -5
- huggingface_hub/inference/_generated/types/text_classification.py +4 -5
- huggingface_hub/inference/_generated/types/text_generation.py +12 -13
- huggingface_hub/inference/_generated/types/text_to_audio.py +5 -6
- huggingface_hub/inference/_generated/types/text_to_image.py +8 -15
- huggingface_hub/inference/_generated/types/text_to_speech.py +5 -6
- huggingface_hub/inference/_generated/types/text_to_video.py +4 -5
- huggingface_hub/inference/_generated/types/token_classification.py +4 -5
- huggingface_hub/inference/_generated/types/translation.py +4 -5
- huggingface_hub/inference/_generated/types/video_classification.py +4 -5
- huggingface_hub/inference/_generated/types/visual_question_answering.py +5 -6
- huggingface_hub/inference/_generated/types/zero_shot_classification.py +4 -5
- huggingface_hub/inference/_generated/types/zero_shot_image_classification.py +4 -5
- huggingface_hub/inference/_generated/types/zero_shot_object_detection.py +5 -6
- huggingface_hub/inference/_providers/__init__.py +44 -8
- huggingface_hub/inference/_providers/_common.py +239 -0
- huggingface_hub/inference/_providers/black_forest_labs.py +66 -0
- huggingface_hub/inference/_providers/fal_ai.py +31 -100
- huggingface_hub/inference/_providers/fireworks_ai.py +6 -0
- huggingface_hub/inference/_providers/hf_inference.py +58 -142
- huggingface_hub/inference/_providers/hyperbolic.py +43 -0
- huggingface_hub/inference/_providers/nebius.py +41 -0
- huggingface_hub/inference/_providers/novita.py +26 -0
- huggingface_hub/inference/_providers/replicate.py +24 -119
- huggingface_hub/inference/_providers/sambanova.py +3 -86
- huggingface_hub/inference/_providers/together.py +36 -130
- huggingface_hub/utils/_headers.py +5 -0
- huggingface_hub/utils/_hf_folder.py +4 -32
- huggingface_hub/utils/_http.py +85 -2
- huggingface_hub/utils/_typing.py +1 -1
- huggingface_hub/utils/logging.py +6 -0
- {huggingface_hub-0.28.0rc5.dist-info → huggingface_hub-0.29.0.dist-info}/METADATA +1 -1
- {huggingface_hub-0.28.0rc5.dist-info → huggingface_hub-0.29.0.dist-info}/RECORD +63 -57
- {huggingface_hub-0.28.0rc5.dist-info → huggingface_hub-0.29.0.dist-info}/LICENSE +0 -0
- {huggingface_hub-0.28.0rc5.dist-info → huggingface_hub-0.29.0.dist-info}/WHEEL +0 -0
- {huggingface_hub-0.28.0rc5.dist-info → huggingface_hub-0.29.0.dist-info}/entry_points.txt +0 -0
- {huggingface_hub-0.28.0rc5.dist-info → huggingface_hub-0.29.0.dist-info}/top_level.txt +0 -0
|
@@ -35,13 +35,12 @@
|
|
|
35
35
|
import base64
|
|
36
36
|
import logging
|
|
37
37
|
import re
|
|
38
|
-
import time
|
|
39
38
|
import warnings
|
|
40
39
|
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Literal, Optional, Union, overload
|
|
41
40
|
|
|
42
41
|
from requests import HTTPError
|
|
43
42
|
|
|
44
|
-
from huggingface_hub
|
|
43
|
+
from huggingface_hub import constants
|
|
45
44
|
from huggingface_hub.errors import BadRequestError, InferenceTimeoutError
|
|
46
45
|
from huggingface_hub.inference._common import (
|
|
47
46
|
TASKS_EXPECTING_IMAGES,
|
|
@@ -92,7 +91,6 @@ from huggingface_hub.inference._generated.types import (
|
|
|
92
91
|
TextGenerationInputGrammarType,
|
|
93
92
|
TextGenerationOutput,
|
|
94
93
|
TextGenerationStreamOutput,
|
|
95
|
-
TextToImageTargetSize,
|
|
96
94
|
TextToSpeechEarlyStoppingEnum,
|
|
97
95
|
TokenClassificationAggregationStrategy,
|
|
98
96
|
TokenClassificationOutputElement,
|
|
@@ -134,9 +132,9 @@ class InferenceClient:
|
|
|
134
132
|
path will be appended to the base URL (see the [TGI Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api)
|
|
135
133
|
documentation for details). When passing a URL as `model`, the client will not append any suffix path to it.
|
|
136
134
|
provider (`str`, *optional*):
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
135
|
+
Name of the provider to use for inference. Can be `"black-forest-labs"`, `"fal-ai"`, `"fireworks-ai"`, `"hf-inference"`, `"hyperbolic"`, `"nebius"`, `"novita"`, `"replicate"`, "sambanova"` or `"together"`.
|
|
136
|
+
defaults to hf-inference (Hugging Face Serverless Inference API).
|
|
137
|
+
If model is a URL or `base_url` is passed, then `provider` is not used.
|
|
140
138
|
token (`str` or `bool`, *optional*):
|
|
141
139
|
Hugging Face token. Will default to the locally saved token if not provided.
|
|
142
140
|
Pass `token=False` if you don't want to send your token to the server.
|
|
@@ -188,7 +186,7 @@ class InferenceClient:
|
|
|
188
186
|
" It has the exact same behavior as `token`."
|
|
189
187
|
)
|
|
190
188
|
|
|
191
|
-
self.model: Optional[str] = model
|
|
189
|
+
self.model: Optional[str] = base_url or model
|
|
192
190
|
self.token: Optional[str] = token if token is not None else api_key
|
|
193
191
|
self.headers = headers if headers is not None else {}
|
|
194
192
|
|
|
@@ -199,9 +197,6 @@ class InferenceClient:
|
|
|
199
197
|
self.timeout = timeout
|
|
200
198
|
self.proxies = proxies
|
|
201
199
|
|
|
202
|
-
# OpenAI compatibility
|
|
203
|
-
self.base_url = base_url
|
|
204
|
-
|
|
205
200
|
def __repr__(self):
|
|
206
201
|
return f"<InferenceClient(model='{self.model if self.model else ''}', timeout={self.timeout})>"
|
|
207
202
|
|
|
@@ -267,8 +262,9 @@ class InferenceClient:
|
|
|
267
262
|
"`InferenceClient.post` is deprecated and should not be used directly anymore."
|
|
268
263
|
)
|
|
269
264
|
provider_helper = HFInferenceTask(task or "unknown")
|
|
270
|
-
|
|
271
|
-
|
|
265
|
+
mapped_model = provider_helper._prepare_mapped_model(model or self.model)
|
|
266
|
+
url = provider_helper._prepare_url(self.token, mapped_model) # type: ignore[arg-type]
|
|
267
|
+
headers = provider_helper._prepare_headers(self.headers, self.token) # type: ignore[arg-type]
|
|
272
268
|
return self._inner_post(
|
|
273
269
|
request_parameters=RequestParameters(
|
|
274
270
|
url=url,
|
|
@@ -304,8 +300,6 @@ class InferenceClient:
|
|
|
304
300
|
if request_parameters.task in TASKS_EXPECTING_IMAGES and "Accept" not in request_parameters.headers:
|
|
305
301
|
request_parameters.headers["Accept"] = "image/png"
|
|
306
302
|
|
|
307
|
-
t0 = time.time()
|
|
308
|
-
timeout = self.timeout
|
|
309
303
|
while True:
|
|
310
304
|
with _open_as_binary(request_parameters.data) as data_as_binary:
|
|
311
305
|
try:
|
|
@@ -328,28 +322,10 @@ class InferenceClient:
|
|
|
328
322
|
return response.iter_lines() if stream else response.content
|
|
329
323
|
except HTTPError as error:
|
|
330
324
|
if error.response.status_code == 422 and request_parameters.task != "unknown":
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
# If Model is unavailable, either raise a TimeoutError...
|
|
336
|
-
if timeout is not None and time.time() - t0 > timeout:
|
|
337
|
-
raise InferenceTimeoutError(
|
|
338
|
-
f"Model not loaded on the server: {request_parameters.url}. Please retry with a higher timeout (current:"
|
|
339
|
-
f" {self.timeout}).",
|
|
340
|
-
request=error.request,
|
|
341
|
-
response=error.response,
|
|
342
|
-
) from error
|
|
343
|
-
# ...or wait 1s and retry
|
|
344
|
-
logger.info(f"Waiting for model to be loaded on the server: {error}")
|
|
345
|
-
time.sleep(1)
|
|
346
|
-
if "X-wait-for-model" not in request_parameters.headers and request_parameters.url.startswith(
|
|
347
|
-
INFERENCE_ENDPOINT
|
|
348
|
-
):
|
|
349
|
-
request_parameters.headers["X-wait-for-model"] = "1"
|
|
350
|
-
if timeout is not None:
|
|
351
|
-
timeout = max(self.timeout - (time.time() - t0), 1) # type: ignore
|
|
352
|
-
continue
|
|
325
|
+
msg = str(error.args[0])
|
|
326
|
+
if len(error.response.text) > 0:
|
|
327
|
+
msg += f"\n{error.response.text}\n"
|
|
328
|
+
error.args = (msg,) + error.args[1:]
|
|
353
329
|
raise
|
|
354
330
|
|
|
355
331
|
def audio_classification(
|
|
@@ -464,6 +440,7 @@ class InferenceClient:
|
|
|
464
440
|
audio: ContentT,
|
|
465
441
|
*,
|
|
466
442
|
model: Optional[str] = None,
|
|
443
|
+
extra_body: Optional[Dict] = None,
|
|
467
444
|
) -> AutomaticSpeechRecognitionOutput:
|
|
468
445
|
"""
|
|
469
446
|
Perform automatic speech recognition (ASR or audio-to-text) on the given audio content.
|
|
@@ -474,8 +451,9 @@ class InferenceClient:
|
|
|
474
451
|
model (`str`, *optional*):
|
|
475
452
|
The model to use for ASR. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
476
453
|
Inference Endpoint. If not provided, the default recommended model for ASR will be used.
|
|
477
|
-
|
|
478
|
-
Additional parameters to pass to the model.
|
|
454
|
+
extra_body (`Dict`, *optional*):
|
|
455
|
+
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
456
|
+
for supported parameters.
|
|
479
457
|
Returns:
|
|
480
458
|
[`AutomaticSpeechRecognitionOutput`]: An item containing the transcribed text and optionally the timestamp chunks.
|
|
481
459
|
|
|
@@ -496,7 +474,7 @@ class InferenceClient:
|
|
|
496
474
|
provider_helper = get_provider_helper(self.provider, task="automatic-speech-recognition")
|
|
497
475
|
request_parameters = provider_helper.prepare_request(
|
|
498
476
|
inputs=audio,
|
|
499
|
-
parameters={},
|
|
477
|
+
parameters={**(extra_body or {})},
|
|
500
478
|
headers=self.headers,
|
|
501
479
|
model=model or self.model,
|
|
502
480
|
api_key=self.token,
|
|
@@ -527,6 +505,7 @@ class InferenceClient:
|
|
|
527
505
|
tools: Optional[List[ChatCompletionInputTool]] = None,
|
|
528
506
|
top_logprobs: Optional[int] = None,
|
|
529
507
|
top_p: Optional[float] = None,
|
|
508
|
+
extra_body: Optional[Dict] = None,
|
|
530
509
|
) -> ChatCompletionOutput: ...
|
|
531
510
|
|
|
532
511
|
@overload
|
|
@@ -552,6 +531,7 @@ class InferenceClient:
|
|
|
552
531
|
tools: Optional[List[ChatCompletionInputTool]] = None,
|
|
553
532
|
top_logprobs: Optional[int] = None,
|
|
554
533
|
top_p: Optional[float] = None,
|
|
534
|
+
extra_body: Optional[Dict] = None,
|
|
555
535
|
) -> Iterable[ChatCompletionStreamOutput]: ...
|
|
556
536
|
|
|
557
537
|
@overload
|
|
@@ -577,6 +557,7 @@ class InferenceClient:
|
|
|
577
557
|
tools: Optional[List[ChatCompletionInputTool]] = None,
|
|
578
558
|
top_logprobs: Optional[int] = None,
|
|
579
559
|
top_p: Optional[float] = None,
|
|
560
|
+
extra_body: Optional[Dict] = None,
|
|
580
561
|
) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]: ...
|
|
581
562
|
|
|
582
563
|
def chat_completion(
|
|
@@ -602,6 +583,7 @@ class InferenceClient:
|
|
|
602
583
|
tools: Optional[List[ChatCompletionInputTool]] = None,
|
|
603
584
|
top_logprobs: Optional[int] = None,
|
|
604
585
|
top_p: Optional[float] = None,
|
|
586
|
+
extra_body: Optional[Dict] = None,
|
|
605
587
|
) -> Union[ChatCompletionOutput, Iterable[ChatCompletionStreamOutput]]:
|
|
606
588
|
"""
|
|
607
589
|
A method for completing conversations using a specified language model.
|
|
@@ -616,7 +598,7 @@ class InferenceClient:
|
|
|
616
598
|
</Tip>
|
|
617
599
|
|
|
618
600
|
<Tip>
|
|
619
|
-
|
|
601
|
+
You can pass provider-specific parameters to the model by using the `extra_body` argument.
|
|
620
602
|
</Tip>
|
|
621
603
|
|
|
622
604
|
Args:
|
|
@@ -671,7 +653,9 @@ class InferenceClient:
|
|
|
671
653
|
tools (List of [`ChatCompletionInputTool`], *optional*):
|
|
672
654
|
A list of tools the model may call. Currently, only functions are supported as a tool. Use this to
|
|
673
655
|
provide a list of functions the model may generate JSON inputs for.
|
|
674
|
-
|
|
656
|
+
extra_body (`Dict`, *optional*):
|
|
657
|
+
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
658
|
+
for supported parameters.
|
|
675
659
|
Returns:
|
|
676
660
|
[`ChatCompletionOutput`] or Iterable of [`ChatCompletionStreamOutput`]:
|
|
677
661
|
Generated text returned from the server:
|
|
@@ -756,7 +740,7 @@ class InferenceClient:
|
|
|
756
740
|
print(chunk.choices[0].delta.content)
|
|
757
741
|
```
|
|
758
742
|
|
|
759
|
-
Example using a third-party provider directly. Usage will be billed on your Together AI account.
|
|
743
|
+
Example using a third-party provider directly with extra (provider-specific) parameters. Usage will be billed on your Together AI account.
|
|
760
744
|
```py
|
|
761
745
|
>>> from huggingface_hub import InferenceClient
|
|
762
746
|
>>> client = InferenceClient(
|
|
@@ -766,6 +750,7 @@ class InferenceClient:
|
|
|
766
750
|
>>> client.chat_completion(
|
|
767
751
|
... model="meta-llama/Meta-Llama-3-8B-Instruct",
|
|
768
752
|
... messages=[{"role": "user", "content": "What is the capital of France?"}],
|
|
753
|
+
... extra_body={"safety_model": "Meta-Llama/Llama-Guard-7b"},
|
|
769
754
|
... )
|
|
770
755
|
```
|
|
771
756
|
|
|
@@ -934,9 +919,9 @@ class InferenceClient:
|
|
|
934
919
|
provider_helper = get_provider_helper(self.provider, task="conversational")
|
|
935
920
|
|
|
936
921
|
# Since `chat_completion(..., model=xxx)` is also a payload parameter for the server, we need to handle 'model' differently.
|
|
937
|
-
# `self.
|
|
922
|
+
# `self.model` takes precedence over 'model' argument for building URL.
|
|
938
923
|
# `model` takes precedence for payload value.
|
|
939
|
-
model_id_or_url = self.
|
|
924
|
+
model_id_or_url = self.model or model
|
|
940
925
|
payload_model = model or self.model
|
|
941
926
|
|
|
942
927
|
# Prepare the payload
|
|
@@ -959,6 +944,7 @@ class InferenceClient:
|
|
|
959
944
|
"top_p": top_p,
|
|
960
945
|
"stream": stream,
|
|
961
946
|
"stream_options": stream_options,
|
|
947
|
+
**(extra_body or {}),
|
|
962
948
|
}
|
|
963
949
|
request_parameters = provider_helper.prepare_request(
|
|
964
950
|
inputs=messages,
|
|
@@ -1611,19 +1597,10 @@ class InferenceClient:
|
|
|
1611
1597
|
response = self._inner_post(request_parameters)
|
|
1612
1598
|
return _bytes_to_list(response)
|
|
1613
1599
|
|
|
1614
|
-
@_deprecate_arguments(
|
|
1615
|
-
version="0.29",
|
|
1616
|
-
deprecated_args=["parameters"],
|
|
1617
|
-
custom_message=(
|
|
1618
|
-
"The `parameters` argument is deprecated and will be removed in a future version. "
|
|
1619
|
-
"Provide individual parameters instead: `clean_up_tokenization_spaces`, `generate_parameters`, and `truncation`."
|
|
1620
|
-
),
|
|
1621
|
-
)
|
|
1622
1600
|
def summarization(
|
|
1623
1601
|
self,
|
|
1624
1602
|
text: str,
|
|
1625
1603
|
*,
|
|
1626
|
-
parameters: Optional[Dict[str, Any]] = None,
|
|
1627
1604
|
model: Optional[str] = None,
|
|
1628
1605
|
clean_up_tokenization_spaces: Optional[bool] = None,
|
|
1629
1606
|
generate_parameters: Optional[Dict[str, Any]] = None,
|
|
@@ -1635,9 +1612,6 @@ class InferenceClient:
|
|
|
1635
1612
|
Args:
|
|
1636
1613
|
text (`str`):
|
|
1637
1614
|
The input text to summarize.
|
|
1638
|
-
parameters (`Dict[str, Any]`, *optional*):
|
|
1639
|
-
Additional parameters for summarization. Check out this [page](https://huggingface.co/docs/api-inference/detailed_parameters#summarization-task)
|
|
1640
|
-
for more details.
|
|
1641
1615
|
model (`str`, *optional*):
|
|
1642
1616
|
The model to use for inference. Can be a model ID hosted on the Hugging Face Hub or a URL to a deployed
|
|
1643
1617
|
Inference Endpoint. If not provided, the default recommended model for summarization will be used.
|
|
@@ -1664,12 +1638,11 @@ class InferenceClient:
|
|
|
1664
1638
|
SummarizationOutput(generated_text="The Eiffel tower is one of the most famous landmarks in the world....")
|
|
1665
1639
|
```
|
|
1666
1640
|
"""
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
}
|
|
1641
|
+
parameters = {
|
|
1642
|
+
"clean_up_tokenization_spaces": clean_up_tokenization_spaces,
|
|
1643
|
+
"generate_parameters": generate_parameters,
|
|
1644
|
+
"truncation": truncation,
|
|
1645
|
+
}
|
|
1673
1646
|
provider_helper = get_provider_helper(self.provider, task="summarization")
|
|
1674
1647
|
request_parameters = provider_helper.prepare_request(
|
|
1675
1648
|
inputs=text,
|
|
@@ -2399,15 +2372,14 @@ class InferenceClient:
|
|
|
2399
2372
|
prompt: str,
|
|
2400
2373
|
*,
|
|
2401
2374
|
negative_prompt: Optional[str] = None,
|
|
2402
|
-
height: Optional[
|
|
2403
|
-
width: Optional[
|
|
2375
|
+
height: Optional[int] = None,
|
|
2376
|
+
width: Optional[int] = None,
|
|
2404
2377
|
num_inference_steps: Optional[int] = None,
|
|
2405
2378
|
guidance_scale: Optional[float] = None,
|
|
2406
2379
|
model: Optional[str] = None,
|
|
2407
2380
|
scheduler: Optional[str] = None,
|
|
2408
|
-
target_size: Optional[TextToImageTargetSize] = None,
|
|
2409
2381
|
seed: Optional[int] = None,
|
|
2410
|
-
|
|
2382
|
+
extra_body: Optional[Dict[str, Any]] = None,
|
|
2411
2383
|
) -> "Image":
|
|
2412
2384
|
"""
|
|
2413
2385
|
Generate an image based on a given text using a specified model.
|
|
@@ -2418,15 +2390,19 @@ class InferenceClient:
|
|
|
2418
2390
|
|
|
2419
2391
|
</Tip>
|
|
2420
2392
|
|
|
2393
|
+
<Tip>
|
|
2394
|
+
You can pass provider-specific parameters to the model by using the `extra_body` argument.
|
|
2395
|
+
</Tip>
|
|
2396
|
+
|
|
2421
2397
|
Args:
|
|
2422
2398
|
prompt (`str`):
|
|
2423
2399
|
The prompt to generate an image from.
|
|
2424
2400
|
negative_prompt (`str`, *optional*):
|
|
2425
2401
|
One prompt to guide what NOT to include in image generation.
|
|
2426
|
-
height (`
|
|
2427
|
-
The height in pixels of the image
|
|
2428
|
-
width (`
|
|
2429
|
-
The width in pixels of the image
|
|
2402
|
+
height (`int`, *optional*):
|
|
2403
|
+
The height in pixels of the output image
|
|
2404
|
+
width (`int`, *optional*):
|
|
2405
|
+
The width in pixels of the output image
|
|
2430
2406
|
num_inference_steps (`int`, *optional*):
|
|
2431
2407
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
|
2432
2408
|
expense of slower inference.
|
|
@@ -2439,10 +2415,11 @@ class InferenceClient:
|
|
|
2439
2415
|
Defaults to None.
|
|
2440
2416
|
scheduler (`str`, *optional*):
|
|
2441
2417
|
Override the scheduler with a compatible one.
|
|
2442
|
-
target_size (`TextToImageTargetSize`, *optional*):
|
|
2443
|
-
The size in pixel of the output image
|
|
2444
2418
|
seed (`int`, *optional*):
|
|
2445
2419
|
Seed for the random number generator.
|
|
2420
|
+
extra_body (`Dict[str, Any]`, *optional*):
|
|
2421
|
+
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2422
|
+
for supported parameters.
|
|
2446
2423
|
|
|
2447
2424
|
Returns:
|
|
2448
2425
|
`Image`: The generated image.
|
|
@@ -2495,6 +2472,21 @@ class InferenceClient:
|
|
|
2495
2472
|
... )
|
|
2496
2473
|
>>> image.save("astronaut.png")
|
|
2497
2474
|
```
|
|
2475
|
+
|
|
2476
|
+
Example using Replicate provider with extra parameters
|
|
2477
|
+
```py
|
|
2478
|
+
>>> from huggingface_hub import InferenceClient
|
|
2479
|
+
>>> client = InferenceClient(
|
|
2480
|
+
... provider="replicate", # Use replicate provider
|
|
2481
|
+
... api_key="hf_...", # Pass your HF token
|
|
2482
|
+
... )
|
|
2483
|
+
>>> image = client.text_to_image(
|
|
2484
|
+
... "An astronaut riding a horse on the moon.",
|
|
2485
|
+
... model="black-forest-labs/FLUX.1-schnell",
|
|
2486
|
+
... extra_body={"output_quality": 100},
|
|
2487
|
+
... )
|
|
2488
|
+
>>> image.save("astronaut.png")
|
|
2489
|
+
```
|
|
2498
2490
|
"""
|
|
2499
2491
|
provider_helper = get_provider_helper(self.provider, task="text-to-image")
|
|
2500
2492
|
request_parameters = provider_helper.prepare_request(
|
|
@@ -2506,9 +2498,8 @@ class InferenceClient:
|
|
|
2506
2498
|
"num_inference_steps": num_inference_steps,
|
|
2507
2499
|
"guidance_scale": guidance_scale,
|
|
2508
2500
|
"scheduler": scheduler,
|
|
2509
|
-
"target_size": target_size,
|
|
2510
2501
|
"seed": seed,
|
|
2511
|
-
**
|
|
2502
|
+
**(extra_body or {}),
|
|
2512
2503
|
},
|
|
2513
2504
|
headers=self.headers,
|
|
2514
2505
|
model=model or self.model,
|
|
@@ -2528,10 +2519,15 @@ class InferenceClient:
|
|
|
2528
2519
|
num_frames: Optional[float] = None,
|
|
2529
2520
|
num_inference_steps: Optional[int] = None,
|
|
2530
2521
|
seed: Optional[int] = None,
|
|
2522
|
+
extra_body: Optional[Dict[str, Any]] = None,
|
|
2531
2523
|
) -> bytes:
|
|
2532
2524
|
"""
|
|
2533
2525
|
Generate a video based on a given text.
|
|
2534
2526
|
|
|
2527
|
+
<Tip>
|
|
2528
|
+
You can pass provider-specific parameters to the model by using the `extra_body` argument.
|
|
2529
|
+
</Tip>
|
|
2530
|
+
|
|
2535
2531
|
Args:
|
|
2536
2532
|
prompt (`str`):
|
|
2537
2533
|
The prompt to generate a video from.
|
|
@@ -2551,6 +2547,9 @@ class InferenceClient:
|
|
|
2551
2547
|
expense of slower inference.
|
|
2552
2548
|
seed (`int`, *optional*):
|
|
2553
2549
|
Seed for the random number generator.
|
|
2550
|
+
extra_body (`Dict[str, Any]`, *optional*):
|
|
2551
|
+
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2552
|
+
for supported parameters.
|
|
2554
2553
|
|
|
2555
2554
|
Returns:
|
|
2556
2555
|
`bytes`: The generated video.
|
|
@@ -2596,6 +2595,7 @@ class InferenceClient:
|
|
|
2596
2595
|
"num_frames": num_frames,
|
|
2597
2596
|
"num_inference_steps": num_inference_steps,
|
|
2598
2597
|
"seed": seed,
|
|
2598
|
+
**(extra_body or {}),
|
|
2599
2599
|
},
|
|
2600
2600
|
headers=self.headers,
|
|
2601
2601
|
model=model or self.model,
|
|
@@ -2626,10 +2626,15 @@ class InferenceClient:
|
|
|
2626
2626
|
top_p: Optional[float] = None,
|
|
2627
2627
|
typical_p: Optional[float] = None,
|
|
2628
2628
|
use_cache: Optional[bool] = None,
|
|
2629
|
+
extra_body: Optional[Dict[str, Any]] = None,
|
|
2629
2630
|
) -> bytes:
|
|
2630
2631
|
"""
|
|
2631
2632
|
Synthesize an audio of a voice pronouncing a given text.
|
|
2632
2633
|
|
|
2634
|
+
<Tip>
|
|
2635
|
+
You can pass provider-specific parameters to the model by using the `extra_body` argument.
|
|
2636
|
+
</Tip>
|
|
2637
|
+
|
|
2633
2638
|
Args:
|
|
2634
2639
|
text (`str`):
|
|
2635
2640
|
The text to synthesize.
|
|
@@ -2683,7 +2688,9 @@ class InferenceClient:
|
|
|
2683
2688
|
paper](https://hf.co/papers/2202.00666) for more details.
|
|
2684
2689
|
use_cache (`bool`, *optional*):
|
|
2685
2690
|
Whether the model should use the past last key/values attentions to speed up decoding
|
|
2686
|
-
|
|
2691
|
+
extra_body (`Dict[str, Any]`, *optional*):
|
|
2692
|
+
Additional provider-specific parameters to pass to the model. Refer to the provider's documentation
|
|
2693
|
+
for supported parameters.
|
|
2687
2694
|
Returns:
|
|
2688
2695
|
`bytes`: The generated audio.
|
|
2689
2696
|
|
|
@@ -2730,6 +2737,51 @@ class InferenceClient:
|
|
|
2730
2737
|
... )
|
|
2731
2738
|
>>> Path("hello_world.flac").write_bytes(audio)
|
|
2732
2739
|
```
|
|
2740
|
+
Example using Replicate provider with extra parameters
|
|
2741
|
+
```py
|
|
2742
|
+
>>> from huggingface_hub import InferenceClient
|
|
2743
|
+
>>> client = InferenceClient(
|
|
2744
|
+
... provider="replicate", # Use replicate provider
|
|
2745
|
+
... api_key="hf_...", # Pass your HF token
|
|
2746
|
+
... )
|
|
2747
|
+
>>> audio = client.text_to_speech(
|
|
2748
|
+
... "Hello, my name is Kororo, an awesome text-to-speech model.",
|
|
2749
|
+
... model="hexgrad/Kokoro-82M",
|
|
2750
|
+
... extra_body={"voice": "af_nicole"},
|
|
2751
|
+
... )
|
|
2752
|
+
>>> Path("hello.flac").write_bytes(audio)
|
|
2753
|
+
```
|
|
2754
|
+
|
|
2755
|
+
Example music-gen using "YuE-s1-7B-anneal-en-cot" on fal.ai
|
|
2756
|
+
```py
|
|
2757
|
+
>>> from huggingface_hub import InferenceClient
|
|
2758
|
+
>>> lyrics = '''
|
|
2759
|
+
... [verse]
|
|
2760
|
+
... In the town where I was born
|
|
2761
|
+
... Lived a man who sailed to sea
|
|
2762
|
+
... And he told us of his life
|
|
2763
|
+
... In the land of submarines
|
|
2764
|
+
... So we sailed on to the sun
|
|
2765
|
+
... 'Til we found a sea of green
|
|
2766
|
+
... And we lived beneath the waves
|
|
2767
|
+
... In our yellow submarine
|
|
2768
|
+
|
|
2769
|
+
... [chorus]
|
|
2770
|
+
... We all live in a yellow submarine
|
|
2771
|
+
... Yellow submarine, yellow submarine
|
|
2772
|
+
... We all live in a yellow submarine
|
|
2773
|
+
... Yellow submarine, yellow submarine
|
|
2774
|
+
... '''
|
|
2775
|
+
>>> genres = "pavarotti-style tenor voice"
|
|
2776
|
+
>>> client = InferenceClient(
|
|
2777
|
+
... provider="fal-ai",
|
|
2778
|
+
... model="m-a-p/YuE-s1-7B-anneal-en-cot",
|
|
2779
|
+
... api_key=...,
|
|
2780
|
+
... )
|
|
2781
|
+
>>> audio = client.text_to_speech(lyrics, extra_body={"genres": genres})
|
|
2782
|
+
>>> with open("output.mp3", "wb") as f:
|
|
2783
|
+
... f.write(audio)
|
|
2784
|
+
```
|
|
2733
2785
|
"""
|
|
2734
2786
|
provider_helper = get_provider_helper(self.provider, task="text-to-speech")
|
|
2735
2787
|
request_parameters = provider_helper.prepare_request(
|
|
@@ -2751,6 +2803,7 @@ class InferenceClient:
|
|
|
2751
2803
|
"top_p": top_p,
|
|
2752
2804
|
"typical_p": typical_p,
|
|
2753
2805
|
"use_cache": use_cache,
|
|
2806
|
+
**(extra_body or {}),
|
|
2754
2807
|
},
|
|
2755
2808
|
headers=self.headers,
|
|
2756
2809
|
model=model or self.model,
|
|
@@ -3184,11 +3237,18 @@ class InferenceClient:
|
|
|
3184
3237
|
response = self._inner_post(request_parameters)
|
|
3185
3238
|
return ZeroShotImageClassificationOutputElement.parse_obj_as_list(response)
|
|
3186
3239
|
|
|
3240
|
+
@_deprecate_method(
|
|
3241
|
+
version="0.33.0",
|
|
3242
|
+
message=(
|
|
3243
|
+
"HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
|
|
3244
|
+
" Use `HfApi.list_models(..., inference_provider='...')` to list warm models per provider."
|
|
3245
|
+
),
|
|
3246
|
+
)
|
|
3187
3247
|
def list_deployed_models(
|
|
3188
3248
|
self, frameworks: Union[None, str, Literal["all"], List[str]] = None
|
|
3189
3249
|
) -> Dict[str, List[str]]:
|
|
3190
3250
|
"""
|
|
3191
|
-
List models deployed on the Serverless Inference API service.
|
|
3251
|
+
List models deployed on the HF Serverless Inference API service.
|
|
3192
3252
|
|
|
3193
3253
|
This helper checks deployed models framework by framework. By default, it will check the 4 main frameworks that
|
|
3194
3254
|
are supported and account for 95% of the hosted models. However, if you want a complete list of models you can
|
|
@@ -3198,7 +3258,7 @@ class InferenceClient:
|
|
|
3198
3258
|
|
|
3199
3259
|
<Tip warning={true}>
|
|
3200
3260
|
|
|
3201
|
-
This endpoint method does not return a live list of all models available for the
|
|
3261
|
+
This endpoint method does not return a live list of all models available for the HF Inference API service.
|
|
3202
3262
|
It searches over a cached list of models that were recently available and the list may not be up to date.
|
|
3203
3263
|
If you want to know the live status of a specific model, use [`~InferenceClient.get_model_status`].
|
|
3204
3264
|
|
|
@@ -3240,9 +3300,9 @@ class InferenceClient:
|
|
|
3240
3300
|
|
|
3241
3301
|
# Resolve which frameworks to check
|
|
3242
3302
|
if frameworks is None:
|
|
3243
|
-
frameworks = MAIN_INFERENCE_API_FRAMEWORKS
|
|
3303
|
+
frameworks = constants.MAIN_INFERENCE_API_FRAMEWORKS
|
|
3244
3304
|
elif frameworks == "all":
|
|
3245
|
-
frameworks = ALL_INFERENCE_API_FRAMEWORKS
|
|
3305
|
+
frameworks = constants.ALL_INFERENCE_API_FRAMEWORKS
|
|
3246
3306
|
elif isinstance(frameworks, str):
|
|
3247
3307
|
frameworks = [frameworks]
|
|
3248
3308
|
frameworks = list(set(frameworks))
|
|
@@ -3262,7 +3322,7 @@ class InferenceClient:
|
|
|
3262
3322
|
|
|
3263
3323
|
for framework in frameworks:
|
|
3264
3324
|
response = get_session().get(
|
|
3265
|
-
f"{INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
|
|
3325
|
+
f"{constants.INFERENCE_ENDPOINT}/framework/{framework}", headers=build_hf_headers(token=self.token)
|
|
3266
3326
|
)
|
|
3267
3327
|
hf_raise_for_status(response)
|
|
3268
3328
|
_unpack_response(framework, response.json())
|
|
@@ -3324,7 +3384,7 @@ class InferenceClient:
|
|
|
3324
3384
|
if model.startswith(("http://", "https://")):
|
|
3325
3385
|
url = model.rstrip("/") + "/info"
|
|
3326
3386
|
else:
|
|
3327
|
-
url = f"{INFERENCE_ENDPOINT}/models/{model}/info"
|
|
3387
|
+
url = f"{constants.INFERENCE_ENDPOINT}/models/{model}/info"
|
|
3328
3388
|
|
|
3329
3389
|
response = get_session().get(url, headers=build_hf_headers(token=self.token))
|
|
3330
3390
|
hf_raise_for_status(response)
|
|
@@ -3367,9 +3427,16 @@ class InferenceClient:
|
|
|
3367
3427
|
response = get_session().get(url, headers=build_hf_headers(token=self.token))
|
|
3368
3428
|
return response.status_code == 200
|
|
3369
3429
|
|
|
3430
|
+
@_deprecate_method(
|
|
3431
|
+
version="0.33.0",
|
|
3432
|
+
message=(
|
|
3433
|
+
"HF Inference API is getting revamped and will only support warm models in the future (no cold start allowed)."
|
|
3434
|
+
" Use `HfApi.model_info` to get the model status both with HF Inference API and external providers."
|
|
3435
|
+
),
|
|
3436
|
+
)
|
|
3370
3437
|
def get_model_status(self, model: Optional[str] = None) -> ModelStatus:
|
|
3371
3438
|
"""
|
|
3372
|
-
Get the status of a model hosted on the Inference API.
|
|
3439
|
+
Get the status of a model hosted on the HF Inference API.
|
|
3373
3440
|
|
|
3374
3441
|
<Tip>
|
|
3375
3442
|
|
|
@@ -3381,7 +3448,7 @@ class InferenceClient:
|
|
|
3381
3448
|
Args:
|
|
3382
3449
|
model (`str`, *optional*):
|
|
3383
3450
|
Identifier of the model for witch the status gonna be checked. If model is not provided,
|
|
3384
|
-
the model associated with this instance of [`InferenceClient`] will be used. Only
|
|
3451
|
+
the model associated with this instance of [`InferenceClient`] will be used. Only HF Inference API service can be checked so the
|
|
3385
3452
|
identifier cannot be a URL.
|
|
3386
3453
|
|
|
3387
3454
|
|
|
@@ -3405,7 +3472,7 @@ class InferenceClient:
|
|
|
3405
3472
|
raise ValueError("Model id not provided.")
|
|
3406
3473
|
if model.startswith("https://"):
|
|
3407
3474
|
raise NotImplementedError("Model status is only available for Inference API endpoints.")
|
|
3408
|
-
url = f"{INFERENCE_ENDPOINT}/status/{model}"
|
|
3475
|
+
url = f"{constants.INFERENCE_ENDPOINT}/status/{model}"
|
|
3409
3476
|
|
|
3410
3477
|
response = get_session().get(url, headers=build_hf_headers(token=self.token))
|
|
3411
3478
|
hf_raise_for_status(response)
|
|
@@ -18,7 +18,6 @@ import base64
|
|
|
18
18
|
import io
|
|
19
19
|
import json
|
|
20
20
|
import logging
|
|
21
|
-
from abc import ABC, abstractmethod
|
|
22
21
|
from contextlib import contextmanager
|
|
23
22
|
from dataclasses import dataclass
|
|
24
23
|
from pathlib import Path
|
|
@@ -50,12 +49,7 @@ from huggingface_hub.errors import (
|
|
|
50
49
|
ValidationError,
|
|
51
50
|
)
|
|
52
51
|
|
|
53
|
-
from ..utils import
|
|
54
|
-
get_session,
|
|
55
|
-
is_aiohttp_available,
|
|
56
|
-
is_numpy_available,
|
|
57
|
-
is_pillow_available,
|
|
58
|
-
)
|
|
52
|
+
from ..utils import get_session, is_aiohttp_available, is_numpy_available, is_pillow_available
|
|
59
53
|
from ._generated.types import ChatCompletionStreamOutput, TextGenerationStreamOutput
|
|
60
54
|
|
|
61
55
|
|
|
@@ -85,33 +79,15 @@ class RequestParameters:
|
|
|
85
79
|
headers: Dict[str, Any]
|
|
86
80
|
|
|
87
81
|
|
|
88
|
-
class TaskProviderHelper(ABC):
|
|
89
|
-
"""Protocol defining the interface for task-specific provider helpers."""
|
|
90
|
-
|
|
91
|
-
@abstractmethod
|
|
92
|
-
def prepare_request(
|
|
93
|
-
self,
|
|
94
|
-
*,
|
|
95
|
-
inputs: Any,
|
|
96
|
-
parameters: Dict[str, Any],
|
|
97
|
-
headers: Dict,
|
|
98
|
-
model: Optional[str],
|
|
99
|
-
api_key: Optional[str],
|
|
100
|
-
extra_payload: Optional[Dict[str, Any]] = None,
|
|
101
|
-
) -> RequestParameters: ...
|
|
102
|
-
@abstractmethod
|
|
103
|
-
def get_response(self, response: Union[bytes, Dict]) -> Any: ...
|
|
104
|
-
|
|
105
|
-
|
|
106
82
|
# Add dataclass for ModelStatus. We use this dataclass in get_model_status function.
|
|
107
83
|
@dataclass
|
|
108
84
|
class ModelStatus:
|
|
109
85
|
"""
|
|
110
|
-
This Dataclass represents the model status in the
|
|
86
|
+
This Dataclass represents the model status in the HF Inference API.
|
|
111
87
|
|
|
112
88
|
Args:
|
|
113
89
|
loaded (`bool`):
|
|
114
|
-
If the model is currently loaded into
|
|
90
|
+
If the model is currently loaded into HF's Inference API. Models
|
|
115
91
|
are loaded on-demand, leading to the user's first request taking longer.
|
|
116
92
|
If a model is loaded, you can be assured that it is in a healthy state.
|
|
117
93
|
state (`str`):
|